手把手教学YOLOV5在RK3568的部署应用及代码实现

引言

RK3568支持NPU，提供0.8Tops的算力，可以用于部署深度学习项目。本篇文章介绍Yolov5代码开发、模型转化、部署。

RKNN-Toolkit2环境安装

RKNN-Toolkit2是用来把pytorch、tf等训练模型导出为rknn模型，供后续NPU加速使用。

1.RKNN-Toolkit2下载

下载地址：

https://github.com/rockchip-linux/rknn-toolkit2

2.安装

建议使用conda虚拟环境，找到对应的packages进行安装RKNN-Toolkit2，具体参考doc/Rockchip_Quick_Start_RKNN_Toolkit2_CN-1.4.0.pdf

packages地址：https://github.com/rockchip-linux/rknn-toolkit2/tree/master/packages

doc地址：https://github.com/rockchip-linux/rknn-toolkit2/tree/master/doc

安装部署

安装所需python版本的conda虚拟环境

安装相关依赖

sudo apt-get install libxslt1-dev zlib1g zlib1g-dev libglib2.0-0 libsm6 \ libgl1-mesa-glx libprotobuf-dev gcc

3.获取 RKNN-Toolkit2 安装包，然后执行以下步骤：

安装 Python 依赖：

pip3 install -r doc/requirements*.txt

进入 package 目录：

cd package/

安装 RKNN-Toolkit2

sudo pip3 install rknn_toolkit2*.whl

检查 RKNN-Toolkit2 是否安装成功

 python3
 from rknn.api import RKNN

如果导入 RKNN 模块没有失败，说明安装成功。

YOLOV5模型转化为RKNN模型

YOLOV5代码下载：

git clone https://github.com/ultralytics/yolov5.git

2.修改models/yolo.py–line56行

    def forward(self, x):
        z = []  # inference output
        for i in range(self.nl):
            x[i] = self.m[i](x[i])  # conv
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            if self.export:
               return x
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
                if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)

                if isinstance(self, Segment):  # (boxes + masks)
                    xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
                    xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i]  # xy
                    wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i]  # wh
                    y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
                else:  # Detect (boxes only)
                    xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
                    xy = (xy * 2 + self.grid[i]) * self.stride[i]  # xy
                    wh = (wh * 2) ** 2 * self.anchor_grid[i]  # wh
                    y = torch.cat((xy, wh, conf), 4)
                z.append(y.view(bs, self.na * nx * ny, self.no))

        return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)

3.onnx模型导出

python export.py --weights xxx.pt --include onnx --simplify --opset 12

4.onnx2rknn

import os
import urllib
import traceback
import time
import sys
import numpy as np
import cv2
from rknn.api import RKNN

ONNX_MODEL = 'yolov5s.onnx'
RKNN_MODEL = 'yolov5s.rknn'
IMG_PATH = './bus.jpg'
DATASET = './dataset.txt'

QUANTIZE_ON = True

OBJ_THRESH = 0.25
NMS_THRESH = 0.45
IMG_SIZE = 640

CLASSES = ("person", "bicycle", "car", "motorbike ", "aeroplane ", "bus ", "train", "truck ", "boat", "traffic light",
           "fire hydrant", "stop sign ", "parking meter", "bench", "bird", "cat", "dog ", "horse ", "sheep", "cow", "elephant",
           "bear", "zebra ", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
           "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife ",
           "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza ", "donut", "cake", "chair", "sofa",
           "pottedplant", "bed", "diningtable", "toilet ", "tvmonitor", "laptop	", "mouse	", "remote ", "keyboard ", "cell phone", "microwave ",
           "oven ", "toaster", "sink", "refrigerator ", "book", "clock", "vase", "scissors ", "teddy bear ", "hair drier", "toothbrush ")


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def xywh2xyxy(x):
    # Convert [x, y, w, h] to [x1, y1, x2, y2]
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y


def process(input, mask, anchors):

    anchors = [anchors[i] for i in mask]
    grid_h, grid_w = map(int, input.shape[0:2])

    box_confidence = sigmoid(input[..., 4])
    box_confidence = np.expand_dims(box_confidence, axis=-1)

    box_class_probs = sigmoid(input[..., 5:])

    box_xy = sigmoid(input[..., :2])*2 - 0.5

    col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
    row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
    col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    grid = np.concatenate((col, row), axis=-1)
    box_xy += grid
    box_xy *= int(IMG_SIZE/grid_h)

    box_wh = pow(sigmoid(input[..., 2:4])*2, 2)
    box_wh = box_wh * anchors

    box = np.concatenate((box_xy, box_wh), axis=-1)

    return box, box_confidence, box_class_probs


def filter_boxes(boxes, box_confidences, box_class_probs):
    """Filter boxes with box threshold. It's a bit different with origin yolov5 post process!
    # Arguments
        boxes: ndarray, boxes of objects.
        box_confidences: ndarray, confidences of objects.
        box_class_probs: ndarray, class_probs of objects.
    # Returns
        boxes: ndarray, filtered boxes.
        classes: ndarray, classes for boxes.
        scores: ndarray, scores for boxes.
    """
    boxes = boxes.reshape(-1, 4)
    box_confidences = box_confidences.reshape(-1)
    box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1])

    _box_pos = np.where(box_confidences >= OBJ_THRESH)
    boxes = boxes[_box_pos]
    box_confidences = box_confidences[_box_pos]
    box_class_probs = box_class_probs[_box_pos]

    class_max_score = np.max(box_class_probs, axis=-1)
    classes = np.argmax(box_class_probs, axis=-1)
    _class_pos = np.where(class_max_score >= OBJ_THRESH)

    boxes = boxes[_class_pos]
    classes = classes[_class_pos]
    scores = (class_max_score* box_confidences)[_class_pos]

    return boxes, classes, scores


def nms_boxes(boxes, scores):
    """Suppress non-maximal boxes.
    # Arguments
        boxes: ndarray, boxes of objects.
        scores: ndarray, scores of objects.
    # Returns
        keep: ndarray, index of effective boxes.
    """
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2] - boxes[:, 0]
    h = boxes[:, 3] - boxes[:, 1]

    areas = w * h
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x[i], x[order[1:]])
        yy1 = np.maximum(y[i], y[order[1:]])
        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])

        w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
        h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
        inter = w1 * h1

        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= NMS_THRESH)[0]
        order = order[inds + 1]
    keep = np.array(keep)
    return keep


def yolov5_post_process(input_data):
    masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
    anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
               [59, 119], [116, 90], [156, 198], [373, 326]]

    boxes, classes, scores = [], [], []
    for input, mask in zip(input_data, masks):
        b, c, s = process(input, mask, anchors)
        b, c, s = filter_boxes(b, c, s)
        boxes.append(b)
        classes.append(c)
        scores.append(s)

    boxes = np.concatenate(boxes)
    boxes = xywh2xyxy(boxes)
    classes = np.concatenate(classes)
    scores = np.concatenate(scores)

    nboxes, nclasses, nscores = [], [], []
    for c in set(classes):
        inds = np.where(classes == c)
        b = boxes[inds]
        c = classes[inds]
        s = scores[inds]

        keep = nms_boxes(b, s)

        nboxes.append(b[keep])
        nclasses.append(c[keep])
        nscores.append(s[keep])

    if not nclasses and not nscores:
        return None, None, None

    boxes = np.concatenate(nboxes)
    classes = np.concatenate(nclasses)
    scores = np.concatenate(nscores)

    return boxes, classes, scores


def draw(image, boxes, scores, classes):
    """Draw the boxes on the image.
    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        top, left, right, bottom = box
        print('class: {}, score: {}'.format(CLASSES[cl], score))
        print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))
        top = int(top)
        left = int(left)
        right = int(right)
        bottom = int(bottom)

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 2)


def letterbox(im, new_shape=(640, 640), color=(0, 0, 0)):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, ratio, (dw, dh)


if __name__ == '__main__':

    # Create RKNN object
    rknn = RKNN(verbose=True)

    # pre-process config
    print('--> Config model')
    rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]])
    print('done')

    # Load ONNX model
    print('--> Loading model')
    ret = rknn.load_onnx(model=ONNX_MODEL)
    if ret != 0:
        print('Load model failed!')
        exit(ret)
    print('done')

    # Build model
    print('--> Building model')
    ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET)
    if ret != 0:
        print('Build model failed!')
        exit(ret)
    print('done')

    # Export RKNN model
    print('--> Export rknn model')
    ret = rknn.export_rknn(RKNN_MODEL)
    if ret != 0:
        print('Export rknn model failed!')
        exit(ret)
    print('done')

    # Init runtime environment
    print('--> Init runtime environment')
    ret = rknn.init_runtime()
    # ret = rknn.init_runtime('rk3566')
    if ret != 0:
        print('Init runtime environment failed!')
        exit(ret)
    print('done')

    # Set inputs
    img = cv2.imread(IMG_PATH)
    # img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

    # Inference
    print('--> Running model')
    outputs = rknn.inference(inputs=[img])
    np.save('./onnx_yolov5_0.npy', outputs[0])
    np.save('./onnx_yolov5_1.npy', outputs[1])
    np.save('./onnx_yolov5_2.npy', outputs[2])
    print('done')

    # post process
    input0_data = outputs[0]
    input1_data = outputs[1]
    input2_data = outputs[2]

    input0_data = input0_data.reshape([3, -1]+list(input0_data.shape[-2:]))
    input1_data = input1_data.reshape([3, -1]+list(input1_data.shape[-2:]))
    input2_data = input2_data.reshape([3, -1]+list(input2_data.shape[-2:]))

    input_data = list()
    input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))

    boxes, classes, scores = yolov5_post_process(input_data)

    img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    if boxes is not None:
        draw(img_1, boxes, scores, classes)
    # show output
    # cv2.imshow("post process result", img_1)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

    rknn.release()

至此YOLOV5在RK3568部署完成，但推理部分为python端

C++代码Inference

#include "zql-app-yolov5.hpp"
#include <atomic>
#include <mutex>
#include <queue>
#include <condition_variable>
#include "zql-producer.hpp"
#include "zql-infer-rknn.hpp"
#include "zql-logger.hpp"

namespace yolov5
{
    using namespace std;
    using namespace cv;
    using namespace zql;
    const int anchor0[6] = {10, 13, 16, 30, 33, 23};
    const int anchor1[6] = {30, 61, 62, 45, 59, 119};
    const int anchor2[6] = {116, 90, 156, 198, 373, 326};
    // const int anchor0[6] = {12, 16, 19, 36, 40, 28};
    // const int anchor1[6] = {36, 75, 76, 55, 72, 146};
    // const int anchor2[6] = {142, 110, 192, 243, 459, 401};
    //       - [12,16, 19,36, 40,28]  # P3/8
    //   - [36,75, 76,55, 72,146]  # P4/16
    //   - [142,110, 192,243, 459,401]  # P5/32
    static float iou(const zql::Box &a, const zql::Box &b)
    {
        float cleft = max(a.left, b.left);
        float ctop = max(a.top, b.top);
        float cright = min(a.right, b.right);
        float cbottom = min(a.bottom, b.bottom);

        float c_area = max(cright - cleft, 0.0f) * max(cbottom - ctop, 0.0f);
        if (c_area == 0.0f)
            return 0.0f;

        float a_area = max(0.0f, a.right - a.left) * max(0.0f, a.bottom - a.top);
        float b_area = max(0.0f, b.right - b.left) * max(0.0f, b.bottom - b.top);
        return c_area / (a_area + b_area - c_area);
    }
    static void cpu_nms(zql::BoxArray &boxes, zql::BoxArray &output, float threshold)
    {

        std::sort(boxes.begin(), boxes.end(), [](zql::BoxArray::const_reference a, zql::BoxArray::const_reference b)
                  { return a.confidence > b.confidence; });

        output.clear();
        vector<bool> remove_flags(boxes.size());
        for (int i = 0; i < boxes.size(); ++i)
        {

            if (remove_flags[i])
                continue;

            auto &a = boxes[i];
            output.emplace_back(a);

            for (int j = i + 1; j < boxes.size(); ++j)
            {
                if (remove_flags[j])
                    continue;

                auto &b = boxes[j];
                if (b.class_label == a.class_label)
                {
                    if (iou(a, b) >= threshold)
                        remove_flags[j] = true;
                }
            }
        }
    }
    static float sigmoid(float x)
    {
        return 1.0 / (1.0 + expf(-x));
    }

    static float unsigmoid(float y)
    {
        return -1.0 * logf((1.0 / y) - 1.0);
    }

    inline static int32_t __clip(float val, float min, float max)
    {
        float f = val <= min ? min : (val >= max ? max : val);
        return f;
    }

    static int8_t qnt_f32_to_affine(float f32, int32_t zp, float scale)
    {
        float dst_val = (f32 / scale) + zp;
        int8_t res = (int8_t)__clip(dst_val, -128, 127);
        return res;
    }

    static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; }

    static int process(int8_t *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride,
                       std::vector<float> &boxes, std::vector<float> &objProbs, std::vector<int> &classId,
                       float threshold, int32_t zp, float scale, int num_class)
    {

        int validCount = 0;
        int grid_len = grid_h * grid_w;
        float thres = unsigmoid(threshold);
        int8_t thres_u8 = qnt_f32_to_affine(thres, zp, scale);

        for (int a = 0; a < 3; a++)
        {
            for (int i = 0; i < grid_h; i++)
            {
                for (int j = 0; j < grid_w; j++)
                {
                    int8_t box_confidence = input[(num_class * a + 4) * grid_len + i * grid_w + j];
                    if (box_confidence >= thres_u8)
                    {
                        int offset = (num_class * a) * grid_len + i * grid_w + j;
                        int8_t *in_ptr = input + offset;
                        int8_t maxClassProbs = in_ptr[5 * grid_len];
                        int maxClassId = 0;
                        for (int k = 1; k < num_class - 5; ++k)
                        {
                            int8_t prob = in_ptr[(5 + k) * grid_len];
                            if (prob > maxClassProbs)
                            {
                                maxClassId = k;
                                maxClassProbs = prob;
                            }
                        }
                        float final_score = sigmoid(deqnt_affine_to_f32(maxClassProbs, zp, scale)) * sigmoid(deqnt_affine_to_f32(box_confidence, zp, scale));

                        if (final_score > threshold)
                        {
                            float box_x = sigmoid(deqnt_affine_to_f32(*in_ptr, zp, scale)) * 2.0 - 0.5;
                            float box_y = sigmoid(deqnt_affine_to_f32(in_ptr[grid_len], zp, scale)) * 2.0 - 0.5;
                            float box_w = sigmoid(deqnt_affine_to_f32(in_ptr[2 * grid_len], zp, scale)) * 2.0;
                            float box_h = sigmoid(deqnt_affine_to_f32(in_ptr[3 * grid_len], zp, scale)) * 2.0;
                            box_x = (box_x + j) * (float)stride;
                            box_y = (box_y + i) * (float)stride;
                            box_w = box_w * box_w * (float)anchor[a * 2];
                            box_h = box_h * box_h * (float)anchor[a * 2 + 1];
                            box_x -= (box_w / 2.0);
                            box_y -= (box_h / 2.0);
                            boxes.emplace_back(box_x);
                            boxes.emplace_back(box_y);
                            boxes.emplace_back(box_w);
                            boxes.emplace_back(box_h);

                            objProbs.emplace_back(final_score);
                            classId.emplace_back(maxClassId);
                            validCount++;
                        }
                    }
                }
            }
        }
        return validCount;
    }

    inline static int clamp(float val, int min, int max)
    {
        return val > min ? (val < max ? val : max) : min;
    }

    using ControllerImpl = zql::Producer<
        Mat,                            // input
        zql::BoxArray,                  // output
        std::tuple<std::string, string> // start param
        >;

    class InferImpl : public Infer, public ControllerImpl
    {
    public:
        /** 要求在InferImpl里面执行stop，而不是在基类执行stop **/
        virtual ~InferImpl()
        {
            stop();
        }

        virtual bool startup(
            const std::string &engine_file, const std::string &engine_label,
            float confidence_threshold = 0.25f, float nms_threshold = 0.5f)
        {
            confidence_threshold_ = confidence_threshold;
            nms_threshold_ = nms_threshold;
            std::tuple<string, string> param_ = make_tuple(engine_file, engine_label);
            return ControllerImpl::startup(param_);
        }

        virtual void worker(promise<bool> &result) override
        {

            string model_file = std::get<0>(start_param_);
            string lable_file = std::get<1>(start_param_);
            bool use_float = false;
            auto engine = rknn::load_infer(model_file, lable_file, use_float);
            if (engine == nullptr)
            {
                INFOE("Engine %s load failed", model_file.c_str());
                result.set_value(false);
                return;
            }
            engine->print();
            auto input = engine->input();
            num_class = engine->output()->size(1) / 3;
            input_width_ = input->size(2);
            input_height_ = input->size(1);
            m_lables = engine->get_lables();
            result.set_value(true);

            Job fetch_job;
            zql::BoxArray keep_output_objs, output_objs, proposals;
            cv::Size target_size(input_width_, input_height_);
            // cv::Mat input_image(input_height_, input_width_, CV_8UC3, input->cpu());

            keep_output_objs.reserve(100);
            output_objs.reserve(100);

            float sx = 0;
            float sy = 0;
            while (get_job_and_wait(fetch_job))
            {


                float scale_letterbox;
                int resize_rows;
                int resize_cols;
                std::vector<float> filterBoxes;
                std::vector<float> objProbs;
                std::vector<int> classId;
                int img_width = fetch_job.input.cols;
                int img_height = fetch_job.input.rows;
                if (fetch_job.input.size() != target_size)
                {

                    if ((input_height_ * 1.0 / img_height) < (input_width_ * 1.0 / img_width))
                    {
                        scale_letterbox = input_height_ * 1.0 / img_height;
                    }
                    else
                    {
                        scale_letterbox = input_width_ * 1.0 / img_width;
                    }
                    resize_cols = int(scale_letterbox * img_width);
                    resize_rows = int(scale_letterbox * img_height);

                    cv::Mat input_image;
                    cv::cvtColor(fetch_job.input, input_image, COLOR_BGR2RGB);

                    cv::resize(input_image, input_image, cv::Size(resize_cols, resize_rows));

                    // Generate a gray image for letterbox using opencv
                    cv::Mat img_new(input_height_, input_width_, CV_8UC3, cv::Scalar(114, 114, 114));
                    int top = (input_height_ - resize_rows) / 2;
                    int bot = (input_height_ - resize_rows + 1) / 2;
                    int left = (input_width_ - resize_cols) / 2;
                    int right = (input_width_ - resize_cols + 1) / 2;
                    // Letterbox filling
                    cv::copyMakeBorder(input_image, img_new, top, bot, left, right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
                    memcpy(input->cpu(), img_new.data, input->bytes());

                }
                else
                {
                    memcpy(input->cpu(), fetch_job.input.data, input->bytes());
                    // resize_scale = 1.f;
                }

                if (!engine->forward())
                {
                    fetch_job.pro->set_value({});
                    continue;
                }

                // stride 8
                int stride0 = 8;
                int grid_h0 = input_height_ / stride0;
                int grid_w0 = input_width_ / stride0;
                int validCount0 = 0;
                validCount0 = process(engine->output(0).get()->cpu<int8_t>(), (int *)anchor0, grid_h0, grid_w0, input_height_, input_width_,
                                      stride0, filterBoxes, objProbs, classId, confidence_threshold_, engine->output(0).get()->get_zp(), engine->output(0).get()->get_scale(), num_class);

                // stride 16
                int stride1 = 16;
                int grid_h1 = input_height_ / stride1;
                int grid_w1 = input_width_ / stride1;
                int validCount1 = 0;
                validCount1 = process(engine->output(1).get()->cpu<int8_t>(), (int *)anchor1, grid_h1, grid_w1, input_height_, input_width_,
                                      stride1, filterBoxes, objProbs, classId, confidence_threshold_, engine->output(1).get()->get_zp(), engine->output(1).get()->get_scale(), num_class);

                // stride 32
                int stride2 = 32;
                int grid_h2 = input_height_ / stride2;
                int grid_w2 = input_width_ / stride2;
                int validCount2 = 0;
                validCount2 = process(engine->output(2).get()->cpu<int8_t>(), (int *)anchor2, grid_h2, grid_w2, input_height_, input_width_,
                                      stride2, filterBoxes, objProbs, classId, confidence_threshold_, engine->output(2).get()->get_zp(), engine->output(2).get()->get_scale(), num_class);

                int validCount = validCount0 + validCount1 + validCount2;

                int tmp_h = (input_height_ - resize_rows) / 2;
                int tmp_w = (input_width_ - resize_cols) / 2;

                float ratio_x = (float)img_height / resize_rows;
                float ratio_y = (float)img_width / resize_cols;
                for (int n = 0; n < validCount; ++n)
                {


                    float x1 = filterBoxes[n * 4 + 0];
                    float y1 = filterBoxes[n * 4 + 1];
                    float x2 = x1 + filterBoxes[n * 4 + 2];
                    float y2 = y1 + filterBoxes[n * 4 + 3];
                    int id = classId[n];
                    float obj_conf = objProbs[n];

                    x1 = (x1 - tmp_w) * ratio_x;
                    y1 = (y1 - tmp_h) * ratio_y;
                    x2 = (x2 - tmp_w) * ratio_x;
                    y2 = (y2 - tmp_h) * ratio_y;
                    x1 = std::max(std::min(x1, (float)(img_width- 1)), 0.f);
                    y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f);
                    x2 = std::max(std::min(x2, (float)(img_width - 1)), 0.f);
                    y2 = std::max(std::min(y2, (float)(img_height - 1)), 0.f);
                    output_objs.emplace_back(x1, y1, x2, y2, obj_conf, id, m_lables[id]);

                }
                cpu_nms(output_objs, keep_output_objs, nms_threshold_);
                fetch_job.pro->set_value(keep_output_objs);

                keep_output_objs.clear();
                output_objs.clear();
                // keep_output_objs.clear();
            }
            INFO("Engine destroy.");
        }

        virtual bool preprocess(Job &job, const Mat &image) override
        {
            job.input = image;

            return !image.empty();
        }

        virtual std::shared_future<zql::BoxArray> commit(const Mat &image) override
        {
            return ControllerImpl::commit(image);
        }

    private:
        int input_width_ = 0;
        int input_height_ = 0;
        int num_class = 0;
        int gpu_ = 0;
        float confidence_threshold_ = 0;
        float nms_threshold_ = 0;
        std::vector<string> m_lables;
    };

    shared_ptr<Infer> create_infer(
        const std::string &engine_file, const std::string &engine_label,
        float confidence_threshold, float nms_threshold)
    {

        shared_ptr<InferImpl> instance(new InferImpl());
        if (!instance->startup(
                engine_file, engine_label, confidence_threshold,
                nms_threshold))
        {
            instance.reset();
        }
        return instance;
    }
}

代码部分进行了封装，披露了重要的代码块，如预处理、后处理、坐标映射。model_forward部分API使用请参考官方文档doc/Rockchip_User_Guide_RKNN_Toolkit2_CN-1.4.0.pdf。

结语

至此，YOLOV5已经完全部署移植到RK3568上，有问题请随时留言，下边会介绍如何移植分类模型如repvgg,姿势识别如litehrnet、动作序列识别如tsm算法。

文章出处登录后可见！

已经登录？立即刷新