From dd455879b365582aca7a6427b789e36883a5c409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=A4=E7=9F=A5=E6=99=BA=E8=83=BD?= <2386089024@qq.com> Date: Mon, 17 Mar 2025 19:26:30 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20/?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- drm_func.c | 186 ++++++++++++++ main.h | 95 +++++++ rga_func.c | 129 ++++++++++ yolov5_detect.cpp | 374 +++++++++++++++++++++++++++ yolov5_detect_postprocess.cpp | 469 ++++++++++++++++++++++++++++++++++ 5 files changed, 1253 insertions(+) create mode 100644 drm_func.c create mode 100644 main.h create mode 100644 rga_func.c create mode 100644 yolov5_detect.cpp create mode 100644 yolov5_detect_postprocess.cpp diff --git a/drm_func.c b/drm_func.c new file mode 100644 index 0000000..eb10de1 --- /dev/null +++ b/drm_func.c @@ -0,0 +1,186 @@ +// Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "drm_func.h" +#include + +int drm_init(drm_context *drm_ctx) +{ + static const char *card = "/dev/dri/card0"; + int flag = O_RDWR; + int drm_fd = -1; + + drm_fd = open(card, flag); + if (drm_fd < 0) + { + printf("failed to open %s\n", card); + return -1; + } + + drm_ctx->drm_handle = dlopen("libdrm.so", RTLD_LAZY); + + if (!drm_ctx->drm_handle) + { + printf("failed to dlopen libdrm.so\n"); + printf("dlopen error: %s\n", dlerror()); + drm_deinit(drm_ctx, drm_fd); + return -1; + } + + drm_ctx->io_func = (FUNC_DRM_IOCTL)dlsym(drm_ctx->drm_handle, "drmIoctl"); + if (drm_ctx->io_func == NULL) + { + dlclose(drm_ctx->drm_handle); + drm_ctx->drm_handle = NULL; + drm_deinit(drm_ctx, drm_fd); + printf("failed to dlsym drmIoctl\n"); + return -1; + } + return drm_fd; +} + +void drm_deinit(drm_context *drm_ctx, int drm_fd) +{ + if (drm_ctx->drm_handle) + { + dlclose(drm_ctx->drm_handle); + drm_ctx->drm_handle = NULL; + } + if (drm_fd > 0) + { + close(drm_fd); + } +} + +void *drm_buf_alloc(drm_context *drm_ctx, int drm_fd, int TexWidth, int TexHeight, int bpp, int *fd, unsigned int *handle, size_t *actual_size) +{ + // printf("0: Width %d, Heigh %d, bpp %d\n", TexWidth, TexHeight, bpp); + // printf("fd: %d, handle: %d, size: %d\n", fd, handle, actual_size); + int ret; + if (drm_ctx == NULL) + { + printf("drm context is unvalid\n"); + return NULL; + } + char *map = NULL; + + // printf("Width %d, Heigh %d\n", TexWidth, TexHeight); + + void *vir_addr = NULL; + struct drm_prime_handle fd_args; + struct drm_mode_map_dumb mmap_arg; + struct drm_mode_destroy_dumb destory_arg; + + struct drm_mode_create_dumb alloc_arg; + + memset(&alloc_arg, 0, sizeof(alloc_arg)); + alloc_arg.bpp = bpp; + alloc_arg.width = TexWidth; + alloc_arg.height = TexHeight; + // alloc_arg.flags = ROCKCHIP_BO_CONTIG; + + // printf("2: Width %d, Heigh %d\n", TexWidth, TexHeight); + + //获取handle和size + ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &alloc_arg); + // printf("3: Width %d, Heigh %d\n", TexWidth, TexHeight); + if (ret) + { + printf("failed to create dumb buffer: %s\n", strerror(errno)); + return NULL; + } + // printf("4: Width %d, Heigh %d\n", TexWidth, TexHeight); + if (handle != NULL) + { + *handle = alloc_arg.handle; + } + // printf("5: Width %d, Heigh %d\n", TexWidth, TexHeight); + if (actual_size != NULL) + { + *actual_size = alloc_arg.size; + } + // printf("create width=%u, height=%u, bpp=%u, size=%lu dumb buffer\n",alloc_arg.width,alloc_arg.height,alloc_arg.bpp,alloc_arg.size); + // printf("out handle= %d\n",alloc_arg.handle); + + //获取fd + memset(&fd_args, 0, sizeof(fd_args)); + fd_args.fd = -1; + fd_args.handle = alloc_arg.handle; + ; + fd_args.flags = 0; + ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &fd_args); + if (ret) + { + printf("rk-debug handle_to_fd failed ret=%d,err=%s, handle=%x \n", ret, strerror(errno), fd_args.handle); + return NULL; + } + // printf("out fd = %d, drm fd: %d\n",fd_args.fd,drm_fd); + if (fd != NULL) + { + *fd = fd_args.fd; + } + + //获取虚拟地址 + memset(&mmap_arg, 0, sizeof(mmap_arg)); + mmap_arg.handle = alloc_arg.handle; + + ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &mmap_arg); + if (ret) + { + printf("failed to create map dumb: %s\n", strerror(errno)); + vir_addr = NULL; + goto destory_dumb; + } + vir_addr = map = mmap(0, alloc_arg.size, PROT_READ | PROT_WRITE, MAP_SHARED, drm_fd, mmap_arg.offset); + if (map == MAP_FAILED) + { + printf("failed to mmap buffer: %s\n", strerror(errno)); + vir_addr = NULL; + goto destory_dumb; + } + // printf("alloc map=%x \n",map); + return vir_addr; +destory_dumb: + memset(&destory_arg, 0, sizeof(destory_arg)); + destory_arg.handle = alloc_arg.handle; + ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destory_arg); + if (ret) + printf("failed to destory dumb %d\n", ret); + return vir_addr; +} + +int drm_buf_destroy(drm_context *drm_ctx, int drm_fd, int buf_fd, int handle, void *drm_buf, size_t size) +{ + int ret = -1; + if (drm_buf == NULL) + { + printf("drm buffer is NULL\n"); + return -1; + } + + munmap(drm_buf, size); + + struct drm_mode_destroy_dumb destory_arg; + memset(&destory_arg, 0, sizeof(destory_arg)); + destory_arg.handle = handle; + ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destory_arg); + if (ret) + printf("failed to destory dumb %d, error=%s\n", ret, strerror(errno)); + if (buf_fd > 0) + { + close(buf_fd); + } + + return ret; +} diff --git a/main.h b/main.h new file mode 100644 index 0000000..dd91d70 --- /dev/null +++ b/main.h @@ -0,0 +1,95 @@ +#ifndef _ATK_YOLOV5_OBJECT_RECOGNIZE_H +#define _ATK_YOLOV5_OBJECT_RECOGNIZE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define _BASETSD_H + +#include "im2d.h" +#include "rga.h" +#include "drm_func.h" +#include "rga_func.h" +#include "rknn_api.h" +#include "rkmedia_api.h" +#include "rkmedia_venc.h" +#include "sample_common.h" +#include "opencv2/opencv.hpp" + +#include "librtsp/rtsp_demo.h" +#include "rk_aiq_user_api_sysctl.h" +#include "rk_aiq_user_api_afec.h" +// #include "rk_aiq_uapi_afec_int.h" + +#include +#include +#include "opencv2/imgcodecs.hpp" + +using namespace cv; + + + +RK_U32 video_width = 1920; +RK_U32 video_height = 1072; +// RK_U32 video_height = 720; +int disp_width = 720; +int disp_height = 1280; + +static bool quit = false; +rtsp_demo_handle g_rtsplive = NULL; +static rtsp_session_handle g_rtsp_session; + +int rgb24_resize(unsigned char *input_rgb, unsigned char *output_rgb, int width,int height, int outwidth, int outheight); + +static unsigned char *load_model(const char *filename, int *model_size); + +static void printRKNNTensor(rknn_tensor_attr *attr); + +void *rkmedia_rknn_thread(void *args); +void *venc_rtsp_tidp(void *args); +void *upload_message(void *args); //上传检测信息与截取的图片 +void *upload_message_controller(void *args); +void *heart_beat(void *args); //上传心跳检测 +void *distortion(void *args); //矫正 +void *read_serial_thread(void *args); //读取串口传来的红外温度数据 + +struct Alarm { + int ifalarm; + int ifwarn; + std::string alarmMsg; + std::string alarmType; + std::string alarmBase64; + std::string alarmCoverage; + float alarmprop; +}; + +void initializeAlarm(Alarm* Alarm) { + // 初始化结构体成员 + Alarm->ifalarm = 0; + Alarm->ifwarn = 0; + Alarm->alarmMsg = "无"; + Alarm->alarmType = "无"; + Alarm->alarmBase64 = "无"; + Alarm->alarmCoverage = "无"; + Alarm->alarmprop = 0.0; +} + +void NV12ToRGB(int width, int height, unsigned char* nv12, Mat& rgb) { + // Step 1: Create a cv::Mat object for NV12 data + Mat nv12Mat(height + height / 2, width, CV_8UC1, nv12); + + // Step 2: Convert NV12 to RGB + cvtColor(nv12Mat, rgb, COLOR_YUV2RGB_NV12); +} + +#endif \ No newline at end of file diff --git a/rga_func.c b/rga_func.c new file mode 100644 index 0000000..aaea364 --- /dev/null +++ b/rga_func.c @@ -0,0 +1,129 @@ +// Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "rga_func.h" + +int RGA_init(rga_context *rga_ctx) +{ + rga_ctx->rga_handle = dlopen("librga.so", RTLD_LAZY); + if (!rga_ctx->rga_handle) + { + printf("dlopen librga.so failed\n"); + printf("dlopen error: %s\n", dlerror()); + return -1; + } + rga_ctx->init_func = (FUNC_RGA_INIT)dlsym(rga_ctx->rga_handle, "c_RkRgaInit"); + rga_ctx->deinit_func = (FUNC_RGA_DEINIT)dlsym(rga_ctx->rga_handle, "c_RkRgaDeInit"); + rga_ctx->blit_func = (FUNC_RGA_BLIT)dlsym(rga_ctx->rga_handle, "c_RkRgaBlit"); + rga_ctx->init_func(); + return 0; +} + +void img_resize_fast(rga_context *rga_ctx, int src_fd, int src_w, int src_h, uint64_t dst_phys, int dst_w, int dst_h) +{ + // printf("rga use fd, src(%dx%d) -> dst(%dx%d)\n", src_w, src_h, dst_w, dst_h); + + if (rga_ctx->rga_handle) + { + int ret = 0; + rga_info_t src, dst; + + memset(&src, 0, sizeof(rga_info_t)); + src.fd = src_fd; + src.mmuFlag = 1; + // src.virAddr = (void *)psrc; + + memset(&dst, 0, sizeof(rga_info_t)); + dst.fd = -1; + dst.mmuFlag = 0; + +#if defined(__arm__) + dst.phyAddr = (void *)((uint32_t)dst_phys); +#else + dst.phyAddr = (void *)dst_phys; +#endif + + dst.nn.nn_flag = 0; + + rga_set_rect(&src.rect, 0, 0, src_w, src_h, src_w, src_h, RK_FORMAT_RGB_888); + rga_set_rect(&dst.rect, 0, 0, dst_w, dst_h, dst_w, dst_h, RK_FORMAT_RGB_888); + + ret = rga_ctx->blit_func(&src, &dst, NULL); + if (ret) + { + printf("c_RkRgaBlit error : %s\n", strerror(errno)); + } + + return; + } + return; +} + +void img_resize_slow(rga_context *rga_ctx, void *src_virt, int src_w, int src_h, void *dst_virt, int dst_w, int dst_h, + int w_offset, int h_offset, RgaSURF_FORMAT color, bool add_extra_sz_w, bool add_extra_sz_h) +{ + // printf("rga use virtual, src(%dx%d) -> dst(%dx%d)\n", src_w, src_h, dst_w, dst_h); + if (rga_ctx->rga_handle) + { + int ret = 0; + rga_info_t src, dst; + + memset(&src, 0, sizeof(rga_info_t)); + src.fd = -1; + src.mmuFlag = 1; + src.virAddr = (void *)src_virt; + + memset(&dst, 0, sizeof(rga_info_t)); + dst.fd = -1; + dst.mmuFlag = 1; + dst.virAddr = dst_virt; + + dst.nn.nn_flag = 0; + // printf("input to rga para, w_offset: %d, h_offset: %d, dst_w: %d, dst_h: %d\n", + // w_offset, h_offset, dst_w, dst_h); + + rga_set_rect(&src.rect, 0, 0, src_w, src_h, src_w, src_h, RK_FORMAT_RGB_888); + int src_w = dst_w + 2*w_offset; + int src_h = dst_h + 2*h_offset; + if (add_extra_sz_w){ + src_w += 1; + // printf("Adding extra sz w: %d\n", src_w); + } + else if (add_extra_sz_h){ + src_h += 1; + // printf("Adding extra sz h: %d\n", src_h); + } + + //rga_set_rect(&dst.rect, w_offset, h_offset, dst_w, dst_h, src_w, dst_h + 2*h_offset, color); + rga_set_rect(&dst.rect, w_offset, h_offset, dst_w, dst_h, src_w, src_h, color); + + ret = rga_ctx->blit_func(&src, &dst, NULL); + if (ret){ + printf("c_RkRgaBlit error : %s\n", strerror(errno)); + } + + return; + } + return; +} + +int RGA_deinit(rga_context *rga_ctx) +{ + if(rga_ctx->rga_handle) + { + dlclose(rga_ctx->rga_handle); + rga_ctx->rga_handle = NULL; + } +} \ No newline at end of file diff --git a/yolov5_detect.cpp b/yolov5_detect.cpp new file mode 100644 index 0000000..96f082d --- /dev/null +++ b/yolov5_detect.cpp @@ -0,0 +1,374 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "yolov5_detect.h" +#include "rknn_api.h" + +#include + +using namespace std; +using namespace cv; + + +//unsigned char *model; +//detection* dets; + +static void printRKNNTensor(rknn_tensor_attr *attr) +{ + printf("index=%d name=%s n_dims=%d dims=[%d %d %d %d] n_elems=%d size=%d " + "fmt=%d type=%d qnt_type=%d fl=%d zp=%d scale=%f\n", + attr->index, attr->name, attr->n_dims, attr->dims[3], attr->dims[2], + attr->dims[1], attr->dims[0], attr->n_elems, attr->size, 0, attr->type, + attr->qnt_type, attr->fl, attr->zp, attr->scale); +} + +static int letter_box(cv::Mat input_image, cv::Mat *output_image, int model_input_size) +{ + int input_width, input_height; + + input_width = input_image.cols; + input_height = input_image.rows; + float ratio; + ratio = min((float)model_input_size / input_width, (float)model_input_size / input_height); + + int new_width, new_height; + new_width = round(ratio * input_width ); + new_height = round(ratio * input_height); + + + int height_padding = 0; + int width_padding = 0; + int top = 0; + int bottom = 0; + int left = 0; + int right = 0; + if( new_width >= new_height) + { + height_padding = new_width - new_height; + if( (height_padding % 2) == 0 ) + { + top = (int)((float)(height_padding/2)); + bottom = (int)((float)(height_padding/2)); + } + else + { + top = (int)((float)(height_padding/2)); + bottom = (int)((float)(height_padding/2))+1; + } + } + else + { + width_padding = new_height - new_width; + if( (width_padding % 2) == 0 ) + { + left = (int)((float)(width_padding/2)); + right = (int)((float)(width_padding/2)); + } + else + { + left = (int)((float)(width_padding/2)); + right = (int)((float)(width_padding/2))+1; + } + + } + + cv::Mat resize_img; + + cv::resize(input_image, resize_img, cv::Size(new_width, new_height)); + cv::copyMakeBorder(resize_img, *output_image, top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); + + return 0; +} + +int yolov5_detect_init(rknn_context *ctx, const char * path) +{ + int ret; + + // Load model + FILE *fp = fopen(path, "rb"); + if(fp == NULL) + { + printf("fopen %s fail!\n", path); + return -1; + } + fseek(fp, 0, SEEK_END); //fp指向end,fseek(FILE *stream, long offset, int fromwhere); + int model_len = ftell(fp); //相对文件首偏移 + unsigned char *model_data = (unsigned char*)malloc(model_len); + + fseek(fp, 0, SEEK_SET); //SEEK_SET为文件头 + if(model_len != fread(model_data, 1, model_len, fp)) + { + printf("fread %s fail!\n", path); + free(model_data); + return -1; + } + fclose(fp); + + //init + ret = rknn_init(ctx, model_data, model_len, RKNN_FLAG_PRIOR_MEDIUM); + if(ret < 0) + { + printf("rknn_init fail! ret=%d\n", ret); + return -1; + } + + free(model_data); + + return 0; +} + +static int scale_coords(yolov5_detect_result_group_t *detect_result_group, int img_width, int img_height, int model_size) +{ + for (int i = 0; i < detect_result_group->count; i++) + { + yolov5_detect_result_t *det_result = &(detect_result_group->results[i]); + + + int x1 = det_result->box.left; + int y1 = det_result->box.top; + int x2 = det_result->box.right; + int y2 = det_result->box.bottom; + + + if( img_width >= img_height ) + { + int image_max_len = img_width; + float gain; + gain = (float)model_size / image_max_len; + int resized_height = img_height * gain; + int height_pading = (model_size - resized_height)/2; + y1 = (y1 - height_pading); + y2 = (y2 - height_pading); + x1 = int(x1 / gain); + y1 = int(y1 / gain); + x2 = int(x2 / gain); + y2 = int(y2 / gain); + + det_result->box.left = x1; + det_result->box.top = y1; + det_result->box.right = x2; + det_result->box.bottom = y2; + } + else + { + int image_max_len = img_height; + float gain; + gain = (float)model_size / image_max_len; + int resized_width = img_width * gain; + int width_pading = (model_size - resized_width)/2; + x1 = (x1 - width_pading); + x2 = (x2 - width_pading); + x1 = int(x1 / gain); + y1 = int(y1 / gain); + x2 = int(x2 / gain); + y2 = int(y2 / gain); + + det_result->box.left = x1; + det_result->box.top = y1; + det_result->box.right = x2; + det_result->box.bottom = y2; + } + + } + + return 0; +} + + +int yolov5_detect_run(rknn_context ctx, cv::Mat input_image, yolov5_detect_result_group_t *detect_result_group) +{ + int img_width = 0; + int img_height = 0; + int img_channel = 0; + + size_t actual_size = 0; + const float vis_threshold = 0.1; + const float nms_threshold = 0.5; + const float conf_threshold = 0.2; + int ret; + + img_width = input_image.cols; + img_height = input_image.rows; + + rknn_sdk_version version; + ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &version, + sizeof(rknn_sdk_version)); + if (ret < 0) + { + printf("rknn_init error ret=%d\n", ret); + return -1; + } + /* + printf("sdk version: %s driver version: %s\n", version.api_version, + version.drv_version); + */ + + rknn_input_output_num io_num; + ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num)); + if (ret < 0) + { + printf("rknn_init error ret=%d\n", ret); + return -1; + } + /* + printf("model input num: %d, output num: %d\n", io_num.n_input, + io_num.n_output); + */ + + rknn_tensor_attr input_attrs[io_num.n_input]; + memset(input_attrs, 0, sizeof(input_attrs)); + for (int i = 0; i < io_num.n_input; i++) + { + input_attrs[i].index = i; + ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), + sizeof(rknn_tensor_attr)); + if (ret < 0) + { + printf("rknn_init error ret=%d\n", ret); + return -1; + } + //printRKNNTensor(&(input_attrs[i])); + } + + rknn_tensor_attr output_attrs[io_num.n_output]; + memset(output_attrs, 0, sizeof(output_attrs)); + for (int i = 0; i < io_num.n_output; i++) + { + output_attrs[i].index = i; + ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), + sizeof(rknn_tensor_attr)); + //printRKNNTensor(&(output_attrs[i])); + } + + int input_channel = 3; + int input_width = 0; + int input_height = 0; + if (input_attrs[0].fmt == RKNN_TENSOR_NCHW) + { + //printf("model is NCHW input fmt\n"); + input_width = input_attrs[0].dims[0]; + input_height = input_attrs[0].dims[1]; + } + else + { + //printf("model is NHWC input fmt\n"); + input_width = input_attrs[0].dims[1]; + input_height = input_attrs[0].dims[2]; + } + + /* + printf("model input height=%d, width=%d, channel=%d\n", height, width, + channel); + */ + + /* Init input tensor */ + rknn_input inputs[1]; + memset(inputs, 0, sizeof(inputs)); + inputs[0].index = 0; + inputs[0].type = RKNN_TENSOR_UINT8; + inputs[0].size = input_width * input_height * input_channel; + inputs[0].fmt = RKNN_TENSOR_NHWC; + inputs[0].pass_through = 0; + + /* Init output tensor */ + rknn_output outputs[io_num.n_output]; + memset(outputs, 0, sizeof(outputs)); + + for (int i = 0; i < io_num.n_output; i++) + { + outputs[i].want_float = 0; + } + + cv::Mat letter_image; + letter_box(input_image, &letter_image, input_width); + inputs[0].buf = letter_image.data; + + rknn_inputs_set(ctx, io_num.n_input, inputs); + ret = rknn_run(ctx, NULL); + ret = rknn_outputs_get(ctx, io_num.n_output, outputs, NULL); + + // Post process + std::vector out_scales; + std::vector out_zps; + for (int i = 0; i < io_num.n_output; ++i) + { + out_scales.push_back(output_attrs[i].scale); + out_zps.push_back(output_attrs[i].zp); + } + + + yolov5_post_process_u8((uint8_t *)outputs[0].buf, (uint8_t *)outputs[1].buf, (uint8_t *)outputs[2].buf, input_height, input_width, + conf_threshold, nms_threshold, out_zps, out_scales, detect_result_group); + + + /* + yolov5_post_process_fp((float *)outputs[0].buf, (float *)outputs[1].buf, (float *)outputs[2].buf, input_height, input_width, + conf_threshold, nms_threshold, &detect_result_group); + */ + + rknn_outputs_release(ctx, io_num.n_output, outputs); + + scale_coords(detect_result_group, img_width, img_height, input_width); + + return 0; +} + +int yolov5_detect_release(rknn_context ctx) +{ + rknn_destroy(ctx); + return 0; +} + + +std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) { + std::string ret; + int i = 0; + int j = 0; + unsigned char char_array_3[3]; + unsigned char char_array_4[4]; + + while (in_len--) { + char_array_3[i++] = *(bytes_to_encode++); + if (i == 3) { + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + char_array_4[3] = char_array_3[2] & 0x3f; + + for(i = 0; (i <4) ; i++) + ret += base64_chars[char_array_4[i]]; + i = 0; + } + } + + if (i) { + for(j = i; j < 3; j++) + char_array_3[j] = '\0'; + + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + char_array_4[3] = char_array_3[2] & 0x3f; + + for (j = 0; (j < i + 1); j++) + ret += base64_chars[char_array_4[j]]; + + while((i++ < 3)) + ret += '='; + } + + return ret; +} + +static inline bool is_base64(unsigned char c) { + return (isalnum(c) || (c == '+') || (c == '/')); +} \ No newline at end of file diff --git a/yolov5_detect_postprocess.cpp b/yolov5_detect_postprocess.cpp new file mode 100644 index 0000000..e57518d --- /dev/null +++ b/yolov5_detect_postprocess.cpp @@ -0,0 +1,469 @@ +// Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include "yolov5_detect_postprocess.h" +#include + + +static char labels[YOLOV5_CLASS_NUM][30] = {"0", "1"}; + +const int anchor0[6] = {10, 13, 16, 30, 33, 23}; +const int anchor1[6] = {30, 61, 62, 45, 59, 119}; +const int anchor2[6] = {116, 90, 156, 198, 373, 326}; + +inline static int clamp(float val, int min, int max) +{ + return val > min ? (val < max ? val : max) : min; +} + +static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1, float ymax1) +{ + float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0); + float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0); + float i = w * h; + float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i; + return u <= 0.f ? 0.f : (i / u); +} + +static int nms(int validCount, std::vector &outputLocations, std::vector &order, float threshold) +{ + for (int i = 0; i < validCount; ++i) + { + if (order[i] == -1) + { + continue; + } + int n = order[i]; + for (int j = i + 1; j < validCount; ++j) + { + int m = order[j]; + if (m == -1) + { + continue; + } + float xmin0 = outputLocations[n * 4 + 0]; + float ymin0 = outputLocations[n * 4 + 1]; + float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2]; + float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3]; + + float xmin1 = outputLocations[m * 4 + 0]; + float ymin1 = outputLocations[m * 4 + 1]; + float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2]; + float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3]; + + float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1); + + if (iou > threshold) + { + order[j] = -1; + } + } + } + return 0; +} + +static int quick_sort_indice_inverse( + std::vector &input, + int left, + int right, + std::vector &indices) +{ + float key; + int key_index; + int low = left; + int high = right; + if (left < right) + { + key_index = indices[left]; + key = input[left]; + while (low < high) + { + while (low < high && input[high] <= key) + { + high--; + } + input[low] = input[high]; + indices[low] = indices[high]; + while (low < high && input[low] >= key) + { + low++; + } + input[high] = input[low]; + indices[high] = indices[low]; + } + input[low] = key; + indices[low] = key_index; + quick_sort_indice_inverse(input, left, low - 1, indices); + quick_sort_indice_inverse(input, low + 1, right, indices); + } + return low; +} + +static float sigmoid(float x) +{ + return 1.0 / (1.0 + expf(-x)); +} + +static float unsigmoid(float y) +{ + return -1.0 * logf((1.0 / y) - 1.0); +} + +inline static int32_t __clip(float val, float min, float max) +{ + float f = val <= min ? min : (val >= max ? max : val); + return f; +} + +static uint8_t qnt_f32_to_affine(float f32, uint8_t zp, float scale) +{ + float dst_val = (f32 / scale) + zp; + uint8_t res = (uint8_t)__clip(dst_val, 0, 255); + return res; +} + +static float deqnt_affine_to_f32(uint8_t qnt, uint8_t zp, float scale) +{ + return ((float)qnt - (float)zp) * scale; +} + +static int process_u8(uint8_t *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride, + std::vector &boxes, std::vector &boxScores, std::vector &classId, + float threshold, uint8_t zp, float scale) +{ + + int validCount = 0; + int grid_len = grid_h * grid_w; + float thres = unsigmoid(threshold); + uint8_t thres_u8 = qnt_f32_to_affine(thres, zp, scale); + for (int a = 0; a < 3; a++) + { + for (int i = 0; i < grid_h; i++) + { + for (int j = 0; j < grid_w; j++) + { + uint8_t box_confidence = input[(YOLOV5_PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j]; + if (box_confidence >= thres_u8) + { + int offset = (YOLOV5_PROP_BOX_SIZE * a) * grid_len + i * grid_w + j; + uint8_t *in_ptr = input + offset; + float box_x = sigmoid(deqnt_affine_to_f32(*in_ptr, zp, scale)) * 2.0 - 0.5; + float box_y = sigmoid(deqnt_affine_to_f32(in_ptr[grid_len], zp, scale)) * 2.0 - 0.5; + float box_w = sigmoid(deqnt_affine_to_f32(in_ptr[2 * grid_len], zp, scale)) * 2.0; + float box_h = sigmoid(deqnt_affine_to_f32(in_ptr[3 * grid_len], zp, scale)) * 2.0; + box_x = (box_x + j) * (float)stride; + box_y = (box_y + i) * (float)stride; + box_w = box_w * box_w * (float)anchor[a * 2]; + box_h = box_h * box_h * (float)anchor[a * 2 + 1]; + box_x -= (box_w / 2.0); + box_y -= (box_h / 2.0); + boxes.push_back(box_x); + boxes.push_back(box_y); + boxes.push_back(box_w); + boxes.push_back(box_h); + + uint8_t maxClassProbs = in_ptr[5 * grid_len]; + int maxClassId = 0; + for (int k = 1; k < YOLOV5_CLASS_NUM; ++k) + { + uint8_t prob = in_ptr[(5 + k) * grid_len]; + if (prob > maxClassProbs) + { + maxClassId = k; + maxClassProbs = prob; + } + } + float box_conf_f32 = sigmoid(deqnt_affine_to_f32(box_confidence, zp, scale)); + float class_prob_f32 = sigmoid(deqnt_affine_to_f32(maxClassProbs, zp, scale)); + boxScores.push_back(box_conf_f32* class_prob_f32); + classId.push_back(maxClassId); + validCount++; + } + } + } + } + return validCount; +} + +static int process_fp(float *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride, + std::vector &boxes, std::vector &boxScores, std::vector &classId, + float threshold) +{ + + int validCount = 0; + int grid_len = grid_h * grid_w; + float thres_sigmoid = unsigmoid(threshold); + for (int a = 0; a < 3; a++) + { + for (int i = 0; i < grid_h; i++) + { + for (int j = 0; j < grid_w; j++) + { + float box_confidence = input[(YOLOV5_PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j]; + if (box_confidence >= thres_sigmoid) + { + int offset = (YOLOV5_PROP_BOX_SIZE * a) * grid_len + i * grid_w + j; + float *in_ptr = input + offset; + float box_x = sigmoid(*in_ptr) * 2.0 - 0.5; + float box_y = sigmoid(in_ptr[grid_len]) * 2.0 - 0.5; + float box_w = sigmoid(in_ptr[2 * grid_len]) * 2.0; + float box_h = sigmoid(in_ptr[3 * grid_len]) * 2.0; + box_x = (box_x + j) * (float)stride; + box_y = (box_y + i) * (float)stride; + box_w = box_w * box_w * (float)anchor[a * 2]; + box_h = box_h * box_h * (float)anchor[a * 2 + 1]; + box_x -= (box_w / 2.0); + box_y -= (box_h / 2.0); + boxes.push_back(box_x); + boxes.push_back(box_y); + boxes.push_back(box_w); + boxes.push_back(box_h); + + float maxClassProbs = in_ptr[5 * grid_len]; + int maxClassId = 0; + for (int k = 1; k < YOLOV5_CLASS_NUM; ++k) + { + float prob = in_ptr[(5 + k) * grid_len]; + if (prob > maxClassProbs) + { + maxClassId = k; + maxClassProbs = prob; + } + } + float box_conf_f32 = sigmoid(box_confidence); + float class_prob_f32 = sigmoid(maxClassProbs); + boxScores.push_back(box_conf_f32* class_prob_f32); + classId.push_back(maxClassId); + validCount++; + } + } + } + } + return validCount; +} + +int yolov5_post_process_u8(uint8_t *input0, uint8_t *input1, uint8_t *input2, int model_in_h, int model_in_w, + float conf_threshold, float nms_threshold, + std::vector &qnt_zps, std::vector &qnt_scales, + yolov5_detect_result_group_t *group) +{ + static int init = -1; + if (init == -1) + { + /* + int ret = 0; + ret = loadLabelName(LABEL_NALE_TXT_PATH, labels); + if (ret < 0) + { + return -1; + } + */ + init = 0; + } + memset(group, 0, sizeof(yolov5_detect_result_group_t)); + + std::vector filterBoxes; + std::vector boxesScore; + std::vector classId; + int stride0 = 8; + int grid_h0 = model_in_h / stride0; + int grid_w0 = model_in_w / stride0; + int validCount0 = 0; + validCount0 = process_u8(input0, (int *)anchor0, grid_h0, grid_w0, model_in_h, model_in_w, + stride0, filterBoxes, boxesScore, classId, conf_threshold, qnt_zps[0], qnt_scales[0]); + + int stride1 = 16; + int grid_h1 = model_in_h / stride1; + int grid_w1 = model_in_w / stride1; + int validCount1 = 0; + validCount1 = process_u8(input1, (int *)anchor1, grid_h1, grid_w1, model_in_h, model_in_w, + stride1, filterBoxes, boxesScore, classId, conf_threshold, qnt_zps[1], qnt_scales[1]); + + int stride2 = 32; + int grid_h2 = model_in_h / stride2; + int grid_w2 = model_in_w / stride2; + int validCount2 = 0; + validCount2 = process_u8(input2, (int *)anchor2, grid_h2, grid_w2, model_in_h, model_in_w, + stride2, filterBoxes, boxesScore, classId, conf_threshold, qnt_zps[2], qnt_scales[2]); + + int validCount = validCount0 + validCount1 + validCount2; + // no object detect + if (validCount <= 0) + { + return 0; + } + + std::vector indexArray; + for (int i = 0; i < validCount; ++i) + { + indexArray.push_back(i); + } + + quick_sort_indice_inverse(boxesScore, 0, validCount - 1, indexArray); + + nms(validCount, filterBoxes, indexArray, nms_threshold); + + int last_count = 0; + group->count = 0; + /* box valid detect target */ + for (int i = 0; i < validCount; ++i) + { + + if (indexArray[i] == -1 || boxesScore[i] < conf_threshold || last_count >= YOLOV5_NUMB_MAX_SIZE) + { + continue; + } + int n = indexArray[i]; + + float x1 = filterBoxes[n * 4 + 0]; + float y1 = filterBoxes[n * 4 + 1]; + float x2 = x1 + filterBoxes[n * 4 + 2]; + float y2 = y1 + filterBoxes[n * 4 + 3]; + int id = classId[n]; + + /* + group->results[last_count].box.left = (int)((clamp(x1, 0, model_in_w) - w_offset) / resize_scale); + group->results[last_count].box.top = (int)((clamp(y1, 0, model_in_h) - h_offset) / resize_scale); + group->results[last_count].box.right = (int)((clamp(x2, 0, model_in_w) - w_offset) / resize_scale); + group->results[last_count].box.bottom = (int)((clamp(y2, 0, model_in_h) - h_offset) / resize_scale); + */ + group->results[last_count].box.left = (int) clamp(x1, 0, model_in_w); + group->results[last_count].box.top = (int) clamp(y1, 0, model_in_h); + group->results[last_count].box.right = (int) clamp(x2, 0, model_in_w); + group->results[last_count].box.bottom = (int) clamp(y2, 0, model_in_h); + + group->results[last_count].prop = boxesScore[i]; + group->results[last_count].class_index = id; + char *label = labels[id]; + strncpy(group->results[last_count].name, label, YOLOV5_NAME_MAX_SIZE); + + // printf("result %2d: (%4d, %4d, %4d, %4d), %s\n", i, group->results[last_count].box.left, group->results[last_count].box.top, + // group->results[last_count].box.right, group->results[last_count].box.bottom, label); + last_count++; + } + group->count = last_count; + + return 0; +} + + +int yolov5_post_process_fp(float *input0, float *input1, float *input2, int model_in_h, int model_in_w, + float conf_threshold, float nms_threshold, + yolov5_detect_result_group_t *group) +{ + static int init = -1; + if (init == -1) + { + /* + int ret = 0; + ret = loadLabelName(LABEL_NALE_TXT_PATH, labels); + if (ret < 0) + { + return -1; + } + */ + + init = 0; + } + memset(group, 0, sizeof(yolov5_detect_result_group_t)); + + std::vector filterBoxes; + std::vector boxesScore; + std::vector classId; + int stride0 = 8; + int grid_h0 = model_in_h / stride0; + int grid_w0 = model_in_w / stride0; + int validCount0 = 0; + validCount0 = process_fp(input0, (int *)anchor0, grid_h0, grid_w0, model_in_h, model_in_w, + stride0, filterBoxes, boxesScore, classId, conf_threshold); + + int stride1 = 16; + int grid_h1 = model_in_h / stride1; + int grid_w1 = model_in_w / stride1; + int validCount1 = 0; + validCount1 = process_fp(input1, (int *)anchor1, grid_h1, grid_w1, model_in_h, model_in_w, + stride1, filterBoxes, boxesScore, classId, conf_threshold); + + int stride2 = 32; + int grid_h2 = model_in_h / stride2; + int grid_w2 = model_in_w / stride2; + int validCount2 = 0; + validCount2 = process_fp(input2, (int *)anchor2, grid_h2, grid_w2, model_in_h, model_in_w, + stride2, filterBoxes, boxesScore, classId, conf_threshold); + + int validCount = validCount0 + validCount1 + validCount2; + // no object detect + if (validCount <= 0) + { + return 0; + } + + std::vector indexArray; + for (int i = 0; i < validCount; ++i) + { + indexArray.push_back(i); + } + + quick_sort_indice_inverse(boxesScore, 0, validCount - 1, indexArray); + + nms(validCount, filterBoxes, indexArray, nms_threshold); + + int last_count = 0; + group->count = 0; + /* box valid detect target */ + for (int i = 0; i < validCount; ++i) + { + + if (indexArray[i] == -1 || boxesScore[i] < conf_threshold || last_count >= YOLOV5_NUMB_MAX_SIZE) + { + continue; + } + int n = indexArray[i]; + + float x1 = filterBoxes[n * 4 + 0]; + float y1 = filterBoxes[n * 4 + 1]; + float x2 = x1 + filterBoxes[n * 4 + 2]; + float y2 = y1 + filterBoxes[n * 4 + 3]; + int id = classId[n]; + + /* + group->results[last_count].box.left = (int)((clamp(x1, 0, model_in_w) - w_offset) / resize_scale); + group->results[last_count].box.top = (int)((clamp(y1, 0, model_in_h) - h_offset) / resize_scale); + group->results[last_count].box.right = (int)((clamp(x2, 0, model_in_w) - w_offset) / resize_scale); + group->results[last_count].box.bottom = (int)((clamp(y2, 0, model_in_h) - h_offset) / resize_scale); + */ + group->results[last_count].box.left = (int) clamp(x1, 0, model_in_w); + group->results[last_count].box.top = (int) clamp(y1, 0, model_in_h); + group->results[last_count].box.right = (int) clamp(x2, 0, model_in_w); + group->results[last_count].box.bottom = (int) clamp(y2, 0, model_in_h); + + group->results[last_count].prop = boxesScore[i]; + group->results[last_count].class_index = id; + char *label = labels[id]; + strncpy(group->results[last_count].name, label, YOLOV5_NAME_MAX_SIZE); + + // printf("result %2d: (%4d, %4d, %4d, %4d), %s\n", i, group->results[last_count].box.left, group->results[last_count].box.top, + // group->results[last_count].box.right, group->results[last_count].box.bottom, label); + last_count++; + } + group->count = last_count; + + return 0; +}