From 7c1feb9857c75eef201f8fb8c999e49f67bf2a3a Mon Sep 17 00:00:00 2001 From: SikongJueluo Date: Sun, 23 Jun 2024 20:27:17 +0800 Subject: [PATCH] finish nnie and succeed to use yolov3 to detect --- .gitignore | 2 + Hi3516_SDK.lua | 15 +- src/isp/eb3516_video.c | 71 +- src/isp/eb3516_video.h | 2 +- src/main.cpp | 7 +- src/nnie/eb3516_nnie.c | 705 ++++++ src/nnie/eb3516_nnie.h | 17 + src/nnie/sample_svp_nnie_software.c | 3666 +++++++++++++++++++++++++++ src/nnie/sample_svp_nnie_software.h | 134 + xmake.lua | 79 +- 10 files changed, 4594 insertions(+), 104 deletions(-) create mode 100644 src/nnie/eb3516_nnie.c create mode 100644 src/nnie/eb3516_nnie.h create mode 100644 src/nnie/sample_svp_nnie_software.c create mode 100644 src/nnie/sample_svp_nnie_software.h diff --git a/.gitignore b/.gitignore index 24799ea..f1d7132 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ Hi3516CV500_SDK/ build/ Rouring/ Rouring-Vision/ +smart_trash_bin/ +nnie-yolov3-demo/ # MacOS Cache .DS_Store diff --git a/Hi3516_SDK.lua b/Hi3516_SDK.lua index 7d8843c..bfa2678 100644 --- a/Hi3516_SDK.lua +++ b/Hi3516_SDK.lua @@ -60,6 +60,7 @@ target("hi_library") "dl", {public = true} ) + -- scipt on_load(function (target) print(target:get("links")) @@ -80,4 +81,16 @@ target("sample_common") "SENSOR1_TYPE=GALAXYCORE_GC2053_MIPI_2M_30FPS_10BIT", {public = true} ) - add_deps("sample_audio") \ No newline at end of file + add_deps("sample_audio") + +target("sample_svp") + set_kind("static") + add_files("Hi3516CV500_SDK/smp/a7_linux/mpp/sample/svp/common/*.c") + add_includedirs("Hi3516CV500_SDK/smp/a7_linux/mpp/sample/svp/common", {public = true}) + add_deps("sample_common") + add_links( + lib.."libive.a", + lib.."libmd.a", + lib.."libnnie.a", + {public=true} + ) \ No newline at end of file diff --git a/src/isp/eb3516_video.c b/src/isp/eb3516_video.c index 3bedb42..33b4f5a 100644 --- a/src/isp/eb3516_video.c +++ b/src/isp/eb3516_video.c @@ -16,7 +16,7 @@ typedef struct hiSAMPLE_VPSS_CONFIG_S { VPSS_CHN_ATTR_S astChnAttrs[VPSS_MAX_PHY_CHN_NUM]; } SAMPLE_VPSS_CONFIG_S; -static zlog_category_t* log = NULL; +static zlog_category_t* log_video = NULL; // Sensor Info static SAMPLE_VI_CONFIG_S g_stViConfig = {0}; @@ -25,6 +25,7 @@ static SIZE_S stSize; static HI_U32 u32Framerate; static VB_CONFIG_S g_stVbConfig = {0}; +static SAMPLE_VO_CONFIG_S stVoConfig; static SAMPLE_VPSS_CONFIG_S g_stVpss0Config = {0}; static SAMPLE_VPSS_CONFIG_S g_stVpss1Config = {0}; @@ -91,8 +92,11 @@ static HI_S32 VPSS_GetParams(SAMPLE_VPSS_CONFIG_S* pstVpssConfig, return HI_SUCCESS; } -void eb3516VideoInit(void) { - log = zlog_get_category("eb3516_video"); +bool eb3516VideoInit(void) { + log_video = zlog_get_category("eb3516_video"); + if (!log_video) { + return false; + } // Get Sensor(VI) Info: picture size , frame rate SAMPLE_COMM_VI_GetSensorInfo(&g_stViConfig); @@ -126,7 +130,7 @@ void eb3516VideoInit(void) { g_stViConfig.astViInfo[0].stDevInfo.ViDev = 0; g_stViConfig.astViInfo[0].stDevInfo.enWDRMode = WDR_MODE_NONE; // 设置 VI PIPE 信息 - g_stViConfig.astViInfo[0].stPipeInfo.aPipe[0] = 0; // video pipe + g_stViConfig.astViInfo[0].stPipeInfo.aPipe[0] = 0; // video pipe g_stViConfig.astViInfo[0].stPipeInfo.aPipe[1] = -1; // snap pipe g_stViConfig.astViInfo[0].stPipeInfo.aPipe[2] = -1; g_stViConfig.astViInfo[0].stPipeInfo.aPipe[3] = -1; @@ -151,9 +155,9 @@ void eb3516VideoInit(void) { // 配置 VPSS VPSS_GetParams(&g_stVpss0Config, &stSize, false); - VPSS_GetParams(&g_stVpss1Config, &stSize, true); + // VPSS_GetParams(&g_stVpss1Config, &stSize, true); - zlog_info(log, "eb3516 Video Init Finish..."); + zlog_info(log_video, "eb3516 Video Init Finish..."); } bool eb3516VideoStart(void) { @@ -163,7 +167,7 @@ bool eb3516VideoStart(void) { s32Ret = SAMPLE_COMM_SYS_InitWithVbSupplement(&g_stVbConfig, VB_SUPPLEMENT_JPEG_MASK); if (s32Ret != HI_SUCCESS) { - zlog_error(log, "Init VB and MPI failed with %#x!\n", s32Ret); + zlog_error(log_video, "Init VB and MPI failed with %#x!\n", s32Ret); goto EXIT_SYS_RST_CLK; } @@ -178,32 +182,31 @@ bool eb3516VideoStart(void) { g_stViConfig.astViInfo[0].stPipeInfo.aPipe[0], &stIspCtrlParam); s32Ret = SAMPLE_COMM_VI_StartVi(&g_stViConfig); if (s32Ret != HI_SUCCESS) { - zlog_error(log, "Start VI failed with %#x!\n", s32Ret); + zlog_error(log_video, "Start VI failed with %#x!\n", s32Ret); goto EXIT_SYS_STOP; } - zlog_info(log, "eb3516 VI Start Success..."); + zlog_info(log_video, "eb3516 VI Start Success..."); // 启动 VPSS s32Ret = SAMPLE_COMM_VPSS_Start( g_stVpss0Config.s32GrpId, g_stVpss0Config.abChnEnable, &g_stVpss0Config.stGrpAttr, g_stVpss0Config.astChnAttrs); if (s32Ret != HI_SUCCESS) { - zlog_error(log, "Start VPSS group %d failed with %#x!\n", + zlog_error(log_video, "Start VPSS group %d failed with %#x!\n", g_stVpss0Config.s32GrpId, s32Ret); goto EXIT_VI_STOP; } - zlog_info(log, "eb3516 VPSS Start Success..."); + zlog_info(log_video, "eb3516 VPSS Start Success..."); // 绑定 VI->VPSS s32Ret = SAMPLE_COMM_VI_Bind_VPSS( g_stViConfig.astViInfo[0].stPipeInfo.aPipe[0], g_stViConfig.astViInfo[0].stChnInfo.ViChn, g_stVpss0Config.s32GrpId); if (s32Ret != HI_SUCCESS) { - zlog_error(log, "VI Video Pipe bind VPSS failed with %#x!\n", s32Ret); + zlog_error(log_video, "VI Video Pipe bind VPSS failed with %#x!\n", s32Ret); goto EXIT_VPSS_STOP; } - // Get Venc Config VENC_GOP_ATTR_S stGopAttr; SAMPLE_COMM_VENC_GetGopAttr(VENC_GOPMODE_NORMALP, &stGopAttr); @@ -213,14 +216,13 @@ bool eb3516VideoStart(void) { SAMPLE_COMM_VENC_Creat(g_VencChn[0], g_CodecTypes[0], g_PicSizes[0], SAMPLE_RC_VBR, 0, HI_FALSE, &stGopAttr); if (s32Ret != HI_SUCCESS) { - zlog_error(log, "Create Venc Chn %d failed with %#x!\n", g_VencChn[0], + zlog_error(log_video, "Create Venc Chn %d failed with %#x!\n", g_VencChn[0], s32Ret); goto EXIT_VI_VPSS_UNBIND; } - zlog_info(log, "eb3516 VENC Start Success..."); + zlog_info(log_video, "eb3516 VENC Start Success..."); /*config vo*/ - SAMPLE_VO_CONFIG_S stVoConfig; SAMPLE_COMM_VO_GetDefConfig(&stVoConfig); stVoConfig.enDstDynamicRange = DYNAMIC_RANGE_SDR8; stVoConfig.enVoIntfType = VO_INTF_HDMI; @@ -228,27 +230,26 @@ bool eb3516VideoStart(void) { /*start vo*/ s32Ret = SAMPLE_COMM_VO_StartVO(&stVoConfig); - if (HI_SUCCESS != s32Ret) - { + if (HI_SUCCESS != s32Ret) { SAMPLE_PRT("start vo failed. s32Ret: 0x%x !\n", s32Ret); goto EXIT_VENC_H264_STOP; } - zlog_info(log, "eb3516 VO Start Success..."); + zlog_info(log_video, "eb3516 VO Start Success..."); /*vpss bind vo*/ - s32Ret = SAMPLE_COMM_VPSS_Bind_VO(g_stVpss0Config.s32GrpId, g_Vpss0Chn[0], stVoConfig.VoDev, g_VencChn[0]); - if (HI_SUCCESS != s32Ret) - { + s32Ret = SAMPLE_COMM_VPSS_Bind_VO(g_stVpss0Config.s32GrpId, g_Vpss0Chn[0], + stVoConfig.VoDev, g_VencChn[0]); + if (HI_SUCCESS != s32Ret) { SAMPLE_PRT("vo bind vpss failed. s32Ret: 0x%x !\n", s32Ret); goto EXIT_VO_STOP; } - // // 绑定 VPSS[0,0]->VENC[0] - // s32Ret = SAMPLE_COMM_VPSS_Bind_VENC(g_stVpss0Config.s32GrpId, g_Vpss0Chn[0], + // s32Ret = SAMPLE_COMM_VPSS_Bind_VENC(g_stVpss0Config.s32GrpId, + // g_Vpss0Chn[0], // g_VencChn[0]); // if (s32Ret != HI_SUCCESS) { - // zlog_error(log, "Venc bind Vpss failed with %#x!\n", s32Ret); + // zlog_error(log_video, "Venc bind Vpss failed with %#x!\n", s32Ret); // goto EXIT_VENC_H265_STOP; // } @@ -256,16 +257,14 @@ bool eb3516VideoStart(void) { // s32Ret = SAMPLE_COMM_VENC_StartGetStream( // g_VencChn + 1, sizeof(g_VencChn) / sizeof(VENC_CHN) - 2); // if (HI_SUCCESS != s32Ret) { - // zlog_error(log, "Get venc stream failed!\n"); + // zlog_error(log_video, "Get venc stream failed!\n"); // goto EXIT_VENC_H264_STOP; // } - // zlog_info(log, "eb3516 Stream Start Success..."); + // zlog_info(log_video, "eb3516 Stream Start Success..."); PAUSE(); // SAMPLE_COMM_VENC_StopGetStream(); - zlog_info(log, "Exit process Sucessfully!"); - EXIT_VO_STOP: SAMPLE_COMM_VO_StopVO(&stVoConfig); @@ -287,4 +286,18 @@ EXIT_SYS_STOP: EXIT_SYS_RST_CLK: EXIT: return s32Ret; +} + +void eb3516VideoStop(void) { + SAMPLE_COMM_VO_StopVO(&stVoConfig); + SAMPLE_COMM_VENC_Stop(g_VencChn[1]); + SAMPLE_COMM_VI_UnBind_VPSS(g_stViConfig.astViInfo[0].stPipeInfo.aPipe[0], + g_stViConfig.astViInfo[0].stChnInfo.ViChn, + g_stVpss0Config.s32GrpId); + SAMPLE_COMM_VPSS_Stop(g_stVpss0Config.s32GrpId, + g_stVpss0Config.abChnEnable); + SAMPLE_COMM_VI_StopVi(&g_stViConfig); + SAMPLE_COMM_SYS_Exit(); + + zlog_info(log_video, "Exit process Sucessfully!"); } \ No newline at end of file diff --git a/src/isp/eb3516_video.h b/src/isp/eb3516_video.h index ed762f7..6f7447a 100644 --- a/src/isp/eb3516_video.h +++ b/src/isp/eb3516_video.h @@ -7,7 +7,7 @@ extern "C"{ #include "stdbool.h" -void eb3516VideoInit(void); +bool eb3516VideoInit(void); bool eb3516VideoStart(void); #ifdef __cplusplus diff --git a/src/main.cpp b/src/main.cpp index 83df009..8e6e65e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -5,6 +5,7 @@ #include "zlog.h" #include "isp/eb3516_video.h" +#include "nnie/eb3516_nnie.h" int main(int argc, char** argv) { int result = 0; @@ -35,8 +36,10 @@ int main(int argc, char** argv) { zlog_warn(log, "Can't Create log dir FOR %s", file_error.message().c_str()); } - eb3516VideoInit(); - eb3516VideoStart(); + // eb3516VideoInit(); + // eb3516VideoStart(); + eb3516Yolov3Init("./coco_yolov3_detect.wk", "./dog_bike_car_416x416.bgr"); + eb3516Yolov3Exit(); zlog_fini(); diff --git a/src/nnie/eb3516_nnie.c b/src/nnie/eb3516_nnie.c new file mode 100644 index 0000000..0c8f442 --- /dev/null +++ b/src/nnie/eb3516_nnie.c @@ -0,0 +1,705 @@ +#include "eb3516_nnie.h" + +#include "hi_comm_svp.h" +#include "hi_comm_sys.h" +#include "hi_common.h" +#include "sample_comm.h" +#include "sample_comm_ive.h" +#include "sample_comm_nnie.h" +#include "sample_comm_svp.h" +#include "sample_svp_nnie_software.h" +#include "zlog.h" + +static zlog_category_t *log_nnie = NULL; + +/*yolov3 para*/ +static SAMPLE_SVP_NNIE_MODEL_S g_stNnieModel = {0}; +static SAMPLE_SVP_NNIE_PARAM_S g_stNnieParam = {0}; +static SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S g_stYolov3Param = {0}; + +static HI_S32 SAMPLE_SVP_NNIE_Detection_PrintResult( + SVP_BLOB_S *pstDstScore, SVP_BLOB_S *pstDstRoi, SVP_BLOB_S *pstClassRoiNum, + HI_FLOAT f32PrintResultThresh) { + HI_U32 i = 0, j = 0; + HI_U32 u32RoiNumBias = 0; + HI_U32 u32ScoreBias = 0; + HI_U32 u32BboxBias = 0; + HI_FLOAT f32Score = 0.0f; + HI_S32 *ps32Score = + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstDstScore->u64VirAddr); + HI_S32 *ps32Roi = + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstDstRoi->u64VirAddr); + HI_S32 *ps32ClassRoiNum = + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstClassRoiNum->u64VirAddr); + HI_U32 u32ClassNum = pstClassRoiNum->unShape.stWhc.u32Width; + HI_S32 s32XMin = 0, s32YMin = 0, s32XMax = 0, s32YMax = 0; + + u32RoiNumBias += ps32ClassRoiNum[0]; + for (i = 1; i < u32ClassNum; i++) { + u32ScoreBias = u32RoiNumBias; + u32BboxBias = u32RoiNumBias * SAMPLE_SVP_NNIE_COORDI_NUM; + /*if the confidence score greater than result threshold, the result will + * be printed*/ + if ((HI_FLOAT)ps32Score[u32ScoreBias] / SAMPLE_SVP_NNIE_QUANT_BASE >= + f32PrintResultThresh && + ps32ClassRoiNum[i] != 0) { + SAMPLE_SVP_TRACE_INFO("==== The %dth class box info====\n", i); + } + for (j = 0; j < (HI_U32)ps32ClassRoiNum[i]; j++) { + f32Score = (HI_FLOAT)ps32Score[u32ScoreBias + j] / + SAMPLE_SVP_NNIE_QUANT_BASE; + if (f32Score < f32PrintResultThresh) { + break; + } + s32XMin = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM]; + s32YMin = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + 1]; + s32XMax = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + 2]; + s32YMax = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + 3]; + SAMPLE_SVP_TRACE_INFO("%d %d %d %d %f\n", s32XMin, s32YMin, s32XMax, + s32YMax, f32Score); + } + u32RoiNumBias += ps32ClassRoiNum[i]; + } + return HI_SUCCESS; +} + +static HI_S32 SAMPLE_SVP_NNIE_Yolov3_SoftwareDeinit( + SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S *pstSoftWareParam) { + HI_S32 s32Ret = HI_SUCCESS; + SAMPLE_SVP_CHECK_EXPR_RET(NULL == pstSoftWareParam, HI_INVALID_VALUE, + SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error, pstSoftWareParam can't be NULL!\n"); + if (0 != pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr && + 0 != pstSoftWareParam->stGetResultTmpBuf.u64VirAddr) { + SAMPLE_SVP_MMZ_FREE(pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr, + pstSoftWareParam->stGetResultTmpBuf.u64VirAddr); + pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr = 0; + pstSoftWareParam->stGetResultTmpBuf.u64VirAddr = 0; + pstSoftWareParam->stDstRoi.u64PhyAddr = 0; + pstSoftWareParam->stDstRoi.u64VirAddr = 0; + pstSoftWareParam->stDstScore.u64PhyAddr = 0; + pstSoftWareParam->stDstScore.u64VirAddr = 0; + pstSoftWareParam->stClassRoiNum.u64PhyAddr = 0; + pstSoftWareParam->stClassRoiNum.u64VirAddr = 0; + } + return s32Ret; +} + +static HI_S32 SAMPLE_SVP_NNIE_Yolov3_Deinit( + SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S *pstSoftWareParam, + SAMPLE_SVP_NNIE_MODEL_S *pstNnieModel) { + HI_S32 s32Ret = HI_SUCCESS; + /*hardware deinit*/ + if (pstNnieParam != NULL) { + s32Ret = SAMPLE_COMM_SVP_NNIE_ParamDeinit(pstNnieParam); + SAMPLE_SVP_CHECK_EXPR_TRACE( + HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,SAMPLE_COMM_SVP_NNIE_ParamDeinit failed!\n"); + } + /*software deinit*/ + if (pstSoftWareParam != NULL) { + s32Ret = SAMPLE_SVP_NNIE_Yolov3_SoftwareDeinit(pstSoftWareParam); + SAMPLE_SVP_CHECK_EXPR_TRACE( + HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,SAMPLE_SVP_NNIE_Yolov3_SoftwareDeinit failed!\n"); + } + /*model deinit*/ + if (pstNnieModel != NULL) { + s32Ret = SAMPLE_COMM_SVP_NNIE_UnloadModel(pstNnieModel); + SAMPLE_SVP_CHECK_EXPR_TRACE( + HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,SAMPLE_COMM_SVP_NNIE_UnloadModel failed!\n"); + } + return s32Ret; +} + +static HI_S32 SAMPLE_SVP_NNIE_Yolov3_SoftwareInit( + SAMPLE_SVP_NNIE_CFG_S *pstCfg, SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S *pstSoftWareParam) { + HI_S32 s32Ret = HI_SUCCESS; + HI_U32 u32ClassNum = 0; + HI_U32 u32TotalSize = 0; + HI_U32 u32DstRoiSize = 0; + HI_U32 u32DstScoreSize = 0; + HI_U32 u32ClassRoiNumSize = 0; + HI_U32 u32TmpBufTotalSize = 0; + HI_U64 u64PhyAddr = 0; + HI_U8 *pu8VirAddr = NULL; + + pstSoftWareParam->u32OriImHeight = + pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Height; + pstSoftWareParam->u32OriImWidth = + pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Width; + pstSoftWareParam->u32BboxNumEachGrid = 3; + pstSoftWareParam->u32ClassNum = 80; + pstSoftWareParam->au32GridNumHeight[0] = 13; + pstSoftWareParam->au32GridNumHeight[1] = 26; + pstSoftWareParam->au32GridNumHeight[2] = 52; + pstSoftWareParam->au32GridNumWidth[0] = 13; + pstSoftWareParam->au32GridNumWidth[1] = 26; + pstSoftWareParam->au32GridNumWidth[2] = 52; + pstSoftWareParam->u32NmsThresh = + (HI_U32)(0.3f * SAMPLE_SVP_NNIE_QUANT_BASE); + pstSoftWareParam->u32ConfThresh = + (HI_U32)(0.5f * SAMPLE_SVP_NNIE_QUANT_BASE); + pstSoftWareParam->u32MaxRoiNum = 10; + pstSoftWareParam->af32Bias[0][0] = 116; + pstSoftWareParam->af32Bias[0][1] = 90; + pstSoftWareParam->af32Bias[0][2] = 156; + pstSoftWareParam->af32Bias[0][3] = 198; + pstSoftWareParam->af32Bias[0][4] = 373; + pstSoftWareParam->af32Bias[0][5] = 326; + pstSoftWareParam->af32Bias[1][0] = 30; + pstSoftWareParam->af32Bias[1][1] = 61; + pstSoftWareParam->af32Bias[1][2] = 62; + pstSoftWareParam->af32Bias[1][3] = 45; + pstSoftWareParam->af32Bias[1][4] = 59; + pstSoftWareParam->af32Bias[1][5] = 119; + pstSoftWareParam->af32Bias[2][0] = 10; + pstSoftWareParam->af32Bias[2][1] = 13; + pstSoftWareParam->af32Bias[2][2] = 16; + pstSoftWareParam->af32Bias[2][3] = 30; + pstSoftWareParam->af32Bias[2][4] = 33; + pstSoftWareParam->af32Bias[2][5] = 23; + + /*Malloc assist buffer memory*/ + u32ClassNum = pstSoftWareParam->u32ClassNum + 1; + + SAMPLE_SVP_CHECK_EXPR_RET( + SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM != + pstNnieParam->pstModel->astSeg[0].u16DstNum, + HI_FAILURE, SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,pstNnieParam->pstModel->astSeg[0].u16DstNum(%d) should be %d!\n", + pstNnieParam->pstModel->astSeg[0].u16DstNum, + SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM); + u32TmpBufTotalSize = + SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf(pstNnieParam, pstSoftWareParam); + u32DstRoiSize = + SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * pstSoftWareParam->u32MaxRoiNum * + sizeof(HI_U32) * SAMPLE_SVP_NNIE_COORDI_NUM); + u32DstScoreSize = SAMPLE_SVP_NNIE_ALIGN16( + u32ClassNum * pstSoftWareParam->u32MaxRoiNum * sizeof(HI_U32)); + u32ClassRoiNumSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * sizeof(HI_U32)); + u32TotalSize = u32TotalSize + u32DstRoiSize + u32DstScoreSize + + u32ClassRoiNumSize + u32TmpBufTotalSize; + s32Ret = SAMPLE_COMM_SVP_MallocCached("SAMPLE_YOLOV3_INIT", NULL, + (HI_U64 *)&u64PhyAddr, + (void **)&pu8VirAddr, u32TotalSize); + SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, + SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,Malloc memory failed!\n"); + memset(pu8VirAddr, 0, u32TotalSize); + SAMPLE_COMM_SVP_FlushCache(u64PhyAddr, (void *)pu8VirAddr, u32TotalSize); + + /*set each tmp buffer addr*/ + pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr = u64PhyAddr; + pstSoftWareParam->stGetResultTmpBuf.u64VirAddr = + (HI_U64)((HI_UL)pu8VirAddr); + + /*set result blob*/ + pstSoftWareParam->stDstRoi.enType = SVP_BLOB_TYPE_S32; + pstSoftWareParam->stDstRoi.u64PhyAddr = u64PhyAddr + u32TmpBufTotalSize; + pstSoftWareParam->stDstRoi.u64VirAddr = + (HI_U64)((HI_UL)pu8VirAddr + u32TmpBufTotalSize); + pstSoftWareParam->stDstRoi.u32Stride = + SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * pstSoftWareParam->u32MaxRoiNum * + sizeof(HI_U32) * SAMPLE_SVP_NNIE_COORDI_NUM); + pstSoftWareParam->stDstRoi.u32Num = 1; + pstSoftWareParam->stDstRoi.unShape.stWhc.u32Chn = 1; + pstSoftWareParam->stDstRoi.unShape.stWhc.u32Height = 1; + pstSoftWareParam->stDstRoi.unShape.stWhc.u32Width = + u32ClassNum * pstSoftWareParam->u32MaxRoiNum * + SAMPLE_SVP_NNIE_COORDI_NUM; + + pstSoftWareParam->stDstScore.enType = SVP_BLOB_TYPE_S32; + pstSoftWareParam->stDstScore.u64PhyAddr = + u64PhyAddr + u32TmpBufTotalSize + u32DstRoiSize; + pstSoftWareParam->stDstScore.u64VirAddr = + (HI_U64)((HI_UL)pu8VirAddr + u32TmpBufTotalSize + u32DstRoiSize); + pstSoftWareParam->stDstScore.u32Stride = SAMPLE_SVP_NNIE_ALIGN16( + u32ClassNum * pstSoftWareParam->u32MaxRoiNum * sizeof(HI_U32)); + pstSoftWareParam->stDstScore.u32Num = 1; + pstSoftWareParam->stDstScore.unShape.stWhc.u32Chn = 1; + pstSoftWareParam->stDstScore.unShape.stWhc.u32Height = 1; + pstSoftWareParam->stDstScore.unShape.stWhc.u32Width = + u32ClassNum * pstSoftWareParam->u32MaxRoiNum; + + pstSoftWareParam->stClassRoiNum.enType = SVP_BLOB_TYPE_S32; + pstSoftWareParam->stClassRoiNum.u64PhyAddr = + u64PhyAddr + u32TmpBufTotalSize + u32DstRoiSize + u32DstScoreSize; + pstSoftWareParam->stClassRoiNum.u64VirAddr = + (HI_U64)((HI_UL)pu8VirAddr + u32TmpBufTotalSize + u32DstRoiSize + + u32DstScoreSize); + pstSoftWareParam->stClassRoiNum.u32Stride = + SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * sizeof(HI_U32)); + pstSoftWareParam->stClassRoiNum.u32Num = 1; + pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Chn = 1; + pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Height = 1; + pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Width = u32ClassNum; + + return s32Ret; +} + +static HI_S32 SAMPLE_SVP_NNIE_Yolov3_ParamInit( + SAMPLE_SVP_NNIE_CFG_S *pstCfg, SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S *pstSoftWareParam) { + HI_S32 s32Ret = HI_SUCCESS; + /*init hardware para*/ + s32Ret = SAMPLE_COMM_SVP_NNIE_ParamInit(pstCfg, pstNnieParam); + SAMPLE_SVP_CHECK_EXPR_GOTO( + HI_SUCCESS != s32Ret, INIT_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error(%#x),SAMPLE_COMM_SVP_NNIE_ParamInit failed!\n", s32Ret); + + /*init software para*/ + s32Ret = SAMPLE_SVP_NNIE_Yolov3_SoftwareInit(pstCfg, pstNnieParam, + pstSoftWareParam); + SAMPLE_SVP_CHECK_EXPR_GOTO( + HI_SUCCESS != s32Ret, INIT_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error(%#x),SAMPLE_SVP_NNIE_Yolov3_SoftwareInit failed!\n", s32Ret); + + return s32Ret; +INIT_FAIL_0: + s32Ret = + SAMPLE_SVP_NNIE_Yolov3_Deinit(pstNnieParam, pstSoftWareParam, NULL); + SAMPLE_SVP_CHECK_EXPR_RET( + HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error(%#x),SAMPLE_SVP_NNIE_Yolov3_Deinit failed!\n", s32Ret); + return HI_FAILURE; +} + +static HI_S32 SAMPLE_SVP_NNIE_FillSrcData( + SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg, SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, + SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S *pstInputDataIdx) { + FILE *fp = NULL; + HI_U32 i = 0, j = 0, n = 0; + HI_U32 u32Height = 0, u32Width = 0, u32Chn = 0, u32Stride = 0, u32Dim = 0; + HI_U32 u32VarSize = 0; + HI_S32 s32Ret = HI_SUCCESS; + HI_U8 *pu8PicAddr = NULL; + HI_U32 *pu32StepAddr = NULL; + HI_U32 u32SegIdx = pstInputDataIdx->u32SegIdx; + HI_U32 u32NodeIdx = pstInputDataIdx->u32NodeIdx; + HI_U32 u32TotalStepNum = 0; + + /*open file*/ + if (NULL != pstNnieCfg->pszPic) { + fp = fopen(pstNnieCfg->pszPic, "rb"); + SAMPLE_SVP_CHECK_EXPR_RET(NULL == fp, HI_INVALID_VALUE, + SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error, open file failed!\n"); + } + + /*get data size*/ + if (SVP_BLOB_TYPE_U8 <= + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType && + SVP_BLOB_TYPE_YVU422SP >= + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType) { + u32VarSize = sizeof(HI_U8); + } else { + u32VarSize = sizeof(HI_U32); + } + + /*fill src data*/ + if (SVP_BLOB_TYPE_SEQ_S32 == + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType) { + u32Dim = pstNnieParam->astSegData[u32SegIdx] + .astSrc[u32NodeIdx] + .unShape.stSeq.u32Dim; + u32Stride = + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Stride; + pu32StepAddr = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR( + HI_U32, pstNnieParam->astSegData[u32SegIdx] + .astSrc[u32NodeIdx] + .unShape.stSeq.u64VirAddrStep); + pu8PicAddr = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR( + HI_U8, + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr); + for (n = 0; + n < pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; + n++) { + for (i = 0; i < *(pu32StepAddr + n); i++) { + s32Ret = fread(pu8PicAddr, u32Dim * u32VarSize, 1, fp); + SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret, FAIL, + SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,Read image file failed!\n"); + pu8PicAddr += u32Stride; + } + u32TotalStepNum += *(pu32StepAddr + n); + } + SAMPLE_COMM_SVP_FlushCache( + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64PhyAddr, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR( + HI_VOID, pstNnieParam->astSegData[u32SegIdx] + .astSrc[u32NodeIdx] + .u64VirAddr), + u32TotalStepNum * u32Stride); + } else { + u32Height = pstNnieParam->astSegData[u32SegIdx] + .astSrc[u32NodeIdx] + .unShape.stWhc.u32Height; + u32Width = pstNnieParam->astSegData[u32SegIdx] + .astSrc[u32NodeIdx] + .unShape.stWhc.u32Width; + u32Chn = pstNnieParam->astSegData[u32SegIdx] + .astSrc[u32NodeIdx] + .unShape.stWhc.u32Chn; + u32Stride = + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Stride; + pu8PicAddr = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR( + HI_U8, + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr); + if (SVP_BLOB_TYPE_YVU420SP == + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType) { + for (n = 0; + n < + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; + n++) { + for (i = 0; i < u32Chn * u32Height / 2; i++) { + s32Ret = fread(pu8PicAddr, u32Width * u32VarSize, 1, fp); + SAMPLE_SVP_CHECK_EXPR_GOTO( + 1 != s32Ret, FAIL, SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,Read image file failed!\n"); + pu8PicAddr += u32Stride; + } + } + } else if (SVP_BLOB_TYPE_YVU422SP == pstNnieParam->astSegData[u32SegIdx] + .astSrc[u32NodeIdx] + .enType) { + for (n = 0; + n < + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; + n++) { + for (i = 0; i < u32Height * 2; i++) { + s32Ret = fread(pu8PicAddr, u32Width * u32VarSize, 1, fp); + SAMPLE_SVP_CHECK_EXPR_GOTO( + 1 != s32Ret, FAIL, SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,Read image file failed!\n"); + pu8PicAddr += u32Stride; + } + } + } else { + for (n = 0; + n < + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; + n++) { + for (i = 0; i < u32Chn; i++) { + for (j = 0; j < u32Height; j++) { + s32Ret = + fread(pu8PicAddr, u32Width * u32VarSize, 1, fp); + SAMPLE_SVP_CHECK_EXPR_GOTO( + 1 != s32Ret, FAIL, SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,Read image file failed!\n"); + pu8PicAddr += u32Stride; + } + } + } + } + SAMPLE_COMM_SVP_FlushCache( + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64PhyAddr, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR( + HI_VOID, pstNnieParam->astSegData[u32SegIdx] + .astSrc[u32NodeIdx] + .u64VirAddr), + pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num * + u32Chn * u32Height * u32Stride); + } + + fclose(fp); + return HI_SUCCESS; +FAIL: + + fclose(fp); + return HI_FAILURE; +} + +static HI_S32 SAMPLE_SVP_NNIE_Forward( + SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, + SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S *pstInputDataIdx, + SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S *pstProcSegIdx, HI_BOOL bInstant) { + HI_S32 s32Ret = HI_SUCCESS; + HI_U32 i = 0, j = 0; + HI_BOOL bFinish = HI_FALSE; + SVP_NNIE_HANDLE hSvpNnieHandle = 0; + HI_U32 u32TotalStepNum = 0; + SAMPLE_SVP_NIE_PERF_STAT_DEF_VAR() + + SAMPLE_COMM_SVP_FlushCache( + pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx] + .stTskBuf.u64PhyAddr, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR( + HI_VOID, pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx] + .stTskBuf.u64VirAddr), + pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx] + .stTskBuf.u32Size); + + SAMPLE_SVP_NNIE_PERF_STAT_BEGIN() + for (i = 0; + i < pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].u32DstNum; + i++) { + if (SVP_BLOB_TYPE_SEQ_S32 == + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .enType) { + for (j = 0; j < pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u32Num; + j++) { + u32TotalStepNum += + *(SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR( + HI_U32, + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .unShape.stSeq.u64VirAddrStep) + + j); + } + SAMPLE_COMM_SVP_FlushCache( + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u64PhyAddr, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR( + HI_VOID, pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u64VirAddr), + u32TotalStepNum * + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u32Stride); + + } else { + SAMPLE_COMM_SVP_FlushCache( + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u64PhyAddr, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR( + HI_VOID, pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u64VirAddr), + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u32Num * + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .unShape.stWhc.u32Chn * + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .unShape.stWhc.u32Height * + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u32Stride); + } + } + + /*set input blob according to node name*/ + if (pstInputDataIdx->u32SegIdx != pstProcSegIdx->u32SegIdx) { + for (i = 0; + i < + pstNnieParam->pstModel->astSeg[pstProcSegIdx->u32SegIdx].u16SrcNum; + i++) { + for (j = 0; + j < pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx] + .u16DstNum; + j++) { + if (0 == strncmp(pstNnieParam->pstModel + ->astSeg[pstInputDataIdx->u32SegIdx] + .astDstNode[j] + .szName, + pstNnieParam->pstModel + ->astSeg[pstProcSegIdx->u32SegIdx] + .astSrcNode[i] + .szName, + SVP_NNIE_NODE_NAME_LEN)) { + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astSrc[i] = + pstNnieParam->astSegData[pstInputDataIdx->u32SegIdx] + .astDst[j]; + break; + } + } + SAMPLE_SVP_CHECK_EXPR_RET( + (j == pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx] + .u16DstNum), + HI_FAILURE, SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,can't find %d-th seg's %d-th src blob!\n", + pstProcSegIdx->u32SegIdx, i); + } + } + + /*NNIE_Forward*/ + SAMPLE_SVP_NNIE_PERF_STAT_BEGIN() + s32Ret = HI_MPI_SVP_NNIE_Forward( + &hSvpNnieHandle, + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astSrc, + pstNnieParam->pstModel, + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst, + &pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx], bInstant); + SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, + SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,HI_MPI_SVP_NNIE_Forward failed!\n"); + + if (bInstant) { + /*Wait NNIE finish*/ + while (HI_ERR_SVP_NNIE_QUERY_TIMEOUT == + (s32Ret = HI_MPI_SVP_NNIE_Query( + pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx] + .enNnieId, + hSvpNnieHandle, &bFinish, HI_TRUE))) { + usleep(100); + SAMPLE_SVP_TRACE(SAMPLE_SVP_ERR_LEVEL_INFO, + "HI_MPI_SVP_NNIE_Query Query timeout!\n"); + } + } + u32TotalStepNum = 0; + + SAMPLE_SVP_NNIE_PERF_STAT_BEGIN() + for (i = 0; + i < pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].u32DstNum; + i++) { + if (SVP_BLOB_TYPE_SEQ_S32 == + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .enType) { + for (j = 0; j < pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u32Num; + j++) { + u32TotalStepNum += + *(SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR( + HI_U32, + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .unShape.stSeq.u64VirAddrStep) + + j); + } + SAMPLE_COMM_SVP_FlushCache( + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u64PhyAddr, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR( + HI_VOID, pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u64VirAddr), + u32TotalStepNum * + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u32Stride); + + } else { + SAMPLE_COMM_SVP_FlushCache( + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u64PhyAddr, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR( + HI_VOID, pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u64VirAddr), + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u32Num * + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .unShape.stWhc.u32Chn * + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .unShape.stWhc.u32Height * + pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx] + .astDst[i] + .u32Stride); + } + } + + return s32Ret; +} + +bool eb3516Yolov3Init(const char *model_file, const char *pic_file) { + HI_S32 ret = HI_SUCCESS; + + SAMPLE_SVP_NNIE_CFG_S stNnieCfg = {0}; + SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S stInputDataIdx = {0}; + SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S stProcSegIdx = {0}; + HI_FLOAT f32PrintResultThresh = 0.0f; + + /*Set configuration parameter*/ + f32PrintResultThresh = 0.8f; + stNnieCfg.pszPic= pic_file; + stNnieCfg.u32MaxInputNum = 1; //max input image num in each batch + stNnieCfg.u32MaxRoiNum = 0; + stNnieCfg.aenNnieCoreId[0] = SVP_NNIE_ID_0;//set NNIE core + + // Get logger + log_nnie = zlog_get_category("eb3516_nnie"); + if (!log_nnie) { + return false; + } + + // Init Sys + SAMPLE_COMM_SVP_CheckSysInit(); + zlog_info(log_nnie, "Sys succeed to init"); + + // Load model + ret = SAMPLE_COMM_SVP_NNIE_LoadModel((HI_CHAR *)model_file, &g_stNnieModel); + if (HI_SUCCESS != ret) { + zlog_error(log_nnie, "NNIE failed to load model"); + goto EXIT_SYS; + } + zlog_info(log_nnie, "Model succeed to be loaded"); + + // Init parameter + g_stNnieParam.pstModel = &g_stNnieModel.stModel; + ret = SAMPLE_SVP_NNIE_Yolov3_ParamInit(&stNnieCfg, &g_stNnieParam, + &g_stYolov3Param); + if (HI_SUCCESS != ret) { + zlog_error(log_nnie, "NNIE failed to init parameter"); + goto EXIT_DEINIT; + } + + // Read src pic + stInputDataIdx.u32SegIdx = 0; + stInputDataIdx.u32NodeIdx = 0; + ret = SAMPLE_SVP_NNIE_FillSrcData(&stNnieCfg, &g_stNnieParam, + &stInputDataIdx); + if (HI_SUCCESS != ret) { + zlog_error(log_nnie, "NNIE failed to read picture"); + goto EXIT_DEINIT; + } + + // NNIE Forward + stProcSegIdx.u32SegIdx = 0; + ret = SAMPLE_SVP_NNIE_Forward(&g_stNnieParam, &stInputDataIdx, + &stProcSegIdx, HI_TRUE); + if (HI_SUCCESS != ret) { + zlog_error(log_nnie, "NNIE failed to start forward"); + goto EXIT_DEINIT; + } + + // Get Result + ret = SAMPLE_SVP_NNIE_Yolov3_GetResult(&g_stNnieParam, &g_stYolov3Param); + if (HI_SUCCESS != ret) { + zlog_error(log_nnie, "NNIE failed to get result"); + goto EXIT_DEINIT; + } + + // Print Result + (void)SAMPLE_SVP_NNIE_Detection_PrintResult( + &g_stYolov3Param.stDstScore, &g_stYolov3Param.stDstRoi, + &g_stYolov3Param.stClassRoiNum, f32PrintResultThresh); + +EXIT_DEINIT: + SAMPLE_SVP_NNIE_Yolov3_Deinit(&g_stNnieParam, &g_stYolov3Param, + &g_stNnieModel); +EXIT_SYS: + SAMPLE_COMM_SVP_CheckSysExit(); + return false; +} + +void eb3516Yolov3Exit(void) { + SAMPLE_SVP_NNIE_Yolov3_Deinit(&g_stNnieParam, &g_stYolov3Param, + &g_stNnieModel); + memset(&g_stNnieParam, 0, sizeof(SAMPLE_SVP_NNIE_PARAM_S)); + memset(&g_stYolov3Param, 0, + sizeof(SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S)); + memset(&g_stNnieModel, 0, sizeof(SAMPLE_SVP_NNIE_MODEL_S)); + SAMPLE_COMM_SVP_CheckSysExit(); + + zlog_info(log_nnie, "Yolov3 Exit Successfully"); +} diff --git a/src/nnie/eb3516_nnie.h b/src/nnie/eb3516_nnie.h new file mode 100644 index 0000000..536264d --- /dev/null +++ b/src/nnie/eb3516_nnie.h @@ -0,0 +1,17 @@ +#ifndef __EB3516_NNIE_H__ +#define __EB3516_NNIE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +bool eb3516Yolov3Init(const char *model_file, const char *pic_file); +void eb3516Yolov3Exit(void); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/src/nnie/sample_svp_nnie_software.c b/src/nnie/sample_svp_nnie_software.c new file mode 100644 index 0000000..4028204 --- /dev/null +++ b/src/nnie/sample_svp_nnie_software.c @@ -0,0 +1,3666 @@ +#include"sample_svp_nnie_software.h" +#include + +#ifdef __cplusplus // If used by C++ code, +extern "C" { // we need to export the C interface +#endif + +#ifdef SAMPLE_SVP_NNIE_PERF_STAT + +#define SAMPLE_SVP_NNIE_PERF_STAT_RPN_CLREAR() memset(&g_stOpRpnPerfTmp,0,sizeof(g_stOpRpnPerfTmp)); +#define SAMPLE_SVP_NNIE_PERF_STAT_RPN_SRC_FLUSH_TIME() SAMPLE_SVP_NNIE_PERF_STAT_GET_DIFF_TIME(g_stOpRpnPerfTmp.u64SrcFlushTime) +#define SAMPLE_SVP_NNIE_PERF_STAT_RPN_PRE_DST_FLUSH_TIME() SAMPLE_SVP_NNIE_PERF_STAT_GET_DIFF_TIME(g_stOpRpnPerfTmp.u64PreDstFulshTime) +#define SAMPLE_SVP_NNIE_PERF_STAT_RPN_AFTER_DST_FLUSH_TIME() SAMPLE_SVP_NNIE_PERF_STAT_GET_DIFF_TIME(g_stOpRpnPerfTmp.u64AferDstFulshTime) +#define SAMPLE_SVP_NNIE_PERF_STAT_RPN_OP_TIME() SAMPLE_SVP_NNIE_PERF_STAT_GET_DIFF_TIME(g_stOpRpnPerfTmp.u64OPTime) + +SAMPLE_SVP_NNIE_OP_PERF_STAT_S g_stOpRpnPerfTmp = {0}; + +#else +#define SAMPLE_SVP_NNIE_PERF_STAT_RPN_CLREAR() +#define SAMPLE_SVP_NNIE_PERF_STAT_RPN_SRC_FLUSH_TIME() +#define SAMPLE_SVP_NNIE_PERF_STAT_RPN_PRE_DST_FLUSH_TIME() +#define SAMPLE_SVP_NNIE_PERF_STAT_RPN_AFTER_DST_FLUSH_TIME() +#define SAMPLE_SVP_NNIE_PERF_STAT_RPN_OP_TIME() +#endif + +static HI_FLOAT s_af32ExpCoef[10][16] = { + {1.0f, 1.00024f, 1.00049f, 1.00073f, 1.00098f, 1.00122f, 1.00147f, 1.00171f, 1.00196f, 1.0022f, 1.00244f, 1.00269f, 1.00293f, 1.00318f, 1.00342f, 1.00367f}, + {1.0f, 1.00391f, 1.00784f, 1.01179f, 1.01575f, 1.01972f, 1.02371f, 1.02772f, 1.03174f, 1.03578f, 1.03984f, 1.04391f, 1.04799f, 1.05209f, 1.05621f, 1.06034f}, + {1.0f, 1.06449f, 1.13315f, 1.20623f, 1.28403f, 1.36684f, 1.45499f, 1.54883f, 1.64872f, 1.75505f, 1.86825f, 1.98874f, 2.117f, 2.25353f, 2.39888f, 2.55359f}, + {1.0f, 2.71828f, 7.38906f, 20.0855f, 54.5981f, 148.413f, 403.429f, 1096.63f, 2980.96f, 8103.08f, 22026.5f, 59874.1f, 162755.0f, 442413.0f, 1.2026e+006f, 3.26902e+006f}, + {1.0f, 8.88611e+006f, 7.8963e+013f, 7.01674e+020f, 6.23515e+027f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f}, + {1.0f, 0.999756f, 0.999512f, 0.999268f, 0.999024f, 0.99878f, 0.998536f, 0.998292f, 0.998049f, 0.997805f, 0.997562f, 0.997318f, 0.997075f, 0.996831f, 0.996588f, 0.996345f}, + {1.0f, 0.996101f, 0.992218f, 0.98835f, 0.984496f, 0.980658f, 0.976835f, 0.973027f, 0.969233f, 0.965455f, 0.961691f, 0.957941f, 0.954207f, 0.950487f, 0.946781f, 0.94309f}, + {1.0f, 0.939413f, 0.882497f, 0.829029f, 0.778801f, 0.731616f, 0.687289f, 0.645649f, 0.606531f, 0.569783f, 0.535261f, 0.502832f, 0.472367f, 0.443747f, 0.416862f, 0.391606f}, + {1.0f, 0.367879f, 0.135335f, 0.0497871f, 0.0183156f, 0.00673795f, 0.00247875f, 0.000911882f, 0.000335463f, 0.00012341f, 4.53999e-005f, 1.67017e-005f, 6.14421e-006f, 2.26033e-006f, 8.31529e-007f, 3.05902e-007f}, + {1.0f, 1.12535e-007f, 1.26642e-014f, 1.42516e-021f, 1.60381e-028f, 1.80485e-035f, 2.03048e-042f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f} +}; + +/***************************************************************************** +* Prototype : SVP_NNIE_QuickExp +* Description : this function is used to quickly get exp result +* Input : HI_S32 s32Value [IN] input value +* +* +* +* +* Output : +* Return Value : HI_FLOAT: output value. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_FLOAT SVP_NNIE_QuickExp( HI_S32 s32Value ) +{ + if( s32Value & 0x80000000 ) + { + s32Value = ~s32Value + 0x00000001; + return s_af32ExpCoef[5][s32Value & 0x0000000F] * s_af32ExpCoef[6][(s32Value>>4) & 0x0000000F] * s_af32ExpCoef[7][(s32Value>>8) & 0x0000000F] * s_af32ExpCoef[8][(s32Value>>12) & 0x0000000F] * s_af32ExpCoef[9][(s32Value>>16) & 0x0000000F ]; + } + else + { + return s_af32ExpCoef[0][s32Value & 0x0000000F] * s_af32ExpCoef[1][(s32Value>>4) & 0x0000000F] * s_af32ExpCoef[2][(s32Value>>8) & 0x0000000F] * s_af32ExpCoef[3][(s32Value>>12) & 0x0000000F] * s_af32ExpCoef[4][(s32Value>>16) & 0x0000000F ]; + } +} + +/***************************************************************************** +* Prototype : SVP_NNIE_SoftMax +* Description : this function is used to do softmax +* Input : HI_FLOAT* pf32Src [IN] the pointer to source data +* HI_U32 u32Num [IN] the num of source data +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_SoftMax( HI_FLOAT* pf32Src, HI_U32 u32Num) +{ + HI_FLOAT f32Max = 0; + HI_FLOAT f32Sum = 0; + HI_U32 i = 0; + + for(i = 0; i < u32Num; ++i) + { + if(f32Max < pf32Src[i]) + { + f32Max = pf32Src[i]; + } + } + + for(i = 0; i < u32Num; ++i) + { + pf32Src[i] = (HI_FLOAT)SVP_NNIE_QuickExp((HI_S32)((pf32Src[i] - f32Max)*SAMPLE_SVP_NNIE_QUANT_BASE)); + f32Sum += pf32Src[i]; + } + + for(i = 0; i < u32Num; ++i) + { + pf32Src[i] /= f32Sum; + } + return HI_SUCCESS; +} + +/***************************************************************************** +* Prototype : SVP_NNIE_Sigmoid +* Description : this function is used to do sigmoid +* Input : HI_FLOAT* pf32Src [IN] the pointer to source data +* HI_U32 u32Num [IN] the num of source data +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Sigmoid( HI_FLOAT* pf32Src, HI_U32 u32Num) +{ + HI_U32 i = 0; + + for(i = 0; i < u32Num; i++) + { + pf32Src[i] = SAMPLE_SVP_NNIE_SIGMOID(pf32Src[i]); + } + return HI_SUCCESS; +} + +/***************************************************************************** +* Prototype : SVP_NNIE_SSD_SoftMax +* Description : this function is used to do softmax for SSD +* Input : HI_S32* pf32Src [IN] the pointer to input array +* HI_S32 s32ArraySize [IN] the array size +* HI_S32* ps32Dst [OUT] the pointer to output array +* +* +* +* +* Output : +* Return Value : void +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-03-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_SSD_SoftMax(HI_S32* ps32Src, HI_S32 s32ArraySize, HI_S32* ps32Dst) +{ + /***** define parameters ****/ + HI_S32 s32Max = 0; + HI_S32 s32Sum = 0; + HI_S32 i = 0; + for (i = 0; i < s32ArraySize; ++i) + { + if (s32Max < ps32Src[i]) + { + s32Max = ps32Src[i]; + } + } + for (i = 0; i < s32ArraySize; ++i) + { + ps32Dst[i] = (HI_S32)(SAMPLE_SVP_NNIE_QUANT_BASE* exp((HI_FLOAT)(ps32Src[i] - s32Max) / SAMPLE_SVP_NNIE_QUANT_BASE)); + s32Sum += ps32Dst[i]; + } + for (i = 0; i < s32ArraySize; ++i) + { + ps32Dst[i] = (HI_S32)(((HI_FLOAT)ps32Dst[i] / (HI_FLOAT)s32Sum) * SAMPLE_SVP_NNIE_QUANT_BASE); + } + return HI_SUCCESS; +} + + +/***************************************************************************** +* Prototype : SVP_NNIE_Argswap +* Description : this function is used to exchange array data +* Input : HI_FLOAT* pf32Src1 [IN] the pointer to the first array +* HI_S32* ps32Src2 [OUT] the pointer to the second array +* +* +* +* +* Output : +* Return Value : void +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-03-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static void SVP_NNIE_Argswap(HI_S32* ps32Src1, HI_S32* ps32Src2) +{ + HI_U32 i = 0; + HI_S32 u32Tmp = 0; + for( i = 0; i < SAMPLE_SVP_NNIE_PROPOSAL_WIDTH; i++ ) + { + u32Tmp = ps32Src1[i]; + ps32Src1[i] = ps32Src2[i]; + ps32Src2[i] = u32Tmp; + } +} + + +/***************************************************************************** +* Prototype : SVP_NNIE_NonRecursiveArgQuickSort +* Description : this function is used to do quick sort +* Input : HI_S32* ps32Array [IN] the array need to be sorted +* HI_S32 s32Low [IN] the start position of quick sort +* HI_S32 s32High [IN] the end position of quick sort +* SAMPLE_SVP_NNIE_STACK_S * pstStack [IN] the buffer used to store start positions and end positions +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-03-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_NonRecursiveArgQuickSort(HI_S32* ps32Array, + HI_S32 s32Low, HI_S32 s32High, SAMPLE_SVP_NNIE_STACK_S *pstStack,HI_U32 u32MaxNum) +{ + HI_S32 i = s32Low; + HI_S32 j = s32High; + HI_S32 s32Top = 0; + HI_S32 s32KeyConfidence = ps32Array[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * s32Low + 4]; + pstStack[s32Top].s32Min = s32Low; + pstStack[s32Top].s32Max = s32High; + + while(s32Top > -1) + { + s32Low = pstStack[s32Top].s32Min; + s32High = pstStack[s32Top].s32Max; + i = s32Low; + j = s32High; + s32Top--; + + s32KeyConfidence = ps32Array[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * s32Low + 4]; + + while(i < j) + { + while((i < j) && (s32KeyConfidence > ps32Array[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4])) + { + j--; + } + if(i < j) + { + SVP_NNIE_Argswap(&ps32Array[i*SAMPLE_SVP_NNIE_PROPOSAL_WIDTH], &ps32Array[j*SAMPLE_SVP_NNIE_PROPOSAL_WIDTH]); + i++; + } + + while((i < j) && (s32KeyConfidence < ps32Array[i*SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4])) + { + i++; + } + if(i < j) + { + SVP_NNIE_Argswap(&ps32Array[i*SAMPLE_SVP_NNIE_PROPOSAL_WIDTH], &ps32Array[j*SAMPLE_SVP_NNIE_PROPOSAL_WIDTH]); + j--; + } + } + + if(s32Low <= u32MaxNum) + { + if(s32Low < i-1) + { + s32Top++; + pstStack[s32Top].s32Min = s32Low; + pstStack[s32Top].s32Max = i-1; + } + + if(s32High > i+1) + { + s32Top++; + pstStack[s32Top].s32Min = i+1; + pstStack[s32Top].s32Max = s32High; + } + } + } + return HI_SUCCESS; +} + + +/***************************************************************************** +* Prototype : SVP_NNIE_Overlap +* Description : this function is used to calculate the overlap ratio of two proposals +* Input : HI_S32 s32XMin1 [IN] first input proposal's minimum value of x coordinate +* HI_S32 s32YMin1 [IN] first input proposal's minimum value of y coordinate of first input proposal +* HI_S32 s32XMax1 [IN] first input proposal's maximum value of x coordinate of first input proposal +* HI_S32 s32YMax1 [IN] first input proposal's maximum value of y coordinate of first input proposal +* HI_S32 s32XMin1 [IN] second input proposal's minimum value of x coordinate +* HI_S32 s32YMin1 [IN] second input proposal's minimum value of y coordinate of first input proposal +* HI_S32 s32XMax1 [IN] second input proposal's maximum value of x coordinate of first input proposal +* HI_S32 s32YMax1 [IN] second input proposal's maximum value of y coordinate of first input proposal +* HI_FLOAT *pf32IoU [INOUT]the pointer of the IoU value +* +* +* Output : +* Return Value : HI_FLOAT f32Iou. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-03-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Overlap(HI_S32 s32XMin1, HI_S32 s32YMin1, HI_S32 s32XMax1, HI_S32 s32YMax1, HI_S32 s32XMin2, + HI_S32 s32YMin2, HI_S32 s32XMax2, HI_S32 s32YMax2, HI_S32* s32AreaSum, HI_S32* s32AreaInter) +{ + /*** Check the input, and change the Return value ***/ + HI_S32 s32Inter = 0; + HI_S32 s32Total = 0; + HI_S32 s32XMin = 0; + HI_S32 s32YMin = 0; + HI_S32 s32XMax = 0; + HI_S32 s32YMax = 0; + HI_S32 s32Area1 = 0; + HI_S32 s32Area2 = 0; + HI_S32 s32InterWidth = 0; + HI_S32 s32InterHeight = 0; + + s32XMin = SAMPLE_SVP_NNIE_MAX(s32XMin1, s32XMin2); + s32YMin = SAMPLE_SVP_NNIE_MAX(s32YMin1, s32YMin2); + s32XMax = SAMPLE_SVP_NNIE_MIN(s32XMax1, s32XMax2); + s32YMax = SAMPLE_SVP_NNIE_MIN(s32YMax1, s32YMax2); + + s32InterWidth = s32XMax - s32XMin + 1; + s32InterHeight = s32YMax - s32YMin + 1; + + s32InterWidth = ( s32InterWidth >= 0 ) ? s32InterWidth : 0; + s32InterHeight = ( s32InterHeight >= 0 ) ? s32InterHeight : 0; + + s32Inter = s32InterWidth * s32InterHeight; + s32Area1 = (s32XMax1 - s32XMin1 + 1) * (s32YMax1 - s32YMin1 + 1); + s32Area2 = (s32XMax2 - s32XMin2 + 1) * (s32YMax2 - s32YMin2 + 1); + + s32Total = s32Area1 + s32Area2 - s32Inter; + + *s32AreaSum = s32Total; + *s32AreaInter = s32Inter; + return HI_SUCCESS; +} + +/***************************************************************************** +* Prototype : SVP_NNIE_FilterLowScoreBbox +* Description : this function is used to remove low score bboxes, in order to speed-up Sort & RPN procedures. +* Input : HI_S32* ps32Proposals [IN] proposals +* HI_U32 u32NumAnchors [IN] input anchors' num +* HI_U32 u32FilterThresh [IN] rpn configuration +* HI_U32* u32NumAfterFilter [OUT] output num of anchors after low score filtering +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-03-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_FilterLowScoreBbox(HI_S32* ps32Proposals, HI_U32 u32AnchorsNum, + HI_U32 u32FilterThresh, HI_U32* u32NumAfterFilter) +{ + HI_U32 u32ProposalCnt = u32AnchorsNum; + HI_U32 i = 0; + + if( u32FilterThresh > 0 ) + { + for( i = 0; i < u32AnchorsNum; i++ ) + { + if( ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + 4 ] < (HI_S32)u32FilterThresh ) + { + ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + 5 ] = 1; + } + } + + u32ProposalCnt = 0; + for( i = 0; i < u32AnchorsNum; i++ ) + { + if( 0 == ps32Proposals[ SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + 5 ] ) + { + ps32Proposals[ SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt ] = ps32Proposals[ SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i ]; + ps32Proposals[ SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + 1 ] = ps32Proposals[ SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + 1 ]; + ps32Proposals[ SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + 2 ] = ps32Proposals[ SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + 2 ]; + ps32Proposals[ SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + 3 ] = ps32Proposals[ SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + 3 ]; + ps32Proposals[ SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + 4 ] = ps32Proposals[ SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + 4 ]; + ps32Proposals[ SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + 5 ] = ps32Proposals[ SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + 5 ]; + u32ProposalCnt++; + } + } + } + *u32NumAfterFilter = u32ProposalCnt; + return HI_SUCCESS; +} +/***************************************************************************** +* Prototype : SVP_NNIE_NonMaxSuppression +* Description : this function is used to do non maximum suppression +* Input : HI_S32* ps32Proposals [IN] proposals +* HI_U32 u32AnchorsNum [IN] anchors num +* HI_U32 u32NmsThresh [IN] non maximum suppression threshold +* HI_U32 u32MaxRoiNum [IN] The max roi num for the roi pooling +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-03-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_NonMaxSuppression( HI_S32* ps32Proposals, HI_U32 u32AnchorsNum, + HI_U32 u32NmsThresh,HI_U32 u32MaxRoiNum) +{ + /****** define variables *******/ + HI_S32 s32XMin1 = 0; + HI_S32 s32YMin1 = 0; + HI_S32 s32XMax1 = 0; + HI_S32 s32YMax1 = 0; + HI_S32 s32XMin2 = 0; + HI_S32 s32YMin2 = 0; + HI_S32 s32XMax2 = 0; + HI_S32 s32YMax2 = 0; + HI_S32 s32AreaTotal = 0; + HI_S32 s32AreaInter = 0; + HI_U32 i = 0; + HI_U32 j = 0; + HI_U32 u32Num = 0; + HI_BOOL bNoOverlap = HI_TRUE; + for (i = 0; i < u32AnchorsNum && u32Num < u32MaxRoiNum; i++) + { + if( ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*i+5] == 0 ) + { + u32Num++; + s32XMin1 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*i]; + s32YMin1 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*i+1]; + s32XMax1 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*i+2]; + s32YMax1 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*i+3]; + for(j= i+1;j< u32AnchorsNum; j++) + { + if( ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*j+5] == 0 ) + { + s32XMin2 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*j]; + s32YMin2 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*j+1]; + s32XMax2 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*j+2]; + s32YMax2 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*j+3]; + bNoOverlap = (s32XMin2>s32XMax1)||(s32XMax2s32YMax1)||(s32YMax2 ((HI_S32)u32NmsThresh*s32AreaTotal)) + { + if( ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*i+4] >= ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*j+4] ) + { + ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*j+5] = 1; + } + else + { + ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*i+5] = 1; + } + } + } + } + } + } + return HI_SUCCESS; +} + +/***************************************************************************** +* Prototype : SVP_NNIE_Cnn_GetTopN +* Description : Cnn get top N +* Input : HI_S32 *ps32Fc [IN] FC data pointer +* HI_U32 u32FcStride [IN] FC stride +* HI_U32 u32ClassNum [IN] Class Num +* HI_U32 u32BatchNum [IN] Batch Num +* HI_U32 u32TopN [IN] TopN +* HI_S32 *ps32TmpBuf [IN] assist buffer pointer +* HI_U32 u32TopNStride [IN] TopN result stride +* HI_S32 *ps32GetTopN [OUT] TopN result +* +* Output : +* Return Value : HI_S32 +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-03-14 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Cnn_GetTopN(HI_S32 *ps32Fc, HI_U32 u32FcStride, + HI_U32 u32ClassNum,HI_U32 u32BatchNum, HI_U32 u32TopN, HI_S32 *ps32TmpBuf, + HI_U32 u32TopNStride,HI_S32*ps32GetTopN) +{ + HI_U32 i = 0, j = 0, n = 0; + HI_U32 u32Id = 0; + HI_S32* ps32Score = NULL; + SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S stTmp = {0}; + SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S *pstTopN = NULL; + SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S *pstTmpBuf = (SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S*)ps32TmpBuf; + for(n = 0; n < u32BatchNum; n++) + { + ps32Score = (HI_S32 *)((HI_U8*)ps32Fc + n * u32FcStride); + pstTopN = (SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S*)((HI_U8*)ps32GetTopN + n * u32TopNStride); + for(i = 0; i < u32ClassNum; i++) + { + pstTmpBuf[i].u32ClassId = i; + pstTmpBuf[i].u32Confidence = (HI_U32)ps32Score[i]; + } + + for(i = 0; i < u32TopN; i++) + { + u32Id = i; + pstTopN[i].u32ClassId = pstTmpBuf[i].u32ClassId; + pstTopN[i].u32Confidence = pstTmpBuf[i].u32Confidence; + for(j = i+1; j < u32ClassNum; j++) + { + if(pstTmpBuf[u32Id].u32Confidence < pstTmpBuf[j].u32Confidence) + { + u32Id = j; + } + } + + stTmp.u32ClassId = pstTmpBuf[u32Id].u32ClassId; + stTmp.u32Confidence = pstTmpBuf[u32Id].u32Confidence; + + if(i!=u32Id) + { + pstTmpBuf[u32Id].u32ClassId = pstTmpBuf[i].u32ClassId; + pstTmpBuf[u32Id].u32Confidence = pstTmpBuf[i].u32Confidence; + pstTmpBuf[i].u32ClassId = stTmp.u32ClassId; + pstTmpBuf[i].u32Confidence = stTmp.u32Confidence; + + pstTopN[i].u32ClassId = stTmp.u32ClassId; + pstTopN[i].u32Confidence = stTmp.u32Confidence; + } + } + } + + return HI_SUCCESS; +} + + +/***************************************************************************** +* Prototype : SVP_NNIE_Rpn +* Description : this function is used to do RPN +* Input : HI_S32** pps32Src [IN] convolution data +* HI_U32 u32NumRatioAnchors [IN] Ratio anchor num +* HI_U32 u32NumScaleAnchors [IN] scale anchor num +* HI_U32* au32Scales [IN] scale value +* HI_U32* au32Ratios [IN] ratio value +* HI_U32 u32OriImHeight [IN] input image height +* HI_U32 u32OriImWidth [IN] input image width +* HI_U32* pu32ConvHeight [IN] convolution height +* HI_U32* pu32ConvWidth [IN] convolution width +* HI_U32* pu32ConvChannel [IN] convolution channel +* HI_U32 u32ConvStride [IN] convolution stride +* HI_U32 u32MaxRois [IN] max roi num +* HI_U32 u32MinSize [IN] min size +* HI_U32 u32SpatialScale [IN] spatial scale +* HI_U32 u32NmsThresh [IN] NMS thresh +* HI_U32 u32FilterThresh [IN] filter thresh +* HI_U32 u32NumBeforeNms [IN] num before doing NMS +* HI_U32 *pu32MemPool [IN] assist buffer +* HI_S32 *ps32ProposalResult [OUT] proposal result +* HI_U32* pu32NumRois [OUT] proposal num +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Rpn(HI_S32** pps32Src,HI_U32 u32NumRatioAnchors, + HI_U32 u32NumScaleAnchors,HI_U32* au32Scales,HI_U32* au32Ratios,HI_U32 u32OriImHeight, + HI_U32 u32OriImWidth,HI_U32* pu32ConvHeight,HI_U32* pu32ConvWidth,HI_U32* pu32ConvChannel, + HI_U32 u32ConvStride,HI_U32 u32MaxRois,HI_U32 u32MinSize,HI_U32 u32SpatialScale, + HI_U32 u32NmsThresh,HI_U32 u32FilterThresh,HI_U32 u32NumBeforeNms,HI_U32 *pu32MemPool, + HI_S32 *ps32ProposalResult,HI_U32* pu32NumRois) +{ + /******************** define parameters ****************/ + HI_U32 u32Size = 0; + HI_S32* ps32Anchors = NULL; + HI_S32* ps32BboxDelta = NULL; + HI_S32* ps32Proposals = NULL; + HI_U32* pu32Ptr = NULL; + HI_S32* ps32Ptr = NULL; + HI_U32 u32NumAfterFilter = 0; + HI_U32 u32NumAnchors = 0; + HI_FLOAT f32BaseW = 0; + HI_FLOAT f32BaseH = 0; + HI_FLOAT f32BaseXCtr = 0; + HI_FLOAT f32BaseYCtr = 0; + HI_FLOAT f32SizeRatios = 0; + HI_FLOAT* pf32RatioAnchors = NULL; + HI_FLOAT* pf32Ptr = NULL; + HI_FLOAT *pf32Ptr2 = NULL; + HI_FLOAT* pf32ScaleAnchors = NULL; + HI_FLOAT* pf32Scores = NULL; + HI_FLOAT f32Ratios = 0; + HI_FLOAT f32Size = 0; + HI_U32 u32PixelInterval = 0; + HI_U32 u32SrcBboxIndex = 0; + HI_U32 u32SrcFgProbIndex = 0; + HI_U32 u32SrcBgProbIndex = 0; + HI_U32 u32SrcBboxBias = 0; + HI_U32 u32SrcProbBias = 0; + HI_U32 u32DesBox = 0; + HI_U32 u32BgBlobSize = 0; + HI_U32 u32AnchorsPerPixel = 0; + HI_U32 u32MapSize = 0; + HI_U32 u32LineSize = 0; + HI_S32* ps32Ptr2 = NULL; + HI_S32* ps32Ptr3 = NULL; + HI_S32 s32ProposalWidth = 0; + HI_S32 s32ProposalHeight = 0; + HI_S32 s32ProposalCenterX = 0; + HI_S32 s32ProposalCenterY = 0; + HI_S32 s32PredW = 0; + HI_S32 s32PredH = 0; + HI_S32 s32PredCenterX = 0; + HI_S32 s32PredCenterY = 0; + HI_U32 u32DesBboxDeltaIndex = 0; + HI_U32 u32DesScoreIndex = 0; + HI_U32 u32RoiCount = 0; + SAMPLE_SVP_NNIE_STACK_S* pstStack = NULL; + HI_S32 s32Ret = HI_SUCCESS; + HI_U32 c = 0; + HI_U32 h = 0; + HI_U32 w = 0; + HI_U32 i = 0; + HI_U32 j = 0; + HI_U32 p = 0; + HI_U32 q = 0; + HI_U32 z = 0; + HI_U32 au32BaseAnchor[4] = {0, 0, (u32MinSize -1), (u32MinSize -1)}; + + /*********************************** Faster RCNN *********************************************/ + /********* calculate the start pointer of each part in MemPool *********/ + pu32Ptr = (HI_U32*)pu32MemPool; + ps32Anchors = (HI_S32*)pu32Ptr; + u32NumAnchors = u32NumRatioAnchors * u32NumScaleAnchors * ( pu32ConvHeight[0] * pu32ConvWidth[0] ); + u32Size = SAMPLE_SVP_NNIE_COORDI_NUM * u32NumAnchors; + pu32Ptr += u32Size; + + ps32BboxDelta = (HI_S32*)pu32Ptr; + pu32Ptr += u32Size; + + ps32Proposals = (HI_S32*)pu32Ptr; + u32Size = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32NumAnchors; + pu32Ptr += u32Size; + + pf32RatioAnchors = (HI_FLOAT*)pu32Ptr; + pf32Ptr = (HI_FLOAT*)pu32Ptr; + u32Size = u32NumRatioAnchors * SAMPLE_SVP_NNIE_COORDI_NUM; + pf32Ptr = pf32Ptr + u32Size; + + pf32ScaleAnchors = pf32Ptr; + u32Size = u32NumScaleAnchors * u32NumRatioAnchors * SAMPLE_SVP_NNIE_COORDI_NUM; + pf32Ptr = pf32Ptr + u32Size; + + pf32Scores = pf32Ptr; + u32Size = u32NumAnchors * SAMPLE_SVP_NNIE_SCORE_NUM; + pf32Ptr = pf32Ptr + u32Size; + + pstStack = (SAMPLE_SVP_NNIE_STACK_S*)pf32Ptr; + + /********************* Generate the base anchor ***********************/ + f32BaseW = (HI_FLOAT)(au32BaseAnchor[2] - au32BaseAnchor[0] + 1 ); + f32BaseH = (HI_FLOAT)(au32BaseAnchor[3] - au32BaseAnchor[1] + 1 ); + f32BaseXCtr = (HI_FLOAT)(au32BaseAnchor[0] + ( ( f32BaseW - 1 ) * 0.5 ) ); + f32BaseYCtr = (HI_FLOAT)(au32BaseAnchor[1] + ( ( f32BaseH - 1 ) * 0.5 ) ); + + /*************** Generate Ratio Anchors for the base anchor ***********/ + pf32Ptr = pf32RatioAnchors; + f32Size = f32BaseW * f32BaseH; + for (i = 0; i < u32NumRatioAnchors; i++) + { + f32Ratios = (HI_FLOAT)au32Ratios[i]/SAMPLE_SVP_NNIE_QUANT_BASE; + f32SizeRatios = f32Size / f32Ratios; + f32BaseW = sqrt(f32SizeRatios); + f32BaseW = (HI_FLOAT)(1.0 * ( (f32BaseW) >= 0 ? (HI_S32)(f32BaseW+SAMPLE_SVP_NNIE_HALF) : (HI_S32)(f32BaseW-SAMPLE_SVP_NNIE_HALF))); + f32BaseH = f32BaseW * f32Ratios; + f32BaseH = (HI_FLOAT)(1.0 * ( (f32BaseH) >= 0 ? (HI_S32)(f32BaseH+SAMPLE_SVP_NNIE_HALF) : (HI_S32)(f32BaseH-SAMPLE_SVP_NNIE_HALF))); + + *pf32Ptr++ = (HI_FLOAT)(f32BaseXCtr - ( ( f32BaseW - 1 ) * SAMPLE_SVP_NNIE_HALF )); + *(pf32Ptr++) = (HI_FLOAT)(f32BaseYCtr - ( ( f32BaseH - 1 ) * SAMPLE_SVP_NNIE_HALF )); + *(pf32Ptr++) = (HI_FLOAT)(f32BaseXCtr + ( ( f32BaseW - 1 ) * SAMPLE_SVP_NNIE_HALF )); + *(pf32Ptr++) = (HI_FLOAT)( f32BaseYCtr + ( ( f32BaseH - 1 ) * SAMPLE_SVP_NNIE_HALF )); + } + + /********* Generate Scale Anchors for each Ratio Anchor **********/ + pf32Ptr = pf32RatioAnchors; + pf32Ptr2 = pf32ScaleAnchors; + /* Generate Scale Anchors for one pixel */ + for( i = 0; i < u32NumRatioAnchors; i++ ) + { + for( j = 0; j < u32NumScaleAnchors; j++ ) + { + f32BaseW = *( pf32Ptr + 2 ) - *( pf32Ptr ) + 1; + f32BaseH = *( pf32Ptr + 3 ) - *( pf32Ptr + 1 ) + 1; + f32BaseXCtr = (HI_FLOAT)( *( pf32Ptr ) + ( ( f32BaseW - 1 ) * SAMPLE_SVP_NNIE_HALF )); + f32BaseYCtr = (HI_FLOAT)( *( pf32Ptr + 1 ) + ( ( f32BaseH - 1 ) * SAMPLE_SVP_NNIE_HALF )); + + *( pf32Ptr2++ ) = (HI_FLOAT) (f32BaseXCtr - ((f32BaseW * ((HI_FLOAT)au32Scales[j]/SAMPLE_SVP_NNIE_QUANT_BASE) - 1) * SAMPLE_SVP_NNIE_HALF)); + *( pf32Ptr2++ ) = (HI_FLOAT)(f32BaseYCtr - ((f32BaseH * ((HI_FLOAT)au32Scales[j]/SAMPLE_SVP_NNIE_QUANT_BASE) - 1) * SAMPLE_SVP_NNIE_HALF)); + *( pf32Ptr2++ ) = (HI_FLOAT)(f32BaseXCtr + ((f32BaseW * ((HI_FLOAT)au32Scales[j]/SAMPLE_SVP_NNIE_QUANT_BASE) - 1) * SAMPLE_SVP_NNIE_HALF)); + *( pf32Ptr2++ ) = (HI_FLOAT)(f32BaseYCtr + ((f32BaseH * ((HI_FLOAT)au32Scales[j]/SAMPLE_SVP_NNIE_QUANT_BASE) - 1) * SAMPLE_SVP_NNIE_HALF)); + } + pf32Ptr += SAMPLE_SVP_NNIE_COORDI_NUM; + } + + + /******************* Copy the anchors to every pixel in the feature map ******************/ + ps32Ptr = ps32Anchors; + u32PixelInterval = SAMPLE_SVP_NNIE_QUANT_BASE/ u32SpatialScale; + + for ( p = 0; p < pu32ConvHeight[0]; p++ ) + { + for ( q = 0; q < pu32ConvWidth[0]; q++ ) + { + pf32Ptr2 = pf32ScaleAnchors; + for ( z = 0 ; z < u32NumScaleAnchors * u32NumRatioAnchors; z++ ) + { + *(ps32Ptr++) = (HI_S32)(q * u32PixelInterval + *(pf32Ptr2++) ); + *(ps32Ptr++) = (HI_S32)( p * u32PixelInterval + *( pf32Ptr2++ )); + *(ps32Ptr++) = (HI_S32)( q * u32PixelInterval + *( pf32Ptr2++ )); + *(ps32Ptr++) = (HI_S32)( p * u32PixelInterval + *( pf32Ptr2++ )); + } + } + } + + /********** do transpose, convert the blob from (M,C,H,W) to (M,H,W,C) **********/ + u32MapSize = pu32ConvHeight[1] * u32ConvStride / sizeof(HI_U32); + u32AnchorsPerPixel = u32NumRatioAnchors * u32NumScaleAnchors; + u32BgBlobSize = u32AnchorsPerPixel * u32MapSize; + u32LineSize = u32ConvStride / sizeof(HI_U32); + u32SrcProbBias = 0; + u32SrcBboxBias = 0; + + for ( c = 0; c < pu32ConvChannel[1]; c++ ) + { + for ( h = 0; h < pu32ConvHeight[1]; h++ ) + { + for ( w = 0; w < pu32ConvWidth[1]; w++ ) + { + u32SrcBboxIndex = u32SrcBboxBias + c * u32MapSize + h * u32LineSize + w; + u32SrcBgProbIndex = u32SrcProbBias + (c/SAMPLE_SVP_NNIE_COORDI_NUM) * u32MapSize + h * u32LineSize + w; + u32SrcFgProbIndex = u32BgBlobSize + u32SrcBgProbIndex; + + u32DesBox = ( u32AnchorsPerPixel ) * ( h * pu32ConvWidth[1] + w) + c/SAMPLE_SVP_NNIE_COORDI_NUM ; + + u32DesBboxDeltaIndex = SAMPLE_SVP_NNIE_COORDI_NUM * u32DesBox + c % SAMPLE_SVP_NNIE_COORDI_NUM; + ps32BboxDelta[u32DesBboxDeltaIndex] = (HI_S32)pps32Src[1][u32SrcBboxIndex]; + + u32DesScoreIndex = ( SAMPLE_SVP_NNIE_SCORE_NUM ) * u32DesBox; + pf32Scores[u32DesScoreIndex] = (HI_FLOAT)((HI_S32)pps32Src[0][u32SrcBgProbIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE; + pf32Scores[u32DesScoreIndex + 1] = (HI_FLOAT)((HI_S32)pps32Src[0][u32SrcFgProbIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE; + } + } + } + + + /************************* do softmax ****************************/ + pf32Ptr = pf32Scores; + for( i = 0; i= u32MaxRois) + { + break; + } + } + + *pu32NumRois = u32RoiCount; + + return s32Ret; + +} + +/***************************************************************************** +* Prototype : SVP_NNIE_FasterRcnn_GetResult +* Description : this function is used to get FasterRcnn result +* Input : HI_S32* ps32FcBbox [IN] Bbox for Roi +* HI_S32 *ps32FcScore [IN] Score for roi +* HI_S32 *ps32Proposals [IN] proposal +* HI_U32 u32RoiCnt [IN] Roi num +* HI_U32 *pu32ConfThresh [IN] each class confidence thresh +* HI_U32 u32NmsThresh [IN] Nms thresh +* HI_U32 u32MaxRoi [IN] max roi +* HI_U32 u32ClassNum [IN] class num +* HI_U32 u32OriImWidth [IN] input image width +* HI_U32 u32OriImHeight [IN] input image height +* HI_U32* pu32MemPool [IN] assist buffer +* HI_S32* ps32DstScore [OUT] result of score +* HI_S32* ps32DstRoi [OUT] result of Bbox +* HI_S32* ps32ClassRoiNum [OUT] result of the roi num of each classs +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_FasterRcnn_GetResult(HI_S32*ps32FcBbox,HI_U32 u32BboxStride, + HI_S32*ps32FcScore, HI_U32 u32ScoreStride,HI_S32* ps32Proposal,HI_U32 u32RoiCnt, + HI_U32* pu32ConfThresh,HI_U32 u32NmsThresh,HI_U32 u32MaxRoi,HI_U32 u32ClassNum, + HI_U32 u32OriImWidth,HI_U32 u32OriImHeight,HI_U32* pu32MemPool,HI_S32* ps32DstScore, + HI_S32* ps32DstBbox,HI_S32* ps32ClassRoiNum) +{ + /************* define variables *****************/ + HI_U32 u32Size = 0; + HI_U32 u32ClsScoreChannels = 0; + HI_S32* ps32Proposals = NULL; + HI_U32 u32FcScoreWidth = 0; + HI_U32 u32FcBboxWidth = 0; + HI_FLOAT f32ProposalWidth = 0.0; + HI_FLOAT f32ProposalHeight = 0.0; + HI_FLOAT f32ProposalCenterX = 0.0; + HI_FLOAT f32ProposalCenterY = 0.0; + HI_FLOAT f32PredW = 0.0; + HI_FLOAT f32PredH = 0.0; + HI_FLOAT f32PredCenterX = 0.0; + HI_FLOAT f32PredCenterY = 0.0; + HI_FLOAT* pf32FcScoresMemPool = NULL; + HI_S32* ps32ProposalMemPool = NULL; + HI_S32* ps32ProposalTmp = NULL; + HI_U32 u32FcBboxIndex = 0; + HI_U32 u32ProposalMemPoolIndex = 0; + HI_FLOAT* pf32Ptr = NULL; + HI_S32* ps32Ptr = NULL; + HI_S32* ps32Score = NULL; + HI_S32* ps32Bbox = NULL; + HI_S32* ps32RoiCnt = NULL; + HI_U32 u32RoiOutCnt = 0; + HI_U32 u32SrcIndex = 0; + HI_U32 u32DstIndex = 0; + HI_U32 i = 0; + HI_U32 j = 0; + HI_U32 k = 0; + SAMPLE_SVP_NNIE_STACK_S* pstStack=NULL; + HI_S32 s32Ret = HI_SUCCESS; + HI_U32 u32OffSet = 0; + + /******************* Get or calculate parameters **********************/ + u32ClsScoreChannels = u32ClassNum; /*channel num is equal to class size, cls_score class*/ + u32FcScoreWidth = u32ScoreStride / sizeof(HI_U32); + u32FcBboxWidth = u32BboxStride / sizeof(HI_U32); + + /*************** Get Start Pointer of MemPool ******************/ + pf32FcScoresMemPool = (HI_FLOAT*)pu32MemPool; + pf32Ptr = pf32FcScoresMemPool; + u32Size = u32MaxRoi * u32ClsScoreChannels; + pf32Ptr += u32Size; + + ps32ProposalMemPool = (HI_S32*)pf32Ptr; + ps32Ptr = ps32ProposalMemPool; + u32Size = u32MaxRoi * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH ; + ps32Ptr += u32Size; + pstStack = (SAMPLE_SVP_NNIE_STACK_S* )ps32Ptr; + + u32DstIndex = 0; + + for( i = 0; i < u32RoiCnt; i++ ) + { + for( k = 0; k < u32ClsScoreChannels; k++ ) + { + u32SrcIndex = i * u32FcScoreWidth + k; + pf32FcScoresMemPool[u32DstIndex++] = (HI_FLOAT)((HI_S32)ps32FcScore[u32SrcIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE; + } + } + ps32Proposals = (HI_S32*)ps32Proposal; + + /************** bbox tranform ************/ + for(j = 0; j < u32ClsScoreChannels; j++) + { + for(i = 0; i < u32RoiCnt; i++) + { + f32ProposalWidth = (HI_FLOAT)(ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i + 2] - ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i] + 1); + f32ProposalHeight = (HI_FLOAT)(ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i + 3] - ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i + 1] + 1); + f32ProposalCenterX = (HI_FLOAT)(ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i] + SAMPLE_SVP_NNIE_HALF * f32ProposalWidth); + f32ProposalCenterY = (HI_FLOAT)(ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i + 1] + SAMPLE_SVP_NNIE_HALF * f32ProposalHeight); + + u32FcBboxIndex = u32FcBboxWidth * i + SAMPLE_SVP_NNIE_COORDI_NUM * j; + f32PredCenterX = ((HI_FLOAT)ps32FcBbox[u32FcBboxIndex]/SAMPLE_SVP_NNIE_QUANT_BASE) * f32ProposalWidth + f32ProposalCenterX; + f32PredCenterY = ((HI_FLOAT)ps32FcBbox[u32FcBboxIndex + 1]/SAMPLE_SVP_NNIE_QUANT_BASE) * f32ProposalHeight + f32ProposalCenterY; + f32PredW = f32ProposalWidth * SVP_NNIE_QuickExp((HI_S32)( ps32FcBbox[u32FcBboxIndex+2] )); + f32PredH = f32ProposalHeight * SVP_NNIE_QuickExp((HI_S32)( ps32FcBbox[u32FcBboxIndex+3] )); + + u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i; + ps32ProposalMemPool[u32ProposalMemPoolIndex] = (HI_S32)(f32PredCenterX - SAMPLE_SVP_NNIE_HALF * f32PredW); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] = (HI_S32)(f32PredCenterY - SAMPLE_SVP_NNIE_HALF * f32PredH); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 2] = (HI_S32)(f32PredCenterX + SAMPLE_SVP_NNIE_HALF * f32PredW); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 3] = (HI_S32)(f32PredCenterY + SAMPLE_SVP_NNIE_HALF * f32PredH); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 4] = (HI_S32)( pf32FcScoresMemPool[u32ClsScoreChannels*i+j] * SAMPLE_SVP_NNIE_QUANT_BASE ); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 5] = 0; /* suprressed flag */ + + } + + /* clip bbox */ + for(i = 0; i < u32RoiCnt; i++) + { + u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i; + ps32ProposalMemPool[u32ProposalMemPoolIndex] = ( (ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth - 1) ? ((HI_S32)u32OriImWidth - 1):( ps32ProposalMemPool[u32ProposalMemPoolIndex] ) )>0?( (ps32ProposalMemPool[u32ProposalMemPoolIndex])>((HI_S32)u32OriImWidth)? (u32OriImWidth - 1):( ps32ProposalMemPool[u32ProposalMemPoolIndex] ) ):0; + ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] = ( (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight - 1) ? ((HI_S32)u32OriImHeight - 1):( ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] ) )>0?( (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])>((HI_S32)u32OriImHeight)? (u32OriImHeight - 1):(ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] ) ):0; + ps32ProposalMemPool[u32ProposalMemPoolIndex + 2] = ( (ps32ProposalMemPool[u32ProposalMemPoolIndex + 2]) > ((HI_S32)u32OriImWidth - 1) ? ((HI_S32)u32OriImWidth - 1):( ps32ProposalMemPool[u32ProposalMemPoolIndex + 2] ) )>0?( (ps32ProposalMemPool[u32ProposalMemPoolIndex + 2])>((HI_S32)u32OriImWidth)? (u32OriImWidth - 1):( ps32ProposalMemPool[u32ProposalMemPoolIndex + 2] ) ):0; + ps32ProposalMemPool[u32ProposalMemPoolIndex + 3] = ( (ps32ProposalMemPool[u32ProposalMemPoolIndex + 3]) > ((HI_S32)u32OriImHeight - 1) ? ((HI_S32)u32OriImHeight - 1):( ps32ProposalMemPool[u32ProposalMemPoolIndex + 3] ) )>0?( (ps32ProposalMemPool[u32ProposalMemPoolIndex + 3])>((HI_S32)u32OriImHeight)? (u32OriImHeight - 1):(ps32ProposalMemPool[u32ProposalMemPoolIndex + 3] ) ):0; + } + + ps32ProposalTmp = ps32ProposalMemPool; + + (void)SVP_NNIE_NonRecursiveArgQuickSort( ps32ProposalTmp, 0, u32RoiCnt-1, pstStack,u32RoiCnt); + + (void)SVP_NNIE_NonMaxSuppression(ps32ProposalTmp, u32RoiCnt, u32NmsThresh, u32RoiCnt); + + ps32Score = (HI_S32*)ps32DstScore; + ps32Bbox = (HI_S32*)ps32DstBbox; + ps32RoiCnt = (HI_S32*)ps32ClassRoiNum; + + ps32Score += (HI_S32)(u32OffSet); + ps32Bbox += (HI_S32)(SAMPLE_SVP_NNIE_COORDI_NUM * u32OffSet); + + u32RoiOutCnt = 0; + for(i = 0; i < u32RoiCnt; i++) + { + u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i; + if( 0 == ps32ProposalMemPool[u32ProposalMemPoolIndex + 5] && ps32ProposalMemPool[u32ProposalMemPoolIndex + 4] > (HI_S32)pu32ConfThresh[j] ) //Suppression = 0; CONF_THRESH == 0.8 + { + ps32Score[u32RoiOutCnt] = ps32ProposalMemPool[u32ProposalMemPoolIndex + 4]; + ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM ] = ps32ProposalMemPool[u32ProposalMemPoolIndex]; + ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1 ] = ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]; + ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 2 ] = ps32ProposalMemPool[u32ProposalMemPoolIndex + 2]; + ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 3 ] = ps32ProposalMemPool[u32ProposalMemPoolIndex + 3]; + u32RoiOutCnt++; + } + if(u32RoiOutCnt >= u32RoiCnt)break; + } + ps32RoiCnt[j] = (HI_S32)u32RoiOutCnt; + u32OffSet += u32RoiOutCnt; + + } + return s32Ret; +} + + + +/***************************************************************************** +* Prototype : SVP_NNIE_Pvanet_GetResult +* Description : this function is used to get FasterRcnn result +* Input : HI_S32* ps32FcBbox [IN] Bbox for Roi +* HI_S32 *ps32FcScore [IN] Score for roi +* HI_S32 *ps32Proposals [IN] proposal +* HI_U32 u32RoiCnt [IN] Roi num +* HI_U32 *pu32ConfThresh [IN] each class confidence thresh +* HI_U32 u32NmsThresh [IN] Nms thresh +* HI_U32 u32MaxRoi [IN] max roi +* HI_U32 u32ClassNum [IN] class num +* HI_U32 u32OriImWidth [IN] input image width +* HI_U32 u32OriImHeight [IN] input image height +* HI_U32* pu32MemPool [IN] assist buffer +* HI_S32* ps32DstScore [OUT] result of score +* HI_S32* ps32DstRoi [OUT] result of Bbox +* HI_S32* ps32ClassRoiNum [OUT] result of the roi num of each classs +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Pvanet_GetResult(HI_S32*ps32FcBbox,HI_U32 u32BboxStride, + HI_S32*ps32FcScore, HI_U32 u32ScoreStride,HI_S32* ps32Proposal,HI_U32 u32RoiCnt, + HI_U32* pu32ConfThresh,HI_U32 u32NmsThresh,HI_U32 u32MaxRoi,HI_U32 u32ClassNum, + HI_U32 u32OriImWidth,HI_U32 u32OriImHeight,HI_U32* pu32MemPool,HI_S32* ps32DstScore, + HI_S32* ps32DstBbox,HI_S32* ps32ClassRoiNum) +{ + /************* define variables *****************/ + HI_U32 u32Size = 0; + HI_U32 u32ClsScoreChannels = 0; + HI_S32* ps32Proposals = NULL; + HI_U32 u32FcScoreWidth = 0; + HI_U32 u32FcBboxWidth = 0; + HI_FLOAT f32ProposalWidth = 0.0; + HI_FLOAT f32ProposalHeight = 0.0; + HI_FLOAT f32ProposalCenterX = 0.0; + HI_FLOAT f32ProposalCenterY = 0.0; + HI_FLOAT f32PredW = 0.0; + HI_FLOAT f32PredH = 0.0; + HI_FLOAT f32PredCenterX = 0.0; + HI_FLOAT f32PredCenterY = 0.0; + HI_FLOAT* pf32FcScoresMemPool = NULL; + HI_S32* ps32ProposalMemPool = NULL; + HI_S32* ps32ProposalTmp = NULL; + HI_U32 u32FcBboxIndex = 0; + HI_U32 u32ProposalMemPoolIndex = 0; + HI_FLOAT* pf32Ptr = NULL; + HI_S32* ps32Ptr = NULL; + HI_S32* ps32Score = NULL; + HI_S32* ps32Bbox = NULL; + HI_S32* ps32RoiCnt = NULL; + HI_U32 u32RoiOutCnt = 0; + HI_U32 u32SrcIndex = 0; + HI_U32 u32DstIndex = 0; + HI_U32 i = 0; + HI_U32 j = 0; + HI_U32 k = 0; + SAMPLE_SVP_NNIE_STACK_S* pstStack=NULL; + HI_S32 s32Ret = HI_SUCCESS; + HI_U32 u32OffSet = 0; + + /******************* Get or calculate parameters **********************/ + u32ClsScoreChannels = u32ClassNum; /*channel num is equal to class size, cls_score class*/ + u32FcScoreWidth = u32ScoreStride / sizeof(HI_U32); + u32FcBboxWidth = u32BboxStride / sizeof(HI_U32); + + /*************** Get Start Pointer of MemPool ******************/ + pf32FcScoresMemPool = (HI_FLOAT*)pu32MemPool; + pf32Ptr = pf32FcScoresMemPool; + u32Size = u32MaxRoi * u32ClsScoreChannels; + pf32Ptr += u32Size; + + ps32ProposalMemPool = (HI_S32*)pf32Ptr; + ps32Ptr = ps32ProposalMemPool; + u32Size = u32MaxRoi * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH ; + ps32Ptr += u32Size; + pstStack = (SAMPLE_SVP_NNIE_STACK_S* )ps32Ptr; + + u32DstIndex = 0; + + for( i = 0; i < u32RoiCnt; i++ ) + { + for( k = 0; k < u32ClsScoreChannels; k++ ) + { + u32SrcIndex = i * u32FcScoreWidth + k; + pf32FcScoresMemPool[u32DstIndex++] = (HI_FLOAT)((HI_S32)ps32FcScore[u32SrcIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE; + } + } + ps32Proposals = (HI_S32*)ps32Proposal; + + /************** bbox tranform ************/ + for(j = 0; j < u32ClsScoreChannels; j++) + { + for(i = 0; i < u32RoiCnt; i++) + { + f32ProposalWidth = (HI_FLOAT)(ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i + 2] - ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i] + 1); + f32ProposalHeight = (HI_FLOAT)(ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i + 3] - ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i + 1] + 1); + f32ProposalCenterX = (HI_FLOAT)(ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i] + SAMPLE_SVP_NNIE_HALF * f32ProposalWidth); + f32ProposalCenterY = (HI_FLOAT)(ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i + 1] + SAMPLE_SVP_NNIE_HALF * f32ProposalHeight); + + u32FcBboxIndex = u32FcBboxWidth * i + SAMPLE_SVP_NNIE_COORDI_NUM * j; + f32PredCenterX = ((HI_FLOAT)ps32FcBbox[u32FcBboxIndex]/SAMPLE_SVP_NNIE_QUANT_BASE) * f32ProposalWidth + f32ProposalCenterX; + f32PredCenterY = ((HI_FLOAT)ps32FcBbox[u32FcBboxIndex + 1]/SAMPLE_SVP_NNIE_QUANT_BASE) * f32ProposalHeight + f32ProposalCenterY; + f32PredW = f32ProposalWidth * SVP_NNIE_QuickExp((HI_S32)( ps32FcBbox[u32FcBboxIndex+2] )); + f32PredH = f32ProposalHeight * SVP_NNIE_QuickExp((HI_S32)( ps32FcBbox[u32FcBboxIndex+3] )); + + u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i; + ps32ProposalMemPool[u32ProposalMemPoolIndex] = (HI_S32)(f32PredCenterX - SAMPLE_SVP_NNIE_HALF * f32PredW); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] = (HI_S32)(f32PredCenterY - SAMPLE_SVP_NNIE_HALF * f32PredH); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 2] = (HI_S32)(f32PredCenterX + SAMPLE_SVP_NNIE_HALF * f32PredW); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 3] = (HI_S32)(f32PredCenterY + SAMPLE_SVP_NNIE_HALF * f32PredH); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 4] = (HI_S32)( pf32FcScoresMemPool[u32ClsScoreChannels*i+j] * SAMPLE_SVP_NNIE_QUANT_BASE ); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 5] = 0; /* suprressed flag */ + + } + + /* clip bbox */ + for(i = 0; i < u32RoiCnt; i++) + { + u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i; + ps32ProposalMemPool[u32ProposalMemPoolIndex] = ( (ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth - 1) ? ((HI_S32)u32OriImWidth - 1):( ps32ProposalMemPool[u32ProposalMemPoolIndex] ) )>0?( (ps32ProposalMemPool[u32ProposalMemPoolIndex])>((HI_S32)u32OriImWidth)? (u32OriImWidth - 1):( ps32ProposalMemPool[u32ProposalMemPoolIndex] ) ):0; + ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] = ( (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight - 1) ? ((HI_S32)u32OriImHeight - 1):( ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] ) )>0?( (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])>((HI_S32)u32OriImHeight)? (u32OriImHeight - 1):(ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] ) ):0; + ps32ProposalMemPool[u32ProposalMemPoolIndex + 2] = ( (ps32ProposalMemPool[u32ProposalMemPoolIndex + 2]) > ((HI_S32)u32OriImWidth - 1) ? ((HI_S32)u32OriImWidth - 1):( ps32ProposalMemPool[u32ProposalMemPoolIndex + 2] ) )>0?( (ps32ProposalMemPool[u32ProposalMemPoolIndex + 2])>((HI_S32)u32OriImWidth)? (u32OriImWidth - 1):( ps32ProposalMemPool[u32ProposalMemPoolIndex + 2] ) ):0; + ps32ProposalMemPool[u32ProposalMemPoolIndex + 3] = ( (ps32ProposalMemPool[u32ProposalMemPoolIndex + 3]) > ((HI_S32)u32OriImHeight - 1) ? ((HI_S32)u32OriImHeight - 1):( ps32ProposalMemPool[u32ProposalMemPoolIndex + 3] ) )>0?( (ps32ProposalMemPool[u32ProposalMemPoolIndex + 3])>((HI_S32)u32OriImHeight)? (u32OriImHeight - 1):(ps32ProposalMemPool[u32ProposalMemPoolIndex + 3] ) ):0; + } + + ps32ProposalTmp = ps32ProposalMemPool; + + (void)SVP_NNIE_NonRecursiveArgQuickSort( ps32ProposalTmp, 0, u32RoiCnt-1, pstStack,u32RoiCnt); + + (void)SVP_NNIE_NonMaxSuppression(ps32ProposalTmp, u32RoiCnt, u32NmsThresh, u32RoiCnt); + + ps32Score = (HI_S32*)ps32DstScore; + ps32Bbox = (HI_S32*)ps32DstBbox; + ps32RoiCnt = (HI_S32*)ps32ClassRoiNum; + + ps32Score += (HI_S32)(u32OffSet); + ps32Bbox += (HI_S32)(SAMPLE_SVP_NNIE_COORDI_NUM * u32OffSet); + + u32RoiOutCnt = 0; + for(i = 0; i < u32RoiCnt; i++) + { + u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i; + if( 0 == ps32ProposalMemPool[u32ProposalMemPoolIndex + 5] && ps32ProposalMemPool[u32ProposalMemPoolIndex + 4] > (HI_S32)pu32ConfThresh[j] ) //Suppression = 0; CONF_THRESH == 0.8 + { + ps32Score[u32RoiOutCnt] = ps32ProposalMemPool[u32ProposalMemPoolIndex + 4]; + ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM ] = ps32ProposalMemPool[u32ProposalMemPoolIndex]; + ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1 ] = ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]; + ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 2 ] = ps32ProposalMemPool[u32ProposalMemPoolIndex + 2]; + ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 3 ] = ps32ProposalMemPool[u32ProposalMemPoolIndex + 3]; + u32RoiOutCnt++; + } + if(u32RoiOutCnt >= u32RoiCnt)break; + } + ps32RoiCnt[j] = (HI_S32)u32RoiOutCnt; + u32OffSet += u32RoiOutCnt; + + } + return s32Ret; +} + + + +/***************************************************************************** +* Prototype : SVP_NNIE_Rfcn_GetResult +* Description : this function is used to get RFCN result +* Input : HI_S32* ps32FcBbox [IN] Bbox for Roi +* HI_U32 u32FcBboxStride [IN] Bbox stride +* HI_S32 *ps32FcScore [IN] Score for roi +* HI_U32 u32FcScoreStride [IN] Score stride +* HI_S32 *ps32Proposals [IN] proposal +* HI_U32 u32RoiCnt [IN] Roi num +* HI_U32 *pu32ConfThresh [IN] each class confidence thresh +* HI_U32 u32MaxRoi [IN] max roi +* HI_U32 u32ClassNum [IN] class num +* HI_U32 u32OriImWidth [IN] input image width +* HI_U32 u32OriImHeight [IN] input image height +* HI_U32 u32NmsThresh [IN] num thresh +* HI_U32* pu32MemPool [IN] assist buffer +* HI_S32* ps32DstScore [OUT]result of score +* HI_S32* ps32DstRoi [OUT]result of Bbox +* HI_S32* ps32ClassRoiNum [OUT]result of the roi num of each classs +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Rfcn_GetResult(HI_S32 *ps32FcScore, + HI_U32 u32FcScoreStride,HI_S32* ps32FcBbox,HI_U32 u32FcBboxStride, + HI_S32 *ps32Proposals, HI_U32 u32RoiCnt, HI_U32 *pu32ConfThresh, + HI_U32 u32MaxRoi,HI_U32 u32ClassNum,HI_U32 u32OriImWidth,HI_U32 u32OriImHeight, + HI_U32 u32NmsThresh, HI_U32* pu32MemPool,HI_S32 *ps32DstScores, HI_S32 *ps32DstRoi, + HI_S32 *ps32ClassRoiNum) +{ + /************* define variables *****************/ + HI_U32 u32Size = 0; + HI_U32 u32ClsScoreChannels = 0; + HI_U32 u32FcScoreWidth = 0; + HI_FLOAT f32ProposalWidth = 0.0; + HI_FLOAT f32ProposalHeight = 0.0; + HI_FLOAT f32ProposalCenterX = 0.0; + HI_FLOAT f32ProposalCenterY = 0.0; + HI_FLOAT f32PredW = 0.0; + HI_FLOAT f32PredH = 0.0; + HI_FLOAT f32PredCenterX = 0.0; + HI_FLOAT f32PredCenterY = 0.0; + HI_FLOAT* pf32FcScoresMemPool = NULL; + HI_S32* ps32FcBboxMemPool = NULL; + HI_S32* ps32ProposalMemPool = NULL; + HI_S32* ps32ProposalTmp = NULL; + HI_U32 u32FcBboxIndex = 0; + HI_U32 u32ProposalMemPoolIndex = 0; + HI_FLOAT* pf32Ptr = NULL; + HI_S32* ps32Ptr = NULL; + HI_S32* ps32DstScore = NULL; + HI_S32* ps32DstBbox = NULL; + HI_U32 u32RoiOutCnt = 0; + HI_U32 u32SrcIndex = 0; + HI_U32 u32DstIndex = 0; + HI_U32 i = 0; + HI_U32 j = 0; + HI_U32 u32OffSet = 0; + SAMPLE_SVP_NNIE_STACK_S* pstStack = NULL; + HI_S32 s32Ret = HI_SUCCESS; + + /******************* Get or calculate parameters **********************/ + u32ClsScoreChannels = u32ClassNum; /*channel num is equal to class size, cls_score class*/ + u32FcScoreWidth = u32ClsScoreChannels; + + /*************** Get Start Pointer of MemPool ******************/ + pf32FcScoresMemPool = (HI_FLOAT*)(pu32MemPool); + pf32Ptr = pf32FcScoresMemPool; + u32Size = u32MaxRoi * u32ClsScoreChannels; + pf32Ptr += u32Size; + + ps32FcBboxMemPool = (HI_S32*)pf32Ptr; + ps32Ptr = (HI_S32*)pf32Ptr; + u32Size = u32MaxRoi * SAMPLE_SVP_NNIE_COORDI_NUM; + ps32Ptr += u32Size; + + ps32ProposalMemPool = (HI_S32*)ps32Ptr; + ps32Ptr = ps32ProposalMemPool; + u32Size = u32MaxRoi * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH; + ps32Ptr += u32Size; + pstStack = (SAMPLE_SVP_NNIE_STACK_S*)ps32Ptr; + + // prepare input data + for (i = 0; i < u32RoiCnt; i++) + { + for (j = 0; j < u32ClsScoreChannels; j++) + { + u32DstIndex = u32FcScoreWidth * i + j; + u32SrcIndex = u32FcScoreStride/sizeof(HI_U32) * i + j; + pf32FcScoresMemPool[u32DstIndex] = (HI_FLOAT)(ps32FcScore[u32SrcIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE; + } + } + + for (i = 0; i < u32RoiCnt; i++) + { + for (j = 0; j < SAMPLE_SVP_NNIE_COORDI_NUM; j++) + { + u32SrcIndex = u32FcBboxStride/sizeof(HI_U32) * i + SAMPLE_SVP_NNIE_COORDI_NUM + j; + u32DstIndex = SAMPLE_SVP_NNIE_COORDI_NUM * i + j; + ps32FcBboxMemPool[u32DstIndex] = ps32FcBbox[u32SrcIndex]; + } + } + /************** bbox tranform ************ + change the fc output to Proposal temp MemPool. + Each Line of the Proposal has 6 bits. + The Format of the Proposal is: + 0-3: The four coordinate of the bbox, x1,y1,x2, y2 + 4: The Confidence Score of the bbox + 5: The suprressed flag + ******************************************/ + for (j = 0; j < u32ClsScoreChannels; j++) + { + for (i = 0; i < u32RoiCnt; i++) + { + f32ProposalWidth = ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i + 2] - ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i] + 1; + f32ProposalHeight = ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i + 3] - ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i + 1] + 1; + f32ProposalCenterX = ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i] + 0.5 * f32ProposalWidth; + f32ProposalCenterY = ps32Proposals[SAMPLE_SVP_NNIE_COORDI_NUM*i + 1] + 0.5 * f32ProposalHeight; + + u32FcBboxIndex = SAMPLE_SVP_NNIE_COORDI_NUM * i; + f32PredCenterX = ((HI_FLOAT)ps32FcBboxMemPool[u32FcBboxIndex] / 4096) * f32ProposalWidth + f32ProposalCenterX; + f32PredCenterY = ((HI_FLOAT)ps32FcBboxMemPool[u32FcBboxIndex + 1] / 4096) * f32ProposalHeight + f32ProposalCenterY; + f32PredW = f32ProposalWidth * SVP_NNIE_QuickExp(ps32FcBboxMemPool[u32FcBboxIndex + 2]); + f32PredH = f32ProposalHeight * SVP_NNIE_QuickExp(ps32FcBboxMemPool[u32FcBboxIndex + 3]); + + u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i; + ps32ProposalMemPool[u32ProposalMemPoolIndex] = (HI_S32)(f32PredCenterX - 0.5 * f32PredW); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] = (HI_S32)(f32PredCenterY - 0.5 * f32PredH); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 2] = (HI_S32)(f32PredCenterX + 0.5 * f32PredW); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 3] = (HI_S32)(f32PredCenterY + 0.5 * f32PredH); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 4] = (HI_S32)(pf32FcScoresMemPool[u32ClsScoreChannels*i + j] * 4096); + ps32ProposalMemPool[u32ProposalMemPoolIndex + 5] = 0; /* suprressed flag */ + } + + /* clip bbox */ + for (i = 0; i < u32RoiCnt; i++) + { + u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i; + ps32ProposalMemPool[u32ProposalMemPoolIndex] = ((ps32ProposalMemPool[u32ProposalMemPoolIndex]) >((HI_S32)u32OriImWidth - 1) ? ((HI_S32)u32OriImWidth - 1) : (ps32ProposalMemPool[u32ProposalMemPoolIndex]))>0 ? ((ps32ProposalMemPool[u32ProposalMemPoolIndex])>((HI_S32)u32OriImWidth) ? (u32OriImWidth - 1) : (ps32ProposalMemPool[u32ProposalMemPoolIndex])) : 0; + ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] = ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight - 1) ? ((HI_S32)u32OriImHeight - 1) : (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]))>0 ? ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])>((HI_S32)u32OriImHeight) ? (u32OriImHeight - 1) : (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])) : 0; + ps32ProposalMemPool[u32ProposalMemPoolIndex + 2] = ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 2]) > ((HI_S32)u32OriImWidth - 1) ? ((HI_S32)u32OriImWidth - 1) : (ps32ProposalMemPool[u32ProposalMemPoolIndex + 2]))>0 ? ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 2])>((HI_S32)u32OriImWidth) ? (u32OriImWidth - 1) : (ps32ProposalMemPool[u32ProposalMemPoolIndex + 2])) : 0; + ps32ProposalMemPool[u32ProposalMemPoolIndex + 3] = ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 3]) > ((HI_S32)u32OriImHeight - 1) ? ((HI_S32)u32OriImHeight - 1) : (ps32ProposalMemPool[u32ProposalMemPoolIndex + 3]))>0 ? ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 3])>((HI_S32)u32OriImHeight) ? (u32OriImHeight - 1) : (ps32ProposalMemPool[u32ProposalMemPoolIndex + 3])) : 0; + } + + ps32ProposalTmp = ps32ProposalMemPool; + s32Ret = SVP_NNIE_NonRecursiveArgQuickSort(ps32ProposalTmp, 0, u32RoiCnt - 1, pstStack,u32RoiCnt); + s32Ret = SVP_NNIE_NonMaxSuppression(ps32ProposalTmp, u32RoiCnt, u32NmsThresh, u32RoiCnt); + u32RoiOutCnt = 0; + + ps32DstScore = (HI_S32*)ps32DstScores; + ps32DstBbox = (HI_S32*)ps32DstRoi; + + ps32DstScore += (HI_S32)u32OffSet; + ps32DstBbox += (HI_S32)(SAMPLE_SVP_NNIE_COORDI_NUM * u32OffSet); + for (i = 0; i < u32RoiCnt; i++) + { + u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i; + if (0 == ps32ProposalMemPool[u32ProposalMemPoolIndex + 5] && ps32ProposalMemPool[u32ProposalMemPoolIndex + 4] >(HI_S32)pu32ConfThresh[j]) //Suppression = 0; CONF_THRESH == 0.8 + { + ps32DstScore[u32RoiOutCnt] = ps32ProposalMemPool[u32ProposalMemPoolIndex + 4]; + ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM] = ps32ProposalMemPool[u32ProposalMemPoolIndex]; + ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1] = ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]; + ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 2] = ps32ProposalMemPool[u32ProposalMemPoolIndex + 2]; + ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 3] = ps32ProposalMemPool[u32ProposalMemPoolIndex + 3]; + u32RoiOutCnt++; + } + if (u32RoiOutCnt >= u32RoiCnt) + { + break; + } + } + ps32ClassRoiNum[j] = (HI_S32)u32RoiOutCnt; + u32OffSet = u32OffSet + u32RoiOutCnt; + } + + return s32Ret; +} + +/***************************************************************************** +* Prototype : SVP_NNIE_Ssd_PriorBoxForward +* Description : this function is used to get SSD priorbox +* Input : HI_U32 u32PriorBoxWidth [IN] prior box width +* HI_U32 u32PriorBoxHeight [IN] prior box height +* HI_U32 u32OriImWidth [IN] input image width +* HI_U32 u32OriImHeight [IN] input image height +* HI_U32 f32PriorBoxMinSize [IN] prior box min size +* HI_U32 u32MinSizeNum [IN] min size num +* HI_U32 f32PriorBoxMaxSize [IN] prior box max size +* HI_U32 u32MaxSizeNum [IN] max size num +* HI_BOOL bFlip [IN] whether do Flip +* HI_BOOL bClip [IN] whether do Clip +* HI_U32 u32InputAspectRatioNum [IN] aspect ratio num +* HI_FLOAT af32PriorBoxAspectRatio[] [IN] aspect ratio value +* HI_FLOAT f32PriorBoxStepWidth [IN] prior box step width +* HI_FLOAT f32PriorBoxStepHeight [IN] prior box step height +* HI_FLOAT f32Offset [IN] offset value +* HI_S32 as32PriorBoxVar[] [IN] prior box variance +* HI_S32* ps32PriorboxOutputData [OUT] output reslut +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Ssd_PriorBoxForward(HI_U32 u32PriorBoxWidth, + HI_U32 u32PriorBoxHeight, HI_U32 u32OriImWidth, HI_U32 u32OriImHeight, + HI_FLOAT* pf32PriorBoxMinSize, HI_U32 u32MinSizeNum, HI_FLOAT* pf32PriorBoxMaxSize, + HI_U32 u32MaxSizeNum, HI_BOOL bFlip, HI_BOOL bClip, HI_U32 u32InputAspectRatioNum, + HI_FLOAT af32PriorBoxAspectRatio[],HI_FLOAT f32PriorBoxStepWidth, + HI_FLOAT f32PriorBoxStepHeight,HI_FLOAT f32Offset,HI_S32 as32PriorBoxVar[], + HI_S32* ps32PriorboxOutputData) +{ + HI_U32 u32AspectRatioNum = 0; + HI_U32 u32Index = 0; + HI_FLOAT af32AspectRatio[SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM] = { 0 }; + HI_U32 u32NumPrior = 0; + HI_FLOAT f32CenterX = 0; + HI_FLOAT f32CenterY = 0; + HI_FLOAT f32BoxHeight = 0; + HI_FLOAT f32BoxWidth = 0; + HI_FLOAT f32MaxBoxWidth = 0; + HI_U32 i = 0; + HI_U32 j = 0; + HI_U32 n = 0; + HI_U32 h = 0; + HI_U32 w = 0; + SAMPLE_SVP_CHECK_EXPR_RET((HI_TRUE == bFlip && u32InputAspectRatioNum > + (SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM-1)/2),HI_INVALID_VALUE,SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,when bFlip is true, u32InputAspectRatioNum(%d) can't be greater than %d!\n", + u32InputAspectRatioNum, (SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM-1)/2); + SAMPLE_SVP_CHECK_EXPR_RET((HI_FALSE == bFlip && u32InputAspectRatioNum > + (SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM-1)),HI_INVALID_VALUE,SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,when bFlip is false, u32InputAspectRatioNum(%d) can't be greater than %d!\n", + u32InputAspectRatioNum, (SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM-1)); + + // generate aspect_ratios + u32AspectRatioNum = 0; + af32AspectRatio[0] = 1; + u32AspectRatioNum++; + for (i = 0; i < u32InputAspectRatioNum; i++) + { + af32AspectRatio[u32AspectRatioNum++] = af32PriorBoxAspectRatio[i]; + if (bFlip) + { + af32AspectRatio[u32AspectRatioNum++] = 1.0f / af32PriorBoxAspectRatio[i]; + } + } + u32NumPrior = u32MinSizeNum * u32AspectRatioNum + u32MaxSizeNum; + + u32Index = 0; + for (h = 0; h < u32PriorBoxHeight; h++) + { + for (w = 0; w < u32PriorBoxWidth; w++) + { + f32CenterX = (w + f32Offset) * f32PriorBoxStepWidth; + f32CenterY = (h + f32Offset) * f32PriorBoxStepHeight; + for (n = 0; n < u32MinSizeNum; n++) + { + /*** first prior ***/ + f32BoxHeight = pf32PriorBoxMinSize[n]; + f32BoxWidth = pf32PriorBoxMinSize[n]; + ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX - f32BoxWidth * SAMPLE_SVP_NNIE_HALF); + ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY - f32BoxHeight * SAMPLE_SVP_NNIE_HALF); + ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX + f32BoxWidth * SAMPLE_SVP_NNIE_HALF); + ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY + f32BoxHeight * SAMPLE_SVP_NNIE_HALF); + /*** second prior ***/ + if(u32MaxSizeNum>0) + { + f32MaxBoxWidth = sqrt(pf32PriorBoxMinSize[n] * pf32PriorBoxMaxSize[n]); + f32BoxHeight = f32MaxBoxWidth; + f32BoxWidth = f32MaxBoxWidth; + ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX - f32BoxWidth * SAMPLE_SVP_NNIE_HALF); + ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY - f32BoxHeight * SAMPLE_SVP_NNIE_HALF); + ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX + f32BoxWidth * SAMPLE_SVP_NNIE_HALF); + ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY + f32BoxHeight * SAMPLE_SVP_NNIE_HALF); + } + /**** rest of priors, skip AspectRatio == 1 ****/ + for (i = 1; i < u32AspectRatioNum; i++) + { + f32BoxWidth = (HI_FLOAT)(pf32PriorBoxMinSize[n] * sqrt( af32AspectRatio[i] )); + f32BoxHeight = (HI_FLOAT)(pf32PriorBoxMinSize[n]/sqrt( af32AspectRatio[i] )); + ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX - f32BoxWidth * SAMPLE_SVP_NNIE_HALF); + ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY - f32BoxHeight * SAMPLE_SVP_NNIE_HALF); + ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX + f32BoxWidth * SAMPLE_SVP_NNIE_HALF); + ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY + f32BoxHeight * SAMPLE_SVP_NNIE_HALF); + } + } + } + } + /************ clip the priors' coordidates, within [0, u32ImgWidth] & [0, u32ImgHeight] *************/ + if (bClip) + { + for (i = 0; i < (HI_U32)(u32PriorBoxWidth * u32PriorBoxHeight * SAMPLE_SVP_NNIE_COORDI_NUM*u32NumPrior / 2); i++) + { + ps32PriorboxOutputData[2 * i] = SAMPLE_SVP_NNIE_MIN((HI_U32)SAMPLE_SVP_NNIE_MAX(ps32PriorboxOutputData[2 * i], 0), u32OriImWidth); + ps32PriorboxOutputData[2 * i + 1] = SAMPLE_SVP_NNIE_MIN((HI_U32)SAMPLE_SVP_NNIE_MAX(ps32PriorboxOutputData[2 * i + 1], 0), u32OriImHeight); + } + } + /*********************** get var **********************/ + for (h = 0; h < u32PriorBoxHeight; h++) + { + for (w = 0; w < u32PriorBoxWidth; w++) + { + for (i = 0; i < u32NumPrior; i++) + { + for (j = 0; j < SAMPLE_SVP_NNIE_COORDI_NUM; j++) + { + ps32PriorboxOutputData[u32Index++] = (HI_S32)as32PriorBoxVar[j]; + } + } + } + } + return HI_SUCCESS; +} + +/***************************************************************************** +* Prototype : SVP_NNIE_Ssd_SoftmaxForward +* Description : this function is used to do SSD softmax +* Input : HI_U32 u32SoftMaxInHeight [IN] softmax input height +* HI_U32 au32SoftMaxInChn[] [IN] softmax input channel +* HI_U32 u32ConcatNum [IN] concat num +* HI_U32 au32ConvStride[] [IN] conv stride +* HI_U32 u32SoftMaxOutWidth [IN] softmax output width +* HI_U32 u32SoftMaxOutHeight [IN] softmax output height +* HI_U32 u32SoftMaxOutChn [IN] softmax output channel +* HI_S32* aps32SoftMaxInputData[] [IN] softmax input data +* HI_S32* ps32SoftMaxOutputData [OUT]softmax output data +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Ssd_SoftmaxForward(HI_U32 u32SoftMaxInHeight, + HI_U32 au32SoftMaxInChn[], HI_U32 u32ConcatNum, HI_U32 au32ConvStride[], + HI_U32 au32SoftMaxWidth[],HI_S32* aps32SoftMaxInputData[], HI_S32* ps32SoftMaxOutputData) +{ + HI_S32* ps32InputData = NULL; + HI_S32* ps32OutputTmp = NULL; + HI_U32 u32OuterNum = 0; + HI_U32 u32InnerNum = 0; + HI_U32 u32InputChannel = 0; + HI_U32 i = 0; + HI_U32 u32ConcatCnt = 0; + HI_S32 s32Ret = 0; + HI_U32 u32Stride = 0; + HI_U32 u32Skip = 0; + HI_U32 u32Left = 0; + ps32OutputTmp = ps32SoftMaxOutputData; + for (u32ConcatCnt = 0; u32ConcatCnt < u32ConcatNum; u32ConcatCnt++) + { + ps32InputData = aps32SoftMaxInputData[u32ConcatCnt]; + u32Stride = au32ConvStride[u32ConcatCnt]; + u32InputChannel = au32SoftMaxInChn[u32ConcatCnt]; + u32OuterNum = u32InputChannel / u32SoftMaxInHeight; + u32InnerNum = u32SoftMaxInHeight; + u32Skip = au32SoftMaxWidth[u32ConcatCnt] / u32InnerNum; + u32Left = u32Stride - au32SoftMaxWidth[u32ConcatCnt]; + for (i = 0; i < u32OuterNum; i++) + { + s32Ret = SVP_NNIE_SSD_SoftMax(ps32InputData, (HI_S32)u32InnerNum,ps32OutputTmp); + if ((i + 1) % u32Skip == 0) + { + ps32InputData += u32Left; + } + ps32InputData += u32InnerNum; + ps32OutputTmp += u32InnerNum; + } + } + return s32Ret; +} + +/***************************************************************************** +* Prototype : SVP_NNIE_Ssd_DetectionOutForward +* Description : this function is used to get detection result of SSD +* Input : HI_U32 u32ConcatNum [IN] SSD concat num +* HI_U32 u32ConfThresh [IN] confidence thresh +* HI_U32 u32ClassNum [IN] class num +* HI_U32 u32TopK [IN] Topk value +* HI_U32 u32KeepTopK [IN] KeepTopK value +* HI_U32 u32NmsThresh [IN] NMS thresh +* HI_U32 au32DetectInputChn[] [IN] detection input channel +* HI_S32* aps32AllLocPreds[] [IN] Location prediction +* HI_S32* aps32AllPriorBoxes[] [IN] prior box +* HI_S32* ps32ConfScores [IN] confidence score +* HI_S32* ps32AssistMemPool [IN] assist buffer +* HI_S32* ps32DstScoreSrc [OUT] result of score +* HI_S32* ps32DstBboxSrc [OUT] result of Bbox +* HI_S32* ps32RoiOutCntSrc [OUT] result of the roi num of each class +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Ssd_DetectionOutForward(HI_U32 u32ConcatNum, + HI_U32 u32ConfThresh,HI_U32 u32ClassNum, HI_U32 u32TopK, HI_U32 u32KeepTopK, HI_U32 u32NmsThresh, + HI_U32 au32DetectInputChn[], HI_S32* aps32AllLocPreds[], HI_S32* aps32AllPriorBoxes[], + HI_S32* ps32ConfScores, HI_S32* ps32AssistMemPool, HI_S32* ps32DstScoreSrc, + HI_S32* ps32DstBboxSrc, HI_S32* ps32RoiOutCntSrc) +{ + /************* check input parameters ****************/ + /******** define variables **********/ + HI_S32* ps32LocPreds = NULL; + HI_S32* ps32PriorBoxes = NULL; + HI_S32* ps32PriorVar = NULL; + HI_S32* ps32AllDecodeBoxes = NULL; + HI_S32* ps32DstScore = NULL; + HI_S32* ps32DstBbox = NULL; + HI_S32* ps32ClassRoiNum = NULL; + HI_U32 u32RoiOutCnt = 0; + HI_S32* ps32SingleProposal = NULL; + HI_S32* ps32AfterTopK = NULL; + SAMPLE_SVP_NNIE_STACK_S* pstStack = NULL; + HI_U32 u32PriorNum = 0; + HI_U32 u32NumPredsPerClass = 0; + HI_FLOAT f32PriorWidth = 0; + HI_FLOAT f32PriorHeight = 0; + HI_FLOAT f32PriorCenterX = 0; + HI_FLOAT f32PriorCenterY = 0; + HI_FLOAT f32DecodeBoxCenterX = 0; + HI_FLOAT f32DecodeBoxCenterY = 0; + HI_FLOAT f32DecodeBoxWidth = 0; + HI_FLOAT f32DecodeBoxHeight = 0; + HI_U32 u32SrcIdx = 0; + HI_U32 u32AfterFilter = 0; + HI_U32 u32AfterTopK = 0; + HI_U32 u32KeepCnt = 0; + HI_U32 i = 0; + HI_U32 j = 0; + HI_U32 u32Offset = 0; + HI_S32 s32Ret = HI_SUCCESS; + u32PriorNum = 0; + for (i = 0; i < u32ConcatNum; i++) + { + u32PriorNum += au32DetectInputChn[i] / SAMPLE_SVP_NNIE_COORDI_NUM; + } + //prepare for Assist MemPool + ps32AllDecodeBoxes = ps32AssistMemPool; + ps32SingleProposal = ps32AllDecodeBoxes + u32PriorNum * SAMPLE_SVP_NNIE_COORDI_NUM; + ps32AfterTopK = ps32SingleProposal + SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32PriorNum; + pstStack = (SAMPLE_SVP_NNIE_STACK_S*)(ps32AfterTopK + u32PriorNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH); + u32SrcIdx = 0; + for (i = 0; i < u32ConcatNum; i++) + { + /********** get loc predictions ************/ + ps32LocPreds = aps32AllLocPreds[i]; + u32NumPredsPerClass = au32DetectInputChn[i] / SAMPLE_SVP_NNIE_COORDI_NUM; + /********** get Prior Bboxes ************/ + ps32PriorBoxes = aps32AllPriorBoxes[i]; + ps32PriorVar = ps32PriorBoxes + u32NumPredsPerClass*SAMPLE_SVP_NNIE_COORDI_NUM; + for (j = 0; j < u32NumPredsPerClass; j++) + { + //printf("ps32PriorBoxes start***************\n"); + f32PriorWidth = (HI_FLOAT)(ps32PriorBoxes[j*SAMPLE_SVP_NNIE_COORDI_NUM+2] - ps32PriorBoxes[j*SAMPLE_SVP_NNIE_COORDI_NUM]); + f32PriorHeight = (HI_FLOAT)(ps32PriorBoxes[j*SAMPLE_SVP_NNIE_COORDI_NUM+3] - ps32PriorBoxes[j*SAMPLE_SVP_NNIE_COORDI_NUM + 1]); + f32PriorCenterX = (ps32PriorBoxes[j*SAMPLE_SVP_NNIE_COORDI_NUM+2] + ps32PriorBoxes[j*SAMPLE_SVP_NNIE_COORDI_NUM])*SAMPLE_SVP_NNIE_HALF; + f32PriorCenterY = (ps32PriorBoxes[j*SAMPLE_SVP_NNIE_COORDI_NUM+3] + ps32PriorBoxes[j*SAMPLE_SVP_NNIE_COORDI_NUM+1])*SAMPLE_SVP_NNIE_HALF; + + f32DecodeBoxCenterX = ((HI_FLOAT)ps32PriorVar[j*SAMPLE_SVP_NNIE_COORDI_NUM]/SAMPLE_SVP_NNIE_QUANT_BASE)* + ((HI_FLOAT)ps32LocPreds[j*SAMPLE_SVP_NNIE_COORDI_NUM]/SAMPLE_SVP_NNIE_QUANT_BASE)*f32PriorWidth+f32PriorCenterX; + + f32DecodeBoxCenterY = ((HI_FLOAT)ps32PriorVar[j*SAMPLE_SVP_NNIE_COORDI_NUM+1]/SAMPLE_SVP_NNIE_QUANT_BASE)* + ((HI_FLOAT)ps32LocPreds[j*SAMPLE_SVP_NNIE_COORDI_NUM+1]/SAMPLE_SVP_NNIE_QUANT_BASE)*f32PriorHeight+f32PriorCenterY; + + f32DecodeBoxWidth = exp(((HI_FLOAT)ps32PriorVar[j*SAMPLE_SVP_NNIE_COORDI_NUM+2]/SAMPLE_SVP_NNIE_QUANT_BASE)* + ((HI_FLOAT)ps32LocPreds[j*SAMPLE_SVP_NNIE_COORDI_NUM+2]/SAMPLE_SVP_NNIE_QUANT_BASE))*f32PriorWidth; + + f32DecodeBoxHeight = exp(((HI_FLOAT)ps32PriorVar[j*SAMPLE_SVP_NNIE_COORDI_NUM+3]/SAMPLE_SVP_NNIE_QUANT_BASE)* + ((HI_FLOAT)ps32LocPreds[j*SAMPLE_SVP_NNIE_COORDI_NUM+3]/SAMPLE_SVP_NNIE_QUANT_BASE))*f32PriorHeight; + + //printf("ps32PriorBoxes end***************\n"); + + ps32AllDecodeBoxes[u32SrcIdx++] = (HI_S32)(f32DecodeBoxCenterX - f32DecodeBoxWidth * SAMPLE_SVP_NNIE_HALF); + ps32AllDecodeBoxes[u32SrcIdx++] = (HI_S32)(f32DecodeBoxCenterY - f32DecodeBoxHeight * SAMPLE_SVP_NNIE_HALF); + ps32AllDecodeBoxes[u32SrcIdx++] = (HI_S32)(f32DecodeBoxCenterX + f32DecodeBoxWidth * SAMPLE_SVP_NNIE_HALF); + ps32AllDecodeBoxes[u32SrcIdx++] = (HI_S32)(f32DecodeBoxCenterY + f32DecodeBoxHeight * SAMPLE_SVP_NNIE_HALF); + } + } + /********** do NMS for each class *************/ + u32AfterTopK = 0; + for (i = 0; i < u32ClassNum; i++) + { + for (j = 0; j < u32PriorNum; j++) + { + ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH] = ps32AllDecodeBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM]; + ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 1] = ps32AllDecodeBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1]; + ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 2] = ps32AllDecodeBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2]; + ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 3] = ps32AllDecodeBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3]; + ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4] = ps32ConfScores[j*u32ClassNum + i]; + ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 5] = 0; + } + s32Ret = SVP_NNIE_NonRecursiveArgQuickSort(ps32SingleProposal, 0, u32PriorNum - 1, pstStack,u32TopK); + u32AfterFilter = (u32PriorNum < u32TopK) ? u32PriorNum : u32TopK; + s32Ret = SVP_NNIE_NonMaxSuppression(ps32SingleProposal, u32AfterFilter, u32NmsThresh, u32AfterFilter); + u32RoiOutCnt = 0; + ps32DstScore = (HI_S32*)ps32DstScoreSrc; + ps32DstBbox = (HI_S32*)ps32DstBboxSrc; + ps32ClassRoiNum = (HI_S32*)ps32RoiOutCntSrc; + ps32DstScore += (HI_S32)u32AfterTopK; + ps32DstBbox += (HI_S32)(u32AfterTopK * SAMPLE_SVP_NNIE_COORDI_NUM); + for (j = 0; j < u32TopK; j++) + { + if (ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 5] == 0 && + ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4] > (HI_S32)u32ConfThresh) + { + ps32DstScore[u32RoiOutCnt] = ps32SingleProposal[j * 6 + 4]; + ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM] = ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH]; + ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1] = ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 1]; + ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 2] = ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 2]; + ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 3] = ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 3]; + u32RoiOutCnt++; + } + } + ps32ClassRoiNum[i] = (HI_S32)u32RoiOutCnt; + u32AfterTopK += u32RoiOutCnt; + } + + u32KeepCnt = 0; + u32Offset = 0; + if (u32AfterTopK > u32KeepTopK) + { + u32Offset = ps32ClassRoiNum[0]; + for (i = 1; i < u32ClassNum; i++) + { + ps32DstScore = (HI_S32*)ps32DstScoreSrc; + ps32DstBbox = (HI_S32*)ps32DstBboxSrc; + ps32ClassRoiNum = (HI_S32*)ps32RoiOutCntSrc; + ps32DstScore += (HI_S32)(u32Offset); + ps32DstBbox += (HI_S32)(u32Offset * SAMPLE_SVP_NNIE_COORDI_NUM); + for (j = 0; j < (HI_U32)ps32ClassRoiNum[i]; j++) + { + ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH] = ps32DstBbox[j * SAMPLE_SVP_NNIE_COORDI_NUM]; + ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 1] = ps32DstBbox[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1]; + ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 2] = ps32DstBbox[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2]; + ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 3] = ps32DstBbox[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3]; + ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4] = ps32DstScore[j]; + ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 5] = i; + u32KeepCnt++; + } + u32Offset = u32Offset + ps32ClassRoiNum[i]; + } + s32Ret = SVP_NNIE_NonRecursiveArgQuickSort(ps32AfterTopK, 0, u32KeepCnt - 1, pstStack,u32KeepCnt); + + u32Offset = 0; + u32Offset = ps32ClassRoiNum[0]; + for (i = 1; i < u32ClassNum; i++) + { + u32RoiOutCnt = 0; + ps32DstScore = (HI_S32*)ps32DstScoreSrc; + ps32DstBbox = (HI_S32*)ps32DstBboxSrc; + ps32ClassRoiNum = (HI_S32*)ps32RoiOutCntSrc; + ps32DstScore += (HI_S32)(u32Offset); + ps32DstBbox += (HI_S32)(u32Offset * SAMPLE_SVP_NNIE_COORDI_NUM); + for (j = 0; j < u32KeepTopK; j++) + { + if (ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 5] == i) + { + ps32DstScore[u32RoiOutCnt] = ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4]; + ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM] = ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH]; + ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1] = ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 1]; + ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 2] = ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 2]; + ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 3] = ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 3]; + u32RoiOutCnt++; + } + } + ps32ClassRoiNum[i] = (HI_S32)u32RoiOutCnt; + u32Offset += u32RoiOutCnt; + } + } + return s32Ret; +} + +/***************************************************************************** +* Prototype : SVP_NNIE_Yolov1_Iou +* Description : this function is used to calculate IOU +* Input : HI_FLOAT *pf32Bbox [IN] pointer to the Bbox memery +* HI_U32 u32Idx1 [IN] first Bbox index +* HI_U32 u32Idx2 [IN] second Bbox index +* +* +* Output : +* Return Value : HI_S32, the result of IOU. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Yolov1_Iou(HI_FLOAT *pf32Bbox, HI_U32 u32Idx1, HI_U32 u32Idx2) +{ + HI_FLOAT f32WidthDis = 0.0f, f32HeightDis = 0.0f; + HI_FLOAT f32Intersection = 0.0f; + HI_FLOAT f32Iou = 0.0f; + f32WidthDis = SAMPLE_SVP_NNIE_MIN(pf32Bbox[u32Idx1*SAMPLE_SVP_NNIE_COORDI_NUM] + + 0.5f*pf32Bbox[u32Idx1*SAMPLE_SVP_NNIE_COORDI_NUM+2], + pf32Bbox[u32Idx2*SAMPLE_SVP_NNIE_COORDI_NUM] + + 0.5f*pf32Bbox[u32Idx2*SAMPLE_SVP_NNIE_COORDI_NUM+2]) - + SAMPLE_SVP_NNIE_MAX(pf32Bbox[u32Idx1*SAMPLE_SVP_NNIE_COORDI_NUM] - + 0.5f*pf32Bbox[u32Idx1*SAMPLE_SVP_NNIE_COORDI_NUM+2], + pf32Bbox[u32Idx2*SAMPLE_SVP_NNIE_COORDI_NUM] - + 0.5f*pf32Bbox[u32Idx2*SAMPLE_SVP_NNIE_COORDI_NUM+2]); + + f32HeightDis = SAMPLE_SVP_NNIE_MIN(pf32Bbox[u32Idx1*SAMPLE_SVP_NNIE_COORDI_NUM+1] + + 0.5f*pf32Bbox[u32Idx1*SAMPLE_SVP_NNIE_COORDI_NUM+3], + pf32Bbox[u32Idx2*SAMPLE_SVP_NNIE_COORDI_NUM+1] + + 0.5f*pf32Bbox[u32Idx2*SAMPLE_SVP_NNIE_COORDI_NUM+3]) - + SAMPLE_SVP_NNIE_MAX(pf32Bbox[u32Idx1*SAMPLE_SVP_NNIE_COORDI_NUM+1] - + 0.5f*pf32Bbox[u32Idx1*SAMPLE_SVP_NNIE_COORDI_NUM+3], + pf32Bbox[u32Idx2*SAMPLE_SVP_NNIE_COORDI_NUM+1] - + 0.5f*pf32Bbox[u32Idx2*SAMPLE_SVP_NNIE_COORDI_NUM+3]); + + if (f32WidthDis < 0 || f32HeightDis < 0) + { + f32Intersection = 0; + } + else + { + f32Intersection = f32WidthDis*f32HeightDis; + } + f32Iou = f32Intersection / (pf32Bbox[u32Idx1*SAMPLE_SVP_NNIE_COORDI_NUM+2]* + pf32Bbox[u32Idx1*SAMPLE_SVP_NNIE_COORDI_NUM+3] + + pf32Bbox[u32Idx2*SAMPLE_SVP_NNIE_COORDI_NUM+2] * + pf32Bbox[u32Idx2*SAMPLE_SVP_NNIE_COORDI_NUM+3] - f32Intersection); + + return (HI_S32)(f32Iou*SAMPLE_SVP_NNIE_QUANT_BASE); +} + +/***************************************************************************** +* Prototype : SVP_NNIE_Yolov1_Argswap +* Description : this function is used to exchange data +* Input : HI_S32* ps32Src1 [IN] first input array +* HI_S32* ps32Src2 [IN] second input array +* HI_U32 u32ArraySize [IN] array size +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static void SVP_NNIE_Yolov1_Argswap(HI_S32* ps32Src1, HI_S32* ps32Src2, + HI_U32 u32ArraySize) +{ + HI_U32 i = 0; + HI_S32 s32Tmp = 0; + for( i = 0; i < u32ArraySize; i++ ) + { + s32Tmp = ps32Src1[i]; + ps32Src1[i] = ps32Src2[i]; + ps32Src2[i] = s32Tmp; + } +} + +/***************************************************************************** +* Prototype : SVP_NNIE_Yolov1_NonRecursiveArgQuickSort +* Description : this function is used to do quick sort +* Input : HI_S32* ps32Array [IN] the array need to be sorted +* HI_S32 s32Low [IN] the start position of quick sort +* HI_S32 s32High [IN] the end position of quick sort +* HI_U32 u32ArraySize [IN] the element size of input array +* HI_U32 u32ScoreIdx [IN] the score index in array element +* SAMPLE_SVP_NNIE_STACK_S *pstStack [IN] the buffer used to store start positions and end positions +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Yolo_NonRecursiveArgQuickSort(HI_S32* ps32Array, + HI_S32 s32Low, HI_S32 s32High, HI_U32 u32ArraySize,HI_U32 u32ScoreIdx, + SAMPLE_SVP_NNIE_STACK_S *pstStack) +{ + HI_S32 i = s32Low; + HI_S32 j = s32High; + HI_S32 s32Top = 0; + HI_S32 s32KeyConfidence = ps32Array[u32ArraySize * s32Low + u32ScoreIdx]; + pstStack[s32Top].s32Min = s32Low; + pstStack[s32Top].s32Max = s32High; + + while(s32Top > -1) + { + s32Low = pstStack[s32Top].s32Min; + s32High = pstStack[s32Top].s32Max; + i = s32Low; + j = s32High; + s32Top--; + + s32KeyConfidence = ps32Array[u32ArraySize * s32Low + u32ScoreIdx]; + + while(i < j) + { + while((i < j) && (s32KeyConfidence > ps32Array[j * u32ArraySize + u32ScoreIdx])) + { + j--; + } + if(i < j) + { + SVP_NNIE_Yolov1_Argswap(&ps32Array[i*u32ArraySize], &ps32Array[j*u32ArraySize],u32ArraySize); + i++; + } + + while((i < j) && (s32KeyConfidence < ps32Array[i*u32ArraySize + u32ScoreIdx])) + { + i++; + } + if(i < j) + { + SVP_NNIE_Yolov1_Argswap(&ps32Array[i*u32ArraySize], &ps32Array[j*u32ArraySize],u32ArraySize); + j--; + } + } + + if(s32Low < i-1) + { + s32Top++; + pstStack[s32Top].s32Min = s32Low; + pstStack[s32Top].s32Max = i-1; + } + + if(s32High > i+1) + { + s32Top++; + pstStack[s32Top].s32Min = i+1; + pstStack[s32Top].s32Max = s32High; + } + } + return HI_SUCCESS; +} + + +/***************************************************************************** +* Prototype : SVP_NNIE_Yolov1_Nms +* Description : this function is used to do NMS +* Input : HI_S32* ps32Score [IN] class score of each bbox +* HI_FLOAT* pf32Bbox [IN] pointer to the Bbox memeory +* HI_U32 u32ConfThresh [IN] confidence thresh +* HI_U32 u32NmsThresh [IN] NMS thresh +* HI_U32* pu32TmpBuf [IN] assist buffer +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Yolov1_Nms(HI_S32* ps32Score, HI_FLOAT* pf32Bbox, + HI_U32 u32BboxNum,HI_U32 u32ConfThresh,HI_U32 u32NmsThresh,HI_U32* pu32TmpBuf) +{ + HI_U32 i = 0, j = 0; + HI_U32 u32Idx1 = 0, u32Idx2 = 0; + SAMPLE_SVP_NNIE_YOLOV1_SCORE_S *pstScore = (SAMPLE_SVP_NNIE_YOLOV1_SCORE_S*)pu32TmpBuf; + SAMPLE_SVP_NNIE_STACK_S* pstAssitBuf = (SAMPLE_SVP_NNIE_STACK_S*)((HI_U8*)pu32TmpBuf+ + u32BboxNum*sizeof(SAMPLE_SVP_NNIE_YOLOV1_SCORE_S)); + for (i = 0; i < u32BboxNum; i++) + { + if (ps32Score[i] < (HI_S32)u32ConfThresh) + { + ps32Score[i] = 0; + } + } + + for (i = 0; i < u32BboxNum; ++i) + { + pstScore[i].u32Idx = i; + pstScore[i].s32Score= (ps32Score[i]); + } + + /*quick sort*/ + (void)SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32*)pstScore,0,u32BboxNum-1, + sizeof(SAMPLE_SVP_NNIE_YOLOV1_SCORE_S)/sizeof(HI_U32),1,pstAssitBuf); + + /*NMS*/ + for (i = 0; i < u32BboxNum; i++) + { + u32Idx1 = pstScore[i].u32Idx; + if (0 == pstScore[i].s32Score) + { + continue; + } + for (j = i + 1; j < u32BboxNum; j++) + { + u32Idx2 = pstScore[j].u32Idx; + if (0 == pstScore[j].s32Score) + { + continue; + } + if (SVP_NNIE_Yolov1_Iou(pf32Bbox, u32Idx1, u32Idx2) > (HI_S32)u32NmsThresh) + { + pstScore[j].s32Score = 0; + ps32Score[pstScore[j].u32Idx] = 0; + } + } + } + + return HI_SUCCESS; +} + + +/***************************************************************************** +* Prototype : SVP_NNIE_Yolov1_ConvertPosition +* Description : this function is used to do convert position coordinates +* Input : HI_FLOAT* pf32Bbox [IN] pointer to the Bbox memeory +* HI_U32 u32OriImgWidth [IN] input image width +* HI_U32 u32OriImagHeight [IN] input image height +* HI_FLOAT af32Roi[] [OUT] converted position coordinates +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +static void SVP_NNIE_Yolov1_ConvertPosition(HI_FLOAT*pf32Bbox, + HI_U32 u32OriImgWidth, HI_U32 u32OriImagHeight, HI_FLOAT af32Roi[]) +{ + HI_FLOAT f32Xmin, f32Ymin, f32Xmax, f32Ymax; + f32Xmin = *pf32Bbox - *(pf32Bbox+2) * SAMPLE_SVP_NNIE_HALF; + f32Xmin = f32Xmin > 0 ? f32Xmin : 0; + f32Ymin = *(pf32Bbox+1) - *(pf32Bbox+3) * SAMPLE_SVP_NNIE_HALF; + f32Ymin = f32Ymin > 0 ? f32Ymin : 0; + f32Xmax = *pf32Bbox + *(pf32Bbox+2) * SAMPLE_SVP_NNIE_HALF; + f32Xmax = f32Xmax > u32OriImgWidth ? u32OriImgWidth : f32Xmax; + f32Ymax = *(pf32Bbox+1) + *(pf32Bbox+3) * SAMPLE_SVP_NNIE_HALF; + f32Ymax = f32Ymax > u32OriImagHeight ? u32OriImagHeight : f32Ymax; + + af32Roi[0] = f32Xmin; + af32Roi[1] = f32Ymin; + af32Roi[2] = f32Xmax; + af32Roi[3] = f32Ymax; +} + +/***************************************************************************** +* Prototype : SVP_NNIE_Yolov1_Detection +* Description : Yolov1 detection +* Input : HI_S32* ps32Score [IN] bbox each class score +* HI_FLOAT* pf32Bbox [IN] bbox +* HI_U32 u32ClassNum [IN] Class num +* HI_U32 u32GridNum [IN] grid num +* HI_U32 u32BboxNum [IN] bbox num +* HI_U32 u32ConfThresh [IN] confidence thresh +* HI_U32 u32NmsThresh [IN] Nms thresh +* HI_U32 u32OriImgWidth [IN] input image width +* HI_U32 u32OriImgHeight [IN] input image height +* HI_U32* pu32MemPool [IN] assist buffer +* HI_S32 *ps32DstScores [OUT] dst score of ROI +* HI_S32 *ps32DstRoi [OUT] dst Roi +* HI_S32 *ps32ClassRoiNum[OUT] dst roi num of each class +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-14 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Yolov1_Detection(HI_S32* ps32Score, HI_FLOAT* pf32Bbox, + HI_U32 u32ClassNum,HI_U32 u32GridNum,HI_U32 u32BboxNum,HI_U32 u32ConfThresh, + HI_U32 u32NmsThresh,HI_U32 u32OriImgWidth, HI_U32 u32OriImgHeight, + HI_U32* pu32MemPool,HI_S32 *ps32DstScores, HI_S32 *ps32DstRoi, + HI_S32 *ps32ClassRoiNum) +{ + HI_U32 i = 0, j = 0; + HI_U32 u32Idx = 0; + HI_U32 u32RoiNum = 0; + HI_S32* ps32EachClassScore = NULL; + HI_FLOAT af32Roi[SAMPLE_SVP_NNIE_COORDI_NUM] = {0.0f}; + SAMPLE_SVP_NNIE_YOLOV1_SCORE_S *pstScore = NULL; + *(ps32ClassRoiNum++) = 0; + for (i = 0; i < u32ClassNum; i++) + { + ps32EachClassScore = ps32Score+u32BboxNum*i; + (void)SVP_NNIE_Yolov1_Nms(ps32EachClassScore, pf32Bbox, u32BboxNum, u32ConfThresh, + u32NmsThresh,pu32MemPool); + + pstScore = (SAMPLE_SVP_NNIE_YOLOV1_SCORE_S *)pu32MemPool; + u32RoiNum = 0; + for(j = 0; j < u32BboxNum; j++) + { + if(pstScore[j].s32Score!=0) + { + u32RoiNum++; + *(ps32DstScores++)=pstScore[j].s32Score; + u32Idx = pstScore[j].u32Idx; + (void)SVP_NNIE_Yolov1_ConvertPosition((pf32Bbox+u32Idx*SAMPLE_SVP_NNIE_COORDI_NUM), + u32OriImgWidth,u32OriImgHeight,af32Roi); + *(ps32DstRoi++) = (HI_S32)af32Roi[0]; + *(ps32DstRoi++) = (HI_S32)af32Roi[1]; + *(ps32DstRoi++) = (HI_S32)af32Roi[2]; + *(ps32DstRoi++) = (HI_S32)af32Roi[3]; + } + else + { + continue; + } + } + *(ps32ClassRoiNum++) = u32RoiNum; + } + return HI_SUCCESS; +} + +/***************************************************************************** +* Prototype : SVP_NNIE_Yolov2_Iou +* Description : Yolov2 IOU +* Input : SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox1 [IN] first bbox +* SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox2 [IN] second bbox +* HI_U32 u32ClassNum [IN] Class num +* HI_U32 u32GridNum [IN] grid num +* HI_U32 u32BboxNum [IN] bbox num +* HI_U32 u32ConfThresh [IN] confidence thresh +* HI_U32 u32NmsThresh [IN] Nms thresh +* HI_U32 u32OriImgWidth [IN] input image width +* HI_U32 u32OriImgHeight [IN] input image height +* HI_U32* pu32MemPool [IN] assist buffer +* HI_S32 *ps32DstScores [OUT] dst score of ROI +* HI_S32 *ps32DstRoi [OUT] dst Roi +* HI_S32 *ps32ClassRoiNum[OUT] dst roi num of each class +* +* Output : +* Return Value : HI_DOUBLE: IOU result +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-14 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_DOUBLE SVP_NNIE_Yolov2_Iou(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox1, + SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox2) +{ + HI_FLOAT f32InterWidth = 0.0; + HI_FLOAT f32InterHeight = 0.0; + HI_DOUBLE f64InterArea = 0.0; + HI_DOUBLE f64Box1Area = 0.0; + HI_DOUBLE f64Box2Area = 0.0; + HI_DOUBLE f64UnionArea = 0.0; + + f32InterWidth = SAMPLE_SVP_NNIE_MIN(pstBbox1->f32Xmax, pstBbox2->f32Xmax) - SAMPLE_SVP_NNIE_MAX(pstBbox1->f32Xmin,pstBbox2->f32Xmin); + f32InterHeight = SAMPLE_SVP_NNIE_MIN(pstBbox1->f32Ymax, pstBbox2->f32Ymax) - SAMPLE_SVP_NNIE_MAX(pstBbox1->f32Ymin,pstBbox2->f32Ymin); + + if(f32InterWidth <= 0 || f32InterHeight <= 0) return 0; + + f64InterArea = f32InterWidth * f32InterHeight; + f64Box1Area = (pstBbox1->f32Xmax - pstBbox1->f32Xmin)* (pstBbox1->f32Ymax - pstBbox1->f32Ymin); + f64Box2Area = (pstBbox2->f32Xmax - pstBbox2->f32Xmin)* (pstBbox2->f32Ymax - pstBbox2->f32Ymin); + f64UnionArea = f64Box1Area + f64Box2Area - f64InterArea; + + return f64InterArea/f64UnionArea; +} + + +/***************************************************************************** +* Prototype : SVP_NNIE_Yolov2_NonMaxSuppression +* Description : Yolov2 NonMaxSuppression function +* Input : SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox [IN] input bbox +* HI_U32 u32BoxNum [IN] Bbox num +* HI_U32 u32ClassNum [IN] Class num +* HI_U32 u32NmsThresh [IN] NMS thresh +* HI_U32 u32BboxNum [IN] bbox num +* HI_U32 u32MaxRoiNum [IN] max roi num +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-14 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Yolov2_NonMaxSuppression( SAMPLE_SVP_NNIE_YOLOV2_BBOX_S* pstBbox, + HI_U32 u32BboxNum, HI_U32 u32NmsThresh,HI_U32 u32MaxRoiNum) +{ + HI_U32 i,j; + HI_U32 u32Num = 0; + HI_DOUBLE f64Iou = 0.0; + + for (i = 0; i < u32BboxNum && u32Num < u32MaxRoiNum; i++) + { + if(pstBbox[i].u32Mask == 0 ) + { + u32Num++; + for(j= i+1;j< u32BboxNum; j++) + { + if( pstBbox[j].u32Mask == 0 ) + { + f64Iou = SVP_NNIE_Yolov2_Iou(&pstBbox[i],&pstBbox[j]); + if(f64Iou >= (HI_DOUBLE)u32NmsThresh/SAMPLE_SVP_NNIE_QUANT_BASE) + { + pstBbox[j].u32Mask = 1; + } + } + } + } + } + + return HI_SUCCESS; +} + +/***************************************************************************** +* Prototype : SVP_NNIE_GetMaxVal +* Description : get max score value +* Input : HI_FLOAT *pf32Val [IN] input score +* HI_U32 u32Num [IN] score num +* HI_U32 * pu32MaxValueIndex [OUT] the class index of max score +* +* Output : +* Return Value : HI_FLOAT: max score. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-14 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_FLOAT SVP_NNIE_GetMaxVal(HI_FLOAT *pf32Val,HI_U32 u32Num, + HI_U32 * pu32MaxValueIndex) +{ + HI_U32 i = 0; + HI_FLOAT f32MaxTmp = 0; + + f32MaxTmp = pf32Val[0]; + *pu32MaxValueIndex = 0; + for(i = 1;i < u32Num;i++) + { + if(pf32Val[i] > f32MaxTmp) + { + f32MaxTmp = pf32Val[i]; + *pu32MaxValueIndex = i; + } + } + + return f32MaxTmp; +} + +/***************************************************************************** +* Prototype : SVP_NNIE_Yolov2_GetResult +* Description : Yolov2 GetResult function +* Input : HI_S32 *ps32InputData [IN] pointer to the input data memory +* HI_U32 u32GridNumWidth [IN] Grid num in width direction +* HI_U32 u32GridNumHeight [IN] Grid num in height direction +* HI_U32 u32EachGridBbox [IN] Bbox num of each gird +* HI_U32 u32ClassNum [IN] class num +* HI_U32 u32SrcWidth [IN] input image width +* HI_U32 u32SrcHeight [IN] input image height +* HI_U32 u32MaxRoiNum [IN] Max output roi num +* HI_U32 u32NmsThresh [IN] NMS thresh +* HI_U32* pu32TmpBuf [IN] assist buffer +* HI_S32 *ps32DstScores [OUT] dst score +* HI_S32 *ps32DstRoi [OUT] dst roi +* HI_S32 *ps32ClassRoiNum [OUT] class roi num +* +* Output : +* Return Value : HI_FLOAT: max score value. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-14 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Yolov2_GetResult(HI_S32 *ps32InputData,HI_U32 u32GridNumWidth, + HI_U32 u32GridNumHeight,HI_U32 u32EachGridBbox,HI_U32 u32ClassNum,HI_U32 u32SrcWidth, + HI_U32 u32SrcHeight,HI_U32 u32MaxRoiNum,HI_U32 u32NmsThresh,HI_U32 u32ConfThresh, + HI_FLOAT af32Bias[],HI_U32* pu32TmpBuf,HI_S32 *ps32DstScores, HI_S32 *ps32DstRoi, + HI_S32 *ps32ClassRoiNum) +{ + HI_U32 u32GridNum = u32GridNumWidth*u32GridNumHeight; + HI_U32 u32ParaNum = (SAMPLE_SVP_NNIE_COORDI_NUM+1+u32ClassNum); + HI_U32 u32TotalBboxNum = u32GridNum*u32EachGridBbox; + HI_U32 u32CStep = u32GridNum; + HI_U32 u32HStep = u32GridNumWidth; + HI_U32 u32BoxsNum = 0; + HI_FLOAT *pf32BoxTmp = NULL; + HI_FLOAT *f32InputData = NULL; + HI_FLOAT f32ObjScore = 0.0; + HI_FLOAT f32MaxScore = 0.0; + HI_S32 s32Score = 0; + HI_U32 u32MaxValueIndex = 0; + HI_U32 h = 0,w = 0,n = 0; + HI_U32 c = 0,k = 0,i=0; + HI_U32 u32Index= 0; + HI_FLOAT x,y,f32Width,f32Height; + HI_U32 u32AssistBuffSize = u32TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_STACK_S); + HI_U32 u32BoxBuffSize = u32TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S); + HI_U32 u32BoxResultNum = 0; + SAMPLE_SVP_NNIE_STACK_S *pstAssistStack = NULL; + SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBox = NULL; + + /*store float type data*/ + f32InputData = (HI_FLOAT*)pu32TmpBuf; + /*assist buffer for sort*/ + pstAssistStack = (SAMPLE_SVP_NNIE_STACK_S*)(f32InputData+u32TotalBboxNum*u32ParaNum); + /*assit box buffer*/ + pstBox = (SAMPLE_SVP_NNIE_YOLOV2_BBOX_S*)((HI_U8*)pstAssistStack+u32AssistBuffSize); + /*box tmp buffer*/ + pf32BoxTmp = (HI_FLOAT*)((HI_U8*)pstBox + u32BoxBuffSize); + + for(i = 0;i < u32TotalBboxNum*u32ParaNum;i++) + { + f32InputData[i] = (HI_FLOAT)(ps32InputData[i])/SAMPLE_SVP_NNIE_QUANT_BASE; + } + + //permute + for(h = 0; h< u32GridNumHeight;h++) + { + for(w = 0;w < u32GridNumWidth;w++) + { + for(c = 0;c < u32EachGridBbox*u32ParaNum;c++) + { + pf32BoxTmp[n++] = f32InputData[c*u32CStep + h*u32HStep + w]; + } + } + } + + for(n = 0;n < u32GridNum ;n++) + { + //Grid + w = n % u32GridNumWidth; + h = n / u32GridNumWidth; + for(k = 0;k u32ConfThresh) + { + pstBox[u32BoxsNum].f32Xmin = (HI_FLOAT)(x - f32Width* SAMPLE_SVP_NNIE_HALF); + pstBox[u32BoxsNum].f32Xmax = (HI_FLOAT)(x + f32Width* SAMPLE_SVP_NNIE_HALF); + pstBox[u32BoxsNum].f32Ymin = (HI_FLOAT)(y - f32Height* SAMPLE_SVP_NNIE_HALF); + pstBox[u32BoxsNum].f32Ymax = (HI_FLOAT)(y + f32Height* SAMPLE_SVP_NNIE_HALF); + pstBox[u32BoxsNum].s32ClsScore = s32Score; + pstBox[u32BoxsNum].u32ClassIdx = u32MaxValueIndex+1; + pstBox[u32BoxsNum].u32Mask = 0; + u32BoxsNum++; + } + } + } + //quick_sort + if(u32BoxsNum > 1) + { + SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32*)pstBox,0,u32BoxsNum-1,sizeof(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S)/sizeof(HI_S32), + 4,pstAssistStack); + } + //Nms + SVP_NNIE_Yolov2_NonMaxSuppression(pstBox,u32BoxsNum,u32NmsThresh,u32MaxRoiNum); + //Get the result + memset((void*)ps32ClassRoiNum,0,(u32ClassNum+1)*sizeof(HI_U32)); + for(i = 1; i < u32ClassNum+1; i++) + { + for(n = 0;n < u32BoxsNum && u32BoxResultNum < u32MaxRoiNum; n++) + { + if(0 == pstBox[n].u32Mask && i == pstBox[n].u32ClassIdx) + { + *(ps32DstRoi++) = (HI_S32)SAMPLE_SVP_NNIE_MAX(pstBox[n].f32Xmin * u32SrcWidth , 0); + *(ps32DstRoi++) = (HI_S32)SAMPLE_SVP_NNIE_MAX(pstBox[n].f32Ymin * u32SrcHeight ,0); + *(ps32DstRoi++) = (HI_S32)SAMPLE_SVP_NNIE_MIN(pstBox[n].f32Xmax * u32SrcWidth , u32SrcWidth); + *(ps32DstRoi++) = (HI_S32)SAMPLE_SVP_NNIE_MIN(pstBox[n].f32Ymax * u32SrcHeight , u32SrcHeight); + *(ps32DstScores++) = pstBox[n].s32ClsScore; + *(ps32ClassRoiNum+pstBox[n].u32ClassIdx)=*(ps32ClassRoiNum+pstBox[n].u32ClassIdx)+1; + u32BoxResultNum++; + } + } + } + return HI_SUCCESS; +} + + +/***************************************************************************** +* Prototype : SVP_NNIE_Yolov3_GetResult +* Description : Yolov3 GetResult function +* Input : HI_S32 **pps32InputData [IN] pointer to the input data +* HI_U32 au32GridNumWidth[] [IN] Grid num in width direction +* HI_U32 au32GridNumHeight[] [IN] Grid num in height direction +* HI_U32 au32Stride[] [IN] stride of input data +* HI_U32 u32EachGridBbox [IN] Bbox num of each gird +* HI_U32 u32ClassNum [IN] class num +* HI_U32 u32SrcWidth [IN] input image width +* HI_U32 u32SrcHeight [IN] input image height +* HI_U32 u32MaxRoiNum [IN] Max output roi num +* HI_U32 u32NmsThresh [IN] NMS thresh +* HI_U32 u32ConfThresh [IN] conf thresh +* HI_U32 af32Bias[][] [IN] bias +* HI_U32* pu32TmpBuf [IN] assist buffer +* HI_S32 *ps32DstScores [OUT] dst score +* HI_S32 *ps32DstRoi [OUT] dst roi +* HI_S32 *ps32ClassRoiNum [OUT] class roi num +* +* Output : +* Return Value : HI_FLOAT: max score value. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-14 +* Author : +* Modification : Create +* +*****************************************************************************/ +static HI_S32 SVP_NNIE_Yolov3_GetResult(HI_U64 au64InputBlobAddr[],HI_U32 au32GridNumWidth[], + HI_U32 au32GridNumHeight[],HI_U32 au32Stride[],HI_U32 u32EachGridBbox,HI_U32 u32ClassNum,HI_U32 u32SrcWidth, + HI_U32 u32SrcHeight,HI_U32 u32MaxRoiNum,HI_U32 u32NmsThresh,HI_U32 u32ConfThresh, + HI_FLOAT af32Bias[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM][SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM], + HI_S32* ps32TmpBuf,HI_S32 *ps32DstScore, HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum) +{ + HI_S32 *ps32InputBlob = NULL; + HI_FLOAT *pf32Permute = NULL; + SAMPLE_SVP_NNIE_YOLOV3_BBOX_S *pstBbox = NULL; + HI_S32 *ps32AssistBuf = NULL; + HI_U32 u32TotalBboxNum = 0; + HI_U32 u32ChnOffset = 0; + HI_U32 u32HeightOffset = 0; + HI_U32 u32BboxNum = 0; + HI_U32 u32GridXIdx; + HI_U32 u32GridYIdx; + HI_U32 u32Offset; + HI_FLOAT f32StartX; + HI_FLOAT f32StartY; + HI_FLOAT f32Width; + HI_FLOAT f32Height; + HI_FLOAT f32ObjScore; + HI_U32 u32MaxValueIndex = 0; + HI_FLOAT f32MaxScore; + HI_S32 s32ClassScore; + HI_U32 u32ClassRoiNum; + HI_U32 i = 0, j = 0, k = 0, c = 0, h = 0, w = 0; + HI_U32 u32BlobSize = 0; + HI_U32 u32MaxBlobSize = 0; + + for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) + { + u32BlobSize = au32GridNumWidth[i]*au32GridNumHeight[i]*sizeof(HI_U32)* + SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM*u32EachGridBbox; + if(u32MaxBlobSize < u32BlobSize) + { + u32MaxBlobSize = u32BlobSize; + } + } + + for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) + { + u32TotalBboxNum += au32GridNumWidth[i]*au32GridNumHeight[i]*u32EachGridBbox; + } + + //get each tmpbuf addr + pf32Permute = (HI_FLOAT*)ps32TmpBuf; + pstBbox = (SAMPLE_SVP_NNIE_YOLOV3_BBOX_S*)(pf32Permute+u32MaxBlobSize/sizeof(HI_S32)); + ps32AssistBuf = (HI_S32*)(pstBbox+u32TotalBboxNum); + + for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) + { + //permute + u32Offset = 0; + ps32InputBlob = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, au64InputBlobAddr[i]); + u32ChnOffset = au32GridNumHeight[i]*au32Stride[i]/sizeof(HI_S32); + u32HeightOffset = au32Stride[i]/sizeof(HI_S32); + for (h = 0; h < au32GridNumHeight[i]; h++) + { + for (w = 0; w < au32GridNumWidth[i]; w++) + { + for (c = 0; c < SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM*u32EachGridBbox; c++) + { + pf32Permute[u32Offset++] = (HI_FLOAT)(ps32InputBlob[c*u32ChnOffset+h*u32HeightOffset+w]) / SAMPLE_SVP_NNIE_QUANT_BASE; + } + } + } + + //decode bbox and calculate score + for(j = 0; j < au32GridNumWidth[i]*au32GridNumHeight[i]; j++) + { + u32GridXIdx = j % au32GridNumWidth[i]; + u32GridYIdx = j / au32GridNumWidth[i]; + for (k = 0; k < u32EachGridBbox; k++) + { + u32MaxValueIndex = 0; + u32Offset = (j * u32EachGridBbox + k) * SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM; + //decode bbox + f32StartX = ((HI_FLOAT)u32GridXIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 0])) / au32GridNumWidth[i]; + f32StartY = ((HI_FLOAT)u32GridYIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 1])) / au32GridNumHeight[i]; + f32Width = (HI_FLOAT)(exp(pf32Permute[u32Offset + 2]) * af32Bias[i][2*k]) / u32SrcWidth; + f32Height = (HI_FLOAT)(exp(pf32Permute[u32Offset + 3]) * af32Bias[i][2*k + 1]) / u32SrcHeight; + + //calculate score + (void)SVP_NNIE_Sigmoid(&pf32Permute[u32Offset + 4], (u32ClassNum+1)); + f32ObjScore = pf32Permute[u32Offset + 4]; + f32MaxScore = SVP_NNIE_GetMaxVal(&pf32Permute[u32Offset + 5], u32ClassNum, &u32MaxValueIndex); + s32ClassScore = (HI_S32)(f32MaxScore * f32ObjScore*SAMPLE_SVP_NNIE_QUANT_BASE); + + //filter low score roi + if (s32ClassScore > u32ConfThresh) + { + pstBbox[u32BboxNum].f32Xmin= (HI_FLOAT)(f32StartX - f32Width * 0.5f); + pstBbox[u32BboxNum].f32Ymin= (HI_FLOAT)(f32StartY - f32Height * 0.5f); + pstBbox[u32BboxNum].f32Xmax= (HI_FLOAT)(f32StartX + f32Width * 0.5f); + pstBbox[u32BboxNum].f32Ymax= (HI_FLOAT)(f32StartY + f32Height * 0.5f); + pstBbox[u32BboxNum].s32ClsScore = s32ClassScore; + pstBbox[u32BboxNum].u32Mask= 0; + pstBbox[u32BboxNum].u32ClassIdx = (HI_S32)(u32MaxValueIndex+1); + u32BboxNum++; + } + } + } + } + + //quick sort + (void)SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32*)pstBbox, 0, u32BboxNum - 1, + sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S)/sizeof(HI_U32),4,(SAMPLE_SVP_NNIE_STACK_S*)ps32AssistBuf); + + //Yolov3 and Yolov2 have the same Nms operation + (void)SVP_NNIE_Yolov2_NonMaxSuppression(pstBbox, u32BboxNum, u32NmsThresh, sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S)/sizeof(HI_U32)); + + //Get result + for (i = 1; i < u32ClassNum; i++) + { + u32ClassRoiNum = 0; + for(j = 0; j < u32BboxNum; j++) + { + if ((0 == pstBbox[j].u32Mask) && (i == pstBbox[j].u32ClassIdx) && (u32ClassRoiNum < u32MaxRoiNum)) + { + *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Xmin*u32SrcWidth), 0); + *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Ymin*u32SrcHeight), 0); + *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Xmax*u32SrcWidth), u32SrcWidth); + *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Ymax*u32SrcHeight), u32SrcHeight); + *(ps32DstScore++) = pstBbox[j].s32ClsScore; + u32ClassRoiNum++; + } + } + *(ps32ClassRoiNum+i) = u32ClassRoiNum; + } + + return HI_SUCCESS; +} + + +/***************************************************************************** +* Prototype : SAMPLE_COMM_SVP_NNIE_CnnGetTopN +* Description : Cnn GetTopN +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to Cnn NNIE parameter +* SAMPLE_SVP_NNIE_CNN_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to Cnn software parameter +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-14 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_S32 SAMPLE_SVP_NNIE_Cnn_GetTopN(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_CNN_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_S32 s32Ret = HI_SUCCESS; + s32Ret = SVP_NNIE_Cnn_GetTopN( + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstNnieParam->astSegData[0].astDst[0].u64VirAddr), + pstNnieParam->astSegData[0].astDst[0].u32Stride, + pstNnieParam->astSegData[0].astDst[0].unShape.stWhc.u32Width, + pstNnieParam->astSegData[0].astDst[0].u32Num, + pstSoftwareParam->u32TopN, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stAssistBuf.u64VirAddr), + pstSoftwareParam->stGetTopN.u32Stride, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stGetTopN.u64VirAddr)); + SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,SVP_NNIE_Cnn_GetTopN failed!\n"); + return s32Ret; +} + + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_RpnTmpBufSize +* Description : this function is used to get RPN func's assist buffer size +* Input : HI_U32 u32NumRatioAnchors [IN] ratio anchor num +* HI_U32 u32NumScaleAnchors [IN] scale anchor num +* HI_U32 u32ConvHeight [IN] convolution height +* HI_U32 u32ConvWidth [IN] convolution width +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +****************************************************************************/ +HI_U32 SAMPLE_SVP_NNIE_RpnTmpBufSize(HI_U32 u32NumRatioAnchors, + HI_U32 u32NumScaleAnchors, HI_U32 u32ConvHeight, HI_U32 u32ConvWidth) +{ + HI_U32 u32AnchorsNum = u32NumRatioAnchors * u32NumScaleAnchors * u32ConvHeight * u32ConvWidth; + HI_U32 u32AnchorsSize = sizeof(HI_U32) * SAMPLE_SVP_NNIE_COORDI_NUM * u32AnchorsNum; + HI_U32 u32BboxDeltaSize = u32AnchorsSize; + HI_U32 u32ProposalSize = sizeof(HI_U32) * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32AnchorsNum; + HI_U32 u32RatioAnchorsSize = sizeof(HI_FLOAT) * u32NumRatioAnchors * SAMPLE_SVP_NNIE_COORDI_NUM; + HI_U32 u32ScaleAnchorsSize = sizeof(HI_FLOAT) * u32NumRatioAnchors * u32NumScaleAnchors * SAMPLE_SVP_NNIE_COORDI_NUM; + HI_U32 u32ScoreSize = sizeof(HI_FLOAT) * u32AnchorsNum * 2; + HI_U32 u32StackSize = sizeof( SAMPLE_SVP_NNIE_STACK_S ) * u32AnchorsNum; + HI_U32 u32TotalSize = u32AnchorsSize + u32BboxDeltaSize + u32ProposalSize + u32RatioAnchorsSize + u32ScaleAnchorsSize + u32ScoreSize + u32StackSize; + return u32TotalSize; +} + + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_FasterRcnn_Rpn +* Description : this function is used to do RPN +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to FasterRcnn NNIE parameter +* SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to FasterRcnn software parameter +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_S32 SAMPLE_SVP_NNIE_FasterRcnn_Rpn(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_S32 s32Ret = HI_SUCCESS; + s32Ret = SVP_NNIE_Rpn(pstSoftwareParam->aps32Conv,pstSoftwareParam->u32NumRatioAnchors, + pstSoftwareParam->u32NumScaleAnchors,pstSoftwareParam->au32Scales, + pstSoftwareParam->au32Ratios,pstSoftwareParam->u32OriImHeight, + pstSoftwareParam->u32OriImWidth,pstSoftwareParam->au32ConvHeight, + pstSoftwareParam->au32ConvWidth,pstSoftwareParam->au32ConvChannel, + pstSoftwareParam->u32ConvStride,pstSoftwareParam->u32MaxRoiNum, + pstSoftwareParam->u32MinSize,pstSoftwareParam->u32SpatialScale, + pstSoftwareParam->u32NmsThresh,pstSoftwareParam->u32FilterThresh, + pstSoftwareParam->u32NumBeforeNms, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32,pstSoftwareParam->stRpnTmpBuf.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stRpnBbox.u64VirAddr), + &pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height); + SAMPLE_COMM_SVP_FlushCache(pstSoftwareParam->stRpnBbox.u64PhyAddr, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID,pstSoftwareParam->stRpnBbox.u64VirAddr), + pstSoftwareParam->stRpnBbox.u32Num* + pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Chn* + pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height* + pstSoftwareParam->stRpnBbox.u32Stride); + SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,SVP_NNIE_Rpn failed!\n"); + return s32Ret; +} + + + + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Pvanet_Rpn +* Description : this function is used to do RPN +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to FasterRcnn NNIE parameter +* SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to FasterRcnn software parameter +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_S32 SAMPLE_SVP_NNIE_Pvanet_Rpn(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_S32 s32Ret = HI_SUCCESS; + SAMPLE_SVP_NIE_PERF_STAT_DEF_VAR() + + SAMPLE_SVP_NNIE_PERF_STAT_RPN_CLREAR() + SAMPLE_SVP_NNIE_PERF_STAT_BEGIN() + s32Ret = SVP_NNIE_Rpn(pstSoftwareParam->aps32Conv,pstSoftwareParam->u32NumRatioAnchors, + pstSoftwareParam->u32NumScaleAnchors,pstSoftwareParam->au32Scales, + pstSoftwareParam->au32Ratios,pstSoftwareParam->u32OriImHeight, + pstSoftwareParam->u32OriImWidth,pstSoftwareParam->au32ConvHeight, + pstSoftwareParam->au32ConvWidth,pstSoftwareParam->au32ConvChannel, + pstSoftwareParam->u32ConvStride,pstSoftwareParam->u32MaxRoiNum, + pstSoftwareParam->u32MinSize,pstSoftwareParam->u32SpatialScale, + pstSoftwareParam->u32NmsThresh,pstSoftwareParam->u32FilterThresh, + pstSoftwareParam->u32NumBeforeNms,SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32,pstSoftwareParam->stRpnTmpBuf.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stRpnBbox.u64VirAddr), + &pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height); + + SAMPLE_SVP_NNIE_PERF_STAT_END() + SAMPLE_SVP_NNIE_PERF_STAT_RPN_OP_TIME() + + SAMPLE_SVP_NNIE_PERF_STAT_BEGIN() + SAMPLE_COMM_SVP_FlushCache(pstSoftwareParam->stRpnBbox.u64PhyAddr, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstSoftwareParam->stRpnBbox.u64VirAddr), + pstSoftwareParam->stRpnBbox.u32Num* + pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Chn* + pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height* + pstSoftwareParam->stRpnBbox.u32Stride); + SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,SVP_NNIE_Rpn failed!\n"); + SAMPLE_SVP_NNIE_PERF_STAT_END() + SAMPLE_SVP_NNIE_PERF_STAT_RPN_AFTER_DST_FLUSH_TIME() + return s32Ret; +} + + + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_FasterRcnn_GetResultTmpBufSize +* Description : this function is used to get tmp buffer size for FasterRcnn_GetResult func +* Input : HI_U32 u32MaxRoiNum [IN] max roi num +* HI_U32 u32ClassNum [IN] class num +* +* +* +* +* Output : +* Return Value : HI_U32: tmp buffer size +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_U32 SAMPLE_SVP_NNIE_FasterRcnn_GetResultTmpBufSize(HI_U32 u32MaxRoiNum, HI_U32 u32ClassNum) +{ + HI_U32 u32ScoreSize = sizeof(HI_FLOAT) * u32MaxRoiNum * u32ClassNum; + HI_U32 u32ProposalSize = sizeof(HI_U32) * u32MaxRoiNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH; + HI_U32 u32StackSize = sizeof(SAMPLE_SVP_NNIE_STACK_S) * u32MaxRoiNum; + HI_U32 u32TotalSize = u32ScoreSize + u32ProposalSize + u32StackSize; + return u32TotalSize; +} + + + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Pvanet_GetResultTmpBufSize +* Description : this function is used to get tmp buffer size for FasterRcnn_GetResult func +* Input : HI_U32 u32MaxRoiNum [IN] max roi num +* HI_U32 u32ClassNum [IN] class num +* +* +* +* +* Output : +* Return Value : HI_U32: tmp buffer size +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_U32 SAMPLE_SVP_NNIE_Pvanet_GetResultTmpBufSize(HI_U32 u32MaxRoiNum, HI_U32 u32ClassNum) +{ + HI_U32 u32ScoreSize = sizeof(HI_FLOAT) * u32MaxRoiNum * u32ClassNum; + HI_U32 u32ProposalSize = sizeof(HI_U32) * u32MaxRoiNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH; + HI_U32 u32StackSize = sizeof(SAMPLE_SVP_NNIE_STACK_S) * u32MaxRoiNum; + HI_U32 u32TotalSize = u32ScoreSize + u32ProposalSize + u32StackSize; + return u32TotalSize; +} + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_FasterRcnn_GetResult +* Description : this function is used to get FasterRcnn result +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to FasterRcnn NNIE parameter +* SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to FasterRcnn software parameter +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_S32 SAMPLE_SVP_NNIE_FasterRcnn_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_S32 s32Ret = HI_SUCCESS; + HI_U32 i = 0; + HI_S32* ps32Proposal = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stRpnBbox.u64VirAddr); + SAMPLE_SVP_CHECK_EXPR_RET(0 == pstSoftwareParam->stRpnBbox.u64VirAddr,HI_INVALID_VALUE, + SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,pstSoftwareParam->stRpnBbox.u64VirAddr can't be 0!\n"); + + for(i = 0; i < pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height; i++) + { + *(ps32Proposal+SAMPLE_SVP_NNIE_COORDI_NUM*i) /= SAMPLE_SVP_NNIE_QUANT_BASE; + *(ps32Proposal+SAMPLE_SVP_NNIE_COORDI_NUM*i+1) /= SAMPLE_SVP_NNIE_QUANT_BASE; + *(ps32Proposal+SAMPLE_SVP_NNIE_COORDI_NUM*i+2) /= SAMPLE_SVP_NNIE_QUANT_BASE; + *(ps32Proposal+SAMPLE_SVP_NNIE_COORDI_NUM*i+3) /= SAMPLE_SVP_NNIE_QUANT_BASE; + } + s32Ret = SVP_NNIE_FasterRcnn_GetResult( + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstNnieParam->astSegData[1].astDst[0].u64VirAddr), + pstNnieParam->astSegData[1].astDst[0].u32Stride, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstNnieParam->astSegData[1].astDst[1].u64VirAddr), + pstNnieParam->astSegData[1].astDst[1].u32Stride, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stRpnBbox.u64VirAddr), + pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height, + pstSoftwareParam->au32ConfThresh,pstSoftwareParam->u32ValidNmsThresh, + pstSoftwareParam->u32MaxRoiNum,pstSoftwareParam->u32ClassNum, + pstSoftwareParam->u32OriImWidth,pstSoftwareParam->u32OriImHeight, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32,pstSoftwareParam->stGetResultTmpBuf.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstScore.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstRoi.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stClassRoiNum.u64VirAddr)); + + return s32Ret; +} +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Pvanet_GetResult +* Description : this function is used to get FasterRcnn result +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to FasterRcnn NNIE parameter +* SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to FasterRcnn software parameter +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_S32 SAMPLE_SVP_NNIE_Pvanet_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_S32 s32Ret = HI_SUCCESS; + HI_U32 i = 0; + HI_S32* ps32Proposal = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stRpnBbox.u64VirAddr); + SAMPLE_SVP_CHECK_EXPR_RET(0 == pstSoftwareParam->stRpnBbox.u64VirAddr,HI_INVALID_VALUE, + SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,pstSoftwareParam->stRpnBbox.u64VirAddr can't be 0!\n"); + + for(i = 0; i < pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height; i++) + { + *(ps32Proposal+SAMPLE_SVP_NNIE_COORDI_NUM*i) /= SAMPLE_SVP_NNIE_QUANT_BASE; + *(ps32Proposal+SAMPLE_SVP_NNIE_COORDI_NUM*i+1) /= SAMPLE_SVP_NNIE_QUANT_BASE; + *(ps32Proposal+SAMPLE_SVP_NNIE_COORDI_NUM*i+2) /= SAMPLE_SVP_NNIE_QUANT_BASE; + *(ps32Proposal+SAMPLE_SVP_NNIE_COORDI_NUM*i+3) /= SAMPLE_SVP_NNIE_QUANT_BASE; + } + s32Ret = SVP_NNIE_Pvanet_GetResult( + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstNnieParam->astSegData[1].astDst[0].u64VirAddr), + pstNnieParam->astSegData[1].astDst[0].u32Stride, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstNnieParam->astSegData[1].astDst[1].u64VirAddr), + pstNnieParam->astSegData[1].astDst[1].u32Stride, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stRpnBbox.u64VirAddr), + pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height, + pstSoftwareParam->au32ConfThresh,pstSoftwareParam->u32ValidNmsThresh, + pstSoftwareParam->u32MaxRoiNum,pstSoftwareParam->u32ClassNum, + pstSoftwareParam->u32OriImWidth,pstSoftwareParam->u32OriImHeight, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32,pstSoftwareParam->stGetResultTmpBuf.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstScore.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstRoi.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stClassRoiNum.u64VirAddr)); + + return s32Ret; +} + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Rfcn_GetResultTmpBuf +* Description : this function is used to get tmp buffer size for RFCN_GetResult func +* Input : HI_U32 u32MaxRoiNum [IN] Max Roi num +* HI_U32 u32ClassNum [IN] class num +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_U32 SAMPLE_SVP_NNIE_Rfcn_GetResultTmpBuf(HI_U32 u32MaxRoiNum, HI_U32 u32ClassNum) +{ + HI_U32 u32ScoreSize = sizeof(HI_FLOAT) * u32MaxRoiNum * u32ClassNum; + HI_U32 u32ProposalSize = sizeof(HI_U32) * u32MaxRoiNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH; + HI_U32 u32BboxSize = sizeof(HI_U32) * u32MaxRoiNum * SAMPLE_SVP_NNIE_COORDI_NUM; + HI_U32 u32StackSize = sizeof(SAMPLE_SVP_NNIE_STACK_S) * u32MaxRoiNum; + HI_U32 u32TotalSize = u32ScoreSize + u32ProposalSize + u32BboxSize+u32StackSize; + return u32TotalSize; +} + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Rfcn_Rpn +* Description : this function is used to do rpn +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to RFCN NNIE parameter +* SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to RFCN software parameter +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_S32 SAMPLE_SVP_NNIE_Rfcn_Rpn(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_S32 s32Ret = HI_SUCCESS; + SAMPLE_SVP_NIE_PERF_STAT_DEF_VAR() + + SAMPLE_SVP_NNIE_PERF_STAT_RPN_CLREAR() + SAMPLE_SVP_NNIE_PERF_STAT_BEGIN() + s32Ret = SVP_NNIE_Rpn(pstSoftwareParam->aps32Conv,pstSoftwareParam->u32NumRatioAnchors, + pstSoftwareParam->u32NumScaleAnchors,pstSoftwareParam->au32Scales, + pstSoftwareParam->au32Ratios,pstSoftwareParam->u32OriImHeight, + pstSoftwareParam->u32OriImWidth,pstSoftwareParam->au32ConvHeight, + pstSoftwareParam->au32ConvWidth,pstSoftwareParam->au32ConvChannel, + pstSoftwareParam->u32ConvStride,pstSoftwareParam->u32MaxRoiNum, + pstSoftwareParam->u32MinSize,pstSoftwareParam->u32SpatialScale, + pstSoftwareParam->u32NmsThresh,pstSoftwareParam->u32FilterThresh, + pstSoftwareParam->u32NumBeforeNms, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32,pstSoftwareParam->stRpnTmpBuf.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stRpnBbox.u64VirAddr), + &pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height); + + SAMPLE_SVP_NNIE_PERF_STAT_END() + SAMPLE_SVP_NNIE_PERF_STAT_RPN_OP_TIME() + + SAMPLE_SVP_NNIE_PERF_STAT_BEGIN() + SAMPLE_COMM_SVP_FlushCache(pstSoftwareParam->stRpnBbox.u64PhyAddr, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID,pstSoftwareParam->stRpnBbox.u64VirAddr), + pstSoftwareParam->stRpnBbox.u32Num* + pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Chn* + pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height* + pstSoftwareParam->stRpnBbox.u32Stride); + SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,SVP_NNIE_Rpn failed!\n"); + + SAMPLE_SVP_NNIE_PERF_STAT_END() + SAMPLE_SVP_NNIE_PERF_STAT_RPN_AFTER_DST_FLUSH_TIME() + return s32Ret; +} + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Rfcn_GetResult +* Description : this function is used to Get RFCN Result +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to RFCN NNIE parameter +* SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to RFCN software parameter +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_S32 SAMPLE_SVP_NNIE_Rfcn_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_S32 s32Ret = HI_SUCCESS; + HI_U32 i = 0; + HI_S32* ps32Proposal = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stRpnBbox.u64VirAddr); + + SAMPLE_SVP_CHECK_EXPR_RET(0 == pstSoftwareParam->stRpnBbox.u64VirAddr,HI_INVALID_VALUE, + SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,pstSoftwareParam->stRpnBbox.u64VirAddr can't be 0!\n"); + for(i = 0; i < pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height; i++) + { + *(ps32Proposal+SAMPLE_SVP_NNIE_COORDI_NUM*i) /= SAMPLE_SVP_NNIE_QUANT_BASE; + *(ps32Proposal+SAMPLE_SVP_NNIE_COORDI_NUM*i+1) /= SAMPLE_SVP_NNIE_QUANT_BASE; + *(ps32Proposal+SAMPLE_SVP_NNIE_COORDI_NUM*i+2) /= SAMPLE_SVP_NNIE_QUANT_BASE; + *(ps32Proposal+SAMPLE_SVP_NNIE_COORDI_NUM*i+3) /= SAMPLE_SVP_NNIE_QUANT_BASE; + } + s32Ret = SVP_NNIE_Rfcn_GetResult( + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstNnieParam->astSegData[1].astDst[0].u64VirAddr), + pstNnieParam->astSegData[1].astDst[0].u32Stride, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstNnieParam->astSegData[2].astDst[0].u64VirAddr), + pstNnieParam->astSegData[2].astDst[0].u32Stride, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stRpnBbox.u64VirAddr), + pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height, + pstSoftwareParam->au32ConfThresh,pstSoftwareParam->u32MaxRoiNum, + pstSoftwareParam->u32ClassNum,pstSoftwareParam->u32OriImWidth, + pstSoftwareParam->u32OriImHeight,pstSoftwareParam->u32ValidNmsThresh, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32,pstSoftwareParam->stGetResultTmpBuf.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstScore.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstRoi.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stClassRoiNum.u64VirAddr)); + SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,SVP_NNIE_Rfcn_GetResult failed!\n"); + return s32Ret; +} + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Ssd_GetResultTmpBuf +* Description : this function is used to Get SSD GetResult tmp buffer size +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to SSD NNIE parameter +* SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to SSD software parameter +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_U32 SAMPLE_SVP_NNIE_Ssd_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_U32 u32PriorBoxSize = 0; + HI_U32 u32SoftMaxSize = 0; + HI_U32 u32DetectionSize = 0; + HI_U32 u32TotalSize = 0; + HI_U32 u32PriorNum = 0; + HI_U32 i = 0; + + /*priorbox size*/ + for(i = 0; i < pstNnieParam->pstModel->astSeg[0].u16DstNum/2; i++) + { + u32PriorBoxSize += pstSoftwareParam->au32PriorBoxHeight[i]*pstSoftwareParam->au32PriorBoxWidth[i]* + SAMPLE_SVP_NNIE_COORDI_NUM*2*(pstSoftwareParam->u32MaxSizeNum+pstSoftwareParam->u32MinSizeNum+ + pstSoftwareParam->au32InputAspectRatioNum[i]*2*pstSoftwareParam->u32MinSizeNum)*sizeof(HI_U32); + } + pstSoftwareParam->stPriorBoxTmpBuf.u32Size = u32PriorBoxSize; + u32TotalSize+=u32PriorBoxSize; + + /*softmax size*/ + for(i = 0; i < pstSoftwareParam->u32ConcatNum; i++) + { + u32SoftMaxSize += pstSoftwareParam->au32SoftMaxInChn[i]*sizeof(HI_U32); + } + pstSoftwareParam->stSoftMaxTmpBuf.u32Size = u32SoftMaxSize; + u32TotalSize+=u32SoftMaxSize; + + /*detection size*/ + for(i = 0; i < pstSoftwareParam->u32ConcatNum; i++) + { + u32PriorNum+=pstSoftwareParam->au32DetectInputChn[i]/SAMPLE_SVP_NNIE_COORDI_NUM; + } + u32DetectionSize+=u32PriorNum*SAMPLE_SVP_NNIE_COORDI_NUM*sizeof(HI_U32); + u32DetectionSize+=u32PriorNum*SAMPLE_SVP_NNIE_PROPOSAL_WIDTH*sizeof(HI_U32)*2; + u32DetectionSize+=u32PriorNum*2*sizeof(HI_U32); + pstSoftwareParam->stGetResultTmpBuf.u32Size = u32DetectionSize; + u32TotalSize+=u32DetectionSize; + + return u32TotalSize; +} + + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Ssd_GetResult +* Description : this function is used to Get SSD result +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to SSD NNIE parameter +* SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to SSD software parameter +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_S32 SAMPLE_SVP_NNIE_Ssd_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_S32* aps32PermuteResult[SAMPLE_SVP_NNIE_SSD_REPORT_NODE_NUM]; + HI_S32* aps32PriorboxOutputData[SAMPLE_SVP_NNIE_SSD_PRIORBOX_NUM]; + HI_S32* aps32SoftMaxInputData[SAMPLE_SVP_NNIE_SSD_SOFTMAX_NUM]; + HI_S32* aps32DetectionLocData[SAMPLE_SVP_NNIE_SSD_SOFTMAX_NUM]; + HI_S32* ps32SoftMaxOutputData = NULL; + HI_S32* ps32DetectionOutTmpBuf = NULL; + HI_U32 au32SoftMaxWidth[SAMPLE_SVP_NNIE_SSD_SOFTMAX_NUM]; + HI_U32 u32Size = 0; + HI_S32 s32Ret = HI_SUCCESS; + HI_U32 i = 0; + + /*get permut result*/ + for(i = 0; i < SAMPLE_SVP_NNIE_SSD_REPORT_NODE_NUM; i++) + { + aps32PermuteResult[i] = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstNnieParam->astSegData[0].astDst[i].u64VirAddr); + } + + /*priorbox*/ + aps32PriorboxOutputData[0] = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stPriorBoxTmpBuf.u64VirAddr); + for (i = 1; i < SAMPLE_SVP_NNIE_SSD_PRIORBOX_NUM; i++) + { + u32Size = pstSoftwareParam->au32PriorBoxHeight[i-1]*pstSoftwareParam->au32PriorBoxWidth[i-1]* + SAMPLE_SVP_NNIE_COORDI_NUM*2*(pstSoftwareParam->u32MaxSizeNum+pstSoftwareParam->u32MinSizeNum+ + pstSoftwareParam->au32InputAspectRatioNum[i-1]*2*pstSoftwareParam->u32MinSizeNum); + aps32PriorboxOutputData[i] = aps32PriorboxOutputData[i - 1] + u32Size; + } + + for (i = 0; i < SAMPLE_SVP_NNIE_SSD_PRIORBOX_NUM; i++) + { + s32Ret = SVP_NNIE_Ssd_PriorBoxForward(pstSoftwareParam->au32PriorBoxWidth[i], + pstSoftwareParam->au32PriorBoxHeight[i], pstSoftwareParam->u32OriImWidth, + pstSoftwareParam->u32OriImHeight, pstSoftwareParam->af32PriorBoxMinSize[i], + pstSoftwareParam->u32MinSizeNum,pstSoftwareParam->af32PriorBoxMaxSize[i], + pstSoftwareParam->u32MaxSizeNum, pstSoftwareParam->bFlip, pstSoftwareParam->bClip, + pstSoftwareParam->au32InputAspectRatioNum[i],pstSoftwareParam->af32PriorBoxAspectRatio[i], + pstSoftwareParam->af32PriorBoxStepWidth[i],pstSoftwareParam->af32PriorBoxStepHeight[i], + pstSoftwareParam->f32Offset,pstSoftwareParam->as32PriorBoxVar, + aps32PriorboxOutputData[i]); + SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR, + "Error,SVP_NNIE_Ssd_PriorBoxForward failed!\n"); + } + + /*softmax*/ + ps32SoftMaxOutputData = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stSoftMaxTmpBuf.u64VirAddr); + for(i = 0; i < SAMPLE_SVP_NNIE_SSD_SOFTMAX_NUM; i++) + { + aps32SoftMaxInputData[i] = aps32PermuteResult[i*2+1]; + au32SoftMaxWidth[i] = pstSoftwareParam->au32ConvChannel[i*2+1]; + } + + (void)SVP_NNIE_Ssd_SoftmaxForward(pstSoftwareParam->u32SoftMaxInHeight, + pstSoftwareParam->au32SoftMaxInChn, pstSoftwareParam->u32ConcatNum, + pstSoftwareParam->au32ConvStride, au32SoftMaxWidth, + aps32SoftMaxInputData, ps32SoftMaxOutputData); + + /*detection*/ + ps32DetectionOutTmpBuf = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stGetResultTmpBuf.u64VirAddr); + for(i = 0; i < SAMPLE_SVP_NNIE_SSD_PRIORBOX_NUM; i++) + { + aps32DetectionLocData[i] = aps32PermuteResult[i*2]; + } + + (void)SVP_NNIE_Ssd_DetectionOutForward(pstSoftwareParam->u32ConcatNum, + pstSoftwareParam->u32ConfThresh,pstSoftwareParam->u32ClassNum, pstSoftwareParam->u32TopK, + pstSoftwareParam->u32KeepTopK, pstSoftwareParam->u32NmsThresh,pstSoftwareParam->au32DetectInputChn, + aps32DetectionLocData, aps32PriorboxOutputData, ps32SoftMaxOutputData, + ps32DetectionOutTmpBuf, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstScore.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstRoi.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stClassRoiNum.u64VirAddr)); + + return s32Ret; +} + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Yolov1_GetResultTmpBuf +* Description : this function is used to Get Yolov1 GetResult tmp buffer size +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to YOLOV1 NNIE parameter +* SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to YOLOV1 software parameter +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_U32 SAMPLE_SVP_NNIE_Yolov1_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_U32 u32TotalGridNum = pstSoftwareParam->u32GridNumHeight*pstSoftwareParam->u32GridNumWidth; + HI_U32 u32ClassNum = pstSoftwareParam->u32ClassNum; + HI_U32 u32EachGridBboxNum = pstSoftwareParam->u32BboxNumEachGrid; + HI_U32 u32TotalBboxNum = u32TotalGridNum*u32EachGridBboxNum; + HI_U32 u32TransSize = (u32ClassNum+u32EachGridBboxNum*(SAMPLE_SVP_NNIE_COORDI_NUM+1))* + u32TotalGridNum*sizeof(HI_U32); + HI_U32 u32Probsize = u32ClassNum*u32TotalBboxNum*sizeof(HI_U32); + HI_U32 u32ScoreSize = u32TotalBboxNum*sizeof(SAMPLE_SVP_NNIE_YOLOV1_SCORE_S); + HI_U32 u32StackSize = u32TotalBboxNum*sizeof(SAMPLE_SVP_NNIE_STACK_S); + HI_U32 u32TotalSize = u32TransSize+u32Probsize+u32ScoreSize+u32StackSize; + return u32TotalSize; +} + + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Yolov1_GetResult +* Description : this function is used to Get Yolov1 result +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to YOLOV1 NNIE parameter +* SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to YOLOV1 software parameter +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_S32 SAMPLE_SVP_NNIE_Yolov1_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_FLOAT *pf32ClassProb = NULL; + HI_FLOAT *pf32Confidence = NULL; + HI_FLOAT *pf32Bbox = NULL; + HI_S32 *ps32Score = NULL; + HI_U32 *pu32AssistBuf = NULL; + HI_U32 u32Offset = 0; + HI_U32 u32Index = 0; + HI_U32 u32GridNum = pstSoftwareParam->u32GridNumHeight*pstSoftwareParam->u32GridNumWidth; + HI_U32 i = 0, j = 0, k = 0; + HI_U8* pu8Tmp = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U8,pstSoftwareParam->stGetResultTmpBuf.u64VirAddr); + HI_FLOAT f32Score = 0.0f; + u32Offset = u32GridNum*(pstSoftwareParam->u32BboxNumEachGrid*5+pstSoftwareParam->u32ClassNum); + pf32ClassProb = (HI_FLOAT*)pu8Tmp; + pf32Confidence = pf32ClassProb + u32GridNum*pstSoftwareParam->u32ClassNum; + pf32Bbox = pf32Confidence + u32GridNum*pstSoftwareParam->u32BboxNumEachGrid; + + ps32Score = (HI_S32*)(pf32ClassProb+u32Offset); + pu32AssistBuf = (HI_U32*)(ps32Score+u32GridNum*pstSoftwareParam->u32BboxNumEachGrid* + pstSoftwareParam->u32ClassNum); + + for(i = 0; i < u32Offset; i++) + { + ((HI_FLOAT*)pu8Tmp)[i] = (SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstNnieParam->astSegData[0].astDst[0].u64VirAddr))[i] / ((HI_FLOAT)SAMPLE_SVP_NNIE_QUANT_BASE); + } + for(i = 0; i < u32GridNum; i++) + { + for(j = 0; j < pstSoftwareParam->u32BboxNumEachGrid; j++) + { + for(k = 0; k < pstSoftwareParam->u32ClassNum; k++) + { + u32Offset = k*u32GridNum*pstSoftwareParam->u32BboxNumEachGrid; + f32Score = *(pf32ClassProb+i*pstSoftwareParam->u32ClassNum+k)**(pf32Confidence+i*pstSoftwareParam->u32BboxNumEachGrid+j); + *(ps32Score+u32Offset+u32Index) = (HI_S32)(f32Score*SAMPLE_SVP_NNIE_QUANT_BASE); + } + u32Index++; + } + } + + for(i= 0; i < u32GridNum; i++) + { + for(j = 0; j < pstSoftwareParam->u32BboxNumEachGrid; j++) + { + pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + 0] = + (pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + 0] + + i % pstSoftwareParam->u32GridNumWidth) / pstSoftwareParam->u32GridNumWidth * pstSoftwareParam->u32OriImWidth; + pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + 1] = + (pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + 1] + + i / pstSoftwareParam->u32GridNumWidth) / pstSoftwareParam->u32GridNumHeight * pstSoftwareParam->u32OriImHeight; + pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + 2] = + pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + 2] * + pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + 2] * pstSoftwareParam->u32OriImWidth; + pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + 3] = + pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + 3] * + pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + 3] * pstSoftwareParam->u32OriImHeight; + } + } + + (void)SVP_NNIE_Yolov1_Detection(ps32Score, pf32Bbox, + pstSoftwareParam->u32ClassNum,u32GridNum,u32GridNum*pstSoftwareParam->u32BboxNumEachGrid, + pstSoftwareParam->u32ConfThresh,pstSoftwareParam->u32NmsThresh, + pstSoftwareParam->u32OriImWidth,pstSoftwareParam->u32OriImHeight,pu32AssistBuf, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstScore.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstRoi.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stClassRoiNum.u64VirAddr)); + return HI_SUCCESS; +} + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Yolov2_GetResultTmpBuf +* Description : this function is used to Get Yolov2 GetResult tmp buffer size +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to YOLOV2 NNIE parameter +* SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to YOLOV2 software parameter +* +* +* +* +* Output : +* Return Value : HI_U32: tmp buffer size. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_U32 SAMPLE_SVP_NNIE_Yolov2_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_U32 u32TotalGridNum = pstSoftwareParam->u32GridNumHeight*pstSoftwareParam->u32GridNumWidth; + HI_U32 u32ParaLength = pstSoftwareParam->u32BboxNumEachGrid*(SAMPLE_SVP_NNIE_COORDI_NUM+1+pstSoftwareParam->u32ClassNum); + HI_U32 u32TotalBboxNum = u32TotalGridNum*pstSoftwareParam->u32BboxNumEachGrid; + HI_U32 u32TransSize = u32TotalGridNum*u32ParaLength*sizeof(HI_U32); + HI_U32 u32BboxAssistBufSize = u32TotalBboxNum*sizeof(SAMPLE_SVP_NNIE_STACK_S); + HI_U32 u32BboxBufSize = u32TotalBboxNum*sizeof(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S); + HI_U32 u32BboxTmpBufSize = u32TotalGridNum*u32ParaLength*sizeof(HI_FLOAT); + HI_U32 u32TotalSize = u32TransSize+u32BboxAssistBufSize+u32BboxBufSize+u32BboxTmpBufSize; + return u32TotalSize; +} + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Yolov2_GetResult +* Description : this function is used to Get Yolov2 result +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to YOLOV2 NNIE parameter +* SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to YOLOV2 software parameter +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_S32 SAMPLE_SVP_NNIE_Yolov2_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + return SVP_NNIE_Yolov2_GetResult( + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstNnieParam->astSegData[0].astDst[0].u64VirAddr), + pstSoftwareParam->u32GridNumWidth, + pstSoftwareParam->u32GridNumHeight, + pstSoftwareParam->u32BboxNumEachGrid,pstSoftwareParam->u32ClassNum, + pstSoftwareParam->u32OriImWidth, + pstSoftwareParam->u32OriImHeight, + pstSoftwareParam->u32MaxRoiNum,pstSoftwareParam->u32NmsThresh, + pstSoftwareParam->u32ConfThresh,pstSoftwareParam->af32Bias, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32,pstSoftwareParam->stGetResultTmpBuf.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstScore.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstRoi.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stClassRoiNum.u64VirAddr)); +} + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf +* Description : this function is used to Get Yolov3 GetResult tmp buffer size +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to YOLOV3 NNIE parameter +* SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to YOLOV3 software parameter +* +* +* +* +* Output : +* Return Value : HI_U32: tmp buffer size. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_U32 SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_U32 u32TotalSize = 0; + HI_U32 u32AssistStackSize = 0; + HI_U32 u32TotalBboxNum = 0; + HI_U32 u32TotalBboxSize = 0; + HI_U32 u32DstBlobSize = 0; + HI_U32 u32MaxBlobSize = 0; + HI_U32 i = 0; + + for(i = 0; i < pstNnieParam->pstModel->astSeg[0].u16DstNum; i++) + { + u32DstBlobSize = pstNnieParam->pstModel->astSeg[0].astDstNode[i].unShape.stWhc.u32Width*sizeof(HI_U32)* + pstNnieParam->pstModel->astSeg[0].astDstNode[i].unShape.stWhc.u32Height* + pstNnieParam->pstModel->astSeg[0].astDstNode[i].unShape.stWhc.u32Chn; + if(u32MaxBlobSize < u32DstBlobSize) + { + u32MaxBlobSize = u32DstBlobSize; + } + u32TotalBboxNum += pstSoftwareParam->au32GridNumWidth[i]*pstSoftwareParam->au32GridNumHeight[i]* + pstSoftwareParam->u32BboxNumEachGrid; + } + u32AssistStackSize = u32TotalBboxNum*sizeof(SAMPLE_SVP_NNIE_STACK_S); + u32TotalBboxSize = u32TotalBboxNum*sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S); + u32TotalSize += (u32MaxBlobSize+u32AssistStackSize+u32TotalBboxSize); + + return u32TotalSize; +} + +/***************************************************************************** +* Prototype : SAMPLE_SVP_NNIE_Yolov3_GetResult +* Description : this function is used to Get Yolov3 result +* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to YOLOV3 NNIE parameter +* SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to YOLOV3 software parameter +* +* +* +* +* Output : +* Return Value : HI_SUCCESS: Success;Error codes: Failure. +* Spec : +* Calls : +* Called By : +* History: +* +* 1. Date : 2017-11-10 +* Author : +* Modification : Create +* +*****************************************************************************/ +HI_S32 SAMPLE_SVP_NNIE_Yolov3_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftwareParam) +{ + HI_U32 i = 0; + HI_U64 au64InputBlobAddr[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0}; + HI_U32 au32Stride[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0}; + + for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) + { + au64InputBlobAddr[i] = pstNnieParam->astSegData[0].astDst[i].u64VirAddr; + au32Stride[i] = pstNnieParam->astSegData[0].astDst[i].u32Stride; + } + return SVP_NNIE_Yolov3_GetResult(au64InputBlobAddr,pstSoftwareParam->au32GridNumWidth, + pstSoftwareParam->au32GridNumHeight,au32Stride,pstSoftwareParam->u32BboxNumEachGrid, + pstSoftwareParam->u32ClassNum,pstSoftwareParam->u32OriImWidth, + pstSoftwareParam->u32OriImWidth,pstSoftwareParam->u32MaxRoiNum,pstSoftwareParam->u32NmsThresh, + pstSoftwareParam->u32ConfThresh,pstSoftwareParam->af32Bias, + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stGetResultTmpBuf.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstScore.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stDstRoi.u64VirAddr), + SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstSoftwareParam->stClassRoiNum.u64VirAddr)); +} + +#ifdef __cplusplus +} +#endif diff --git a/src/nnie/sample_svp_nnie_software.h b/src/nnie/sample_svp_nnie_software.h new file mode 100644 index 0000000..2f2ed67 --- /dev/null +++ b/src/nnie/sample_svp_nnie_software.h @@ -0,0 +1,134 @@ +#ifndef _SAMPLE_SVP_USER_KERNEL_H_ +#define _SAMPLE_SVP_USER_KERNEL_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "hi_comm_svp.h" +#include "hi_nnie.h" +#include "mpi_nnie.h" +#include "sample_comm_svp.h" +#include "sample_comm_nnie.h" +#include +#include +#include + + +#define SAMPLE_SVP_NNIE_ADDR_ALIGN_16 16 /*16 byte alignment*/ +#define SAMPLE_SVP_NNIE_MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define SAMPLE_SVP_NNIE_MIN(a,b) (((a) < (b)) ? (a) : (b)) +#define SAMPLE_SVP_NNIE_SIGMOID(x) (HI_FLOAT)(1.0f/(1+exp(-x))) + +#define SAMPLE_SVP_NNIE_COORDI_NUM 4 /*coordinate numbers*/ +#define SAMPLE_SVP_NNIE_PROPOSAL_WIDTH 6 /*the number of proposal values*/ +#define SAMPLE_SVP_NNIE_QUANT_BASE 4096 /*the base value*/ +#define SAMPLE_SVP_NNIE_SCORE_NUM 2 /*the num of RPN scores*/ +#define SAMPLE_SVP_NNIE_HALF 0.5f /*the half value*/ +#define SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM 3 /*yolov3 report blob num*/ +#define SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM 6 /*yolov3 bias num of each grid*/ +#define SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM 85 /*yolov3 inference result num of each bbox*/ + +/*CNN GetTopN unit*/ +typedef struct hiSAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S +{ + HI_U32 u32ClassId; + HI_U32 u32Confidence; +}SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S; + +/*stack for sort*/ +typedef struct hiSAMPLE_SVP_NNIE_STACK +{ + HI_S32 s32Min; + HI_S32 s32Max; +}SAMPLE_SVP_NNIE_STACK_S; + +/*stack for sort*/ +typedef struct hiSAMPLE_SVP_NNIE_YOLOV1_SCORE +{ + HI_U32 u32Idx; + HI_S32 s32Score; +}SAMPLE_SVP_NNIE_YOLOV1_SCORE_S; + +typedef struct hiSAMPLE_SVP_NNIE_YOLOV2_BBOX +{ + HI_FLOAT f32Xmin; + HI_FLOAT f32Xmax; + HI_FLOAT f32Ymin; + HI_FLOAT f32Ymax; + HI_S32 s32ClsScore; + HI_U32 u32ClassIdx; + HI_U32 u32Mask; +}SAMPLE_SVP_NNIE_YOLOV2_BBOX_S; + +typedef SAMPLE_SVP_NNIE_YOLOV2_BBOX_S SAMPLE_SVP_NNIE_YOLOV3_BBOX_S; + +/*CNN*/ +HI_S32 SAMPLE_SVP_NNIE_Cnn_GetTopN(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_CNN_SOFTWARE_PARAM_S* pstSoftwareParam); + +/*FasterRcnn*/ +HI_U32 SAMPLE_SVP_NNIE_RpnTmpBufSize(HI_U32 u32RatioAnchorsNum, + HI_U32 u32ScaleAnchorsNum, HI_U32 u32ConvHeight, HI_U32 u32ConvWidth); + +HI_U32 SAMPLE_SVP_NNIE_FasterRcnn_GetResultTmpBufSize( + HI_U32 u32MaxRoiNum, HI_U32 u32ClassNum); + +HI_S32 SAMPLE_SVP_NNIE_FasterRcnn_Rpn(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S* pstSoftwareParam); + +HI_S32 SAMPLE_SVP_NNIE_FasterRcnn_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S* pstSoftwareParam); + + +/*Pvanet*/ +HI_U32 SAMPLE_SVP_NNIE_Pvanet_GetResultTmpBufSize( + HI_U32 u32MaxRoiNum, HI_U32 u32ClassNum); + +HI_S32 SAMPLE_SVP_NNIE_Pvanet_Rpn(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S* pstSoftwareParam); + +HI_S32 SAMPLE_SVP_NNIE_Pvanet_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S* pstSoftwareParam); + +/*RFCN*/ +HI_U32 SAMPLE_SVP_NNIE_Rfcn_GetResultTmpBuf(HI_U32 u32MaxRoiNum, HI_U32 u32ClassNum); + +HI_S32 SAMPLE_SVP_NNIE_Rfcn_Rpn(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S* pstSoftwareParam); + +HI_S32 SAMPLE_SVP_NNIE_Rfcn_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S* pstSoftwareParam); + +/*SSD*/ +HI_U32 SAMPLE_SVP_NNIE_Ssd_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S* pstSoftwareParam); + +HI_S32 SAMPLE_SVP_NNIE_Ssd_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S* pstSoftwareParam); + +/*YOLOV1*/ +HI_U32 SAMPLE_SVP_NNIE_Yolov1_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S* pstSoftwareParam); + +HI_S32 SAMPLE_SVP_NNIE_Yolov1_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S* pstSoftwareParam); + +/*YOLOV2*/ +HI_U32 SAMPLE_SVP_NNIE_Yolov2_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S* pstSoftwareParam); + +HI_S32 SAMPLE_SVP_NNIE_Yolov2_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S* pstSoftwareParam); + +/*YOLOV3*/ +HI_U32 SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftwareParam); + +HI_S32 SAMPLE_SVP_NNIE_Yolov3_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam, + SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftwareParam); +#ifdef __cplusplus +} +#endif + +#endif /* _SAMPLE_SVP_USER_KERNEL_H_ */ \ No newline at end of file diff --git a/xmake.lua b/xmake.lua index d451cb7..2639879 100644 --- a/xmake.lua +++ b/xmake.lua @@ -12,11 +12,18 @@ target("ISP") add_deps("sample_common") add_packages("zlog") +target("NNIE") + set_kind("static") + add_files("src/nnie/*.c") + add_includedirs("src") + add_deps("sample_common","sample_svp") + add_packages("zlog") + target("CatFeeder") set_kind("binary") add_includedirs("src") add_files("src/*.cpp") - add_deps("hi_library", "ISP") + add_deps("hi_library", "ISP", "NNIE") add_packages("zlog") add_links("stdc++fs") after_build(function (target) @@ -28,73 +35,3 @@ if is_mode("debug") then set_symbols("debug") set_optimize("none") end - --- --- If you want to known more usage about xmake, please see https://xmake.io --- --- ## FAQ --- --- You can enter the project directory firstly before building project. --- --- $ cd projectdir --- --- 1. How to build project? --- --- $ xmake --- --- 2. How to configure project? --- --- $ xmake f -p [macosx|linux|iphoneos ..] -a [x86_64|i386|arm64 ..] -m [debug|release] --- --- 3. Where is the build output directory? --- --- The default output directory is `./build` and you can configure the output directory. --- --- $ xmake f -o outputdir --- $ xmake --- --- 4. How to run and debug target after building project? --- --- $ xmake run [targetname] --- $ xmake run -d [targetname] --- --- 5. How to install target to the system directory or other output directory? --- --- $ xmake install --- $ xmake install -o installdir --- --- 6. Add some frequently-used compilation flags in xmake.lua --- --- @code --- -- add debug and release modes --- add_rules("mode.debug", "mode.release") --- --- -- add macro definition --- add_defines("NDEBUG", "_GNU_SOURCE=1") --- --- -- set warning all as error --- set_warnings("all", "error") --- --- -- set language: c99, c++11 --- set_languages("c99", "c++11") --- --- -- set optimization: none, faster, fastest, smallest --- set_optimize("fastest") --- --- -- add include search directories --- add_includedirs("/usr/include", "/usr/local/include") --- --- -- add link libraries and search directories --- add_links("tbox") --- add_linkdirs("/usr/local/lib", "/usr/lib") --- --- -- add system link libraries --- add_syslinks("z", "pthread") --- --- -- add compilation and link flags --- add_cxflags("-stdnolib", "-fno-strict-aliasing") --- add_ldflags("-L/usr/local/lib", "-lpthread", {force = true}) --- --- @endcode --- -