update cmakelists.txt and change ctdetforward_kernel

Youngon · Dec 27, 2019 · 543b118 · 543b118
1 parent b1f47f9
commit 543b118
Show file tree

Hide file tree

Showing 24 changed files with 169 additions and 154 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,47 +1,14 @@
 cmake_minimum_required(VERSION 3.5)
 project(ctdet_trt)
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/lib)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/lib)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
 
-set(CMAKE_BUILD_TYPE Debug)
-set(GPU_ARCHS 61)  ## config your GPU_ARCHS
+set(CMAKE_BUILD_TYPE Release)
+set(GPU_ARCHS 61)  ## config your GPU_ARCHS,See [here](https://developer.nvidia.com/cuda-gpus) for finding what maximum compute capability your specific GPU supports.
 set(TENSORRT_ROOT /usr/local/TensorRT-5.0.2.6)
-add_subdirectory(onnx-tensorrt)
-
-find_package(CUDA REQUIRED)
-
-find_path(TENSORRT_INCLUDE_DIR NvInfer.h
-        HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR}
-        PATH_SUFFIXES include/)
-message(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}")
-find_library(TENSORRT_LIBRARY_INFER nvinfer
-        HINTS ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
-        PATH_SUFFIXES lib lib64 lib/x64)
-set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER}
-         nvonnxparser nvonnxparser_runtime)
-
-message(STATUS ${TENSORRT_LIBRARY})
-find_package(OpenCV REQUIRED)
-link_directories(${OpenCV_LIBRARIES_DIRS})
-
-include_directories(include ${OpenCV_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS}
-        ${TENSORRT_INCLUDE_DIR} ${CUDNN_INCLUDE_DIR} ${CUDNN_INCLUDE_DIR} ${TENSORRT_INCLUDE_DIR} onnx-tensorrt )
-
-file(GLOB CPP_SRC src/*.cpp)
-file(GLOB CU_SRC src/*.cu)
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast")
 
-list(APPEND CUDA_NVCC_FLAGS "-D_FORCE_INLINES -Xcompiler -fPIC")
-
-
-cuda_add_library(ctdet SHARED src/python_api.cpp ${CPP_SRC} ${CU_SRC})
-target_include_directories(ctdet PUBLIC ${CUDA_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${CUDNN_INCLUDE_DIR})
-target_link_libraries(ctdet ${TENSORRT_LIBRARY} ${OpenCV_LIBS})
-
-
-cuda_add_executable(buildEngine example/buildEngine.cpp ${CPP_SRC} ${CU_SRC})
-target_link_libraries(buildEngine ${OpenCV_LIBS} ${TENSORRT_LIBRARY})
-
-cuda_add_executable(runDet example/runDet.cpp ${CPP_SRC} ${CU_SRC})
-target_link_libraries(runDet ${OpenCV_LIBS} ${TENSORRT_LIBRARY})
+## build
+add_subdirectory(onnx-tensorrt)
+add_subdirectory(src)
+add_subdirectory(example)
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@
 | [resdcn101](https://github.com/xingyizhou/CenterNet/blob/master/src/lib/models/networks/resnet_dcn.py)| 512x512    | gtx 1070 |float32    |    20.9ms    |
 | [resdcn18](https://github.com/xingyizhou/CenterNet/blob/master/src/lib/models/networks/resnet_dcn.py)| 512x512    | gtx 1070 |float32    |    5.81ms    |
 | [resdcn18](https://github.com/xingyizhou/CenterNet/blob/master/src/lib/models/networks/resnet_dcn.py)| 512x512    | gtx 1070 |int8    |    3.63ms    |
-1. support Deform Conv v2.  
+1. support Deform Conv v2.
 2. no nms.
 3. support fp32 fp16 int8 mode.
 
@@ -30,21 +30,22 @@
 |---|---|---|---|---|---|---|---|---|
 |ctdet_coco_dla_2x|gtx 1070|float32|0.365/0.374|0.543|0.390|0.164|0.398|0.536|
 |ctdet_coco_dlav0_1x|gtx 1070|float32|0.324/--|0.511|0.343|0.140|0.350|0.476|
-|ctdet_coco_dlav0_1x|gtx 1070|int8|0.293/--|0.465|0.309|0.123|0.317|0.443|
+|ctdet_coco_dlav0_1x|gtx 1070|int8|0.295/--|0.468|0.311|0.123|0.318|0.446|
 |ctdet_coco_resdcn101|gtx 1070|float32|0.332/0.346|0.516|0.349|0.115|0.367|0.531|
 |ctdet_coco_resdcn18|gtx 1070|float32|0.277/0.281|0.448|0.286|0.083|0.290|0.454|
-|ctdet_coco_resdcn18|gtx 1070|int8|0.238/0.281|0.394|0.246|0.062|0.254|0.402|
+|ctdet_coco_resdcn18|gtx 1070|int8|0.242/0.281|0.401|0.250|0.061|0.255|0.409|
 
 #### notes
  * cocoval2017 test AP with no augmentation.
  * input_szie = 512x512
  * thresh = 0.01
  * maxpool kernel_size = 3
- * calib_img_list.txt : random sample 700 images from COCO2017/val2017
+ * calib_img_list.txt : random sample 200 images from COCO2017/val2017
 
 ### Enviroments
 1. gtx 1070
 ```
+pytorch 1.0-1.1
 ubuntu 1604
 TensorRT 5.0
 onnx-tensorrt v5.0
@@ -68,8 +69,8 @@ cd build && cmake .. && make
 cd ..
 
 ##ctdet | config include/ctdetConfig.h 
-## int 8
-./buildEngine -i model/ctdet_coco_dla_2x.onnx -o model/ctdet_coco_dla_2x.engine -m 2 -c calib_img_list.txt
+## float32
+./buildEngine -i model/ctdet_coco_dla_2x.onnx -o model/ctdet_coco_dla_2x.engine 
 ./runDet -e model/ctdet_coco_dla_2x.engine -i test.jpg -c test.h264
 
 ##cthelmet   | config include/ctdetConfig.h
@@ -85,7 +86,7 @@ cd ..
 ./runDet -e model/centerface.engine -i test.jpg -c test.h264
 
 ## run eval_coco.py | conifg your cocodaset and ctdet_coco engine 
-python3 eval_coco.py
+python3 eval_coco.py model/ctdet_coco_dla_2x.engine
 ```
 
 ### Related projects

diff --git a/eval_coco.py b/eval_coco.py
@@ -5,6 +5,7 @@
 from pycocotools.cocoeval import COCOeval
 import pycocotools.coco as coco
 from tqdm import tqdm
+import sys
 
 class BOX(Structure):
     _fields_ = [("x1", c_float),
@@ -130,7 +131,7 @@ def top_k(pred,K=100):
 data = coco.COCO(coco_val_ann)
 
 set_device(0)
-net = init_net(b'/home/cao/CLionProjects/ctdet_trt/model/ctdet_coco_resdcn18_int8.engine')
+net = init_net(bytes(sys.argv[1],encoding = "utf8"))
 detections = []
 for img_id in tqdm(data.getImgIds()):
     img_name = os.path.join(coco_val_dir,data.loadImgs(ids=[img_id])[0]['file_name']).strip()

diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
@@ -0,0 +1,12 @@
+cmake_minimum_required(VERSION 3.5)
+project(example)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast")
+include_directories(../include
+                    ../onnx-tensorrt)
+
+add_executable(buildEngine buildEngine.cpp)
+target_link_libraries(buildEngine ctdet)
+
+add_executable(runDet runDet.cpp)
+target_link_libraries(runDet ctdet)
diff --git a/example/buildEngine.cpp b/example/buildEngine.cpp
@@ -2,11 +2,12 @@
 // Created by cao on 19-10-26.
 //
 
-#include <ctdetNet.h>
+
 #include <argparse.h>
 #include <string>
-#include <utils.h>
 #include <iostream>
+#include "ctdetNet.h"
+#include "utils.h"
 
 int main(int argc, const char** argv)
 {

diff --git a/example/runDet.cpp b/example/runDet.cpp
@@ -6,9 +6,10 @@
 #include <argparse.h>
 #include <string>
 #include <iostream>
-#include <ctdetNet.h>
-#include <utils.h>
 #include <memory>
+#include "ctdetNet.h"
+#include "utils.h"
+
 
 int main(int argc, const char** argv){
     optparse::OptionParser parser;

diff --git a/include/ctdetConfig.h b/include/ctdetConfig.h
@@ -7,7 +7,7 @@
 
 namespace ctdet{
 
-    constexpr static float visThresh = 0.3;
+    constexpr static float visThresh = 0.01;
     constexpr static int kernelSize = 3 ;  /// nms maxpool size
 
 

diff --git a/include/ctdetLayer.h b/include/ctdetLayer.h
@@ -5,9 +5,8 @@
 #ifndef CTDET_TRT_CTDETLAYER_H
 #define CTDET_TRT_CTDETLAYER_H
 
-#include <utils.h>
-extern "C" void CTdetforward_gpu(const float *hm, const float *reg,const float *wh ,float *output,
+void CTdetforward_gpu(const float *hm, const float *reg,const float *wh ,float *output,
                       const int w,const int h,const int classes,const int kernerl_size,const float visthresh  );
-extern "C" void CTfaceforward_gpu(const float *hm, const float *wh,const float *reg,const float* landmarks,float *output,
+void CTfaceforward_gpu(const float *hm, const float *wh,const float *reg,const float* landmarks,float *output,
                        const int w,const int h,const int classes,const int kernerl_size, const float visthresh );
 #endif //CTDET_TRT_CTDETLAYER_H
diff --git a/include/ctdetNet.h b/include/ctdetNet.h
@@ -11,8 +11,8 @@
 #include <numeric>
 #include "NvInferPlugin.h"
 #include "NvOnnxParser.h"
-#include <ctdetConfig.h>
-#include <utils.h>
+#include "ctdetConfig.h"
+#include "utils.h"
 #include "NvOnnxParserRuntime.h"
 
 namespace ctdet

diff --git a/include/entroyCalibrator.h b/include/entroyCalibrator.h
@@ -5,7 +5,7 @@
 #ifndef CTDET_TRT_ENTROYCALIBRATOR_H
 #define CTDET_TRT_ENTROYCALIBRATOR_H
 
-#include <NvInfer.h>
+#include "NvInfer.h"
 #include <vector>
 #include <string>
 namespace nvinfer1 {

diff --git a/include/python_api.h b/include/python_api.h
@@ -4,8 +4,8 @@
 
 #ifndef CTDET_TRT_PYTHON_API_H
 #define CTDET_TRT_PYTHON_API_H
-#include <utils.h>
-#include <ctdetConfig.h>
+#include "utils.h"
+#include "ctdetConfig.h"
 typedef struct
 {
     int num;
@@ -18,4 +18,4 @@ extern "C" void setDevice(int id);
 extern "C" void freeResult(detResult *p);
 extern "C" void freeNet(void * p);
 
-#endif //CTDET_TRT_PYTHON_API_H
+#endif //CTDET_TRT_PYTHON_API_H
diff --git a/include/utils.h b/include/utils.h
@@ -9,15 +9,15 @@
 #include <iostream>
 #include <iomanip>
 #include <algorithm>
-#include <cublas_v2.h>
-#include <cudnn.h>
 #include <assert.h>
 #include "NvInfer.h"
-#include <opencv2/opencv.hpp>
-
+#include "opencv2/opencv.hpp"
+#include "cuda.h"
+#include "cuda_runtime.h"
+#include "numeric"
 
 #ifndef BLOCK
-#define BLOCK 512
+#define BLOCK 1024
 #endif
 #ifndef CUDA_CHECK
 #define CUDA_CHECK(callstr)                                                                    \
@@ -51,7 +51,6 @@ class Profiler : public nvinfer1::IProfiler
             totalTime += elem.second.time;
             maxLayerNameLength = std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
         }
-
 //        auto old_settings = std::cout.flags();
 //        auto old_precision = std::cout.precision();
 //        // Output header
@@ -162,8 +161,6 @@ struct Detection{
     landmarks marks[5];
 };
 
-
-extern dim3 cudaGridSize(uint n);
 extern std::vector<float> prepareImage(cv::Mat& img, const bool& forwardFace);
 extern void postProcess(std::vector<Detection> & result,const cv::Mat& img, const bool& forwardFace);
 extern void postProcess(std::vector<Detection> & result,const int &img_w ,const int& img_h, const bool& forwardFace);

diff --git a/model/centerface.engine b/model/centerface.engine
diff --git a/model/centerface.enigne b/model/centerface.enigne
diff --git a/model/centerface.onnx b/model/centerface.onnx
diff --git a/model/ctdet_helmet.engine b/model/ctdet_helmet.engine
diff --git a/onnx-tensorrt/CMakeLists.txt b/onnx-tensorrt/CMakeLists.txt
@@ -241,7 +241,7 @@ else()
   add_library(nvonnxparser_plugin STATIC ${PLUGIN_SOURCES})
 endif()
 target_include_directories(nvonnxparser_plugin PUBLIC ${CUDA_INCLUDE_DIRS} ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${CUDNN_INCLUDE_DIR})
-target_link_libraries(nvonnxparser_plugin ${TENSORRT_LIBRARY})
+target_link_libraries(nvonnxparser_plugin ${TENSORRT_LIBRARY} cuda cudart cublas)
 
 # --------------------------------
 # Importer library
@@ -255,9 +255,9 @@ target_link_libraries(nvonnxparser PUBLIC onnx_proto nvonnxparser_plugin ${PROTO
 #  LINK_DEPENDS ${PARSER_LINKER_SCRIPT}
 #  LINK_FLAGS "-Wl,--version-script=${PARSER_LINKER_SCRIPT}"
 #)
-add_library(nvonnxparser_static STATIC ${IMPORTER_SOURCES})
-target_include_directories(nvonnxparser_static PUBLIC ${CUDA_INCLUDE_DIRS} ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${CUDNN_INCLUDE_DIR})
-target_link_libraries(nvonnxparser_static PUBLIC onnx_proto nvonnxparser_plugin ${PROTOBUF_LIBRARY} ${CUDNN_LIBRARY} ${TENSORRT_LIBRARY})
+#add_library(nvonnxparser_static STATIC ${IMPORTER_SOURCES})
+#target_include_directories(nvonnxparser_static PUBLIC ${CUDA_INCLUDE_DIRS} ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${CUDNN_INCLUDE_DIR})
+#target_link_libraries(nvonnxparser_static PUBLIC onnx_proto nvonnxparser_plugin ${PROTOBUF_LIBRARY} ${CUDNN_LIBRARY} ${TENSORRT_LIBRARY})
 
 # --------------------------------
 # Runtime library
@@ -271,9 +271,9 @@ target_link_libraries(nvonnxparser_runtime PUBLIC nvonnxparser_plugin ${CUDNN_LI
 #  LINK_DEPENDS ${RUNTIME_LINKER_SCRIPT}
 #  LINK_FLAGS "-Wl,--version-script=${RUNTIME_LINKER_SCRIPT}"
 #)
-add_library(nvonnxparser_runtime_static STATIC ${RUNTIME_SOURCES})
-target_include_directories(nvonnxparser_runtime_static PUBLIC ${CUDA_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${CUDNN_INCLUDE_DIR})
-target_link_libraries(nvonnxparser_runtime_static PUBLIC nvonnxparser_plugin ${CUDNN_LIBRARY} ${TENSORRT_LIBRARY})
+# add_library(nvonnxparser_runtime_static STATIC ${RUNTIME_SOURCES})
+# target_include_directories(nvonnxparser_runtime_static PUBLIC ${CUDA_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${CUDNN_INCLUDE_DIR})
+# target_link_libraries(nvonnxparser_runtime_static PUBLIC nvonnxparser_plugin ${CUDNN_LIBRARY} ${TENSORRT_LIBRARY})
 
 # --------------------------------
 # Onnxifi library

diff --git a/readme/ctdet2onnx.md b/readme/ctdet2onnx.md
@@ -5,7 +5,7 @@ So I use DCNv2 from mmdetection.
         ```bash
         cp -r dcn lib/models/netowrks
         ```
-    * upgrade pytorch to 1.0+
+    * upgrade pytorch to 1.0-1.1
     * complie Deform Conv
         ```bash
         cd lib/models/netowrks/dcn
@@ -94,7 +94,7 @@ So I use DCNv2 from mmdetection.
         opt = opts().init()  ## change lib/opts.py add_argument('task', default='ctdet'....) to add_argument('--task', default='ctdet'....)
         opt.arch = 'dla_34'
         opt.heads = OrderedDict([('hm', 80), ('reg', 2), ('wh', 2)])
-        opt.head_conv = 256 if 'dla' in opt.arch else opt.head_conv=64
+        opt.head_conv = 256 if 'dla' in opt.arch else 64
         print(opt)
         model = create_model(opt.arch, opt.heads, opt.head_conv)
         model.forward = MethodType(forward[opt.arch.split('_')[0]], model)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -0,0 +1,34 @@
+cmake_minimum_required(VERSION 3.5)
+project(ctdet_trt)
+find_package(CUDA REQUIRED)
+
+find_path(TENSORRT_INCLUDE_DIR NvInfer.h
+        HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR}
+        PATH_SUFFIXES include/)
+message(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}")
+find_library(TENSORRT_LIBRARY_INFER nvinfer
+        HINTS ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
+        PATH_SUFFIXES lib lib64 lib/x64)
+message(STATUS "Found TensorRT libs ${TENSORRT_LIBRARY_INFER}")
+
+find_package(OpenCV REQUIRED)
+link_directories(${OpenCV_LIBRARIES_DIRS})
+
+file(GLOB CPP_SRC *.cpp)
+file(GLOB CU_SRC *.cu)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast")
+list(APPEND CUDA_NVCC_FLAGS "-D_FORCE_INLINES -Xcompiler -fPIC")
+
+include_directories(${CUDA_INCLUDE_DIRS}
+                    ${TENSORRT_INCLUDE_DIR}
+                    ${OpenCV_INCLUDE_DIRS}
+                    ../include
+                    ../onnx-tensorrt)
+
+cuda_add_library(ctdet SHARED ${CPP_SRC} ${CU_SRC})
+target_link_libraries(ctdet
+        ${TENSORRT_LIBRARY_INFER}
+        ${OpenCV_LIBS}
+        nvonnxparser
+        nvonnxparser_runtime)