diff --git a/yolov8/yolov8_trt10/include/block.h b/yolov8/yolov8_trt10/include/block.h index 1816e01b..4dd43e6b 100644 --- a/yolov8/yolov8_trt10/include/block.h +++ b/yolov8/yolov8_trt10/include/block.h @@ -7,29 +7,29 @@ std::map loadWeights(const std::string file); -nvinfer1::IScaleLayer *addBatchNorm2d(nvinfer1::INetworkDefinition *network, - std::map weightMap, - nvinfer1::ITensor &input, std::string lname, float eps); +nvinfer1::IScaleLayer* addBatchNorm2d(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, + std::string lname, float eps); -nvinfer1::IElementWiseLayer *convBnSiLU(nvinfer1::INetworkDefinition *network, - std::map weightMap, nvinfer1::ITensor &input, +nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, int ch, int k, int s, int p, std::string lname); -nvinfer1::IElementWiseLayer *C2F(nvinfer1::INetworkDefinition *network, - std::map weightMap, nvinfer1::ITensor &input, int c1, +nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname); -nvinfer1::IElementWiseLayer *C2(nvinfer1::INetworkDefinition *network, - std::map &weightMap, nvinfer1::ITensor &input, int c1, +nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network, + std::map& weightMap, nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname); -nvinfer1::IElementWiseLayer *SPPF(nvinfer1::INetworkDefinition *network, - std::map weightMap, nvinfer1::ITensor &input, int c1, +nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname); -nvinfer1::IShuffleLayer *DFL(nvinfer1::INetworkDefinition *network, std::map weightMap, - nvinfer1::ITensor &input, int ch, int grid, int k, int s, int p, std::string lname); +nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map weightMap, + nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname); -nvinfer1::IPluginV2Layer *addYoLoLayer(nvinfer1::INetworkDefinition *network, - std::vector dets, const int *px_arry, +nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition* network, + std::vector dets, const int* px_arry, int px_arry_num, bool is_segmentation, bool is_pose); diff --git a/yolov8/yolov8_trt10/include/calibrator.h b/yolov8/yolov8_trt10/include/calibrator.h index 9bb60a7c..a324106d 100644 --- a/yolov8/yolov8_trt10/include/calibrator.h +++ b/yolov8/yolov8_trt10/include/calibrator.h @@ -11,17 +11,17 @@ //! \brief Implements Entropy calibrator 2. //! CalibrationAlgoType is kENTROPY_CALIBRATION_2. //! -class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 -{ -public: - Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache = true); +class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 { + public: + Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, + const char* input_blob_name, bool read_cache = true); virtual ~Int8EntropyCalibrator2(); int getBatchSize() const TRT_NOEXCEPT override; bool getBatch(void* bindings[], const char* names[], int nbBindings) TRT_NOEXCEPT override; const void* readCalibrationCache(size_t& length) TRT_NOEXCEPT override; void writeCalibrationCache(const void* cache, size_t length) TRT_NOEXCEPT override; -private: + private: int batchsize_; int input_w_; int input_h_; @@ -36,4 +36,4 @@ class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 std::vector calib_cache_; }; -#endif // ENTROPY_CALIBRATOR_H +#endif // ENTROPY_CALIBRATOR_H diff --git a/yolov8/yolov8_trt10/include/config.h b/yolov8/yolov8_trt10/include/config.h index e9c70c1f..d3137f11 100644 --- a/yolov8/yolov8_trt10/include/config.h +++ b/yolov8/yolov8_trt10/include/config.h @@ -4,7 +4,7 @@ const static char* kInputTensorName = "images"; const static char* kOutputTensorName = "output"; -const static char *kProtoTensorName = "proto"; +const static char* kProtoTensorName = "proto"; const static int kNumClass = 80; const static int kPoseNumClass = 1; const static int kNumberOfPoints = 17; // number of keypoints total diff --git a/yolov8/yolov8_trt10/include/cuda_utils.h b/yolov8/yolov8_trt10/include/cuda_utils.h index 8fbd3199..35d50d84 100644 --- a/yolov8/yolov8_trt10/include/cuda_utils.h +++ b/yolov8/yolov8_trt10/include/cuda_utils.h @@ -4,15 +4,14 @@ #include #ifndef CUDA_CHECK -#define CUDA_CHECK(callstr)\ - {\ - cudaError_t error_code = callstr;\ - if (error_code != cudaSuccess) {\ - std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__;\ - assert(0);\ - }\ +#define CUDA_CHECK(callstr) \ + { \ + cudaError_t error_code = callstr; \ + if (error_code != cudaSuccess) { \ + std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \ + assert(0); \ + } \ } #endif // CUDA_CHECK #endif // TRTX_CUDA_UTILS_H_ - diff --git a/yolov8/yolov8_trt10/include/logging.h b/yolov8/yolov8_trt10/include/logging.h index 6b79a8b9..3a25d975 100644 --- a/yolov8/yolov8_trt10/include/logging.h +++ b/yolov8/yolov8_trt10/include/logging.h @@ -17,7 +17,6 @@ #ifndef TENSORRT_LOGGING_H #define TENSORRT_LOGGING_H -#include "NvInferRuntimeCommon.h" #include #include #include @@ -25,33 +24,24 @@ #include #include #include +#include "NvInferRuntimeCommon.h" #include "macros.h" using Severity = nvinfer1::ILogger::Severity; -class LogStreamConsumerBuffer : public std::stringbuf -{ -public: +class LogStreamConsumerBuffer : public std::stringbuf { + public: LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog) - : mOutput(stream) - , mPrefix(prefix) - , mShouldLog(shouldLog) - { - } + : mOutput(stream), mPrefix(prefix), mShouldLog(shouldLog) {} - LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) - : mOutput(other.mOutput) - { - } + LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) : mOutput(other.mOutput) {} - ~LogStreamConsumerBuffer() - { + ~LogStreamConsumerBuffer() { // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence // std::streambuf::pptr() gives a pointer to the current position of the output sequence // if the pointer to the beginning is not equal to the pointer to the current position, // call putOutput() to log the output to the stream - if (pbase() != pptr()) - { + if (pbase() != pptr()) { putOutput(); } } @@ -59,16 +49,13 @@ class LogStreamConsumerBuffer : public std::stringbuf // synchronizes the stream buffer and returns 0 on success // synchronizing the stream buffer consists of inserting the buffer contents into the stream, // resetting the buffer and flushing the stream - virtual int sync() - { + virtual int sync() { putOutput(); return 0; } - void putOutput() - { - if (mShouldLog) - { + void putOutput() { + if (mShouldLog) { // prepend timestamp std::time_t timestamp = std::time(nullptr); tm* tm_local = std::localtime(×tamp); @@ -89,12 +76,9 @@ class LogStreamConsumerBuffer : public std::stringbuf } } - void setShouldLog(bool shouldLog) - { - mShouldLog = shouldLog; - } + void setShouldLog(bool shouldLog) { mShouldLog = shouldLog; } -private: + private: std::ostream& mOutput; std::string mPrefix; bool mShouldLog; @@ -104,15 +88,12 @@ class LogStreamConsumerBuffer : public std::stringbuf //! \class LogStreamConsumerBase //! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer //! -class LogStreamConsumerBase -{ -public: +class LogStreamConsumerBase { + public: LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog) - : mBuffer(stream, prefix, shouldLog) - { - } + : mBuffer(stream, prefix, shouldLog) {} -protected: + protected: LogStreamConsumerBuffer mBuffer; }; @@ -125,49 +106,49 @@ class LogStreamConsumerBase //! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream. //! Please do not change the order of the parent classes. //! -class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream -{ -public: +class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream { + public: //! \brief Creates a LogStreamConsumer which logs messages with level severity. //! Reportable severity determines if the messages are severe enough to be logged. LogStreamConsumer(Severity reportableSeverity, Severity severity) - : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity) - , std::ostream(&mBuffer) // links the stream buffer with the stream - , mShouldLog(severity <= reportableSeverity) - , mSeverity(severity) - { - } + : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity), + std::ostream(&mBuffer) // links the stream buffer with the stream + , + mShouldLog(severity <= reportableSeverity), + mSeverity(severity) {} LogStreamConsumer(LogStreamConsumer&& other) - : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog) - , std::ostream(&mBuffer) // links the stream buffer with the stream - , mShouldLog(other.mShouldLog) - , mSeverity(other.mSeverity) - { - } + : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog), + std::ostream(&mBuffer) // links the stream buffer with the stream + , + mShouldLog(other.mShouldLog), + mSeverity(other.mSeverity) {} - void setReportableSeverity(Severity reportableSeverity) - { + void setReportableSeverity(Severity reportableSeverity) { mShouldLog = mSeverity <= reportableSeverity; mBuffer.setShouldLog(mShouldLog); } -private: - static std::ostream& severityOstream(Severity severity) - { + private: + static std::ostream& severityOstream(Severity severity) { return severity >= Severity::kINFO ? std::cout : std::cerr; } - static std::string severityPrefix(Severity severity) - { - switch (severity) - { - case Severity::kINTERNAL_ERROR: return "[F] "; - case Severity::kERROR: return "[E] "; - case Severity::kWARNING: return "[W] "; - case Severity::kINFO: return "[I] "; - case Severity::kVERBOSE: return "[V] "; - default: assert(0); return ""; + static std::string severityPrefix(Severity severity) { + switch (severity) { + case Severity::kINTERNAL_ERROR: + return "[F] "; + case Severity::kERROR: + return "[E] "; + case Severity::kWARNING: + return "[W] "; + case Severity::kINFO: + return "[I] "; + case Severity::kVERBOSE: + return "[V] "; + default: + assert(0); + return ""; } } @@ -199,24 +180,19 @@ class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream //! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger //! object. -class Logger : public nvinfer1::ILogger -{ -public: - Logger(Severity severity = Severity::kWARNING) - : mReportableSeverity(severity) - { - } +class Logger : public nvinfer1::ILogger { + public: + Logger(Severity severity = Severity::kWARNING) : mReportableSeverity(severity) {} //! //! \enum TestResult //! \brief Represents the state of a given test //! - enum class TestResult - { - kRUNNING, //!< The test is running - kPASSED, //!< The test passed - kFAILED, //!< The test failed - kWAIVED //!< The test was waived + enum class TestResult { + kRUNNING, //!< The test is running + kPASSED, //!< The test passed + kFAILED, //!< The test failed + kWAIVED //!< The test was waived }; //! @@ -226,10 +202,7 @@ class Logger : public nvinfer1::ILogger //! TODO Once all samples are updated to use this method to register the logger with TensorRT, //! we can eliminate the inheritance of Logger from ILogger //! - nvinfer1::ILogger& getTRTLogger() - { - return *this; - } + nvinfer1::ILogger& getTRTLogger() { return *this; } //! //! \brief Implementation of the nvinfer1::ILogger::log() virtual method @@ -237,8 +210,7 @@ class Logger : public nvinfer1::ILogger //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the //! inheritance from nvinfer1::ILogger //! - void log(Severity severity, const char* msg) TRT_NOEXCEPT override - { + void log(Severity severity, const char* msg) TRT_NOEXCEPT override { LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl; } @@ -247,10 +219,7 @@ class Logger : public nvinfer1::ILogger //! //! \param severity The logger will only emit messages that have severity of this level or higher. //! - void setReportableSeverity(Severity severity) - { - mReportableSeverity = severity; - } + void setReportableSeverity(Severity severity) { mReportableSeverity = severity; } //! //! \brief Opaque handle that holds logging information for a particular test @@ -259,20 +228,15 @@ class Logger : public nvinfer1::ILogger //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used //! with Logger::reportTest{Start,End}(). //! - class TestAtom - { - public: + class TestAtom { + public: TestAtom(TestAtom&&) = default; - private: + private: friend class Logger; TestAtom(bool started, const std::string& name, const std::string& cmdline) - : mStarted(started) - , mName(name) - , mCmdline(cmdline) - { - } + : mStarted(started), mName(name), mCmdline(cmdline) {} bool mStarted; std::string mName; @@ -290,8 +254,7 @@ class Logger : public nvinfer1::ILogger // //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). //! - static TestAtom defineTest(const std::string& name, const std::string& cmdline) - { + static TestAtom defineTest(const std::string& name, const std::string& cmdline) { return TestAtom(false, name, cmdline); } @@ -304,8 +267,7 @@ class Logger : public nvinfer1::ILogger //! \param[in] argv The array of command-line arguments (given as C strings) //! //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). - static TestAtom defineTest(const std::string& name, int argc, char const* const* argv) - { + static TestAtom defineTest(const std::string& name, int argc, char const* const* argv) { auto cmdline = genCmdlineString(argc, argv); return defineTest(name, cmdline); } @@ -317,8 +279,7 @@ class Logger : public nvinfer1::ILogger //! //! \param[in] testAtom The handle to the test that has started //! - static void reportTestStart(TestAtom& testAtom) - { + static void reportTestStart(TestAtom& testAtom) { reportTestResult(testAtom, TestResult::kRUNNING); assert(!testAtom.mStarted); testAtom.mStarted = true; @@ -333,86 +294,85 @@ class Logger : public nvinfer1::ILogger //! \param[in] result The result of the test. Should be one of TestResult::kPASSED, //! TestResult::kFAILED, TestResult::kWAIVED //! - static void reportTestEnd(const TestAtom& testAtom, TestResult result) - { + static void reportTestEnd(const TestAtom& testAtom, TestResult result) { assert(result != TestResult::kRUNNING); assert(testAtom.mStarted); reportTestResult(testAtom, result); } - static int reportPass(const TestAtom& testAtom) - { + static int reportPass(const TestAtom& testAtom) { reportTestEnd(testAtom, TestResult::kPASSED); return EXIT_SUCCESS; } - static int reportFail(const TestAtom& testAtom) - { + static int reportFail(const TestAtom& testAtom) { reportTestEnd(testAtom, TestResult::kFAILED); return EXIT_FAILURE; } - static int reportWaive(const TestAtom& testAtom) - { + static int reportWaive(const TestAtom& testAtom) { reportTestEnd(testAtom, TestResult::kWAIVED); return EXIT_SUCCESS; } - static int reportTest(const TestAtom& testAtom, bool pass) - { + static int reportTest(const TestAtom& testAtom, bool pass) { return pass ? reportPass(testAtom) : reportFail(testAtom); } - Severity getReportableSeverity() const - { - return mReportableSeverity; - } + Severity getReportableSeverity() const { return mReportableSeverity; } -private: + private: //! //! \brief returns an appropriate string for prefixing a log message with the given severity //! - static const char* severityPrefix(Severity severity) - { - switch (severity) - { - case Severity::kINTERNAL_ERROR: return "[F] "; - case Severity::kERROR: return "[E] "; - case Severity::kWARNING: return "[W] "; - case Severity::kINFO: return "[I] "; - case Severity::kVERBOSE: return "[V] "; - default: assert(0); return ""; + static const char* severityPrefix(Severity severity) { + switch (severity) { + case Severity::kINTERNAL_ERROR: + return "[F] "; + case Severity::kERROR: + return "[E] "; + case Severity::kWARNING: + return "[W] "; + case Severity::kINFO: + return "[I] "; + case Severity::kVERBOSE: + return "[V] "; + default: + assert(0); + return ""; } } //! //! \brief returns an appropriate string for prefixing a test result message with the given result //! - static const char* testResultString(TestResult result) - { - switch (result) - { - case TestResult::kRUNNING: return "RUNNING"; - case TestResult::kPASSED: return "PASSED"; - case TestResult::kFAILED: return "FAILED"; - case TestResult::kWAIVED: return "WAIVED"; - default: assert(0); return ""; + static const char* testResultString(TestResult result) { + switch (result) { + case TestResult::kRUNNING: + return "RUNNING"; + case TestResult::kPASSED: + return "PASSED"; + case TestResult::kFAILED: + return "FAILED"; + case TestResult::kWAIVED: + return "WAIVED"; + default: + assert(0); + return ""; } } //! //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity //! - static std::ostream& severityOstream(Severity severity) - { + static std::ostream& severityOstream(Severity severity) { return severity >= Severity::kINFO ? std::cout : std::cerr; } //! //! \brief method that implements logging test results //! - static void reportTestResult(const TestAtom& testAtom, TestResult result) - { + static void reportTestResult(const TestAtom& testAtom, TestResult result) { severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # " << testAtom.mCmdline << std::endl; } @@ -420,11 +380,9 @@ class Logger : public nvinfer1::ILogger //! //! \brief generate a command line string from the given (argc, argv) values //! - static std::string genCmdlineString(int argc, char const* const* argv) - { + static std::string genCmdlineString(int argc, char const* const* argv) { std::stringstream ss; - for (int i = 0; i < argc; i++) - { + for (int i = 0; i < argc; i++) { if (i > 0) ss << " "; ss << argv[i]; @@ -435,8 +393,7 @@ class Logger : public nvinfer1::ILogger Severity mReportableSeverity; }; -namespace -{ +namespace { //! //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE @@ -445,8 +402,7 @@ namespace //! //! LOG_VERBOSE(logger) << "hello world" << std::endl; //! -inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) -{ +inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) { return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE); } @@ -457,8 +413,7 @@ inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) //! //! LOG_INFO(logger) << "hello world" << std::endl; //! -inline LogStreamConsumer LOG_INFO(const Logger& logger) -{ +inline LogStreamConsumer LOG_INFO(const Logger& logger) { return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO); } @@ -469,8 +424,7 @@ inline LogStreamConsumer LOG_INFO(const Logger& logger) //! //! LOG_WARN(logger) << "hello world" << std::endl; //! -inline LogStreamConsumer LOG_WARN(const Logger& logger) -{ +inline LogStreamConsumer LOG_WARN(const Logger& logger) { return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING); } @@ -481,8 +435,7 @@ inline LogStreamConsumer LOG_WARN(const Logger& logger) //! //! LOG_ERROR(logger) << "hello world" << std::endl; //! -inline LogStreamConsumer LOG_ERROR(const Logger& logger) -{ +inline LogStreamConsumer LOG_ERROR(const Logger& logger) { return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR); } @@ -494,11 +447,10 @@ inline LogStreamConsumer LOG_ERROR(const Logger& logger) //! //! LOG_FATAL(logger) << "hello world" << std::endl; //! -inline LogStreamConsumer LOG_FATAL(const Logger& logger) -{ +inline LogStreamConsumer LOG_FATAL(const Logger& logger) { return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR); } -} // anonymous namespace +} // anonymous namespace -#endif // TENSORRT_LOGGING_H +#endif // TENSORRT_LOGGING_H diff --git a/yolov8/yolov8_trt10/include/model.h b/yolov8/yolov8_trt10/include/model.h index ad865df2..4e3d23cc 100644 --- a/yolov8/yolov8_trt10/include/model.h +++ b/yolov8/yolov8_trt10/include/model.h @@ -4,29 +4,29 @@ #include #include "NvInfer.h" -nvinfer1::IHostMemory *buildEngineYolov8Cls(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, - nvinfer1::DataType dt, const std::string &wts_path, float &gd, float &gw); +nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw); -nvinfer1::IHostMemory *buildEngineYolov8Det(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, - nvinfer1::DataType dt, const std::string &wts_path, float &gd, float &gw, - int &max_channels); +nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels); -nvinfer1::IHostMemory *buildEngineYolov8DetP6(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, - nvinfer1::DataType dt, const std::string &wts_path, float &gd, float &gw, - int &max_channels); +nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels); -nvinfer1::IHostMemory *buildEngineYolov8DetP2(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, - nvinfer1::DataType dt, const std::string &wts_path, float &gd, float &gw, - int &max_channels); +nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels); -nvinfer1::IHostMemory *buildEngineYolov8Seg(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, - nvinfer1::DataType dt, const std::string &wts_path, float &gd, float &gw, - int &max_channels); +nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels); -nvinfer1::IHostMemory *buildEngineYolov8Pose(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, - nvinfer1::DataType dt, const std::string &wts_path, float &gd, float &gw, - int &max_channels); +nvinfer1::IHostMemory* buildEngineYolov8Pose(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels); -nvinfer1::IHostMemory *buildEngineYolov8PoseP6(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, - nvinfer1::DataType dt, const std::string &wts_path, float &gd, float &gw, - int &max_channels); \ No newline at end of file +nvinfer1::IHostMemory* buildEngineYolov8PoseP6(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels); diff --git a/yolov8/yolov8_trt10/include/preprocess.h b/yolov8/yolov8_trt10/include/preprocess.h index 10bead97..8e40f549 100644 --- a/yolov8/yolov8_trt10/include/preprocess.h +++ b/yolov8/yolov8_trt10/include/preprocess.h @@ -1,16 +1,16 @@ #pragma once +#include #include #include "NvInfer.h" #include "types.h" -#include - void cuda_preprocess_init(int max_image_size); void cuda_preprocess_destroy(); -void cuda_preprocess(uint8_t *src, int src_width, int src_height, float *dst, int dst_width, int dst_height, cudaStream_t stream); - -void cuda_batch_preprocess(std::vector &img_batch, float *dst, int dst_width, int dst_height, cudaStream_t stream); +void cuda_preprocess(uint8_t* src, int src_width, int src_height, float* dst, int dst_width, int dst_height, + cudaStream_t stream); +void cuda_batch_preprocess(std::vector& img_batch, float* dst, int dst_width, int dst_height, + cudaStream_t stream); diff --git a/yolov8/yolov8_trt10/include/utils.h b/yolov8/yolov8_trt10/include/utils.h index 610c8e28..0bddf546 100644 --- a/yolov8/yolov8_trt10/include/utils.h +++ b/yolov8/yolov8_trt10/include/utils.h @@ -1,12 +1,12 @@ #pragma once -#include #include -#include +#include +#include static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h) { int w, h, x, y; - float r_w = input_w / (img.cols*1.0); - float r_h = input_h / (img.rows*1.0); + float r_w = input_w / (img.cols * 1.0); + float r_h = input_h / (img.rows * 1.0); if (r_h > r_w) { w = input_w; h = r_w * img.rows; @@ -25,16 +25,15 @@ static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h) { return out; } -static inline int read_files_in_dir(const char *p_dir_name, std::vector &file_names) { - DIR *p_dir = opendir(p_dir_name); +static inline int read_files_in_dir(const char* p_dir_name, std::vector& file_names) { + DIR* p_dir = opendir(p_dir_name); if (p_dir == nullptr) { return -1; } struct dirent* p_file = nullptr; while ((p_file = readdir(p_dir)) != nullptr) { - if (strcmp(p_file->d_name, ".") != 0 && - strcmp(p_file->d_name, "..") != 0) { + if (strcmp(p_file->d_name, ".") != 0 && strcmp(p_file->d_name, "..") != 0) { //std::string cur_file_name(p_dir_name); //cur_file_name += "/"; //cur_file_name += p_file->d_name; @@ -83,4 +82,3 @@ static inline int read_labels(const std::string labels_filename, std::unordered_ return 0; } - diff --git a/yolov8/yolov8_trt10/src/block.cpp b/yolov8/yolov8_trt10/src/block.cpp index a92a03a9..5b1df961 100644 --- a/yolov8/yolov8_trt10/src/block.cpp +++ b/yolov8/yolov8_trt10/src/block.cpp @@ -4,9 +4,8 @@ #include #include #include "config.h" -#include "yololayer.h" #include "model.h" - +#include "yololayer.h" std::map loadWeights(const std::string file) { std::cout << "Loading weights: " << file << std::endl; @@ -27,7 +26,7 @@ std::map loadWeights(const std::string file) { input >> name >> std::dec >> size; wt.type = nvinfer1::DataType::kFLOAT; - uint32_t *val = reinterpret_cast(malloc(sizeof(val) * size)); + uint32_t* val = reinterpret_cast(malloc(sizeof(val) * size)); for (uint32_t x = 0, y = size; x < y; x++) { input >> std::hex >> val[x]; } @@ -38,28 +37,28 @@ std::map loadWeights(const std::string file) { return WeightMap; } -nvinfer1::IScaleLayer *addBatchNorm2d(nvinfer1::INetworkDefinition *network, - std::map weightMap, - nvinfer1::ITensor &input, std::string lname, float eps) { - float *gamma = (float *) weightMap[lname + ".weight"].values; - float *beta = (float *) weightMap[lname + ".bias"].values; - float *mean = (float *) weightMap[lname + ".running_mean"].values; - float *var = (float *) weightMap[lname + ".running_var"].values; +nvinfer1::IScaleLayer* addBatchNorm2d(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, + std::string lname, float eps) { + float* gamma = (float*)weightMap[lname + ".weight"].values; + float* beta = (float*)weightMap[lname + ".bias"].values; + float* mean = (float*)weightMap[lname + ".running_mean"].values; + float* var = (float*)weightMap[lname + ".running_var"].values; int len = weightMap[lname + ".running_var"].count; - float *scval = reinterpret_cast(malloc(sizeof(float) * len)); + float* scval = reinterpret_cast(malloc(sizeof(float) * len)); for (int i = 0; i < len; i++) { scval[i] = gamma[i] / sqrt(var[i] + eps); } nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, scval, len}; - float *shval = reinterpret_cast(malloc(sizeof(float) * len)); + float* shval = reinterpret_cast(malloc(sizeof(float) * len)); for (int i = 0; i < len; i++) { shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps); } nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, shval, len}; - float *pval = reinterpret_cast(malloc(sizeof(float) * len)); + float* pval = reinterpret_cast(malloc(sizeof(float) * len)); for (int i = 0; i < len; i++) { pval[i] = 1.0; } @@ -67,164 +66,162 @@ nvinfer1::IScaleLayer *addBatchNorm2d(nvinfer1::INetworkDefinition *network, weightMap[lname + ".scale"] = scale; weightMap[lname + ".shift"] = shift; weightMap[lname + ".power"] = power; - nvinfer1::IScaleLayer *output = network->addScale(input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); + nvinfer1::IScaleLayer* output = network->addScale(input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); assert(output); return output; } -nvinfer1::IElementWiseLayer *convBnSiLU(nvinfer1::INetworkDefinition *network, - std::map weightMap, nvinfer1::ITensor &input, +nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, int ch, int k, int s, int p, std::string lname) { nvinfer1::Weights bias_empty{nvinfer1::DataType::kFLOAT, nullptr, 0}; - nvinfer1::IConvolutionLayer *conv = + nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(input, ch, nvinfer1::DimsHW{k, k}, weightMap[lname + ".conv.weight"], bias_empty); assert(conv); conv->setStrideNd(nvinfer1::DimsHW{s, s}); conv->setPaddingNd(nvinfer1::DimsHW{p, p}); - nvinfer1::IScaleLayer *bn = addBatchNorm2d(network, weightMap, *conv->getOutput(0), lname + ".bn", 1e-3); + nvinfer1::IScaleLayer* bn = addBatchNorm2d(network, weightMap, *conv->getOutput(0), lname + ".bn", 1e-3); - nvinfer1::IActivationLayer *sigmoid = network->addActivation(*bn->getOutput(0), nvinfer1::ActivationType::kSIGMOID); - nvinfer1::IElementWiseLayer *ew = + nvinfer1::IActivationLayer* sigmoid = network->addActivation(*bn->getOutput(0), nvinfer1::ActivationType::kSIGMOID); + nvinfer1::IElementWiseLayer* ew = network->addElementWise(*bn->getOutput(0), *sigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD); assert(ew); return ew; } -nvinfer1::ILayer *bottleneck(nvinfer1::INetworkDefinition *network, std::map weightMap, - nvinfer1::ITensor &input, int c1, int c2, bool shortcut, float e, std::string lname) { - nvinfer1::IElementWiseLayer *conv1 = convBnSiLU(network, weightMap, input, c2, 3, 1, 1, lname + ".cv1"); - nvinfer1::IElementWiseLayer *conv2 = +nvinfer1::ILayer* bottleneck(nvinfer1::INetworkDefinition* network, std::map weightMap, + nvinfer1::ITensor& input, int c1, int c2, bool shortcut, float e, std::string lname) { + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, c2, 3, 1, 1, lname + ".cv1"); + nvinfer1::IElementWiseLayer* conv2 = convBnSiLU(network, weightMap, *conv1->getOutput(0), c2, 3, 1, 1, lname + ".cv2"); if (shortcut && c1 == c2) { - nvinfer1::IElementWiseLayer *ew = + nvinfer1::IElementWiseLayer* ew = network->addElementWise(input, *conv2->getOutput(0), nvinfer1::ElementWiseOperation::kSUM); return ew; } return conv2; } -nvinfer1::IElementWiseLayer *C2F(nvinfer1::INetworkDefinition *network, - std::map weightMap, nvinfer1::ITensor &input, int c1, +nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname) { - int c_ = (float) c2 * e; + int c_ = (float)c2 * e; - nvinfer1::IElementWiseLayer *conv1 = convBnSiLU(network, weightMap, input, 2 * c_, 1, 1, 0, lname + ".cv1"); + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, 2 * c_, 1, 1, 0, lname + ".cv1"); nvinfer1::Dims d = conv1->getOutput(0)->getDimensions(); - nvinfer1::ISliceLayer *split1 = + nvinfer1::ISliceLayer* split1 = network->addSlice(*conv1->getOutput(0), nvinfer1::Dims4{0, 0, 0, 0}, nvinfer1::Dims4{d.d[0], d.d[1] / 2, d.d[2], d.d[3]}, nvinfer1::Dims4{1, 1, 1, 1}); - nvinfer1::ISliceLayer *split2 = + nvinfer1::ISliceLayer* split2 = network->addSlice(*conv1->getOutput(0), nvinfer1::Dims4{0, d.d[1] / 2, 0, 0}, nvinfer1::Dims4{d.d[0], d.d[1] / 2, d.d[2], d.d[3]}, nvinfer1::Dims4{1, 1, 1, 1}); - nvinfer1::ITensor *inputTensor0[] = {split1->getOutput(0), split2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat = network->addConcatenation(inputTensor0, 2); - nvinfer1::ITensor *y1 = split2->getOutput(0); + nvinfer1::ITensor* inputTensor0[] = {split1->getOutput(0), split2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat = network->addConcatenation(inputTensor0, 2); + nvinfer1::ITensor* y1 = split2->getOutput(0); for (int i = 0; i < n; i++) { - auto *b = bottleneck(network, weightMap, *y1, c_, c_, shortcut, 1.0, lname + ".m." + std::to_string(i)); + auto* b = bottleneck(network, weightMap, *y1, c_, c_, shortcut, 1.0, lname + ".m." + std::to_string(i)); y1 = b->getOutput(0); - nvinfer1::ITensor *inputTensors[] = {cat->getOutput(0), b->getOutput(0)}; + nvinfer1::ITensor* inputTensors[] = {cat->getOutput(0), b->getOutput(0)}; cat = network->addConcatenation(inputTensors, 2); } - nvinfer1::IElementWiseLayer *conv2 = + nvinfer1::IElementWiseLayer* conv2 = convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname + ".cv2"); return conv2; } -nvinfer1::IElementWiseLayer *C2(nvinfer1::INetworkDefinition *network, - std::map &weightMap, nvinfer1::ITensor &input, int c1, +nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network, + std::map& weightMap, nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname) { assert(network != nullptr); int hidden_channels = static_cast(c2 * e); // cv1 branch - nvinfer1::IElementWiseLayer *conv1 = + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, 2 * hidden_channels, 1, 1, 0, lname + ".cv1"); - nvinfer1::ITensor *cv1_out = conv1->getOutput(0); + nvinfer1::ITensor* cv1_out = conv1->getOutput(0); // Split the output of cv1 into two tensors nvinfer1::Dims dims = cv1_out->getDimensions(); - nvinfer1::ISliceLayer *split1 = - network->addSlice(*cv1_out, nvinfer1::Dims4{0, 0, 0, 0}, - nvinfer1::Dims4{dims.d[0], dims.d[1] / 2, dims.d[2], dims.d[3]}, - nvinfer1::Dims4{1, 1, 1, 1}); - nvinfer1::ISliceLayer *split2 = - network->addSlice(*cv1_out, nvinfer1::Dims4{0, dims.d[1] / 2, 0, 0}, - nvinfer1::Dims4{dims.d[0], dims.d[1] / 2, dims.d[2], dims.d[3]}, - nvinfer1::Dims4{1, 1, 1, 1}); + nvinfer1::ISliceLayer* split1 = network->addSlice(*cv1_out, nvinfer1::Dims4{0, 0, 0, 0}, + nvinfer1::Dims4{dims.d[0], dims.d[1] / 2, dims.d[2], dims.d[3]}, + nvinfer1::Dims4{1, 1, 1, 1}); + nvinfer1::ISliceLayer* split2 = network->addSlice(*cv1_out, nvinfer1::Dims4{0, dims.d[1] / 2, 0, 0}, + nvinfer1::Dims4{dims.d[0], dims.d[1] / 2, dims.d[2], dims.d[3]}, + nvinfer1::Dims4{1, 1, 1, 1}); // Create y1 bottleneck sequence - nvinfer1::ITensor *y1 = split1->getOutput(0); + nvinfer1::ITensor* y1 = split1->getOutput(0); for (int i = 0; i < n; ++i) { - auto *bottleneck_layer = bottleneck(network, weightMap, *y1, hidden_channels, hidden_channels, shortcut, 1.0, + auto* bottleneck_layer = bottleneck(network, weightMap, *y1, hidden_channels, hidden_channels, shortcut, 1.0, lname + ".m." + std::to_string(i)); y1 = bottleneck_layer->getOutput(0); // update 'y1' to be the output of the current bottleneck } // Concatenate y1 with the second split of cv1 - nvinfer1::ITensor *concatInputs[2] = {y1, split2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat = network->addConcatenation(concatInputs, 2); + nvinfer1::ITensor* concatInputs[2] = {y1, split2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat = network->addConcatenation(concatInputs, 2); // cv2 to produce the final output - nvinfer1::IElementWiseLayer *conv2 = + nvinfer1::IElementWiseLayer* conv2 = convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname + ".cv2"); return conv2; } -nvinfer1::IElementWiseLayer *SPPF(nvinfer1::INetworkDefinition *network, - std::map weightMap, nvinfer1::ITensor &input, int c1, +nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname) { int c_ = c1 / 2; - nvinfer1::IElementWiseLayer *conv1 = convBnSiLU(network, weightMap, input, c_, 1, 1, 0, lname + ".cv1"); - nvinfer1::IPoolingLayer *pool1 = + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, c_, 1, 1, 0, lname + ".cv1"); + nvinfer1::IPoolingLayer* pool1 = network->addPoolingNd(*conv1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{k, k}); pool1->setStrideNd(nvinfer1::DimsHW{1, 1}); pool1->setPaddingNd(nvinfer1::DimsHW{k / 2, k / 2}); - nvinfer1::IPoolingLayer *pool2 = + nvinfer1::IPoolingLayer* pool2 = network->addPoolingNd(*pool1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{k, k}); pool2->setStrideNd(nvinfer1::DimsHW{1, 1}); pool2->setPaddingNd(nvinfer1::DimsHW{k / 2, k / 2}); - nvinfer1::IPoolingLayer *pool3 = + nvinfer1::IPoolingLayer* pool3 = network->addPoolingNd(*pool2->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{k, k}); pool3->setStrideNd(nvinfer1::DimsHW{1, 1}); pool3->setPaddingNd(nvinfer1::DimsHW{k / 2, k / 2}); - nvinfer1::ITensor *inputTensors[] = {conv1->getOutput(0), pool1->getOutput(0), pool2->getOutput(0), + nvinfer1::ITensor* inputTensors[] = {conv1->getOutput(0), pool1->getOutput(0), pool2->getOutput(0), pool3->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat = network->addConcatenation(inputTensors, 4); - nvinfer1::IElementWiseLayer *conv2 = + nvinfer1::IConcatenationLayer* cat = network->addConcatenation(inputTensors, 4); + nvinfer1::IElementWiseLayer* conv2 = convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname + ".cv2"); return conv2; } -nvinfer1::IShuffleLayer *DFL(nvinfer1::INetworkDefinition *network, std::map weightMap, - nvinfer1::ITensor &input, int ch, int grid, int k, int s, int p, std::string lname) { +nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map weightMap, + nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname) { - nvinfer1::IShuffleLayer *shuffle1 = network->addShuffle(input); + nvinfer1::IShuffleLayer* shuffle1 = network->addShuffle(input); shuffle1->setReshapeDimensions(nvinfer1::Dims4{kBatchSize, 4, 16, grid}); shuffle1->setSecondTranspose(nvinfer1::Permutation{0, 2, 1, 3}); - nvinfer1::ISoftMaxLayer *softmax = network->addSoftMax(*shuffle1->getOutput(0)); + nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*shuffle1->getOutput(0)); softmax->setAxes(1 << 1); nvinfer1::Weights bias_empty{nvinfer1::DataType::kFLOAT, nullptr, 0}; - nvinfer1::IConvolutionLayer *conv = + nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(*softmax->getOutput(0), 1, nvinfer1::DimsHW{1, 1}, weightMap[lname], bias_empty); conv->setStrideNd(nvinfer1::DimsHW{s, s}); conv->setPaddingNd(nvinfer1::DimsHW{p, p}); - nvinfer1::IShuffleLayer *shuffle2 = network->addShuffle(*conv->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle2 = network->addShuffle(*conv->getOutput(0)); shuffle2->setReshapeDimensions(nvinfer1::Dims3{kBatchSize, 4, grid}); return shuffle2; } -nvinfer1::IPluginV2Layer *addYoLoLayer(nvinfer1::INetworkDefinition *network, - std::vector dets, const int *px_arry, +nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition* network, + std::vector dets, const int* px_arry, int px_arry_num, bool is_segmentation, bool is_pose) { auto creator = getPluginRegistry()->getPluginCreator("YoloLayer_TRT", "1"); const int netinfo_count = 8; // Assuming the first 5 elements are for netinfo as per existing code. @@ -257,17 +254,17 @@ nvinfer1::IPluginV2Layer *addYoLoLayer(nvinfer1::INetworkDefinition *network, pluginFieldCollection.fields = &pluginField; // Create the plugin object using the PluginFieldCollection. - nvinfer1::IPluginV2 *pluginObject = creator->createPlugin("yololayer", &pluginFieldCollection); + nvinfer1::IPluginV2* pluginObject = creator->createPlugin("yololayer", &pluginFieldCollection); // We assume that the plugin is to be added onto the network. // Prepare input tensors for the YOLO Layer. - std::vector inputTensors; - for (auto det: dets) { + std::vector inputTensors; + for (auto det : dets) { inputTensors.push_back(det->getOutput(0)); // Assuming each IConcatenationLayer has one output tensor. } // Add the plugin to the network using the prepared input tensors. - nvinfer1::IPluginV2Layer *yoloLayer = network->addPluginV2(inputTensors.data(), inputTensors.size(), *pluginObject); + nvinfer1::IPluginV2Layer* yoloLayer = network->addPluginV2(inputTensors.data(), inputTensors.size(), *pluginObject); return yoloLayer; // Return the added YOLO layer. } diff --git a/yolov8/yolov8_trt10/src/calibrator.cpp b/yolov8/yolov8_trt10/src/calibrator.cpp index 62027881..c457bd77 100644 --- a/yolov8/yolov8_trt10/src/calibrator.cpp +++ b/yolov8/yolov8_trt10/src/calibrator.cpp @@ -1,39 +1,36 @@ +#include "calibrator.h" +#include #include #include -#include #include -#include "calibrator.h" #include "cuda_utils.h" #include "utils.h" -Int8EntropyCalibrator2::Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, - const char* input_blob_name, bool read_cache) - : batchsize_(batchsize) - , input_w_(input_w) - , input_h_(input_h) - , img_idx_(0) - , img_dir_(img_dir) - , calib_table_name_(calib_table_name) - , input_blob_name_(input_blob_name) - , read_cache_(read_cache) -{ +Int8EntropyCalibrator2::Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, + const char* calib_table_name, const char* input_blob_name, + bool read_cache) + : batchsize_(batchsize), + input_w_(input_w), + input_h_(input_h), + img_idx_(0), + img_dir_(img_dir), + calib_table_name_(calib_table_name), + input_blob_name_(input_blob_name), + read_cache_(read_cache) { input_count_ = 3 * input_w * input_h * batchsize; CUDA_CHECK(cudaMalloc(&device_input_, input_count_ * sizeof(float))); read_files_in_dir(img_dir, img_files_); } -Int8EntropyCalibrator2::~Int8EntropyCalibrator2() -{ +Int8EntropyCalibrator2::~Int8EntropyCalibrator2() { CUDA_CHECK(cudaFree(device_input_)); } -int Int8EntropyCalibrator2::getBatchSize() const TRT_NOEXCEPT -{ +int Int8EntropyCalibrator2::getBatchSize() const TRT_NOEXCEPT { return batchsize_; } -bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int nbBindings) TRT_NOEXCEPT -{ +bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int nbBindings) TRT_NOEXCEPT { if (img_idx_ + batchsize_ > (int)img_files_.size()) { return false; } @@ -42,7 +39,7 @@ bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int for (int i = img_idx_; i < img_idx_ + batchsize_; i++) { std::cout << img_files_[i] << " " << i << std::endl; cv::Mat temp = cv::imread(img_dir_ + "/" + img_files_[i]); - if (temp.empty()){ + if (temp.empty()) { std::cerr << "Fatal error: image cannot open!" << std::endl; return false; } @@ -50,31 +47,28 @@ bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int input_imgs_.push_back(pr_img); } img_idx_ += batchsize_; - cv::Mat blob = cv::dnn::blobFromImages(input_imgs_, 1.0 / 255.0, cv::Size(input_w_, input_h_), cv::Scalar(0, 0, 0), true, false); + cv::Mat blob = cv::dnn::blobFromImages(input_imgs_, 1.0 / 255.0, cv::Size(input_w_, input_h_), cv::Scalar(0, 0, 0), + true, false); CUDA_CHECK(cudaMemcpy(device_input_, blob.ptr(0), input_count_ * sizeof(float), cudaMemcpyHostToDevice)); assert(!strcmp(names[0], input_blob_name_)); bindings[0] = device_input_; return true; } -const void* Int8EntropyCalibrator2::readCalibrationCache(size_t& length) TRT_NOEXCEPT -{ +const void* Int8EntropyCalibrator2::readCalibrationCache(size_t& length) TRT_NOEXCEPT { std::cout << "reading calib cache: " << calib_table_name_ << std::endl; calib_cache_.clear(); std::ifstream input(calib_table_name_, std::ios::binary); input >> std::noskipws; - if (read_cache_ && input.good()) - { + if (read_cache_ && input.good()) { std::copy(std::istream_iterator(input), std::istream_iterator(), std::back_inserter(calib_cache_)); } length = calib_cache_.size(); return length ? calib_cache_.data() : nullptr; } -void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, size_t length) TRT_NOEXCEPT -{ +void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, size_t length) TRT_NOEXCEPT { std::cout << "writing calib cache: " << calib_table_name_ << " size: " << length << std::endl; std::ofstream output(calib_table_name_, std::ios::binary); output.write(reinterpret_cast(cache), length); } - diff --git a/yolov8/yolov8_trt10/src/model.cpp b/yolov8/yolov8_trt10/src/model.cpp index af3301c9..2ad7fb9b 100644 --- a/yolov8/yolov8_trt10/src/model.cpp +++ b/yolov8/yolov8_trt10/src/model.cpp @@ -20,49 +20,49 @@ static int get_depth(int x, float gd) { return std::max(r, 1); } -void calculateStrides(nvinfer1::IElementWiseLayer *conv_layers[], int size, int reference_size, int strides[]) { +void calculateStrides(nvinfer1::IElementWiseLayer* conv_layers[], int size, int reference_size, int strides[]) { for (int i = 0; i < size; ++i) { - nvinfer1::ILayer *layer = conv_layers[i]; + nvinfer1::ILayer* layer = conv_layers[i]; nvinfer1::Dims dims = layer->getOutput(0)->getDimensions(); int feature_map_size = dims.d[2]; strides[i] = reference_size / feature_map_size; } } -nvinfer1::IHostMemory *buildEngineYolov8Cls(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, - nvinfer1::DataType dt, const std::string &wts_path, float &gd, float &gw) { +nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw) { std::map weightMap = loadWeights(wts_path); -// nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); - nvinfer1::INetworkDefinition *network = builder->createNetworkV2( + // nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); + nvinfer1::INetworkDefinition* network = builder->createNetworkV2( 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)); int max_channels = 1280; // ****************************************** YOLOV8 INPUT ********************************************** - nvinfer1::ITensor *data = network->addInput(kInputTensorName, dt, - nvinfer1::Dims4{kBatchSize, 3, kClsInputH, kClsInputW}); + nvinfer1::ITensor* data = + network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kClsInputH, kClsInputW}); assert(data); // ***************************************** YOLOV8 BACKBONE ******************************************** - nvinfer1::IElementWiseLayer *conv0 = + nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); - nvinfer1::IElementWiseLayer *conv1 = + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); // C2 Block (11233) - nvinfer1::IElementWiseLayer *conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), + nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); - nvinfer1::IElementWiseLayer *conv3 = + nvinfer1::IElementWiseLayer* conv3 = convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); // C2 Block Sequence (22466) - nvinfer1::IElementWiseLayer *conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), + nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); - nvinfer1::IElementWiseLayer *conv5 = + nvinfer1::IElementWiseLayer* conv5 = convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); // C2 Block Sequence (22466) - nvinfer1::IElementWiseLayer *conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), + nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); - nvinfer1::IElementWiseLayer *conv7 = + nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.7"); // C2 Block (11233) - nvinfer1::IElementWiseLayer *conv8 = + nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); @@ -74,7 +74,7 @@ nvinfer1::IHostMemory *buildEngineYolov8Cls(nvinfer1::IBuilder *builder, nvinfer conv_class->getOutput(0)->getDimensions(); // Obtain the dimensions of the output of conv_class assert(dims.nbDims == 4); // Make sure there are exactly 3 dimensions (channels, height, width) - nvinfer1::IPoolingLayer *pool2 = network->addPoolingNd(*conv_class->getOutput(0), nvinfer1::PoolingType::kAVERAGE, + nvinfer1::IPoolingLayer* pool2 = network->addPoolingNd(*conv_class->getOutput(0), nvinfer1::PoolingType::kAVERAGE, nvinfer1::DimsHW{dims.d[2], dims.d[3]}); assert(pool2); @@ -83,12 +83,11 @@ nvinfer1::IHostMemory *buildEngineYolov8Cls(nvinfer1::IBuilder *builder, nvinfer shuffle_0->setReshapeDimensions(nvinfer1::Dims2{kBatchSize, max_channels}); auto linear_weight = weightMap["model.9.linear.weight"]; auto constant_weight = network->addConstant(nvinfer1::Dims2{kClsNumClass, max_channels}, linear_weight); - auto constant_bias = network->addConstant(nvinfer1::Dims2{kBatchSize, kClsNumClass}, - weightMap["model.9.linear.bias"]); - auto linear_matrix_multipy = network->addMatrixMultiply(*shuffle_0->getOutput(0), - nvinfer1::MatrixOperation::kNONE, - *constant_weight->getOutput(0), - nvinfer1::MatrixOperation::kTRANSPOSE); + auto constant_bias = + network->addConstant(nvinfer1::Dims2{kBatchSize, kClsNumClass}, weightMap["model.9.linear.bias"]); + auto linear_matrix_multipy = + network->addMatrixMultiply(*shuffle_0->getOutput(0), nvinfer1::MatrixOperation::kNONE, + *constant_weight->getOutput(0), nvinfer1::MatrixOperation::kTRANSPOSE); auto yolo = network->addElementWise(*linear_matrix_multipy->getOutput(0), *constant_bias->getOutput(0), nvinfer1::ElementWiseOperation::kSUM); assert(yolo); @@ -107,107 +106,107 @@ nvinfer1::IHostMemory *buildEngineYolov8Cls(nvinfer1::IBuilder *builder, nvinfer std::cout << "Your platform supports int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; assert(builder->platformHasFastInt8()); config->setFlag(nvinfer1::BuilderFlag::kINT8); - auto *calibrator = new Int8EntropyCalibrator2(1, kClsInputW, kClsInputH, kInputQuantizationFolder, + auto* calibrator = new Int8EntropyCalibrator2(1, kClsInputW, kClsInputH, kInputQuantizationFolder, "int8calib.table", kInputTensorName); config->setInt8Calibrator(calibrator); #endif // Begin building the engine; this may take a while std::cout << "Building engine, please wait for a while..." << std::endl; - nvinfer1::IHostMemory *serialized_model = builder->buildSerializedNetwork(*network, *config); + nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config); std::cout << "Build engine successfully!" << std::endl; // Cleanup the network definition and allocated weights delete network; - for (auto &mem: weightMap) { - free((void *) (mem.second.values)); + for (auto& mem : weightMap) { + free((void*)(mem.second.values)); } return serialized_model; } -nvinfer1::IHostMemory *buildEngineYolov8Det(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, - nvinfer1::DataType dt, const std::string &wts_path, float &gd, float &gw, - int &max_channels) { +nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels) { std::map weightMap = loadWeights(wts_path); -// nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); - nvinfer1::INetworkDefinition *network = builder->createNetworkV2( + // nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); + nvinfer1::INetworkDefinition* network = builder->createNetworkV2( 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)); /******************************************************************************************************* ****************************************** YOLOV8 INPUT ********************************************** *******************************************************************************************************/ - nvinfer1::ITensor *data = network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kInputH, kInputW}); + nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kInputH, kInputW}); assert(data); /******************************************************************************************************* ***************************************** YOLOV8 BACKBONE ******************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer *conv0 = + nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); - nvinfer1::IElementWiseLayer *conv1 = + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); // 11233 - nvinfer1::IElementWiseLayer *conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), + nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); - nvinfer1::IElementWiseLayer *conv3 = + nvinfer1::IElementWiseLayer* conv3 = convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); // 22466 - nvinfer1::IElementWiseLayer *conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), + nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); - nvinfer1::IElementWiseLayer *conv5 = + nvinfer1::IElementWiseLayer* conv5 = convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); // 22466 - nvinfer1::IElementWiseLayer *conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), + nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); - nvinfer1::IElementWiseLayer *conv7 = + nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.7"); // 11233 - nvinfer1::IElementWiseLayer *conv8 = + nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); - nvinfer1::IElementWiseLayer *conv9 = + nvinfer1::IElementWiseLayer* conv9 = SPPF(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.9"); /******************************************************************************************************* ********************************************* YOLOV8 HEAD ******************************************** *******************************************************************************************************/ float scale[] = {1.0, 1.0, 2.0, 2.0}; - nvinfer1::IResizeLayer *upsample10 = network->addResize(*conv9->getOutput(0)); + nvinfer1::IResizeLayer* upsample10 = network->addResize(*conv9->getOutput(0)); assert(upsample10); upsample10->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample10->setScales(scale, 4); - nvinfer1::ITensor *inputTensor11[] = {upsample10->getOutput(0), conv6->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat11 = network->addConcatenation(inputTensor11, 2); + nvinfer1::ITensor* inputTensor11[] = {upsample10->getOutput(0), conv6->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat11 = network->addConcatenation(inputTensor11, 2); - nvinfer1::IElementWiseLayer *conv12 = + nvinfer1::IElementWiseLayer* conv12 = C2F(network, weightMap, *cat11->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.12"); - nvinfer1::IResizeLayer *upsample13 = network->addResize(*conv12->getOutput(0)); + nvinfer1::IResizeLayer* upsample13 = network->addResize(*conv12->getOutput(0)); assert(upsample13); upsample13->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample13->setScales(scale, 4); - nvinfer1::ITensor *inputTensor14[] = {upsample13->getOutput(0), conv4->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat14 = network->addConcatenation(inputTensor14, 2); + nvinfer1::ITensor* inputTensor14[] = {upsample13->getOutput(0), conv4->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat14 = network->addConcatenation(inputTensor14, 2); - nvinfer1::IElementWiseLayer *conv15 = + nvinfer1::IElementWiseLayer* conv15 = C2F(network, weightMap, *cat14->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.15"); - nvinfer1::IElementWiseLayer *conv16 = convBnSiLU(network, weightMap, *conv15->getOutput(0), + nvinfer1::IElementWiseLayer* conv16 = convBnSiLU(network, weightMap, *conv15->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.16"); - nvinfer1::ITensor *inputTensor17[] = {conv16->getOutput(0), conv12->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat17 = network->addConcatenation(inputTensor17, 2); - nvinfer1::IElementWiseLayer *conv18 = + nvinfer1::ITensor* inputTensor17[] = {conv16->getOutput(0), conv12->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat17 = network->addConcatenation(inputTensor17, 2); + nvinfer1::IElementWiseLayer* conv18 = C2F(network, weightMap, *cat17->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.18"); - nvinfer1::IElementWiseLayer *conv19 = convBnSiLU(network, weightMap, *conv18->getOutput(0), + nvinfer1::IElementWiseLayer* conv19 = convBnSiLU(network, weightMap, *conv18->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.19"); - nvinfer1::ITensor *inputTensor20[] = {conv19->getOutput(0), conv9->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat20 = network->addConcatenation(inputTensor20, 2); - nvinfer1::IElementWiseLayer *conv21 = + nvinfer1::ITensor* inputTensor20[] = {conv19->getOutput(0), conv9->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat20 = network->addConcatenation(inputTensor20, 2); + nvinfer1::IElementWiseLayer* conv21 = C2F(network, weightMap, *cat20->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.21"); @@ -218,142 +217,130 @@ nvinfer1::IHostMemory *buildEngineYolov8Det(nvinfer1::IBuilder *builder, nvinfer int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(256, gw, max_channels); // output0 - nvinfer1::IElementWiseLayer *conv22_cv2_0_0 = + nvinfer1::IElementWiseLayer* conv22_cv2_0_0 = convBnSiLU(network, weightMap, *conv15->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.0"); - nvinfer1::IElementWiseLayer *conv22_cv2_0_1 = + nvinfer1::IElementWiseLayer* conv22_cv2_0_1 = convBnSiLU(network, weightMap, *conv22_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.1"); - nvinfer1::IConvolutionLayer *conv22_cv2_0_2 = + nvinfer1::IConvolutionLayer* conv22_cv2_0_2 = network->addConvolutionNd(*conv22_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.0.2.weight"], weightMap["model.22.cv2.0.2.bias"]); conv22_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv22_cv3_0_0 = + nvinfer1::IElementWiseLayer* conv22_cv3_0_0 = convBnSiLU(network, weightMap, *conv15->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.0.0"); - nvinfer1::IElementWiseLayer *conv22_cv3_0_1 = - convBnSiLU(network, weightMap, *conv22_cv3_0_0->getOutput(0), - base_out_channel, 3, 1, 1, "model.22.cv3.0.1"); - nvinfer1::IConvolutionLayer *conv22_cv3_0_2 = + nvinfer1::IElementWiseLayer* conv22_cv3_0_1 = convBnSiLU(network, weightMap, *conv22_cv3_0_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.22.cv3.0.1"); + nvinfer1::IConvolutionLayer* conv22_cv3_0_2 = network->addConvolutionNd(*conv22_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.0.2.weight"], weightMap["model.22.cv3.0.2.bias"]); conv22_cv3_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv3_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor22_0[] = {conv22_cv2_0_2->getOutput(0), conv22_cv3_0_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_0 = network->addConcatenation(inputTensor22_0, 2); + nvinfer1::ITensor* inputTensor22_0[] = {conv22_cv2_0_2->getOutput(0), conv22_cv3_0_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_0 = network->addConcatenation(inputTensor22_0, 2); // output1 - nvinfer1::IElementWiseLayer *conv22_cv2_1_0 = + nvinfer1::IElementWiseLayer* conv22_cv2_1_0 = convBnSiLU(network, weightMap, *conv18->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.0"); - nvinfer1::IElementWiseLayer *conv22_cv2_1_1 = + nvinfer1::IElementWiseLayer* conv22_cv2_1_1 = convBnSiLU(network, weightMap, *conv22_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.1"); - nvinfer1::IConvolutionLayer *conv22_cv2_1_2 = + nvinfer1::IConvolutionLayer* conv22_cv2_1_2 = network->addConvolutionNd(*conv22_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.1.2.weight"], weightMap["model.22.cv2.1.2.bias"]); conv22_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv22_cv3_1_0 = + nvinfer1::IElementWiseLayer* conv22_cv3_1_0 = convBnSiLU(network, weightMap, *conv18->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.1.0"); - nvinfer1::IElementWiseLayer *conv22_cv3_1_1 = - convBnSiLU(network, weightMap, *conv22_cv3_1_0->getOutput(0), - base_out_channel, 3, 1, 1, "model.22.cv3.1.1"); - nvinfer1::IConvolutionLayer *conv22_cv3_1_2 = + nvinfer1::IElementWiseLayer* conv22_cv3_1_1 = convBnSiLU(network, weightMap, *conv22_cv3_1_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.22.cv3.1.1"); + nvinfer1::IConvolutionLayer* conv22_cv3_1_2 = network->addConvolutionNd(*conv22_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.1.2.weight"], weightMap["model.22.cv3.1.2.bias"]); conv22_cv3_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv3_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor22_1[] = {conv22_cv2_1_2->getOutput(0), conv22_cv3_1_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_1 = network->addConcatenation(inputTensor22_1, 2); + nvinfer1::ITensor* inputTensor22_1[] = {conv22_cv2_1_2->getOutput(0), conv22_cv3_1_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_1 = network->addConcatenation(inputTensor22_1, 2); // output2 - nvinfer1::IElementWiseLayer *conv22_cv2_2_0 = + nvinfer1::IElementWiseLayer* conv22_cv2_2_0 = convBnSiLU(network, weightMap, *conv21->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.0"); - nvinfer1::IElementWiseLayer *conv22_cv2_2_1 = + nvinfer1::IElementWiseLayer* conv22_cv2_2_1 = convBnSiLU(network, weightMap, *conv22_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.1"); - nvinfer1::IConvolutionLayer *conv22_cv2_2_2 = + nvinfer1::IConvolutionLayer* conv22_cv2_2_2 = network->addConvolutionNd(*conv22_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.2.2.weight"], weightMap["model.22.cv2.2.2.bias"]); - nvinfer1::IElementWiseLayer *conv22_cv3_2_0 = + nvinfer1::IElementWiseLayer* conv22_cv3_2_0 = convBnSiLU(network, weightMap, *conv21->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.2.0"); - nvinfer1::IElementWiseLayer *conv22_cv3_2_1 = - convBnSiLU(network, weightMap, *conv22_cv3_2_0->getOutput(0), - base_out_channel, 3, 1, 1, "model.22.cv3.2.1"); - nvinfer1::IConvolutionLayer *conv22_cv3_2_2 = + nvinfer1::IElementWiseLayer* conv22_cv3_2_1 = convBnSiLU(network, weightMap, *conv22_cv3_2_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.22.cv3.2.1"); + nvinfer1::IConvolutionLayer* conv22_cv3_2_2 = network->addConvolutionNd(*conv22_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.2.2.weight"], weightMap["model.22.cv3.2.2.bias"]); - nvinfer1::ITensor *inputTensor22_2[] = {conv22_cv2_2_2->getOutput(0), conv22_cv3_2_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_2 = network->addConcatenation(inputTensor22_2, 2); + nvinfer1::ITensor* inputTensor22_2[] = {conv22_cv2_2_2->getOutput(0), conv22_cv3_2_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_2 = network->addConcatenation(inputTensor22_2, 2); /******************************************************************************************************* ********************************************* YOLOV8 DETECT ****************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer *conv_layers[] = {conv3, conv5, conv7}; + nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7}; int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); int stridesLength = sizeof(strides) / sizeof(int); - nvinfer1::IShuffleLayer *shuffle22_0 = network->addShuffle(*cat22_0->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle22_0 = network->addShuffle(*cat22_0->getOutput(0)); shuffle22_0->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}); - nvinfer1::ISliceLayer *split22_0_0 = network->addSlice( - *shuffle22_0->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[0]) * (kInputW / strides[0])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split22_0_1 = network->addSlice( - *shuffle22_0->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, - nvinfer1::Dims3{1, 1, 1}); - - nvinfer1::IShuffleLayer *dfl22_0 = + nvinfer1::ISliceLayer* split22_0_0 = network->addSlice( + *shuffle22_0->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split22_0_1 = + network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, + nvinfer1::Dims3{1, 1, 1}); + + nvinfer1::IShuffleLayer* dfl22_0 = DFL(network, weightMap, *split22_0_0->getOutput(0), 4, (kInputH / strides[0]) * (kInputW / strides[0]), 1, 1, 0, "model.22.dfl.conv.weight"); - nvinfer1::ITensor *inputTensor22_dfl_0[] = {dfl22_0->getOutput(0), split22_0_1->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_dfl_0 = network->addConcatenation(inputTensor22_dfl_0, 2); + nvinfer1::ITensor* inputTensor22_dfl_0[] = {dfl22_0->getOutput(0), split22_0_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_dfl_0 = network->addConcatenation(inputTensor22_dfl_0, 2); cat22_dfl_0->setAxis(1); - nvinfer1::IShuffleLayer *shuffle22_1 = network->addShuffle(*cat22_1->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle22_1 = network->addShuffle(*cat22_1->getOutput(0)); shuffle22_1->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}); - nvinfer1::ISliceLayer *split22_1_0 = network->addSlice( - *shuffle22_1->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[1]) * (kInputW / strides[1])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split22_1_1 = network->addSlice( - *shuffle22_1->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl22_1 = + nvinfer1::ISliceLayer* split22_1_0 = network->addSlice( + *shuffle22_1->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split22_1_1 = + network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl22_1 = DFL(network, weightMap, *split22_1_0->getOutput(0), 4, (kInputH / strides[1]) * (kInputW / strides[1]), 1, 1, 0, "model.22.dfl.conv.weight"); - nvinfer1::ITensor *inputTensor22_dfl_1[] = {dfl22_1->getOutput(0), split22_1_1->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_dfl_1 = network->addConcatenation(inputTensor22_dfl_1, 2); + nvinfer1::ITensor* inputTensor22_dfl_1[] = {dfl22_1->getOutput(0), split22_1_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_dfl_1 = network->addConcatenation(inputTensor22_dfl_1, 2); cat22_dfl_1->setAxis(1); - nvinfer1::IShuffleLayer *shuffle22_2 = network->addShuffle(*cat22_2->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle22_2 = network->addShuffle(*cat22_2->getOutput(0)); shuffle22_2->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}); - nvinfer1::ISliceLayer *split22_2_0 = network->addSlice( - *shuffle22_2->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[2]) * (kInputW / strides[2])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split22_2_1 = network->addSlice( - *shuffle22_2->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl22_2 = + nvinfer1::ISliceLayer* split22_2_0 = network->addSlice( + *shuffle22_2->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split22_2_1 = + network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl22_2 = DFL(network, weightMap, *split22_2_0->getOutput(0), 4, (kInputH / strides[2]) * (kInputW / strides[2]), 1, 1, 0, "model.22.dfl.conv.weight"); - nvinfer1::ITensor *inputTensor22_dfl_2[] = {dfl22_2->getOutput(0), split22_2_1->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_dfl_2 = network->addConcatenation(inputTensor22_dfl_2, 2); + nvinfer1::ITensor* inputTensor22_dfl_2[] = {dfl22_2->getOutput(0), split22_2_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_dfl_2 = network->addConcatenation(inputTensor22_dfl_2, 2); cat22_dfl_2->setAxis(1); - nvinfer1::IPluginV2Layer *yolo = - addYoLoLayer(network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}, + nvinfer1::IPluginV2Layer* yolo = + addYoLoLayer(network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}, strides, stridesLength, false, false); yolo->getOutput(0)->setName(kOutputTensorName); @@ -367,68 +354,68 @@ nvinfer1::IHostMemory *buildEngineYolov8Det(nvinfer1::IBuilder *builder, nvinfer std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; assert(builder->platformHasFastInt8()); config->setFlag(nvinfer1::BuilderFlag::kINT8); - auto *calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, kInputQuantizationFolder, "int8calib.table", + auto* calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, kInputQuantizationFolder, "int8calib.table", kInputTensorName); config->setInt8Calibrator(calibrator); #endif std::cout << "Building engine, please wait for a while..." << std::endl; - nvinfer1::IHostMemory *serialized_model = builder->buildSerializedNetwork(*network, *config); + nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config); std::cout << "Build engine successfully!" << std::endl; delete network; - for (auto &mem: weightMap) { - free((void *) (mem.second.values)); + for (auto& mem : weightMap) { + free((void*)(mem.second.values)); } return serialized_model; } -nvinfer1::IHostMemory * -buildEngineYolov8DetP6(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, nvinfer1::DataType dt, - const std::string &wts_path, float &gd, float &gw, int &max_channels) { +nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels) { std::map weightMap = loadWeights(wts_path); -// nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); - nvinfer1::INetworkDefinition *network = builder->createNetworkV2( + // nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); + nvinfer1::INetworkDefinition* network = builder->createNetworkV2( 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)); /******************************************************************************************************* ****************************************** YOLOV8 INPUT ********************************************** *******************************************************************************************************/ - nvinfer1::ITensor *data = network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kInputH, kInputW}); + nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kInputH, kInputW}); assert(data); /******************************************************************************************************* ***************************************** YOLOV8 BACKBONE ******************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer *conv0 = + nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); - nvinfer1::IElementWiseLayer *conv1 = + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); // 11233 - nvinfer1::IElementWiseLayer *conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), + nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); - nvinfer1::IElementWiseLayer *conv3 = + nvinfer1::IElementWiseLayer* conv3 = convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); // 22466 - nvinfer1::IElementWiseLayer *conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), + nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); - nvinfer1::IElementWiseLayer *conv5 = + nvinfer1::IElementWiseLayer* conv5 = convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); // 22466 - nvinfer1::IElementWiseLayer *conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), + nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); - nvinfer1::IElementWiseLayer *conv7 = + nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(768, gw, max_channels), 3, 2, 1, "model.7"); - nvinfer1::IElementWiseLayer *conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(768, gw, max_channels), + nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(768, gw, max_channels), get_width(768, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); - nvinfer1::IElementWiseLayer *conv9 = + nvinfer1::IElementWiseLayer* conv9 = convBnSiLU(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.9"); - nvinfer1::IElementWiseLayer *conv10 = + nvinfer1::IElementWiseLayer* conv10 = C2F(network, weightMap, *conv9->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.10"); - nvinfer1::IElementWiseLayer *conv11 = + nvinfer1::IElementWiseLayer* conv11 = SPPF(network, weightMap, *conv10->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.11"); @@ -439,60 +426,60 @@ buildEngineYolov8DetP6(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *co float scale[] = {1.0, 1.0, 2.0, 2.0}; // scale used for upsampling // P5 - nvinfer1::IResizeLayer *upsample12 = network->addResize(*conv11->getOutput(0)); + nvinfer1::IResizeLayer* upsample12 = network->addResize(*conv11->getOutput(0)); upsample12->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample12->setScales(scale, 4); - nvinfer1::ITensor *concat13_inputs[] = {upsample12->getOutput(0), conv8->getOutput(0)}; - nvinfer1::IConcatenationLayer *concat13 = network->addConcatenation(concat13_inputs, 2); - nvinfer1::IElementWiseLayer *conv14 = + nvinfer1::ITensor* concat13_inputs[] = {upsample12->getOutput(0), conv8->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat13 = network->addConcatenation(concat13_inputs, 2); + nvinfer1::IElementWiseLayer* conv14 = C2(network, weightMap, *concat13->getOutput(0), get_width(768, gw, max_channels), get_width(768, gw, max_channels), get_depth(3, gd), false, 0.5, "model.14"); // P4 - nvinfer1::IResizeLayer *upsample15 = network->addResize(*conv14->getOutput(0)); + nvinfer1::IResizeLayer* upsample15 = network->addResize(*conv14->getOutput(0)); upsample15->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample15->setScales(scale, 4); - nvinfer1::ITensor *concat16_inputs[] = {upsample15->getOutput(0), conv6->getOutput(0)}; - nvinfer1::IConcatenationLayer *concat16 = network->addConcatenation(concat16_inputs, 2); - nvinfer1::IElementWiseLayer *conv17 = + nvinfer1::ITensor* concat16_inputs[] = {upsample15->getOutput(0), conv6->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat16 = network->addConcatenation(concat16_inputs, 2); + nvinfer1::IElementWiseLayer* conv17 = C2(network, weightMap, *concat16->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.17"); // P3 - nvinfer1::IResizeLayer *upsample18 = network->addResize(*conv17->getOutput(0)); + nvinfer1::IResizeLayer* upsample18 = network->addResize(*conv17->getOutput(0)); upsample18->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample18->setScales(scale, 4); - nvinfer1::ITensor *concat19_inputs[] = {upsample18->getOutput(0), conv4->getOutput(0)}; - nvinfer1::IConcatenationLayer *concat19 = network->addConcatenation(concat19_inputs, 2); - nvinfer1::IElementWiseLayer *conv20 = + nvinfer1::ITensor* concat19_inputs[] = {upsample18->getOutput(0), conv4->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat19 = network->addConcatenation(concat19_inputs, 2); + nvinfer1::IElementWiseLayer* conv20 = C2(network, weightMap, *concat19->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.20"); // Additional layers for P4, P5, P6 // P4/16-medium - nvinfer1::IElementWiseLayer *conv21 = convBnSiLU(network, weightMap, *conv20->getOutput(0), + nvinfer1::IElementWiseLayer* conv21 = convBnSiLU(network, weightMap, *conv20->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.21"); - nvinfer1::ITensor *concat22_inputs[] = {conv21->getOutput(0), conv17->getOutput(0)}; - nvinfer1::IConcatenationLayer *concat22 = network->addConcatenation(concat22_inputs, 2); - nvinfer1::IElementWiseLayer *conv23 = + nvinfer1::ITensor* concat22_inputs[] = {conv21->getOutput(0), conv17->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat22 = network->addConcatenation(concat22_inputs, 2); + nvinfer1::IElementWiseLayer* conv23 = C2(network, weightMap, *concat22->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.23"); // P5/32-large - nvinfer1::IElementWiseLayer *conv24 = convBnSiLU(network, weightMap, *conv23->getOutput(0), + nvinfer1::IElementWiseLayer* conv24 = convBnSiLU(network, weightMap, *conv23->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.24"); - nvinfer1::ITensor *concat25_inputs[] = {conv24->getOutput(0), conv14->getOutput(0)}; - nvinfer1::IConcatenationLayer *concat25 = network->addConcatenation(concat25_inputs, 2); - nvinfer1::IElementWiseLayer *conv26 = + nvinfer1::ITensor* concat25_inputs[] = {conv24->getOutput(0), conv14->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat25 = network->addConcatenation(concat25_inputs, 2); + nvinfer1::IElementWiseLayer* conv26 = C2(network, weightMap, *concat25->getOutput(0), get_width(768, gw, max_channels), get_width(768, gw, max_channels), get_depth(3, gd), false, 0.5, "model.26"); // P6/64-xlarge - nvinfer1::IElementWiseLayer *conv27 = convBnSiLU(network, weightMap, *conv26->getOutput(0), + nvinfer1::IElementWiseLayer* conv27 = convBnSiLU(network, weightMap, *conv26->getOutput(0), get_width(768, gw, max_channels), 3, 2, 1, "model.27"); - nvinfer1::ITensor *concat28_inputs[] = {conv27->getOutput(0), conv11->getOutput(0)}; - nvinfer1::IConcatenationLayer *concat28 = network->addConcatenation(concat28_inputs, 2); - nvinfer1::IElementWiseLayer *conv29 = + nvinfer1::ITensor* concat28_inputs[] = {conv27->getOutput(0), conv11->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat28 = network->addConcatenation(concat28_inputs, 2); + nvinfer1::IElementWiseLayer* conv29 = C2(network, weightMap, *concat28->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.29"); @@ -503,196 +490,182 @@ buildEngineYolov8DetP6(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *co int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(256, gw, max_channels); // output0 - nvinfer1::IElementWiseLayer *conv30_cv2_0_0 = + nvinfer1::IElementWiseLayer* conv30_cv2_0_0 = convBnSiLU(network, weightMap, *conv20->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.0.0"); - nvinfer1::IElementWiseLayer *conv30_cv2_0_1 = + nvinfer1::IElementWiseLayer* conv30_cv2_0_1 = convBnSiLU(network, weightMap, *conv30_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.0.1"); - nvinfer1::IConvolutionLayer *conv30_cv2_0_2 = + nvinfer1::IConvolutionLayer* conv30_cv2_0_2 = network->addConvolutionNd(*conv30_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.0.2.weight"], weightMap["model.30.cv2.0.2.bias"]); conv30_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv30_cv3_0_0 = + nvinfer1::IElementWiseLayer* conv30_cv3_0_0 = convBnSiLU(network, weightMap, *conv20->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.0.0"); - nvinfer1::IElementWiseLayer *conv30_cv3_0_1 = - convBnSiLU(network, weightMap, *conv30_cv3_0_0->getOutput(0), base_out_channel, 3, 1, 1, - "model.30.cv3.0.1"); - nvinfer1::IConvolutionLayer *conv30_cv3_0_2 = + nvinfer1::IElementWiseLayer* conv30_cv3_0_1 = convBnSiLU(network, weightMap, *conv30_cv3_0_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.30.cv3.0.1"); + nvinfer1::IConvolutionLayer* conv30_cv3_0_2 = network->addConvolutionNd(*conv30_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.0.2.weight"], weightMap["model.30.cv3.0.2.bias"]); conv30_cv3_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv3_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor30_0[] = {conv30_cv2_0_2->getOutput(0), conv30_cv3_0_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_0 = network->addConcatenation(inputTensor30_0, 2); + nvinfer1::ITensor* inputTensor30_0[] = {conv30_cv2_0_2->getOutput(0), conv30_cv3_0_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_0 = network->addConcatenation(inputTensor30_0, 2); // output1 - nvinfer1::IElementWiseLayer *conv30_cv2_1_0 = + nvinfer1::IElementWiseLayer* conv30_cv2_1_0 = convBnSiLU(network, weightMap, *conv23->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.1.0"); - nvinfer1::IElementWiseLayer *conv30_cv2_1_1 = + nvinfer1::IElementWiseLayer* conv30_cv2_1_1 = convBnSiLU(network, weightMap, *conv30_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.1.1"); - nvinfer1::IConvolutionLayer *conv30_cv2_1_2 = + nvinfer1::IConvolutionLayer* conv30_cv2_1_2 = network->addConvolutionNd(*conv30_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.1.2.weight"], weightMap["model.30.cv2.1.2.bias"]); conv30_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv30_cv3_1_0 = + nvinfer1::IElementWiseLayer* conv30_cv3_1_0 = convBnSiLU(network, weightMap, *conv23->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.1.0"); - nvinfer1::IElementWiseLayer *conv30_cv3_1_1 = - convBnSiLU(network, weightMap, *conv30_cv3_1_0->getOutput(0), base_out_channel, 3, 1, 1, - "model.30.cv3.1.1"); - nvinfer1::IConvolutionLayer *conv30_cv3_1_2 = + nvinfer1::IElementWiseLayer* conv30_cv3_1_1 = convBnSiLU(network, weightMap, *conv30_cv3_1_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.30.cv3.1.1"); + nvinfer1::IConvolutionLayer* conv30_cv3_1_2 = network->addConvolutionNd(*conv30_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.1.2.weight"], weightMap["model.30.cv3.1.2.bias"]); conv30_cv3_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv3_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor30_1[] = {conv30_cv2_1_2->getOutput(0), conv30_cv3_1_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_1 = network->addConcatenation(inputTensor30_1, 2); + nvinfer1::ITensor* inputTensor30_1[] = {conv30_cv2_1_2->getOutput(0), conv30_cv3_1_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_1 = network->addConcatenation(inputTensor30_1, 2); // output2 - nvinfer1::IElementWiseLayer *conv30_cv2_2_0 = + nvinfer1::IElementWiseLayer* conv30_cv2_2_0 = convBnSiLU(network, weightMap, *conv26->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.2.0"); - nvinfer1::IElementWiseLayer *conv30_cv2_2_1 = + nvinfer1::IElementWiseLayer* conv30_cv2_2_1 = convBnSiLU(network, weightMap, *conv30_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.2.1"); - nvinfer1::IConvolutionLayer *conv30_cv2_2_2 = + nvinfer1::IConvolutionLayer* conv30_cv2_2_2 = network->addConvolutionNd(*conv30_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.2.2.weight"], weightMap["model.30.cv2.2.2.bias"]); conv30_cv2_2_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv2_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv30_cv3_2_0 = + nvinfer1::IElementWiseLayer* conv30_cv3_2_0 = convBnSiLU(network, weightMap, *conv26->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.2.0"); - nvinfer1::IElementWiseLayer *conv30_cv3_2_1 = convBnSiLU(network, weightMap, *conv30_cv3_2_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv30_cv3_2_1 = convBnSiLU(network, weightMap, *conv30_cv3_2_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.2.1"); - nvinfer1::IConvolutionLayer *conv30_cv3_2_2 = + nvinfer1::IConvolutionLayer* conv30_cv3_2_2 = network->addConvolutionNd(*conv30_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.2.2.weight"], weightMap["model.30.cv3.2.2.bias"]); conv30_cv3_2_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv3_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor30_2[] = {conv30_cv2_2_2->getOutput(0), conv30_cv3_2_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_2 = network->addConcatenation(inputTensor30_2, 2); + nvinfer1::ITensor* inputTensor30_2[] = {conv30_cv2_2_2->getOutput(0), conv30_cv3_2_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_2 = network->addConcatenation(inputTensor30_2, 2); // output3 - nvinfer1::IElementWiseLayer *conv30_cv2_3_0 = + nvinfer1::IElementWiseLayer* conv30_cv2_3_0 = convBnSiLU(network, weightMap, *conv29->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.3.0"); - nvinfer1::IElementWiseLayer *conv30_cv2_3_1 = + nvinfer1::IElementWiseLayer* conv30_cv2_3_1 = convBnSiLU(network, weightMap, *conv30_cv2_3_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.3.1"); - nvinfer1::IConvolutionLayer *conv30_cv2_3_2 = + nvinfer1::IConvolutionLayer* conv30_cv2_3_2 = network->addConvolutionNd(*conv30_cv2_3_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.3.2.weight"], weightMap["model.30.cv2.3.2.bias"]); conv30_cv2_3_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv2_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv30_cv3_3_0 = + nvinfer1::IElementWiseLayer* conv30_cv3_3_0 = convBnSiLU(network, weightMap, *conv29->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.3.0"); - nvinfer1::IElementWiseLayer *conv30_cv3_3_1 = convBnSiLU(network, weightMap, *conv30_cv3_3_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv30_cv3_3_1 = convBnSiLU(network, weightMap, *conv30_cv3_3_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.3.1"); - nvinfer1::IConvolutionLayer *conv30_cv3_3_2 = + nvinfer1::IConvolutionLayer* conv30_cv3_3_2 = network->addConvolutionNd(*conv30_cv3_3_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.3.2.weight"], weightMap["model.30.cv3.3.2.bias"]); conv30_cv3_3_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv3_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor30_3[] = {conv30_cv2_3_2->getOutput(0), conv30_cv3_3_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_3 = network->addConcatenation(inputTensor30_3, 2); + nvinfer1::ITensor* inputTensor30_3[] = {conv30_cv2_3_2->getOutput(0), conv30_cv3_3_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_3 = network->addConcatenation(inputTensor30_3, 2); /******************************************************************************************************* ********************************************* YOLOV8 DETECT ****************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer *conv_layers[] = {conv3, conv5, conv7, conv9}; + nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7, conv9}; int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); int stridesLength = sizeof(strides) / sizeof(int); // P3 processing steps (remains unchanged) - nvinfer1::IShuffleLayer *shuffle30_0 = + nvinfer1::IShuffleLayer* shuffle30_0 = network->addShuffle(*cat30_0->getOutput(0)); // Reusing the previous cat30_0 as P3 concatenation layer shuffle30_0->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}); - nvinfer1::ISliceLayer *split30_0_0 = network->addSlice( - *shuffle30_0->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[0]) * (kInputW / strides[0])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split30_0_1 = network->addSlice( - *shuffle30_0->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl30_0 = + nvinfer1::ISliceLayer* split30_0_0 = network->addSlice( + *shuffle30_0->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split30_0_1 = + network->addSlice(*shuffle30_0->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl30_0 = DFL(network, weightMap, *split30_0_0->getOutput(0), 4, (kInputH / strides[0]) * (kInputW / strides[0]), 1, 1, 0, "model.30.dfl.conv.weight"); - nvinfer1::ITensor *inputTensor30_dfl_0[] = {dfl30_0->getOutput(0), split30_0_1->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_dfl_0 = network->addConcatenation(inputTensor30_dfl_0, 2); + nvinfer1::ITensor* inputTensor30_dfl_0[] = {dfl30_0->getOutput(0), split30_0_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_dfl_0 = network->addConcatenation(inputTensor30_dfl_0, 2); cat30_dfl_0->setAxis(1); // P4 processing steps (remains unchanged) - nvinfer1::IShuffleLayer *shuffle30_1 = + nvinfer1::IShuffleLayer* shuffle30_1 = network->addShuffle(*cat30_1->getOutput(0)); // Reusing the previous cat30_1 as P4 concatenation layer shuffle30_1->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}); - nvinfer1::ISliceLayer *split30_1_0 = network->addSlice( - *shuffle30_1->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[1]) * (kInputW / strides[1])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split30_1_1 = network->addSlice( - *shuffle30_1->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl30_1 = + nvinfer1::ISliceLayer* split30_1_0 = network->addSlice( + *shuffle30_1->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split30_1_1 = + network->addSlice(*shuffle30_1->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl30_1 = DFL(network, weightMap, *split30_1_0->getOutput(0), 4, (kInputH / strides[1]) * (kInputW / strides[1]), 1, 1, 0, "model.30.dfl.conv.weight"); - nvinfer1::ITensor *inputTensor30_dfl_1[] = {dfl30_1->getOutput(0), split30_1_1->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_dfl_1 = network->addConcatenation(inputTensor30_dfl_1, 2); + nvinfer1::ITensor* inputTensor30_dfl_1[] = {dfl30_1->getOutput(0), split30_1_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_dfl_1 = network->addConcatenation(inputTensor30_dfl_1, 2); cat30_dfl_1->setAxis(1); // P5 processing steps (remains unchanged) - nvinfer1::IShuffleLayer *shuffle30_2 = + nvinfer1::IShuffleLayer* shuffle30_2 = network->addShuffle(*cat30_2->getOutput(0)); // Reusing the previous cat30_2 as P5 concatenation layer shuffle30_2->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}); - nvinfer1::ISliceLayer *split30_2_0 = network->addSlice( - *shuffle30_2->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[2]) * (kInputW / strides[2])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split30_2_1 = network->addSlice( - *shuffle30_2->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl30_2 = + nvinfer1::ISliceLayer* split30_2_0 = network->addSlice( + *shuffle30_2->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split30_2_1 = + network->addSlice(*shuffle30_2->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl30_2 = DFL(network, weightMap, *split30_2_0->getOutput(0), 4, (kInputH / strides[2]) * (kInputW / strides[2]), 1, 1, 0, "model.30.dfl.conv.weight"); - nvinfer1::ITensor *inputTensor30_dfl_2[] = {dfl30_2->getOutput(0), split30_2_1->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_dfl_2 = network->addConcatenation(inputTensor30_dfl_2, 2); + nvinfer1::ITensor* inputTensor30_dfl_2[] = {dfl30_2->getOutput(0), split30_2_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_dfl_2 = network->addConcatenation(inputTensor30_dfl_2, 2); cat30_dfl_2->setAxis(1); // P6 processing steps - nvinfer1::IShuffleLayer *shuffle30_3 = network->addShuffle(*cat30_3->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle30_3 = network->addShuffle(*cat30_3->getOutput(0)); shuffle30_3->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}); - nvinfer1::ISliceLayer *split30_3_0 = network->addSlice( - *shuffle30_3->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[3]) * (kInputW / strides[3])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split30_3_1 = network->addSlice( - *shuffle30_3->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl30_3 = + nvinfer1::ISliceLayer* split30_3_0 = network->addSlice( + *shuffle30_3->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[3]) * (kInputW / strides[3])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split30_3_1 = + network->addSlice(*shuffle30_3->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl30_3 = DFL(network, weightMap, *split30_3_0->getOutput(0), 4, (kInputH / strides[3]) * (kInputW / strides[3]), 1, 1, 0, "model.30.dfl.conv.weight"); - nvinfer1::ITensor *inputTensor30_dfl_3[] = {dfl30_3->getOutput(0), split30_3_1->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_dfl_3 = network->addConcatenation(inputTensor30_dfl_3, 2); + nvinfer1::ITensor* inputTensor30_dfl_3[] = {dfl30_3->getOutput(0), split30_3_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_dfl_3 = network->addConcatenation(inputTensor30_dfl_3, 2); cat30_dfl_3->setAxis(1); - nvinfer1::IPluginV2Layer *yolo = addYoLoLayer( - network, std::vector{cat30_dfl_0, cat30_dfl_1, cat30_dfl_2, cat30_dfl_3}, + nvinfer1::IPluginV2Layer* yolo = addYoLoLayer( + network, std::vector{cat30_dfl_0, cat30_dfl_1, cat30_dfl_2, cat30_dfl_3}, strides, stridesLength, false, false); yolo->getOutput(0)->setName(kOutputTensorName); network->markOutput(*yolo->getOutput(0)); @@ -705,64 +678,64 @@ buildEngineYolov8DetP6(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *co std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; assert(builder->platformHasFastInt8()); config->setFlag(nvinfer1::BuilderFlag::kINT8); - auto *calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, kInputQuantizationFolder, "int8calib.table", + auto* calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, kInputQuantizationFolder, "int8calib.table", kInputTensorName); config->setInt8Calibrator(calibrator); #endif std::cout << "Building engine, please wait for a while..." << std::endl; - nvinfer1::IHostMemory *serialized_model = builder->buildSerializedNetwork(*network, *config); + nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config); std::cout << "Build engine successfully!" << std::endl; delete network; - for (auto &mem: weightMap) { - free((void *) (mem.second.values)); + for (auto& mem : weightMap) { + free((void*)(mem.second.values)); } return serialized_model; } -nvinfer1::IHostMemory * -buildEngineYolov8DetP2(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, nvinfer1::DataType dt, - const std::string &wts_path, float &gd, float &gw, int &max_channels) { +nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels) { std::map weightMap = loadWeights(wts_path); -// nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); - nvinfer1::INetworkDefinition *network = builder->createNetworkV2( + // nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); + nvinfer1::INetworkDefinition* network = builder->createNetworkV2( 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)); /******************************************************************************************************* ****************************************** YOLOV8 INPUT ********************************************** *******************************************************************************************************/ - nvinfer1::ITensor *data = network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kInputH, kInputW}); + nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kInputH, kInputW}); assert(data); /******************************************************************************************************* ***************************************** YOLOV8 BACKBONE ******************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer *conv0 = + nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); - nvinfer1::IElementWiseLayer *conv1 = + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); // 11233 - nvinfer1::IElementWiseLayer *conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), + nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); - nvinfer1::IElementWiseLayer *conv3 = + nvinfer1::IElementWiseLayer* conv3 = convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); // 22466 - nvinfer1::IElementWiseLayer *conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), + nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); - nvinfer1::IElementWiseLayer *conv5 = + nvinfer1::IElementWiseLayer* conv5 = convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); // 22466 - nvinfer1::IElementWiseLayer *conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), + nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); - nvinfer1::IElementWiseLayer *conv7 = + nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.7"); // 11233 - nvinfer1::IElementWiseLayer *conv8 = + nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); - nvinfer1::IElementWiseLayer *conv9 = + nvinfer1::IElementWiseLayer* conv9 = SPPF(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.9"); @@ -773,269 +746,255 @@ buildEngineYolov8DetP2(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *co float scale[] = {1.0, 1.0, 2.0, 2.0}; // scale used for upsampling // P4 - nvinfer1::IResizeLayer *upsample10 = network->addResize( + nvinfer1::IResizeLayer* upsample10 = network->addResize( *conv9->getOutput(0)); // Assuming conv9 is the last layer of the backbone as per P5 in your first section. upsample10->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample10->setScales(scale, 4); - nvinfer1::ITensor *concat11_inputs[] = { + nvinfer1::ITensor* concat11_inputs[] = { upsample10->getOutput(0), conv6->getOutput(0)}; // Assuming conv6 corresponds to "backbone P4" as per your pseudocode - nvinfer1::IConcatenationLayer *concat11 = network->addConcatenation(concat11_inputs, 2); - nvinfer1::IElementWiseLayer *conv12 = + nvinfer1::IConcatenationLayer* concat11 = network->addConcatenation(concat11_inputs, 2); + nvinfer1::IElementWiseLayer* conv12 = C2F(network, weightMap, *concat11->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.12"); // P3 - nvinfer1::IResizeLayer *upsample13 = network->addResize(*conv12->getOutput(0)); + nvinfer1::IResizeLayer* upsample13 = network->addResize(*conv12->getOutput(0)); upsample13->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample13->setScales(scale, 4); - nvinfer1::ITensor *concat14_inputs[] = {upsample13->getOutput(0), + nvinfer1::ITensor* concat14_inputs[] = {upsample13->getOutput(0), conv4->getOutput(0)}; // Assuming conv4 corresponds to "backbone P3" - nvinfer1::IConcatenationLayer *concat14 = network->addConcatenation(concat14_inputs, 2); - nvinfer1::IElementWiseLayer *conv15 = + nvinfer1::IConcatenationLayer* concat14 = network->addConcatenation(concat14_inputs, 2); + nvinfer1::IElementWiseLayer* conv15 = C2F(network, weightMap, *concat14->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.15"); // P2 - nvinfer1::IResizeLayer *upsample16 = network->addResize(*conv15->getOutput(0)); + nvinfer1::IResizeLayer* upsample16 = network->addResize(*conv15->getOutput(0)); upsample16->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample16->setScales(scale, 4); - nvinfer1::ITensor *concat17_inputs[] = {upsample16->getOutput(0), + nvinfer1::ITensor* concat17_inputs[] = {upsample16->getOutput(0), conv2->getOutput(0)}; // Assuming conv2 corresponds to "backbone P2" - nvinfer1::IConcatenationLayer *concat17 = network->addConcatenation(concat17_inputs, 2); - nvinfer1::IElementWiseLayer *conv18 = + nvinfer1::IConcatenationLayer* concat17 = network->addConcatenation(concat17_inputs, 2); + nvinfer1::IElementWiseLayer* conv18 = C2F(network, weightMap, *concat17->getOutput(0), get_width(128, gw, max_channels), get_width(128, gw, max_channels), get_depth(3, gd), false, 0.5, "model.18"); // Additional layers for P3, P4, P5 // Downsample and concatenate for P3 - nvinfer1::IElementWiseLayer *conv19 = convBnSiLU(network, weightMap, *conv18->getOutput(0), + nvinfer1::IElementWiseLayer* conv19 = convBnSiLU(network, weightMap, *conv18->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.19"); - nvinfer1::ITensor *concat20_inputs[] = { + nvinfer1::ITensor* concat20_inputs[] = { conv19->getOutput(0), conv15->getOutput(0)}; // concatenate with higher-resolution feature map from P3 - nvinfer1::IConcatenationLayer *concat20 = network->addConcatenation(concat20_inputs, 2); - nvinfer1::IElementWiseLayer *conv21 = + nvinfer1::IConcatenationLayer* concat20 = network->addConcatenation(concat20_inputs, 2); + nvinfer1::IElementWiseLayer* conv21 = C2F(network, weightMap, *concat20->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.21"); // Downsample and concatenate for P4 - nvinfer1::IElementWiseLayer *conv22 = convBnSiLU(network, weightMap, *conv21->getOutput(0), + nvinfer1::IElementWiseLayer* conv22 = convBnSiLU(network, weightMap, *conv21->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.22"); - nvinfer1::ITensor *concat23_inputs[] = { + nvinfer1::ITensor* concat23_inputs[] = { conv22->getOutput(0), conv12->getOutput(0)}; // concatenate with higher-resolution feature map from P4 - nvinfer1::IConcatenationLayer *concat23 = network->addConcatenation(concat23_inputs, 2); - nvinfer1::IElementWiseLayer *conv24 = + nvinfer1::IConcatenationLayer* concat23 = network->addConcatenation(concat23_inputs, 2); + nvinfer1::IElementWiseLayer* conv24 = C2F(network, weightMap, *concat23->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.24"); // Downsample and concatenate for P5 - nvinfer1::IElementWiseLayer *conv25 = convBnSiLU(network, weightMap, *conv24->getOutput(0), + nvinfer1::IElementWiseLayer* conv25 = convBnSiLU(network, weightMap, *conv24->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.25"); - nvinfer1::ITensor *concat26_inputs[] = { + nvinfer1::ITensor* concat26_inputs[] = { conv25->getOutput(0), conv9->getOutput(0)}; // concatenate with higher-resolution feature map from P5 - nvinfer1::IConcatenationLayer *concat26 = network->addConcatenation(concat26_inputs, 2); - nvinfer1::IElementWiseLayer *conv27 = + nvinfer1::IConcatenationLayer* concat26 = network->addConcatenation(concat26_inputs, 2); + nvinfer1::IElementWiseLayer* conv27 = C2F(network, weightMap, *concat26->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.27"); /******************************************************************************************************* ********************************************* YOLOV8 OUTPUT ****************************************** *******************************************************************************************************/ -// int ch_0 = conv18->getOutput(0)->getDimensions().d[1]; -// int base_in_channel = std::max(16, std::max(ch_0 / 4, 64)); -// int base_out_channel = std::max(ch_0, std::min(kNumClass, 100)); + // int ch_0 = conv18->getOutput(0)->getDimensions().d[1]; + // int base_in_channel = std::max(16, std::max(ch_0 / 4, 64)); + // int base_out_channel = std::max(ch_0, std::min(kNumClass, 100)); int base_in_channel = 64; int base_out_channel = (gw == 0.25) ? std::max(32, std::min(kNumClass, 100)) : get_width(128, gw, max_channels); // output0 - nvinfer1::IElementWiseLayer *conv28_cv2_0_0 = + nvinfer1::IElementWiseLayer* conv28_cv2_0_0 = convBnSiLU(network, weightMap, *conv18->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.0.0"); - nvinfer1::IElementWiseLayer *conv28_cv2_0_1 = + nvinfer1::IElementWiseLayer* conv28_cv2_0_1 = convBnSiLU(network, weightMap, *conv28_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.0.1"); - nvinfer1::IConvolutionLayer *conv28_cv2_0_2 = + nvinfer1::IConvolutionLayer* conv28_cv2_0_2 = network->addConvolutionNd(*conv28_cv2_0_1->getOutput(0), base_in_channel, nvinfer1::DimsHW{1, 1}, weightMap["model.28.cv2.0.2.weight"], weightMap["model.28.cv2.0.2.bias"]); conv28_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv28_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv28_cv3_0_0 = + nvinfer1::IElementWiseLayer* conv28_cv3_0_0 = convBnSiLU(network, weightMap, *conv18->getOutput(0), base_out_channel, 3, 1, 1, "model.28.cv3.0.0"); - nvinfer1::IElementWiseLayer *conv28_cv3_0_1 = - convBnSiLU(network, weightMap, *conv28_cv3_0_0->getOutput(0), base_out_channel, 3, 1, 1, - "model.28.cv3.0.1"); - nvinfer1::IConvolutionLayer *conv28_cv3_0_2 = + nvinfer1::IElementWiseLayer* conv28_cv3_0_1 = convBnSiLU(network, weightMap, *conv28_cv3_0_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.28.cv3.0.1"); + nvinfer1::IConvolutionLayer* conv28_cv3_0_2 = network->addConvolutionNd(*conv28_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.28.cv3.0.2.weight"], weightMap["model.28.cv3.0.2.bias"]); conv28_cv3_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv28_cv3_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor28_0[] = {conv28_cv2_0_2->getOutput(0), conv28_cv3_0_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat28_0 = network->addConcatenation(inputTensor28_0, 2); + nvinfer1::ITensor* inputTensor28_0[] = {conv28_cv2_0_2->getOutput(0), conv28_cv3_0_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_0 = network->addConcatenation(inputTensor28_0, 2); // output1 - nvinfer1::IElementWiseLayer *conv28_cv2_1_0 = + nvinfer1::IElementWiseLayer* conv28_cv2_1_0 = convBnSiLU(network, weightMap, *conv21->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.1.0"); - nvinfer1::IElementWiseLayer *conv28_cv2_1_1 = + nvinfer1::IElementWiseLayer* conv28_cv2_1_1 = convBnSiLU(network, weightMap, *conv28_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.1.1"); - nvinfer1::IConvolutionLayer *conv28_cv2_1_2 = + nvinfer1::IConvolutionLayer* conv28_cv2_1_2 = network->addConvolutionNd(*conv28_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.28.cv2.1.2.weight"], weightMap["model.28.cv2.1.2.bias"]); conv28_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv28_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv28_cv3_1_0 = + nvinfer1::IElementWiseLayer* conv28_cv3_1_0 = convBnSiLU(network, weightMap, *conv21->getOutput(0), base_out_channel, 3, 1, 1, "model.28.cv3.1.0"); - nvinfer1::IElementWiseLayer *conv28_cv3_1_1 = convBnSiLU(network, weightMap, *conv28_cv3_1_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv28_cv3_1_1 = convBnSiLU(network, weightMap, *conv28_cv3_1_0->getOutput(0), base_out_channel, 3, 1, 1, "model.28.cv3.1.1"); - nvinfer1::IConvolutionLayer *conv28_cv3_1_2 = + nvinfer1::IConvolutionLayer* conv28_cv3_1_2 = network->addConvolutionNd(*conv28_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.28.cv3.1.2.weight"], weightMap["model.28.cv3.1.2.bias"]); conv28_cv3_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv28_cv3_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor28_1[] = {conv28_cv2_1_2->getOutput(0), conv28_cv3_1_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat28_1 = network->addConcatenation(inputTensor28_1, 2); + nvinfer1::ITensor* inputTensor28_1[] = {conv28_cv2_1_2->getOutput(0), conv28_cv3_1_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_1 = network->addConcatenation(inputTensor28_1, 2); // output2 - nvinfer1::IElementWiseLayer *conv28_cv2_2_0 = + nvinfer1::IElementWiseLayer* conv28_cv2_2_0 = convBnSiLU(network, weightMap, *conv24->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.2.0"); - nvinfer1::IElementWiseLayer *conv28_cv2_2_1 = + nvinfer1::IElementWiseLayer* conv28_cv2_2_1 = convBnSiLU(network, weightMap, *conv28_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.2.1"); - nvinfer1::IConvolutionLayer *conv28_cv2_2_2 = + nvinfer1::IConvolutionLayer* conv28_cv2_2_2 = network->addConvolutionNd(*conv28_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.28.cv2.2.2.weight"], weightMap["model.28.cv2.2.2.bias"]); conv28_cv2_2_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv28_cv2_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv28_cv3_2_0 = + nvinfer1::IElementWiseLayer* conv28_cv3_2_0 = convBnSiLU(network, weightMap, *conv24->getOutput(0), base_out_channel, 3, 1, 1, "model.28.cv3.2.0"); - nvinfer1::IElementWiseLayer *conv28_cv3_2_1 = convBnSiLU(network, weightMap, *conv28_cv3_2_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv28_cv3_2_1 = convBnSiLU(network, weightMap, *conv28_cv3_2_0->getOutput(0), base_out_channel, 3, 1, 1, "model.28.cv3.2.1"); - nvinfer1::IConvolutionLayer *conv28_cv3_2_2 = + nvinfer1::IConvolutionLayer* conv28_cv3_2_2 = network->addConvolutionNd(*conv28_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.28.cv3.2.2.weight"], weightMap["model.28.cv3.2.2.bias"]); conv28_cv3_2_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv28_cv3_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor28_2[] = {conv28_cv2_2_2->getOutput(0), conv28_cv3_2_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat28_2 = network->addConcatenation(inputTensor28_2, 2); + nvinfer1::ITensor* inputTensor28_2[] = {conv28_cv2_2_2->getOutput(0), conv28_cv3_2_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_2 = network->addConcatenation(inputTensor28_2, 2); // output3 - nvinfer1::IElementWiseLayer *conv28_cv2_3_0 = + nvinfer1::IElementWiseLayer* conv28_cv2_3_0 = convBnSiLU(network, weightMap, *conv27->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.3.0"); - nvinfer1::IElementWiseLayer *conv28_cv2_3_1 = + nvinfer1::IElementWiseLayer* conv28_cv2_3_1 = convBnSiLU(network, weightMap, *conv28_cv2_3_0->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.3.1"); - nvinfer1::IConvolutionLayer *conv28_cv2_3_2 = + nvinfer1::IConvolutionLayer* conv28_cv2_3_2 = network->addConvolutionNd(*conv28_cv2_3_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.28.cv2.3.2.weight"], weightMap["model.28.cv2.3.2.bias"]); conv28_cv2_3_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv28_cv2_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv28_cv3_3_0 = + nvinfer1::IElementWiseLayer* conv28_cv3_3_0 = convBnSiLU(network, weightMap, *conv27->getOutput(0), base_out_channel, 3, 1, 1, "model.28.cv3.3.0"); - nvinfer1::IElementWiseLayer *conv28_cv3_3_1 = - convBnSiLU(network, weightMap, *conv28_cv3_3_0->getOutput(0), - base_out_channel, 3, 1, 1, "model.28.cv3.3.1"); - nvinfer1::IConvolutionLayer *conv28_cv3_3_2 = + nvinfer1::IElementWiseLayer* conv28_cv3_3_1 = convBnSiLU(network, weightMap, *conv28_cv3_3_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.28.cv3.3.1"); + nvinfer1::IConvolutionLayer* conv28_cv3_3_2 = network->addConvolutionNd(*conv28_cv3_3_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.28.cv3.3.2.weight"], weightMap["model.28.cv3.3.2.bias"]); conv28_cv3_3_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv28_cv3_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor28_3[] = {conv28_cv2_3_2->getOutput(0), conv28_cv3_3_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat28_3 = network->addConcatenation(inputTensor28_3, 2); + nvinfer1::ITensor* inputTensor28_3[] = {conv28_cv2_3_2->getOutput(0), conv28_cv3_3_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_3 = network->addConcatenation(inputTensor28_3, 2); /******************************************************************************************************* ********************************************* YOLOV8 DETECT ****************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer *conv_layers[] = {conv1, conv3, conv5, conv7}; + nvinfer1::IElementWiseLayer* conv_layers[] = {conv1, conv3, conv5, conv7}; int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); int stridesLength = sizeof(strides) / sizeof(int); // P2 processing steps (remains unchanged) - nvinfer1::IShuffleLayer *shuffle28_0 = network->addShuffle(*cat28_0->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle28_0 = network->addShuffle(*cat28_0->getOutput(0)); shuffle28_0->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}); - nvinfer1::ISliceLayer *split28_0_0 = network->addSlice( - *shuffle28_0->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[0]) * (kInputW / strides[0])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split28_0_1 = network->addSlice( - *shuffle28_0->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl28_0 = + nvinfer1::ISliceLayer* split28_0_0 = network->addSlice( + *shuffle28_0->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split28_0_1 = + network->addSlice(*shuffle28_0->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl28_0 = DFL(network, weightMap, *split28_0_0->getOutput(0), 4, (kInputH / strides[0]) * (kInputW / strides[0]), 1, 1, 0, "model.28.dfl.conv.weight"); - nvinfer1::ITensor *inputTensor28_dfl_0[] = {dfl28_0->getOutput(0), split28_0_1->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat28_dfl_0 = network->addConcatenation(inputTensor28_dfl_0, 2); + nvinfer1::ITensor* inputTensor28_dfl_0[] = {dfl28_0->getOutput(0), split28_0_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_dfl_0 = network->addConcatenation(inputTensor28_dfl_0, 2); cat28_dfl_0->setAxis(1); // P3 processing steps (remains unchanged) - nvinfer1::IShuffleLayer *shuffle28_1 = network->addShuffle(*cat28_1->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle28_1 = network->addShuffle(*cat28_1->getOutput(0)); shuffle28_1->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}); - nvinfer1::ISliceLayer *split28_1_0 = network->addSlice( - *shuffle28_1->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[1]) * (kInputW / strides[1])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split28_1_1 = network->addSlice( - *shuffle28_1->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl28_1 = + nvinfer1::ISliceLayer* split28_1_0 = network->addSlice( + *shuffle28_1->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split28_1_1 = + network->addSlice(*shuffle28_1->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl28_1 = DFL(network, weightMap, *split28_1_0->getOutput(0), 4, (kInputH / strides[1]) * (kInputW / strides[1]), 1, 1, 0, "model.28.dfl.conv.weight"); - nvinfer1::ITensor *inputTensor28_dfl_1[] = {dfl28_1->getOutput(0), split28_1_1->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat28_dfl_1 = network->addConcatenation(inputTensor28_dfl_1, 2); + nvinfer1::ITensor* inputTensor28_dfl_1[] = {dfl28_1->getOutput(0), split28_1_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_dfl_1 = network->addConcatenation(inputTensor28_dfl_1, 2); cat28_dfl_1->setAxis(1); // P4 processing steps (remains unchanged) - nvinfer1::IShuffleLayer *shuffle28_2 = network->addShuffle(*cat28_2->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle28_2 = network->addShuffle(*cat28_2->getOutput(0)); shuffle28_2->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}); - nvinfer1::ISliceLayer *split28_2_0 = network->addSlice( - *shuffle28_2->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[2]) * (kInputW / strides[2])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split28_2_1 = network->addSlice( - *shuffle28_2->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl28_2 = + nvinfer1::ISliceLayer* split28_2_0 = network->addSlice( + *shuffle28_2->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split28_2_1 = + network->addSlice(*shuffle28_2->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl28_2 = DFL(network, weightMap, *split28_2_0->getOutput(0), 4, (kInputH / strides[2]) * (kInputW / strides[2]), 1, 1, 0, "model.28.dfl.conv.weight"); - nvinfer1::ITensor *inputTensor28_dfl_2[] = {dfl28_2->getOutput(0), split28_2_1->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat28_dfl_2 = network->addConcatenation(inputTensor28_dfl_2, 2); + nvinfer1::ITensor* inputTensor28_dfl_2[] = {dfl28_2->getOutput(0), split28_2_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_dfl_2 = network->addConcatenation(inputTensor28_dfl_2, 2); cat28_dfl_2->setAxis(1); // P5 processing steps - nvinfer1::IShuffleLayer *shuffle28_3 = network->addShuffle(*cat28_3->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle28_3 = network->addShuffle(*cat28_3->getOutput(0)); shuffle28_3->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}); - nvinfer1::ISliceLayer *split28_3_0 = network->addSlice( - *shuffle28_3->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[3]) * (kInputW / strides[3])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split28_3_1 = network->addSlice( - *shuffle28_3->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl28_3 = + nvinfer1::ISliceLayer* split28_3_0 = network->addSlice( + *shuffle28_3->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[3]) * (kInputW / strides[3])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split28_3_1 = + network->addSlice(*shuffle28_3->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl28_3 = DFL(network, weightMap, *split28_3_0->getOutput(0), 4, (kInputH / strides[3]) * (kInputW / strides[3]), 1, 1, 0, "model.28.dfl.conv.weight"); - nvinfer1::ITensor *inputTensor28_dfl_3[] = {dfl28_3->getOutput(0), split28_3_1->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat28_dfl_3 = network->addConcatenation(inputTensor28_dfl_3, 2); + nvinfer1::ITensor* inputTensor28_dfl_3[] = {dfl28_3->getOutput(0), split28_3_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_dfl_3 = network->addConcatenation(inputTensor28_dfl_3, 2); cat28_dfl_3->setAxis(1); - nvinfer1::IPluginV2Layer *yolo = addYoLoLayer( - network, std::vector{cat28_dfl_0, cat28_dfl_1, cat28_dfl_2, cat28_dfl_3}, + nvinfer1::IPluginV2Layer* yolo = addYoLoLayer( + network, std::vector{cat28_dfl_0, cat28_dfl_1, cat28_dfl_2, cat28_dfl_3}, strides, stridesLength, false, false); yolo->getOutput(0)->setName(kOutputTensorName); network->markOutput(*yolo->getOutput(0)); @@ -1048,56 +1007,57 @@ buildEngineYolov8DetP2(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *co std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; assert(builder->platformHasFastInt8()); config->setFlag(nvinfer1::BuilderFlag::kINT8); - auto *calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, kInputQuantizationFolder, "int8calib.table", + auto* calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, kInputQuantizationFolder, "int8calib.table", kInputTensorName); config->setInt8Calibrator(calibrator); #endif std::cout << "Building engine, please wait for a while..." << std::endl; - nvinfer1::IHostMemory *serialized_model = builder->buildSerializedNetwork(*network, *config); + nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config); std::cout << "Build engine successfully!" << std::endl; delete network; - for (auto &mem: weightMap) { - free((void *) (mem.second.values)); + for (auto& mem : weightMap) { + free((void*)(mem.second.values)); } return serialized_model; } -static nvinfer1::IElementWiseLayer *convBnSiLUProto(nvinfer1::INetworkDefinition *network, +static nvinfer1::IElementWiseLayer* convBnSiLUProto(nvinfer1::INetworkDefinition* network, std::map weightMap, - nvinfer1::ITensor &input, - int ch, int k, int s, int p, std::string lname) { + nvinfer1::ITensor& input, int ch, int k, int s, int p, + std::string lname) { nvinfer1::Weights bias_empty{nvinfer1::DataType::kFLOAT, nullptr, 0}; - nvinfer1::IConvolutionLayer *conv = + nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(input, ch, nvinfer1::DimsHW{k, k}, weightMap[lname + ".conv.weight"], bias_empty); assert(conv); conv->setStrideNd(nvinfer1::DimsHW{s, s}); conv->setPaddingNd(nvinfer1::DimsHW{p, p}); conv->setName((lname + ".conv").c_str()); - nvinfer1::IScaleLayer *bn = addBatchNorm2d(network, weightMap, *conv->getOutput(0), lname + ".bn", 1e-3); + nvinfer1::IScaleLayer* bn = addBatchNorm2d(network, weightMap, *conv->getOutput(0), lname + ".bn", 1e-3); bn->setName((lname + ".bn").c_str()); // This concat operator is not used for calculation, in order to prevent the operator fusion unrealized error when int8 is quantized. // Error Code 10: Internal Error (Could not find any implementation for node // model.22.proto.cv3.conv + model.22.proto.cv3.sigmoid + PWN(PWN((Unnamed Layer* 353) [Activation]), PWN(model.22.proto.cv3.silu)).) #if defined(USE_INT8) - nvinfer1::ITensor *inputTensors[] = {bn->getOutput(0)}; + nvinfer1::ITensor* inputTensors[] = {bn->getOutput(0)}; auto concat = network->addConcatenation(inputTensors, 1); - nvinfer1::IActivationLayer *sigmoid = network->addActivation(*concat->getOutput(0), nvinfer1::ActivationType::kSIGMOID); + nvinfer1::IActivationLayer* sigmoid = + network->addActivation(*concat->getOutput(0), nvinfer1::ActivationType::kSIGMOID); assert(sigmoid); bn->setName((lname + ".sigmoid").c_str()); - nvinfer1::IElementWiseLayer *ew = - network->addElementWise(*concat->getOutput(0), *sigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD); + nvinfer1::IElementWiseLayer* ew = network->addElementWise(*concat->getOutput(0), *sigmoid->getOutput(0), + nvinfer1::ElementWiseOperation::kPROD); assert(ew); ew->setName((lname + ".silu").c_str()); #else - nvinfer1::IActivationLayer *sigmoid = network->addActivation(*bn->getOutput(0), nvinfer1::ActivationType::kSIGMOID); + nvinfer1::IActivationLayer* sigmoid = network->addActivation(*bn->getOutput(0), nvinfer1::ActivationType::kSIGMOID); assert(sigmoid); bn->setName((lname + ".sigmoid").c_str()); - nvinfer1::IElementWiseLayer *ew = + nvinfer1::IElementWiseLayer* ew = network->addElementWise(*bn->getOutput(0), *sigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD); assert(ew); ew->setName((lname + ".silu").c_str()); @@ -1105,28 +1065,27 @@ static nvinfer1::IElementWiseLayer *convBnSiLUProto(nvinfer1::INetworkDefinition return ew; } -static nvinfer1::IElementWiseLayer *Proto(nvinfer1::INetworkDefinition *network, - std::map &weightMap, nvinfer1::ITensor &input, +static nvinfer1::IElementWiseLayer* Proto(nvinfer1::INetworkDefinition* network, + std::map& weightMap, nvinfer1::ITensor& input, std::string lname, float gw, int max_channels) { int mid_channel = get_width(256, gw, max_channels); auto cv1 = convBnSiLU(network, weightMap, input, mid_channel, 3, 1, 1, "model.22.proto.cv1"); - float *convTranpsose_bais = (float *) weightMap["model.22.proto.upsample.bias"].values; + float* convTranpsose_bais = (float*)weightMap["model.22.proto.upsample.bias"].values; int convTranpsose_bais_len = weightMap["model.22.proto.upsample.bias"].count; nvinfer1::Weights bias{nvinfer1::DataType::kFLOAT, convTranpsose_bais, convTranpsose_bais_len}; auto convTranpsose = network->addDeconvolutionNd(*cv1->getOutput(0), mid_channel, nvinfer1::DimsHW{2, 2}, weightMap["model.22.proto.upsample.weight"], bias); assert(convTranpsose); convTranpsose->setStrideNd(nvinfer1::DimsHW{2, 2}); - auto cv2 = convBnSiLU(network, weightMap, *convTranpsose->getOutput(0), mid_channel, 3, 1, 1, - "model.22.proto.cv2"); + auto cv2 = convBnSiLU(network, weightMap, *convTranpsose->getOutput(0), mid_channel, 3, 1, 1, "model.22.proto.cv2"); auto cv3 = convBnSiLUProto(network, weightMap, *cv2->getOutput(0), 32, 1, 1, 0, "model.22.proto.cv3"); assert(cv3); return cv3; } -static nvinfer1::IShuffleLayer *cv4_conv_combined(nvinfer1::INetworkDefinition *network, - std::map &weightMap, - nvinfer1::ITensor &input, std::string lname, int grid_shape, float gw, +static nvinfer1::IShuffleLayer* cv4_conv_combined(nvinfer1::INetworkDefinition* network, + std::map& weightMap, + nvinfer1::ITensor& input, std::string lname, int grid_shape, float gw, std::string algo_type) { int mid_channle = 0; int output_channel = 0; @@ -1151,55 +1110,55 @@ static nvinfer1::IShuffleLayer *cv4_conv_combined(nvinfer1::INetworkDefinition * auto cv0 = convBnSiLU(network, weightMap, input, mid_channle, 3, 1, 1, lname + ".0"); auto cv1 = convBnSiLU(network, weightMap, *cv0->getOutput(0), mid_channle, 3, 1, 1, lname + ".1"); - float *cv2_bais_value = (float *) weightMap[lname + ".2" + ".bias"].values; + float* cv2_bais_value = (float*)weightMap[lname + ".2" + ".bias"].values; int cv2_bais_len = weightMap[lname + ".2" + ".bias"].count; nvinfer1::Weights cv2_bais{nvinfer1::DataType::kFLOAT, cv2_bais_value, cv2_bais_len}; auto cv2 = network->addConvolutionNd(*cv1->getOutput(0), output_channel, nvinfer1::DimsHW{1, 1}, weightMap[lname + ".2" + ".weight"], cv2_bais); cv2->setStrideNd(nvinfer1::DimsHW{1, 1}); - nvinfer1::IShuffleLayer *cv2_shuffle = network->addShuffle(*cv2->getOutput(0)); + nvinfer1::IShuffleLayer* cv2_shuffle = network->addShuffle(*cv2->getOutput(0)); cv2_shuffle->setReshapeDimensions(nvinfer1::Dims3{kBatchSize, output_channel, grid_shape}); return cv2_shuffle; } -nvinfer1::IHostMemory * -buildEngineYolov8Seg(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, nvinfer1::DataType dt, - const std::string &wts_path, float &gd, float &gw, int &max_channels) { +nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels) { std::map weightMap = loadWeights(wts_path); -// nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); - nvinfer1::INetworkDefinition *network = builder->createNetworkV2( + // nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); + nvinfer1::INetworkDefinition* network = builder->createNetworkV2( 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)); /******************************************************************************************************* ****************************************** YOLOV8 INPUT ********************************************** *******************************************************************************************************/ - nvinfer1::ITensor *data = network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kInputH, kInputW}); + nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kInputH, kInputW}); assert(data); /******************************************************************************************************* ***************************************** YOLOV8 BACKBONE ******************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer *conv0 = + nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); - nvinfer1::IElementWiseLayer *conv1 = + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); - nvinfer1::IElementWiseLayer *conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), + nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); - nvinfer1::IElementWiseLayer *conv3 = + nvinfer1::IElementWiseLayer* conv3 = convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); - nvinfer1::IElementWiseLayer *conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), + nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); - nvinfer1::IElementWiseLayer *conv5 = + nvinfer1::IElementWiseLayer* conv5 = convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); - nvinfer1::IElementWiseLayer *conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), + nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); - nvinfer1::IElementWiseLayer *conv7 = + nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.7"); - nvinfer1::IElementWiseLayer *conv8 = + nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); - nvinfer1::IElementWiseLayer *conv9 = + nvinfer1::IElementWiseLayer* conv9 = SPPF(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.9"); @@ -1207,39 +1166,39 @@ buildEngineYolov8Seg(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *conf ********************************************* YOLOV8 HEAD ******************************************** *******************************************************************************************************/ float scale[] = {1.0, 1.0, 2.0, 2.0}; - nvinfer1::IResizeLayer *upsample10 = network->addResize(*conv9->getOutput(0)); + nvinfer1::IResizeLayer* upsample10 = network->addResize(*conv9->getOutput(0)); assert(upsample10); upsample10->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample10->setScales(scale, 4); - nvinfer1::ITensor *inputTensor11[] = {upsample10->getOutput(0), conv6->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat11 = network->addConcatenation(inputTensor11, 2); - nvinfer1::IElementWiseLayer *conv12 = + nvinfer1::ITensor* inputTensor11[] = {upsample10->getOutput(0), conv6->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat11 = network->addConcatenation(inputTensor11, 2); + nvinfer1::IElementWiseLayer* conv12 = C2F(network, weightMap, *cat11->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.12"); - nvinfer1::IResizeLayer *upsample13 = network->addResize(*conv12->getOutput(0)); + nvinfer1::IResizeLayer* upsample13 = network->addResize(*conv12->getOutput(0)); assert(upsample13); upsample13->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample13->setScales(scale, 4); - nvinfer1::ITensor *inputTensor14[] = {upsample13->getOutput(0), conv4->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat14 = network->addConcatenation(inputTensor14, 2); - nvinfer1::IElementWiseLayer *conv15 = + nvinfer1::ITensor* inputTensor14[] = {upsample13->getOutput(0), conv4->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat14 = network->addConcatenation(inputTensor14, 2); + nvinfer1::IElementWiseLayer* conv15 = C2F(network, weightMap, *cat14->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.15"); - nvinfer1::IElementWiseLayer *conv16 = convBnSiLU(network, weightMap, *conv15->getOutput(0), + nvinfer1::IElementWiseLayer* conv16 = convBnSiLU(network, weightMap, *conv15->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.16"); - nvinfer1::ITensor *inputTensor17[] = {conv16->getOutput(0), conv12->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat17 = network->addConcatenation(inputTensor17, 2); - nvinfer1::IElementWiseLayer *conv18 = + nvinfer1::ITensor* inputTensor17[] = {conv16->getOutput(0), conv12->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat17 = network->addConcatenation(inputTensor17, 2); + nvinfer1::IElementWiseLayer* conv18 = C2F(network, weightMap, *cat17->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.18"); - nvinfer1::IElementWiseLayer *conv19 = convBnSiLU(network, weightMap, *conv18->getOutput(0), + nvinfer1::IElementWiseLayer* conv19 = convBnSiLU(network, weightMap, *conv18->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.19"); - nvinfer1::ITensor *inputTensor20[] = {conv19->getOutput(0), conv9->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat20 = network->addConcatenation(inputTensor20, 2); - nvinfer1::IElementWiseLayer *conv21 = + nvinfer1::ITensor* inputTensor20[] = {conv19->getOutput(0), conv9->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat20 = network->addConcatenation(inputTensor20, 2); + nvinfer1::IElementWiseLayer* conv21 = C2F(network, weightMap, *cat20->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.21"); @@ -1250,153 +1209,144 @@ buildEngineYolov8Seg(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *conf int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(256, gw, max_channels); // output0 - nvinfer1::IElementWiseLayer *conv22_cv2_0_0 = + nvinfer1::IElementWiseLayer* conv22_cv2_0_0 = convBnSiLU(network, weightMap, *conv15->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.0"); - nvinfer1::IElementWiseLayer *conv22_cv2_0_1 = + nvinfer1::IElementWiseLayer* conv22_cv2_0_1 = convBnSiLU(network, weightMap, *conv22_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.1"); - nvinfer1::IConvolutionLayer *conv22_cv2_0_2 = + nvinfer1::IConvolutionLayer* conv22_cv2_0_2 = network->addConvolutionNd(*conv22_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.0.2.weight"], weightMap["model.22.cv2.0.2.bias"]); conv22_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv22_cv3_0_0 = + nvinfer1::IElementWiseLayer* conv22_cv3_0_0 = convBnSiLU(network, weightMap, *conv15->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.0.0"); - nvinfer1::IElementWiseLayer *conv22_cv3_0_1 = convBnSiLU(network, weightMap, *conv22_cv3_0_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv22_cv3_0_1 = convBnSiLU(network, weightMap, *conv22_cv3_0_0->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.0.1"); - nvinfer1::IConvolutionLayer *conv22_cv3_0_2 = + nvinfer1::IConvolutionLayer* conv22_cv3_0_2 = network->addConvolutionNd(*conv22_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.0.2.weight"], weightMap["model.22.cv3.0.2.bias"]); conv22_cv3_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv3_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor22_0[] = {conv22_cv2_0_2->getOutput(0), conv22_cv3_0_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_0 = network->addConcatenation(inputTensor22_0, 2); + nvinfer1::ITensor* inputTensor22_0[] = {conv22_cv2_0_2->getOutput(0), conv22_cv3_0_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_0 = network->addConcatenation(inputTensor22_0, 2); // output1 - nvinfer1::IElementWiseLayer *conv22_cv2_1_0 = + nvinfer1::IElementWiseLayer* conv22_cv2_1_0 = convBnSiLU(network, weightMap, *conv18->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.0"); - nvinfer1::IElementWiseLayer *conv22_cv2_1_1 = + nvinfer1::IElementWiseLayer* conv22_cv2_1_1 = convBnSiLU(network, weightMap, *conv22_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.1"); - nvinfer1::IConvolutionLayer *conv22_cv2_1_2 = + nvinfer1::IConvolutionLayer* conv22_cv2_1_2 = network->addConvolutionNd(*conv22_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.1.2.weight"], weightMap["model.22.cv2.1.2.bias"]); conv22_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv22_cv3_1_0 = + nvinfer1::IElementWiseLayer* conv22_cv3_1_0 = convBnSiLU(network, weightMap, *conv18->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.1.0"); - nvinfer1::IElementWiseLayer *conv22_cv3_1_1 = convBnSiLU(network, weightMap, *conv22_cv3_1_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv22_cv3_1_1 = convBnSiLU(network, weightMap, *conv22_cv3_1_0->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.1.1"); - nvinfer1::IConvolutionLayer *conv22_cv3_1_2 = + nvinfer1::IConvolutionLayer* conv22_cv3_1_2 = network->addConvolutionNd(*conv22_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.1.2.weight"], weightMap["model.22.cv3.1.2.bias"]); conv22_cv3_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv3_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor22_1[] = {conv22_cv2_1_2->getOutput(0), conv22_cv3_1_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_1 = network->addConcatenation(inputTensor22_1, 2); + nvinfer1::ITensor* inputTensor22_1[] = {conv22_cv2_1_2->getOutput(0), conv22_cv3_1_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_1 = network->addConcatenation(inputTensor22_1, 2); // output2 - nvinfer1::IElementWiseLayer *conv22_cv2_2_0 = + nvinfer1::IElementWiseLayer* conv22_cv2_2_0 = convBnSiLU(network, weightMap, *conv21->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.0"); - nvinfer1::IElementWiseLayer *conv22_cv2_2_1 = + nvinfer1::IElementWiseLayer* conv22_cv2_2_1 = convBnSiLU(network, weightMap, *conv22_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.1"); - nvinfer1::IConvolutionLayer *conv22_cv2_2_2 = + nvinfer1::IConvolutionLayer* conv22_cv2_2_2 = network->addConvolutionNd(*conv22_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.2.2.weight"], weightMap["model.22.cv2.2.2.bias"]); - nvinfer1::IElementWiseLayer *conv22_cv3_2_0 = + nvinfer1::IElementWiseLayer* conv22_cv3_2_0 = convBnSiLU(network, weightMap, *conv21->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.2.0"); - nvinfer1::IElementWiseLayer *conv22_cv3_2_1 = convBnSiLU(network, weightMap, *conv22_cv3_2_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv22_cv3_2_1 = convBnSiLU(network, weightMap, *conv22_cv3_2_0->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.2.1"); - nvinfer1::IConvolutionLayer *conv22_cv3_2_2 = + nvinfer1::IConvolutionLayer* conv22_cv3_2_2 = network->addConvolutionNd(*conv22_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.2.2.weight"], weightMap["model.22.cv3.2.2.bias"]); - nvinfer1::ITensor *inputTensor22_2[] = {conv22_cv2_2_2->getOutput(0), conv22_cv3_2_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_2 = network->addConcatenation(inputTensor22_2, 2); + nvinfer1::ITensor* inputTensor22_2[] = {conv22_cv2_2_2->getOutput(0), conv22_cv3_2_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_2 = network->addConcatenation(inputTensor22_2, 2); /******************************************************************************************************* ********************************************* YOLOV8 DETECT ****************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer *conv_layers[] = {conv3, conv5, conv7}; + nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7}; int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); int stridesLength = sizeof(strides) / sizeof(int); - nvinfer1::IShuffleLayer *shuffle22_0 = network->addShuffle(*cat22_0->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle22_0 = network->addShuffle(*cat22_0->getOutput(0)); shuffle22_0->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}); - nvinfer1::ISliceLayer *split22_0_0 = network->addSlice( - *shuffle22_0->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[0]) * (kInputW / strides[0])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split22_0_1 = network->addSlice( - *shuffle22_0->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl22_0 = + nvinfer1::ISliceLayer* split22_0_0 = network->addSlice( + *shuffle22_0->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split22_0_1 = + network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl22_0 = DFL(network, weightMap, *split22_0_0->getOutput(0), 4, (kInputH / strides[0]) * (kInputW / strides[0]), 1, 1, 0, "model.22.dfl.conv.weight"); - nvinfer1::IShuffleLayer *shuffle22_1 = network->addShuffle(*cat22_1->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle22_1 = network->addShuffle(*cat22_1->getOutput(0)); shuffle22_1->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}); - nvinfer1::ISliceLayer *split22_1_0 = network->addSlice( - *shuffle22_1->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[1]) * (kInputW / strides[1])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split22_1_1 = network->addSlice( - *shuffle22_1->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl22_1 = + nvinfer1::ISliceLayer* split22_1_0 = network->addSlice( + *shuffle22_1->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split22_1_1 = + network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl22_1 = DFL(network, weightMap, *split22_1_0->getOutput(0), 4, (kInputH / strides[1]) * (kInputW / strides[1]), 1, 1, 0, "model.22.dfl.conv.weight"); - nvinfer1::IShuffleLayer *shuffle22_2 = network->addShuffle(*cat22_2->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle22_2 = network->addShuffle(*cat22_2->getOutput(0)); shuffle22_2->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}); - nvinfer1::ISliceLayer *split22_2_0 = network->addSlice( - *shuffle22_2->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[2]) * (kInputW / strides[2])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split22_2_1 = network->addSlice( - *shuffle22_2->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, - nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl22_2 = + nvinfer1::ISliceLayer* split22_2_0 = network->addSlice( + *shuffle22_2->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split22_2_1 = + network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl22_2 = DFL(network, weightMap, *split22_2_0->getOutput(0), 4, (kInputH / strides[2]) * (kInputW / strides[2]), 1, 1, 0, "model.22.dfl.conv.weight"); // det0 auto proto_coef_0 = cv4_conv_combined(network, weightMap, *conv15->getOutput(0), "model.22.cv4.0", (kInputH / strides[0]) * (kInputW / strides[0]), gw, "seg"); - nvinfer1::ITensor *inputTensor22_dfl_0[] = {dfl22_0->getOutput(0), split22_0_1->getOutput(0), + nvinfer1::ITensor* inputTensor22_dfl_0[] = {dfl22_0->getOutput(0), split22_0_1->getOutput(0), proto_coef_0->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_dfl_0 = network->addConcatenation(inputTensor22_dfl_0, 3); + nvinfer1::IConcatenationLayer* cat22_dfl_0 = network->addConcatenation(inputTensor22_dfl_0, 3); cat22_dfl_0->setAxis(1); // det1 auto proto_coef_1 = cv4_conv_combined(network, weightMap, *conv18->getOutput(0), "model.22.cv4.1", (kInputH / strides[1]) * (kInputW / strides[1]), gw, "seg"); - nvinfer1::ITensor *inputTensor22_dfl_1[] = {dfl22_1->getOutput(0), split22_1_1->getOutput(0), + nvinfer1::ITensor* inputTensor22_dfl_1[] = {dfl22_1->getOutput(0), split22_1_1->getOutput(0), proto_coef_1->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_dfl_1 = network->addConcatenation(inputTensor22_dfl_1, 3); + nvinfer1::IConcatenationLayer* cat22_dfl_1 = network->addConcatenation(inputTensor22_dfl_1, 3); cat22_dfl_1->setAxis(1); // det2 auto proto_coef_2 = cv4_conv_combined(network, weightMap, *conv21->getOutput(0), "model.22.cv4.2", (kInputH / strides[2]) * (kInputW / strides[2]), gw, "seg"); - nvinfer1::ITensor *inputTensor22_dfl_2[] = {dfl22_2->getOutput(0), split22_2_1->getOutput(0), + nvinfer1::ITensor* inputTensor22_dfl_2[] = {dfl22_2->getOutput(0), split22_2_1->getOutput(0), proto_coef_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_dfl_2 = network->addConcatenation(inputTensor22_dfl_2, 3); + nvinfer1::IConcatenationLayer* cat22_dfl_2 = network->addConcatenation(inputTensor22_dfl_2, 3); cat22_dfl_2->setAxis(1); - nvinfer1::IPluginV2Layer *yolo = - addYoLoLayer(network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}, + nvinfer1::IPluginV2Layer* yolo = + addYoLoLayer(network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}, strides, stridesLength, true, false); yolo->getOutput(0)->setName(kOutputTensorName); network->markOutput(*yolo->getOutput(0)); @@ -1413,99 +1363,99 @@ buildEngineYolov8Seg(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *conf std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; assert(builder->platformHasFastInt8()); config->setFlag(nvinfer1::BuilderFlag::kINT8); - auto *calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, kInputQuantizationFolder, "int8calib.table", + auto* calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, kInputQuantizationFolder, "int8calib.table", kInputTensorName); config->setInt8Calibrator(calibrator); #endif std::cout << "Building engine, please wait for a while..." << std::endl; - nvinfer1::IHostMemory *serialized_model = builder->buildSerializedNetwork(*network, *config); + nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config); std::cout << "Build engine successfully!" << std::endl; delete network; - for (auto &mem: weightMap) { - free((void *) (mem.second.values)); + for (auto& mem : weightMap) { + free((void*)(mem.second.values)); } return serialized_model; } -nvinfer1::IHostMemory *buildEngineYolov8Pose(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, - nvinfer1::DataType dt, const std::string &wts_path, float &gd, float &gw, - int &max_channels) { +nvinfer1::IHostMemory* buildEngineYolov8Pose(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels) { std::map weightMap = loadWeights(wts_path); -// nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); - nvinfer1::INetworkDefinition *network = builder->createNetworkV2( + // nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); + nvinfer1::INetworkDefinition* network = builder->createNetworkV2( 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)); /******************************************************************************************************* ****************************************** YOLOV8 INPUT ********************************************** *******************************************************************************************************/ - nvinfer1::ITensor *data = network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kInputH, kInputW}); + nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kInputH, kInputW}); assert(data); /******************************************************************************************************* ***************************************** YOLOV8 BACKBONE ******************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer *conv0 = + nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); - nvinfer1::IElementWiseLayer *conv1 = + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); - nvinfer1::IElementWiseLayer *conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), + nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); - nvinfer1::IElementWiseLayer *conv3 = + nvinfer1::IElementWiseLayer* conv3 = convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); - nvinfer1::IElementWiseLayer *conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), + nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); - nvinfer1::IElementWiseLayer *conv5 = + nvinfer1::IElementWiseLayer* conv5 = convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); - nvinfer1::IElementWiseLayer *conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), + nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); - nvinfer1::IElementWiseLayer *conv7 = + nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.7"); - nvinfer1::IElementWiseLayer *conv8 = + nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); - nvinfer1::IElementWiseLayer *conv9 = + nvinfer1::IElementWiseLayer* conv9 = SPPF(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.9"); /******************************************************************************************************* ********************************************* YOLOV8 HEAD ******************************************** *******************************************************************************************************/ float scale[] = {1.0, 1.0, 2.0, 2.0}; - nvinfer1::IResizeLayer *upsample10 = network->addResize(*conv9->getOutput(0)); + nvinfer1::IResizeLayer* upsample10 = network->addResize(*conv9->getOutput(0)); assert(upsample10); upsample10->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample10->setScales(scale, 4); - nvinfer1::ITensor *inputTensor11[] = {upsample10->getOutput(0), conv6->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat11 = network->addConcatenation(inputTensor11, 2); - nvinfer1::IElementWiseLayer *conv12 = + nvinfer1::ITensor* inputTensor11[] = {upsample10->getOutput(0), conv6->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat11 = network->addConcatenation(inputTensor11, 2); + nvinfer1::IElementWiseLayer* conv12 = C2F(network, weightMap, *cat11->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.12"); - nvinfer1::IResizeLayer *upsample13 = network->addResize(*conv12->getOutput(0)); + nvinfer1::IResizeLayer* upsample13 = network->addResize(*conv12->getOutput(0)); assert(upsample13); upsample13->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample13->setScales(scale, 4); - nvinfer1::ITensor *inputTensor14[] = {upsample13->getOutput(0), conv4->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat14 = network->addConcatenation(inputTensor14, 2); - nvinfer1::IElementWiseLayer *conv15 = + nvinfer1::ITensor* inputTensor14[] = {upsample13->getOutput(0), conv4->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat14 = network->addConcatenation(inputTensor14, 2); + nvinfer1::IElementWiseLayer* conv15 = C2F(network, weightMap, *cat14->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.15"); - nvinfer1::IElementWiseLayer *conv16 = convBnSiLU(network, weightMap, *conv15->getOutput(0), + nvinfer1::IElementWiseLayer* conv16 = convBnSiLU(network, weightMap, *conv15->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.16"); - nvinfer1::ITensor *inputTensor17[] = {conv16->getOutput(0), conv12->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat17 = network->addConcatenation(inputTensor17, 2); - nvinfer1::IElementWiseLayer *conv18 = + nvinfer1::ITensor* inputTensor17[] = {conv16->getOutput(0), conv12->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat17 = network->addConcatenation(inputTensor17, 2); + nvinfer1::IElementWiseLayer* conv18 = C2F(network, weightMap, *cat17->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.18"); - nvinfer1::IElementWiseLayer *conv19 = convBnSiLU(network, weightMap, *conv18->getOutput(0), + nvinfer1::IElementWiseLayer* conv19 = convBnSiLU(network, weightMap, *conv18->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.19"); - nvinfer1::ITensor *inputTensor20[] = {conv19->getOutput(0), conv9->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat20 = network->addConcatenation(inputTensor20, 2); - nvinfer1::IElementWiseLayer *conv21 = + nvinfer1::ITensor* inputTensor20[] = {conv19->getOutput(0), conv9->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat20 = network->addConcatenation(inputTensor20, 2); + nvinfer1::IElementWiseLayer* conv21 = C2F(network, weightMap, *cat20->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.21"); @@ -1516,90 +1466,87 @@ nvinfer1::IHostMemory *buildEngineYolov8Pose(nvinfer1::IBuilder *builder, nvinfe int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kPoseNumClass, 100)) : get_width(256, gw, max_channels); // output0 - nvinfer1::IElementWiseLayer *conv22_cv2_0_0 = + nvinfer1::IElementWiseLayer* conv22_cv2_0_0 = convBnSiLU(network, weightMap, *conv15->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.0"); - nvinfer1::IElementWiseLayer *conv22_cv2_0_1 = + nvinfer1::IElementWiseLayer* conv22_cv2_0_1 = convBnSiLU(network, weightMap, *conv22_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.1"); - nvinfer1::IConvolutionLayer *conv22_cv2_0_2 = + nvinfer1::IConvolutionLayer* conv22_cv2_0_2 = network->addConvolutionNd(*conv22_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.0.2.weight"], weightMap["model.22.cv2.0.2.bias"]); conv22_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv22_cv3_0_0 = + nvinfer1::IElementWiseLayer* conv22_cv3_0_0 = convBnSiLU(network, weightMap, *conv15->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.0.0"); - nvinfer1::IElementWiseLayer *conv22_cv3_0_1 = convBnSiLU(network, weightMap, *conv22_cv3_0_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv22_cv3_0_1 = convBnSiLU(network, weightMap, *conv22_cv3_0_0->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.0.1"); - nvinfer1::IConvolutionLayer *conv22_cv3_0_2 = + nvinfer1::IConvolutionLayer* conv22_cv3_0_2 = network->addConvolutionNd(*conv22_cv3_0_1->getOutput(0), kPoseNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.0.2.weight"], weightMap["model.22.cv3.0.2.bias"]); conv22_cv3_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv3_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor22_0[] = {conv22_cv2_0_2->getOutput(0), conv22_cv3_0_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_0 = network->addConcatenation(inputTensor22_0, 2); + nvinfer1::ITensor* inputTensor22_0[] = {conv22_cv2_0_2->getOutput(0), conv22_cv3_0_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_0 = network->addConcatenation(inputTensor22_0, 2); // output1 - nvinfer1::IElementWiseLayer *conv22_cv2_1_0 = + nvinfer1::IElementWiseLayer* conv22_cv2_1_0 = convBnSiLU(network, weightMap, *conv18->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.0"); - nvinfer1::IElementWiseLayer *conv22_cv2_1_1 = + nvinfer1::IElementWiseLayer* conv22_cv2_1_1 = convBnSiLU(network, weightMap, *conv22_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.1"); - nvinfer1::IConvolutionLayer *conv22_cv2_1_2 = + nvinfer1::IConvolutionLayer* conv22_cv2_1_2 = network->addConvolutionNd(*conv22_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.1.2.weight"], weightMap["model.22.cv2.1.2.bias"]); conv22_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv22_cv3_1_0 = + nvinfer1::IElementWiseLayer* conv22_cv3_1_0 = convBnSiLU(network, weightMap, *conv18->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.1.0"); - nvinfer1::IElementWiseLayer *conv22_cv3_1_1 = convBnSiLU(network, weightMap, *conv22_cv3_1_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv22_cv3_1_1 = convBnSiLU(network, weightMap, *conv22_cv3_1_0->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.1.1"); - nvinfer1::IConvolutionLayer *conv22_cv3_1_2 = + nvinfer1::IConvolutionLayer* conv22_cv3_1_2 = network->addConvolutionNd(*conv22_cv3_1_1->getOutput(0), kPoseNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.1.2.weight"], weightMap["model.22.cv3.1.2.bias"]); conv22_cv3_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv3_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor22_1[] = {conv22_cv2_1_2->getOutput(0), conv22_cv3_1_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_1 = network->addConcatenation(inputTensor22_1, 2); + nvinfer1::ITensor* inputTensor22_1[] = {conv22_cv2_1_2->getOutput(0), conv22_cv3_1_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_1 = network->addConcatenation(inputTensor22_1, 2); // output2 - nvinfer1::IElementWiseLayer *conv22_cv2_2_0 = + nvinfer1::IElementWiseLayer* conv22_cv2_2_0 = convBnSiLU(network, weightMap, *conv21->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.0"); - nvinfer1::IElementWiseLayer *conv22_cv2_2_1 = + nvinfer1::IElementWiseLayer* conv22_cv2_2_1 = convBnSiLU(network, weightMap, *conv22_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.1"); - nvinfer1::IConvolutionLayer *conv22_cv2_2_2 = + nvinfer1::IConvolutionLayer* conv22_cv2_2_2 = network->addConvolutionNd(*conv22_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.2.2.weight"], weightMap["model.22.cv2.2.2.bias"]); - nvinfer1::IElementWiseLayer *conv22_cv3_2_0 = + nvinfer1::IElementWiseLayer* conv22_cv3_2_0 = convBnSiLU(network, weightMap, *conv21->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.2.0"); - nvinfer1::IElementWiseLayer *conv22_cv3_2_1 = convBnSiLU(network, weightMap, *conv22_cv3_2_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv22_cv3_2_1 = convBnSiLU(network, weightMap, *conv22_cv3_2_0->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.2.1"); - nvinfer1::IConvolutionLayer *conv22_cv3_2_2 = + nvinfer1::IConvolutionLayer* conv22_cv3_2_2 = network->addConvolutionNd(*conv22_cv3_2_1->getOutput(0), kPoseNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.2.2.weight"], weightMap["model.22.cv3.2.2.bias"]); - nvinfer1::ITensor *inputTensor22_2[] = {conv22_cv2_2_2->getOutput(0), conv22_cv3_2_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_2 = network->addConcatenation(inputTensor22_2, 2); + nvinfer1::ITensor* inputTensor22_2[] = {conv22_cv2_2_2->getOutput(0), conv22_cv3_2_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_2 = network->addConcatenation(inputTensor22_2, 2); /******************************************************************************************************* ********************************************* YOLOV8 DETECT ****************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer *conv_layers[] = {conv3, conv5, conv7}; + nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7}; int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); int stridesLength = sizeof(strides) / sizeof(int); /**************************************************************************************P3****************************************************************************************************************************************/ - nvinfer1::IShuffleLayer *shuffle22_0 = network->addShuffle(*cat22_0->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle22_0 = network->addShuffle(*cat22_0->getOutput(0)); shuffle22_0->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kPoseNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}); - nvinfer1::ISliceLayer *split22_0_0 = network->addSlice( - *shuffle22_0->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[0]) * (kInputW / strides[0])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split22_0_1 = network->addSlice( - *shuffle22_0->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, + nvinfer1::ISliceLayer* split22_0_0 = network->addSlice( + *shuffle22_0->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split22_0_1 = network->addSlice( + *shuffle22_0->getOutput(0), nvinfer1::Dims3{0, 64, 0}, nvinfer1::Dims3{kBatchSize, kPoseNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl22_0 = + nvinfer1::IShuffleLayer* dfl22_0 = DFL(network, weightMap, *split22_0_0->getOutput(0), 4, (kInputH / strides[0]) * (kInputW / strides[0]), 1, 1, 0, "model.22.dfl.conv.weight"); @@ -1607,26 +1554,23 @@ nvinfer1::IHostMemory *buildEngineYolov8Pose(nvinfer1::IBuilder *builder, nvinfe auto shuffle_conv15 = cv4_conv_combined(network, weightMap, *conv15->getOutput(0), "model.22.cv4.0", (kInputH / strides[0]) * (kInputW / strides[0]), gw, "pose"); - nvinfer1::ITensor *inputTensor22_dfl_0[] = {dfl22_0->getOutput(0), split22_0_1->getOutput(0), + nvinfer1::ITensor* inputTensor22_dfl_0[] = {dfl22_0->getOutput(0), split22_0_1->getOutput(0), shuffle_conv15->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_dfl_0 = network->addConcatenation(inputTensor22_dfl_0, 3); + nvinfer1::IConcatenationLayer* cat22_dfl_0 = network->addConcatenation(inputTensor22_dfl_0, 3); cat22_dfl_0->setAxis(1); /********************************************************************************************P4**********************************************************************************************************************************/ - nvinfer1::IShuffleLayer *shuffle22_1 = network->addShuffle(*cat22_1->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle22_1 = network->addShuffle(*cat22_1->getOutput(0)); shuffle22_1->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kPoseNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}); - nvinfer1::ISliceLayer *split22_1_0 = network->addSlice( - *shuffle22_1->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[1]) * (kInputW / strides[1])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split22_1_1 = network->addSlice( - *shuffle22_1->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, + nvinfer1::ISliceLayer* split22_1_0 = network->addSlice( + *shuffle22_1->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split22_1_1 = network->addSlice( + *shuffle22_1->getOutput(0), nvinfer1::Dims3{0, 64, 0}, nvinfer1::Dims3{kBatchSize, kPoseNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl22_1 = + nvinfer1::IShuffleLayer* dfl22_1 = DFL(network, weightMap, *split22_1_0->getOutput(0), 4, (kInputH / strides[1]) * (kInputW / strides[1]), 1, 1, 0, "model.22.dfl.conv.weight"); @@ -1634,39 +1578,36 @@ nvinfer1::IHostMemory *buildEngineYolov8Pose(nvinfer1::IBuilder *builder, nvinfe auto shuffle_conv18 = cv4_conv_combined(network, weightMap, *conv18->getOutput(0), "model.22.cv4.1", (kInputH / strides[1]) * (kInputW / strides[1]), gw, "pose"); - nvinfer1::ITensor *inputTensor22_dfl_1[] = {dfl22_1->getOutput(0), split22_1_1->getOutput(0), + nvinfer1::ITensor* inputTensor22_dfl_1[] = {dfl22_1->getOutput(0), split22_1_1->getOutput(0), shuffle_conv18->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_dfl_1 = network->addConcatenation(inputTensor22_dfl_1, 3); + nvinfer1::IConcatenationLayer* cat22_dfl_1 = network->addConcatenation(inputTensor22_dfl_1, 3); cat22_dfl_1->setAxis(1); /********************************************************************************************P5**********************************************************************************************************************************/ - nvinfer1::IShuffleLayer *shuffle22_2 = network->addShuffle(*cat22_2->getOutput(0)); + nvinfer1::IShuffleLayer* shuffle22_2 = network->addShuffle(*cat22_2->getOutput(0)); shuffle22_2->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kPoseNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}); - nvinfer1::ISliceLayer *split22_2_0 = network->addSlice( - *shuffle22_2->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[2]) * (kInputW / strides[2])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split22_2_1 = network->addSlice( - *shuffle22_2->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, + nvinfer1::ISliceLayer* split22_2_0 = network->addSlice( + *shuffle22_2->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split22_2_1 = network->addSlice( + *shuffle22_2->getOutput(0), nvinfer1::Dims3{0, 64, 0}, nvinfer1::Dims3{kBatchSize, kPoseNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl22_2 = + nvinfer1::IShuffleLayer* dfl22_2 = DFL(network, weightMap, *split22_2_0->getOutput(0), 4, (kInputH / strides[2]) * (kInputW / strides[2]), 1, 1, 0, "model.22.dfl.conv.weight"); // det2 auto shuffle_conv21 = cv4_conv_combined(network, weightMap, *conv21->getOutput(0), "model.22.cv4.2", (kInputH / strides[2]) * (kInputW / strides[2]), gw, "pose"); - nvinfer1::ITensor *inputTensor22_dfl_2[] = {dfl22_2->getOutput(0), split22_2_1->getOutput(0), + nvinfer1::ITensor* inputTensor22_dfl_2[] = {dfl22_2->getOutput(0), split22_2_1->getOutput(0), shuffle_conv21->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_dfl_2 = network->addConcatenation(inputTensor22_dfl_2, 3); + nvinfer1::IConcatenationLayer* cat22_dfl_2 = network->addConcatenation(inputTensor22_dfl_2, 3); cat22_dfl_2->setAxis(1); - nvinfer1::IPluginV2Layer *yolo = - addYoLoLayer(network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}, + nvinfer1::IPluginV2Layer* yolo = + addYoLoLayer(network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}, strides, stridesLength, false, true); yolo->getOutput(0)->setName(kOutputTensorName); network->markOutput(*yolo->getOutput(0)); @@ -1685,62 +1626,62 @@ nvinfer1::IHostMemory *buildEngineYolov8Pose(nvinfer1::IBuilder *builder, nvinfe #endif std::cout << "Building engine, please wait for a while..." << std::endl; - nvinfer1::IHostMemory *serialized_model = builder->buildSerializedNetwork(*network, *config); + nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config); std::cout << "Build engine successfully!" << std::endl; delete network; - for (auto &mem: weightMap) { - free((void *) (mem.second.values)); + for (auto& mem : weightMap) { + free((void*)(mem.second.values)); } return serialized_model; } -nvinfer1::IHostMemory * -buildEngineYolov8PoseP6(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *config, nvinfer1::DataType dt, - const std::string &wts_path, float &gd, float &gw, int &max_channels) { +nvinfer1::IHostMemory* buildEngineYolov8PoseP6(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels) { std::map weightMap = loadWeights(wts_path); -// nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); - nvinfer1::INetworkDefinition *network = builder->createNetworkV2( + // nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U); + nvinfer1::INetworkDefinition* network = builder->createNetworkV2( 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)); /******************************************************************************************************* ****************************************** YOLOV8 INPUT ********************************************** *******************************************************************************************************/ - nvinfer1::ITensor *data = network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kInputH, kInputW}); + nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kInputH, kInputW}); assert(data); /******************************************************************************************************* ***************************************** YOLOV8 BACKBONE ******************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer *conv0 = + nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); - nvinfer1::IElementWiseLayer *conv1 = + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); // 11233 - nvinfer1::IElementWiseLayer *conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), + nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); - nvinfer1::IElementWiseLayer *conv3 = + nvinfer1::IElementWiseLayer* conv3 = convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); // 22466 - nvinfer1::IElementWiseLayer *conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), + nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); - nvinfer1::IElementWiseLayer *conv5 = + nvinfer1::IElementWiseLayer* conv5 = convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); // 22466 - nvinfer1::IElementWiseLayer *conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), + nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); - nvinfer1::IElementWiseLayer *conv7 = + nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(768, gw, max_channels), 3, 2, 1, "model.7"); - nvinfer1::IElementWiseLayer *conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(768, gw, max_channels), + nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(768, gw, max_channels), get_width(768, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); - nvinfer1::IElementWiseLayer *conv9 = + nvinfer1::IElementWiseLayer* conv9 = convBnSiLU(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.9"); - nvinfer1::IElementWiseLayer *conv10 = + nvinfer1::IElementWiseLayer* conv10 = C2F(network, weightMap, *conv9->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.10"); - nvinfer1::IElementWiseLayer *conv11 = + nvinfer1::IElementWiseLayer* conv11 = SPPF(network, weightMap, *conv10->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.11"); @@ -1751,60 +1692,60 @@ buildEngineYolov8PoseP6(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *c float scale[] = {1.0, 1.0, 2.0, 2.0}; // scale used for upsampling // P5 - nvinfer1::IResizeLayer *upsample12 = network->addResize(*conv11->getOutput(0)); + nvinfer1::IResizeLayer* upsample12 = network->addResize(*conv11->getOutput(0)); upsample12->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample12->setScales(scale, 4); - nvinfer1::ITensor *concat13_inputs[] = {upsample12->getOutput(0), conv8->getOutput(0)}; - nvinfer1::IConcatenationLayer *concat13 = network->addConcatenation(concat13_inputs, 2); - nvinfer1::IElementWiseLayer *conv14 = + nvinfer1::ITensor* concat13_inputs[] = {upsample12->getOutput(0), conv8->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat13 = network->addConcatenation(concat13_inputs, 2); + nvinfer1::IElementWiseLayer* conv14 = C2(network, weightMap, *concat13->getOutput(0), get_width(768, gw, max_channels), get_width(768, gw, max_channels), get_depth(3, gd), false, 0.5, "model.14"); // P4 - nvinfer1::IResizeLayer *upsample15 = network->addResize(*conv14->getOutput(0)); + nvinfer1::IResizeLayer* upsample15 = network->addResize(*conv14->getOutput(0)); upsample15->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample15->setScales(scale, 4); - nvinfer1::ITensor *concat16_inputs[] = {upsample15->getOutput(0), conv6->getOutput(0)}; - nvinfer1::IConcatenationLayer *concat16 = network->addConcatenation(concat16_inputs, 2); - nvinfer1::IElementWiseLayer *conv17 = + nvinfer1::ITensor* concat16_inputs[] = {upsample15->getOutput(0), conv6->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat16 = network->addConcatenation(concat16_inputs, 2); + nvinfer1::IElementWiseLayer* conv17 = C2(network, weightMap, *concat16->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.17"); // P3 - nvinfer1::IResizeLayer *upsample18 = network->addResize(*conv17->getOutput(0)); + nvinfer1::IResizeLayer* upsample18 = network->addResize(*conv17->getOutput(0)); upsample18->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); upsample18->setScales(scale, 4); - nvinfer1::ITensor *concat19_inputs[] = {upsample18->getOutput(0), conv4->getOutput(0)}; - nvinfer1::IConcatenationLayer *concat19 = network->addConcatenation(concat19_inputs, 2); - nvinfer1::IElementWiseLayer *conv20 = + nvinfer1::ITensor* concat19_inputs[] = {upsample18->getOutput(0), conv4->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat19 = network->addConcatenation(concat19_inputs, 2); + nvinfer1::IElementWiseLayer* conv20 = C2(network, weightMap, *concat19->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.20"); // Additional layers for P4, P5, P6 // P4/16-medium - nvinfer1::IElementWiseLayer *conv21 = convBnSiLU(network, weightMap, *conv20->getOutput(0), + nvinfer1::IElementWiseLayer* conv21 = convBnSiLU(network, weightMap, *conv20->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.21"); - nvinfer1::ITensor *concat22_inputs[] = {conv21->getOutput(0), conv17->getOutput(0)}; - nvinfer1::IConcatenationLayer *concat22 = network->addConcatenation(concat22_inputs, 2); - nvinfer1::IElementWiseLayer *conv23 = + nvinfer1::ITensor* concat22_inputs[] = {conv21->getOutput(0), conv17->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat22 = network->addConcatenation(concat22_inputs, 2); + nvinfer1::IElementWiseLayer* conv23 = C2(network, weightMap, *concat22->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.23"); // P5/32-large - nvinfer1::IElementWiseLayer *conv24 = convBnSiLU(network, weightMap, *conv23->getOutput(0), + nvinfer1::IElementWiseLayer* conv24 = convBnSiLU(network, weightMap, *conv23->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.24"); - nvinfer1::ITensor *concat25_inputs[] = {conv24->getOutput(0), conv14->getOutput(0)}; - nvinfer1::IConcatenationLayer *concat25 = network->addConcatenation(concat25_inputs, 2); - nvinfer1::IElementWiseLayer *conv26 = + nvinfer1::ITensor* concat25_inputs[] = {conv24->getOutput(0), conv14->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat25 = network->addConcatenation(concat25_inputs, 2); + nvinfer1::IElementWiseLayer* conv26 = C2(network, weightMap, *concat25->getOutput(0), get_width(768, gw, max_channels), get_width(768, gw, max_channels), get_depth(3, gd), false, 0.5, "model.26"); // P6/64-xlarge - nvinfer1::IElementWiseLayer *conv27 = convBnSiLU(network, weightMap, *conv26->getOutput(0), + nvinfer1::IElementWiseLayer* conv27 = convBnSiLU(network, weightMap, *conv26->getOutput(0), get_width(768, gw, max_channels), 3, 2, 1, "model.27"); - nvinfer1::ITensor *concat28_inputs[] = {conv27->getOutput(0), conv11->getOutput(0)}; - nvinfer1::IConcatenationLayer *concat28 = network->addConcatenation(concat28_inputs, 2); - nvinfer1::IElementWiseLayer *conv29 = + nvinfer1::ITensor* concat28_inputs[] = {conv27->getOutput(0), conv11->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat28 = network->addConcatenation(concat28_inputs, 2); + nvinfer1::IElementWiseLayer* conv29 = C2(network, weightMap, *concat28->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.29"); @@ -1815,211 +1756,201 @@ buildEngineYolov8PoseP6(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *c int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kPoseNumClass, 100)) : get_width(256, gw, max_channels); // output0 - nvinfer1::IElementWiseLayer *conv30_cv2_0_0 = + nvinfer1::IElementWiseLayer* conv30_cv2_0_0 = convBnSiLU(network, weightMap, *conv20->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.0.0"); - nvinfer1::IElementWiseLayer *conv30_cv2_0_1 = + nvinfer1::IElementWiseLayer* conv30_cv2_0_1 = convBnSiLU(network, weightMap, *conv30_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.0.1"); - nvinfer1::IConvolutionLayer *conv30_cv2_0_2 = + nvinfer1::IConvolutionLayer* conv30_cv2_0_2 = network->addConvolutionNd(*conv30_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.0.2.weight"], weightMap["model.30.cv2.0.2.bias"]); conv30_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv30_cv3_0_0 = + nvinfer1::IElementWiseLayer* conv30_cv3_0_0 = convBnSiLU(network, weightMap, *conv20->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.0.0"); - nvinfer1::IElementWiseLayer *conv30_cv3_0_1 = convBnSiLU(network, weightMap, *conv30_cv3_0_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv30_cv3_0_1 = convBnSiLU(network, weightMap, *conv30_cv3_0_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.0.1"); - nvinfer1::IConvolutionLayer *conv30_cv3_0_2 = + nvinfer1::IConvolutionLayer* conv30_cv3_0_2 = network->addConvolutionNd(*conv30_cv3_0_1->getOutput(0), kPoseNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.0.2.weight"], weightMap["model.30.cv3.0.2.bias"]); conv30_cv3_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv3_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor30_0[] = {conv30_cv2_0_2->getOutput(0), conv30_cv3_0_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_0 = network->addConcatenation(inputTensor30_0, 2); + nvinfer1::ITensor* inputTensor30_0[] = {conv30_cv2_0_2->getOutput(0), conv30_cv3_0_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_0 = network->addConcatenation(inputTensor30_0, 2); // output1 - nvinfer1::IElementWiseLayer *conv30_cv2_1_0 = + nvinfer1::IElementWiseLayer* conv30_cv2_1_0 = convBnSiLU(network, weightMap, *conv23->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.1.0"); - nvinfer1::IElementWiseLayer *conv30_cv2_1_1 = + nvinfer1::IElementWiseLayer* conv30_cv2_1_1 = convBnSiLU(network, weightMap, *conv30_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.1.1"); - nvinfer1::IConvolutionLayer *conv30_cv2_1_2 = + nvinfer1::IConvolutionLayer* conv30_cv2_1_2 = network->addConvolutionNd(*conv30_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.1.2.weight"], weightMap["model.30.cv2.1.2.bias"]); conv30_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv30_cv3_1_0 = + nvinfer1::IElementWiseLayer* conv30_cv3_1_0 = convBnSiLU(network, weightMap, *conv23->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.1.0"); - nvinfer1::IElementWiseLayer *conv30_cv3_1_1 = convBnSiLU(network, weightMap, *conv30_cv3_1_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv30_cv3_1_1 = convBnSiLU(network, weightMap, *conv30_cv3_1_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.1.1"); - nvinfer1::IConvolutionLayer *conv30_cv3_1_2 = + nvinfer1::IConvolutionLayer* conv30_cv3_1_2 = network->addConvolutionNd(*conv30_cv3_1_1->getOutput(0), kPoseNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.1.2.weight"], weightMap["model.30.cv3.1.2.bias"]); conv30_cv3_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv3_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor30_1[] = {conv30_cv2_1_2->getOutput(0), conv30_cv3_1_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_1 = network->addConcatenation(inputTensor30_1, 2); + nvinfer1::ITensor* inputTensor30_1[] = {conv30_cv2_1_2->getOutput(0), conv30_cv3_1_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_1 = network->addConcatenation(inputTensor30_1, 2); // output2 - nvinfer1::IElementWiseLayer *conv30_cv2_2_0 = + nvinfer1::IElementWiseLayer* conv30_cv2_2_0 = convBnSiLU(network, weightMap, *conv26->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.2.0"); - nvinfer1::IElementWiseLayer *conv30_cv2_2_1 = + nvinfer1::IElementWiseLayer* conv30_cv2_2_1 = convBnSiLU(network, weightMap, *conv30_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.2.1"); - nvinfer1::IConvolutionLayer *conv30_cv2_2_2 = + nvinfer1::IConvolutionLayer* conv30_cv2_2_2 = network->addConvolutionNd(*conv30_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.2.2.weight"], weightMap["model.30.cv2.2.2.bias"]); conv30_cv2_2_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv2_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv30_cv3_2_0 = + nvinfer1::IElementWiseLayer* conv30_cv3_2_0 = convBnSiLU(network, weightMap, *conv26->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.2.0"); - nvinfer1::IElementWiseLayer *conv30_cv3_2_1 = convBnSiLU(network, weightMap, *conv30_cv3_2_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv30_cv3_2_1 = convBnSiLU(network, weightMap, *conv30_cv3_2_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.2.1"); - nvinfer1::IConvolutionLayer *conv30_cv3_2_2 = + nvinfer1::IConvolutionLayer* conv30_cv3_2_2 = network->addConvolutionNd(*conv30_cv3_2_1->getOutput(0), kPoseNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.2.2.weight"], weightMap["model.30.cv3.2.2.bias"]); conv30_cv3_2_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv3_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor30_2[] = {conv30_cv2_2_2->getOutput(0), conv30_cv3_2_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_2 = network->addConcatenation(inputTensor30_2, 2); + nvinfer1::ITensor* inputTensor30_2[] = {conv30_cv2_2_2->getOutput(0), conv30_cv3_2_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_2 = network->addConcatenation(inputTensor30_2, 2); // output3 - nvinfer1::IElementWiseLayer *conv30_cv2_3_0 = + nvinfer1::IElementWiseLayer* conv30_cv2_3_0 = convBnSiLU(network, weightMap, *conv29->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.3.0"); - nvinfer1::IElementWiseLayer *conv30_cv2_3_1 = + nvinfer1::IElementWiseLayer* conv30_cv2_3_1 = convBnSiLU(network, weightMap, *conv30_cv2_3_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.3.1"); - nvinfer1::IConvolutionLayer *conv30_cv2_3_2 = + nvinfer1::IConvolutionLayer* conv30_cv2_3_2 = network->addConvolutionNd(*conv30_cv2_3_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.3.2.weight"], weightMap["model.30.cv2.3.2.bias"]); conv30_cv2_3_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv2_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv30_cv3_3_0 = + nvinfer1::IElementWiseLayer* conv30_cv3_3_0 = convBnSiLU(network, weightMap, *conv29->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.3.0"); - nvinfer1::IElementWiseLayer *conv30_cv3_3_1 = convBnSiLU(network, weightMap, *conv30_cv3_3_0->getOutput(0), + nvinfer1::IElementWiseLayer* conv30_cv3_3_1 = convBnSiLU(network, weightMap, *conv30_cv3_3_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.3.1"); - nvinfer1::IConvolutionLayer *conv30_cv3_3_2 = + nvinfer1::IConvolutionLayer* conv30_cv3_3_2 = network->addConvolutionNd(*conv30_cv3_3_1->getOutput(0), kPoseNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.3.2.weight"], weightMap["model.30.cv3.3.2.bias"]); conv30_cv3_3_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv30_cv3_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::ITensor *inputTensor30_3[] = {conv30_cv2_3_2->getOutput(0), conv30_cv3_3_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_3 = network->addConcatenation(inputTensor30_3, 2); + nvinfer1::ITensor* inputTensor30_3[] = {conv30_cv2_3_2->getOutput(0), conv30_cv3_3_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_3 = network->addConcatenation(inputTensor30_3, 2); /******************************************************************************************************* ********************************************* YOLOV8 DETECT ****************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer *conv_layers[] = {conv3, conv5, conv7, conv9}; + nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7, conv9}; int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); int stridesLength = sizeof(strides) / sizeof(int); // P3 processing steps (remains unchanged) - nvinfer1::IShuffleLayer *shuffle30_0 = + nvinfer1::IShuffleLayer* shuffle30_0 = network->addShuffle(*cat30_0->getOutput(0)); // Reusing the previous cat30_0 as P3 concatenation layer shuffle30_0->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kPoseNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}); - nvinfer1::ISliceLayer *split30_0_0 = network->addSlice( - *shuffle30_0->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[0]) * (kInputW / strides[0])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split30_0_1 = network->addSlice( - *shuffle30_0->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, + nvinfer1::ISliceLayer* split30_0_0 = network->addSlice( + *shuffle30_0->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split30_0_1 = network->addSlice( + *shuffle30_0->getOutput(0), nvinfer1::Dims3{0, 64, 0}, nvinfer1::Dims3{kBatchSize, kPoseNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl30_0 = + nvinfer1::IShuffleLayer* dfl30_0 = DFL(network, weightMap, *split30_0_0->getOutput(0), 4, (kInputH / strides[0]) * (kInputW / strides[0]), 1, 1, 0, "model.30.dfl.conv.weight"); // det0 auto shuffle_conv20 = cv4_conv_combined(network, weightMap, *conv20->getOutput(0), "model.30.cv4.0", (kInputH / strides[0]) * (kInputW / strides[0]), gw, "pose"); - nvinfer1::ITensor *inputTensor30_dfl_0[] = {dfl30_0->getOutput(0), split30_0_1->getOutput(0), + nvinfer1::ITensor* inputTensor30_dfl_0[] = {dfl30_0->getOutput(0), split30_0_1->getOutput(0), shuffle_conv20->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_dfl_0 = network->addConcatenation(inputTensor30_dfl_0, 3); + nvinfer1::IConcatenationLayer* cat30_dfl_0 = network->addConcatenation(inputTensor30_dfl_0, 3); cat30_dfl_0->setAxis(1); // P4 processing steps (remains unchanged) - nvinfer1::IShuffleLayer *shuffle30_1 = + nvinfer1::IShuffleLayer* shuffle30_1 = network->addShuffle(*cat30_1->getOutput(0)); // Reusing the previous cat30_1 as P4 concatenation layer shuffle30_1->setReshapeDimensions( nvinfer1::Dims3{kBatchSize, 64 + kPoseNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}); - nvinfer1::ISliceLayer *split30_1_0 = network->addSlice( - *shuffle30_1->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[1]) * (kInputW / strides[1])}, - nvinfer1::Dims3{1, 1, 1}); - nvinfer1::ISliceLayer *split30_1_1 = network->addSlice( - *shuffle30_1->getOutput(0), - nvinfer1::Dims3{0, 64, 0}, + nvinfer1::ISliceLayer* split30_1_0 = network->addSlice( + *shuffle30_1->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split30_1_1 = network->addSlice( + *shuffle30_1->getOutput(0), nvinfer1::Dims3{0, 64, 0}, nvinfer1::Dims3{kBatchSize, kPoseNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims3{1, 1, 1}); - nvinfer1::IShuffleLayer *dfl30_1 = + nvinfer1::IShuffleLayer* dfl30_1 = DFL(network, weightMap, *split30_1_0->getOutput(0), 4, (kInputH / strides[1]) * (kInputW / strides[1]), 1, 1, 0, "model.30.dfl.conv.weight"); // det1 auto shuffle_conv23 = cv4_conv_combined(network, weightMap, *conv23->getOutput(0), "model.30.cv4.1", (kInputH / strides[1]) * (kInputW / strides[1]), gw, "pose"); - nvinfer1::ITensor *inputTensor30_dfl_1[] = {dfl30_1->getOutput(0), split30_1_1->getOutput(0), + nvinfer1::ITensor* inputTensor30_dfl_1[] = {dfl30_1->getOutput(0), split30_1_1->getOutput(0), shuffle_conv23->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_dfl_1 = network->addConcatenation(inputTensor30_dfl_1, 3); + nvinfer1::IConcatenationLayer* cat30_dfl_1 = network->addConcatenation(inputTensor30_dfl_1, 3); cat30_dfl_1->setAxis(1); // P5 processing steps (remains unchanged) - nvinfer1::IShuffleLayer *shuffle30_2 = + nvinfer1::IShuffleLayer* shuffle30_2 = network->addShuffle(*cat30_2->getOutput(0)); // Reusing the previous cat30_2 as P5 concatenation layer - shuffle30_2->setReshapeDimensions(nvinfer1::Dims3{kBatchSize,64 + kPoseNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}); - nvinfer1::ISliceLayer *split30_2_0 = network->addSlice( - *shuffle30_2->getOutput(0), - nvinfer1::Dims3{0,0, 0}, - nvinfer1::Dims3{kBatchSize,64, (kInputH / strides[2]) * (kInputW / strides[2])}, - nvinfer1::Dims3{1,1, 1}); - nvinfer1::ISliceLayer *split30_2_1 = network->addSlice( - *shuffle30_2->getOutput(0), - nvinfer1::Dims3{0,64, 0}, - nvinfer1::Dims3{kBatchSize,kPoseNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, - nvinfer1::Dims3{1,1, 1}); - nvinfer1::IShuffleLayer *dfl30_2 = + shuffle30_2->setReshapeDimensions( + nvinfer1::Dims3{kBatchSize, 64 + kPoseNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}); + nvinfer1::ISliceLayer* split30_2_0 = network->addSlice( + *shuffle30_2->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split30_2_1 = network->addSlice( + *shuffle30_2->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kPoseNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl30_2 = DFL(network, weightMap, *split30_2_0->getOutput(0), 4, (kInputH / strides[2]) * (kInputW / strides[2]), 1, 1, 0, "model.30.dfl.conv.weight"); // det2 auto shuffle_conv26 = cv4_conv_combined(network, weightMap, *conv26->getOutput(0), "model.30.cv4.2", (kInputH / strides[2]) * (kInputW / strides[2]), gw, "pose"); - nvinfer1::ITensor *inputTensor30_dfl_2[] = {dfl30_2->getOutput(0), split30_2_1->getOutput(0), + nvinfer1::ITensor* inputTensor30_dfl_2[] = {dfl30_2->getOutput(0), split30_2_1->getOutput(0), shuffle_conv26->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_dfl_2 = network->addConcatenation(inputTensor30_dfl_2, 3); + nvinfer1::IConcatenationLayer* cat30_dfl_2 = network->addConcatenation(inputTensor30_dfl_2, 3); cat30_dfl_2->setAxis(1); // P6 processing steps - nvinfer1::IShuffleLayer *shuffle30_3 = network->addShuffle(*cat30_3->getOutput(0)); - shuffle30_3->setReshapeDimensions(nvinfer1::Dims3{kBatchSize,64 + kPoseNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}); - nvinfer1::ISliceLayer *split30_3_0 = network->addSlice( - *shuffle30_3->getOutput(0), - nvinfer1::Dims3{0,0, 0}, - nvinfer1::Dims3{kBatchSize,64, (kInputH / strides[3]) * (kInputW / strides[3])}, - nvinfer1::Dims3{1,1, 1}); - nvinfer1::ISliceLayer *split30_3_1 = network->addSlice( - *shuffle30_3->getOutput(0), - nvinfer1::Dims3{0,64, 0}, - nvinfer1::Dims3{kBatchSize,kPoseNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}, - nvinfer1::Dims3{1,1, 1}); - nvinfer1::IShuffleLayer *dfl30_3 = + nvinfer1::IShuffleLayer* shuffle30_3 = network->addShuffle(*cat30_3->getOutput(0)); + shuffle30_3->setReshapeDimensions( + nvinfer1::Dims3{kBatchSize, 64 + kPoseNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}); + nvinfer1::ISliceLayer* split30_3_0 = network->addSlice( + *shuffle30_3->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{kBatchSize, 64, (kInputH / strides[3]) * (kInputW / strides[3])}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split30_3_1 = network->addSlice( + *shuffle30_3->getOutput(0), nvinfer1::Dims3{0, 64, 0}, + nvinfer1::Dims3{kBatchSize, kPoseNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::IShuffleLayer* dfl30_3 = DFL(network, weightMap, *split30_3_0->getOutput(0), 4, (kInputH / strides[3]) * (kInputW / strides[3]), 1, 1, 0, "model.30.dfl.conv.weight"); // det3 auto shuffle_conv29 = cv4_conv_combined(network, weightMap, *conv29->getOutput(0), "model.30.cv4.3", (kInputH / strides[3]) * (kInputW / strides[3]), gw, "pose"); - nvinfer1::ITensor *inputTensor30_dfl_3[] = {dfl30_3->getOutput(0), split30_3_1->getOutput(0), + nvinfer1::ITensor* inputTensor30_dfl_3[] = {dfl30_3->getOutput(0), split30_3_1->getOutput(0), shuffle_conv29->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat30_dfl_3 = network->addConcatenation(inputTensor30_dfl_3, 3); + nvinfer1::IConcatenationLayer* cat30_dfl_3 = network->addConcatenation(inputTensor30_dfl_3, 3); cat30_dfl_3->setAxis(1); - nvinfer1::IPluginV2Layer *yolo = addYoLoLayer( - network, std::vector{cat30_dfl_0, cat30_dfl_1, cat30_dfl_2, cat30_dfl_3}, + nvinfer1::IPluginV2Layer* yolo = addYoLoLayer( + network, std::vector{cat30_dfl_0, cat30_dfl_1, cat30_dfl_2, cat30_dfl_3}, strides, stridesLength, false, true); yolo->getOutput(0)->setName(kOutputTensorName); network->markOutput(*yolo->getOutput(0)); @@ -2038,13 +1969,13 @@ buildEngineYolov8PoseP6(nvinfer1::IBuilder *builder, nvinfer1::IBuilderConfig *c #endif std::cout << "Building engine, please wait for a while..." << std::endl; - nvinfer1::IHostMemory *serialized_model = builder->buildSerializedNetwork(*network, *config); + nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config); std::cout << "Build engine successfully!" << std::endl; delete network; - for (auto &mem: weightMap) { - free((void *) (mem.second.values)); + for (auto& mem : weightMap) { + free((void*)(mem.second.values)); } return serialized_model; } diff --git a/yolov8/yolov8_trt10/src/postprocess.cu b/yolov8/yolov8_trt10/src/postprocess.cu index 3cae0427..ca4bac46 100644 --- a/yolov8/yolov8_trt10/src/postprocess.cu +++ b/yolov8/yolov8_trt10/src/postprocess.cu @@ -1,21 +1,24 @@ // // Created by lindsay on 23-7-17. // -#include "types.h" #include "postprocess.h" +#include "types.h" -static __global__ void -decode_kernel(float *predict, int num_bboxes, float confidence_threshold, float *parray, int max_objects) { +static __global__ void decode_kernel(float* predict, int num_bboxes, float confidence_threshold, float* parray, + int max_objects) { float count = predict[0]; int position = (blockDim.x * blockIdx.x + threadIdx.x); - if (position >= count) return; + if (position >= count) + return; - float *pitem = predict + 1 + position * (sizeof(Detection) / sizeof(float)); + float* pitem = predict + 1 + position * (sizeof(Detection) / sizeof(float)); int index = atomicAdd(parray, 1); - if (index >= max_objects) return; + if (index >= max_objects) + return; float confidence = pitem[4]; - if (confidence < confidence_threshold) return; + if (confidence < confidence_threshold) + return; float left = pitem[0]; float top = pitem[1]; @@ -23,7 +26,7 @@ decode_kernel(float *predict, int num_bboxes, float confidence_threshold, float float bottom = pitem[3]; float label = pitem[5]; - float *pout_item = parray + 1 + index * bbox_element; + float* pout_item = parray + 1 + index * bbox_element; *pout_item++ = left; *pout_item++ = top; *pout_item++ = right; @@ -33,35 +36,37 @@ decode_kernel(float *predict, int num_bboxes, float confidence_threshold, float *pout_item++ = 1; // 1 = keep, 0 = ignore } -static __device__ float -box_iou(float aleft, float atop, float aright, float abottom, float bleft, float btop, float bright, float bbottom) { +static __device__ float box_iou(float aleft, float atop, float aright, float abottom, float bleft, float btop, + float bright, float bbottom) { float cleft = max(aleft, bleft); float ctop = max(atop, btop); float cright = min(aright, bright); float cbottom = min(abottom, bbottom); float c_area = max(cright - cleft, 0.0f) * max(cbottom - ctop, 0.0f); - if (c_area == 0.0f) return 0.0f; + if (c_area == 0.0f) + return 0.0f; float a_area = max(0.0f, aright - aleft) * max(0.0f, abottom - atop); float b_area = max(0.0f, bright - bleft) * max(0.0f, bbottom - btop); return c_area / (a_area + b_area - c_area); } -static __global__ void nms_kernel(float *bboxes, int max_objects, float threshold) { +static __global__ void nms_kernel(float* bboxes, int max_objects, float threshold) { int position = (blockDim.x * blockIdx.x + threadIdx.x); int count = bboxes[0]; - if (position >= count) return; + if (position >= count) + return; - float *pcurrent = bboxes + 1 + position * bbox_element; + float* pcurrent = bboxes + 1 + position * bbox_element; for (int i = 0; i < count; ++i) { - float *pitem = bboxes + 1 + i * bbox_element; - if (i == position || pcurrent[5] != pitem[5]) continue; + float* pitem = bboxes + 1 + i * bbox_element; + if (i == position || pcurrent[5] != pitem[5]) + continue; if (pitem[4] >= pcurrent[4]) { - if (pitem[4] == pcurrent[4] && i < position) continue; - float iou = box_iou( - pcurrent[0], pcurrent[1], pcurrent[2], pcurrent[3], - pitem[0], pitem[1], pitem[2], pitem[3] - ); + if (pitem[4] == pcurrent[4] && i < position) + continue; + float iou = + box_iou(pcurrent[0], pcurrent[1], pcurrent[2], pcurrent[3], pitem[0], pitem[1], pitem[2], pitem[3]); if (iou > threshold) { pcurrent[6] = 0; return; @@ -70,14 +75,14 @@ static __global__ void nms_kernel(float *bboxes, int max_objects, float threshol } } -void cuda_decode(float *predict, int num_bboxes, float confidence_threshold, float *parray, int max_objects, +void cuda_decode(float* predict, int num_bboxes, float confidence_threshold, float* parray, int max_objects, cudaStream_t stream) { int block = 256; int grid = ceil(num_bboxes / (float)block); decode_kernel<<>>((float*)predict, num_bboxes, confidence_threshold, parray, max_objects); } -void cuda_nms(float *parray, float nms_threshold, int max_objects, cudaStream_t stream) { +void cuda_nms(float* parray, float nms_threshold, int max_objects, cudaStream_t stream) { int block = max_objects < 256 ? max_objects : 256; int grid = ceil(max_objects / (float)block); nms_kernel<<>>(parray, max_objects, nms_threshold); diff --git a/yolov8/yolov8_trt10/src/preprocess.cu b/yolov8/yolov8_trt10/src/preprocess.cu index 14d9e778..d3d6f879 100644 --- a/yolov8/yolov8_trt10/src/preprocess.cu +++ b/yolov8/yolov8_trt10/src/preprocess.cu @@ -1,15 +1,14 @@ -#include "preprocess.h" #include "cuda_utils.h" +#include "preprocess.h" -static uint8_t *img_buffer_host = nullptr; -static uint8_t *img_buffer_device = nullptr; - +static uint8_t* img_buffer_host = nullptr; +static uint8_t* img_buffer_device = nullptr; -__global__ void -warpaffine_kernel(uint8_t *src, int src_line_size, int src_width, int src_height, float *dst, int dst_width, - int dst_height, uint8_t const_value_st, AffineMatrix d2s, int edge) { +__global__ void warpaffine_kernel(uint8_t* src, int src_line_size, int src_width, int src_height, float* dst, + int dst_width, int dst_height, uint8_t const_value_st, AffineMatrix d2s, int edge) { int position = blockDim.x * blockIdx.x + threadIdx.x; - if (position >= edge) return; + if (position >= edge) + return; float m_x1 = d2s.value[0]; float m_y1 = d2s.value[1]; @@ -41,10 +40,10 @@ warpaffine_kernel(uint8_t *src, int src_line_size, int src_width, int src_height float hy = 1 - ly; float hx = 1 - lx; float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - uint8_t *v1 = const_value; - uint8_t *v2 = const_value; - uint8_t *v3 = const_value; - uint8_t *v4 = const_value; + uint8_t* v1 = const_value; + uint8_t* v2 = const_value; + uint8_t* v3 = const_value; + uint8_t* v4 = const_value; if (y_low >= 0) { if (x_low >= 0) @@ -79,18 +78,15 @@ warpaffine_kernel(uint8_t *src, int src_line_size, int src_width, int src_height // rgbrgbrgb to rrrgggbbb int area = dst_width * dst_height; - float *pdst_c0 = dst + dy * dst_width + dx; - float *pdst_c1 = pdst_c0 + area; - float *pdst_c2 = pdst_c1 + area; + float* pdst_c0 = dst + dy * dst_width + dx; + float* pdst_c1 = pdst_c0 + area; + float* pdst_c2 = pdst_c1 + area; *pdst_c0 = c0; *pdst_c1 = c1; *pdst_c2 = c2; } - - - -void cuda_preprocess(uint8_t *src, int src_width, int src_height, float *dst, int dst_width, int dst_height, +void cuda_preprocess(uint8_t* src, int src_width, int src_height, float* dst, int dst_width, int dst_height, cudaStream_t stream) { int img_size = src_width * src_height * 3; // copy data to pinned memory @@ -99,7 +95,7 @@ void cuda_preprocess(uint8_t *src, int src_width, int src_height, float *dst, in CUDA_CHECK(cudaMemcpyAsync(img_buffer_device, img_buffer_host, img_size, cudaMemcpyHostToDevice, stream)); AffineMatrix s2d, d2s; - float scale = std::min(dst_height / (float) src_height, dst_width / (float) src_width); + float scale = std::min(dst_height / (float)src_height, dst_width / (float)src_width); s2d.value[0] = scale; s2d.value[1] = 0; @@ -115,16 +111,12 @@ void cuda_preprocess(uint8_t *src, int src_width, int src_height, float *dst, in int jobs = dst_height * dst_width; int threads = 256; - int blocks = ceil(jobs / (float) threads); - warpaffine_kernel<<>>( - img_buffer_device, src_width * 3, src_width, - src_height, dst, dst_width, - dst_height, 128, d2s, jobs); + int blocks = ceil(jobs / (float)threads); + warpaffine_kernel<<>>(img_buffer_device, src_width * 3, src_width, src_height, dst, + dst_width, dst_height, 128, d2s, jobs); } - -void cuda_batch_preprocess(std::vector &img_batch, - float *dst, int dst_width, int dst_height, +void cuda_batch_preprocess(std::vector& img_batch, float* dst, int dst_width, int dst_height, cudaStream_t stream) { int dst_size = dst_width * dst_height * 3; for (size_t i = 0; i < img_batch.size(); i++) { @@ -134,22 +126,14 @@ void cuda_batch_preprocess(std::vector &img_batch, } } - - - - void cuda_preprocess_init(int max_image_size) { // prepare input data in pinned memory - CUDA_CHECK(cudaMallocHost((void **) &img_buffer_host, max_image_size * 3)); + CUDA_CHECK(cudaMallocHost((void**)&img_buffer_host, max_image_size * 3)); // prepare input data in device memory - CUDA_CHECK(cudaMalloc((void **) &img_buffer_device, max_image_size * 3)); + CUDA_CHECK(cudaMalloc((void**)&img_buffer_device, max_image_size * 3)); } void cuda_preprocess_destroy() { CUDA_CHECK(cudaFree(img_buffer_device)); CUDA_CHECK(cudaFreeHost(img_buffer_host)); } - - - - diff --git a/yolov8/yolov8_trt10/yolov8_cls.cpp b/yolov8/yolov8_trt10/yolov8_cls.cpp old mode 100755 new mode 100644 index 1ab490b1..31b729ab --- a/yolov8/yolov8_trt10/yolov8_cls.cpp +++ b/yolov8/yolov8_trt10/yolov8_cls.cpp @@ -1,13 +1,13 @@ +#include "calibrator.h" +#include "config.h" #include "cuda_utils.h" #include "logging.h" -#include "utils.h" #include "model.h" -#include "config.h" -#include "calibrator.h" +#include "utils.h" -#include #include #include +#include #include #include @@ -16,7 +16,7 @@ using namespace nvinfer1; static Logger gLogger; const static int kOutputSize = kClsNumClass; -void batch_preprocess(std::vector &imgs, float *output, int dst_width = 224, int dst_height = 224) { +void batch_preprocess(std::vector& imgs, float* output, int dst_width = 224, int dst_height = 224) { for (size_t b = 0; b < imgs.size(); b++) { int h = imgs[b].rows; int w = imgs[b].cols; @@ -45,7 +45,7 @@ void batch_preprocess(std::vector &imgs, float *output, int dst_width = } } -std::vector softmax(float *prob, int n) { +std::vector softmax(float* prob, int n) { std::vector res; float sum = 0.0f; float t; @@ -60,7 +60,7 @@ std::vector softmax(float *prob, int n) { return res; } -std::vector topk(const std::vector &vec, int k) { +std::vector topk(const std::vector& vec, int k) { std::vector topk_index; std::vector vec_index(vec.size()); std::iota(vec_index.begin(), vec_index.end(), 0); @@ -92,9 +92,10 @@ std::vector read_classes(std::string file_name) { return classes; } -bool -parse_args(int argc, char **argv, std::string &wts, std::string &engine, float &gd, float &gw, std::string &img_dir) { - if (argc < 4) return false; +bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, float& gd, float& gw, + std::string& img_dir) { + if (argc < 4) + return false; if (std::string(argv[1]) == "-s" && (argc == 5)) { wts = std::string(argv[2]); engine = std::string(argv[3]); @@ -126,8 +127,8 @@ parse_args(int argc, char **argv, std::string &wts, std::string &engine, float & return true; } -void prepare_buffers(ICudaEngine *engine, float **gpu_input_buffer, float **gpu_output_buffer, float **cpu_input_buffer, - float **output_buffer_host) { +void prepare_buffers(ICudaEngine* engine, float** gpu_input_buffer, float** gpu_output_buffer, float** cpu_input_buffer, + float** output_buffer_host) { assert(engine->getNbIOTensors() == 2); // In order to bind the buffers, we need to know the names of the input and output tensors. // Note that indices are guaranteed to be less than IEngine::getNbBindings() @@ -142,15 +143,15 @@ void prepare_buffers(ICudaEngine *engine, float **gpu_input_buffer, float **gpu_ assert(false); } // Create GPU buffers on device - CUDA_CHECK(cudaMalloc((void **) gpu_input_buffer, kBatchSize * 3 * kClsInputH * kClsInputW * sizeof(float))); - CUDA_CHECK(cudaMalloc((void **) gpu_output_buffer, kBatchSize * kOutputSize * sizeof(float))); + CUDA_CHECK(cudaMalloc((void**)gpu_input_buffer, kBatchSize * 3 * kClsInputH * kClsInputW * sizeof(float))); + CUDA_CHECK(cudaMalloc((void**)gpu_output_buffer, kBatchSize * kOutputSize * sizeof(float))); *cpu_input_buffer = new float[kBatchSize * 3 * kClsInputH * kClsInputW]; *output_buffer_host = new float[kBatchSize * kOutputSize]; } -void -infer(IExecutionContext &context, cudaStream_t &stream, void **buffers, float *input, float *output, int batchSize) { +void infer(IExecutionContext& context, cudaStream_t& stream, void** buffers, float* input, float* output, + int batchSize) { CUDA_CHECK(cudaMemcpyAsync(buffers[0], input, batchSize * 3 * kClsInputH * kClsInputW * sizeof(float), cudaMemcpyHostToDevice, stream)); context.setInputTensorAddress(kInputTensorName, buffers[0]); @@ -161,13 +162,13 @@ infer(IExecutionContext &context, cudaStream_t &stream, void **buffers, float *i cudaStreamSynchronize(stream); } -void -serialize_engine(unsigned int max_batchsize, float &gd, float &gw, std::string &wts_name, std::string &engine_name) { +void serialize_engine(unsigned int max_batchsize, float& gd, float& gw, std::string& wts_name, + std::string& engine_name) { // Create builder - IBuilder *builder = createInferBuilder(gLogger); - IBuilderConfig *config = builder->createBuilderConfig(); + IBuilder* builder = createInferBuilder(gLogger); + IBuilderConfig* config = builder->createBuilderConfig(); // Create model to populate the network, then set the outputs and create an engine - IHostMemory *serialized_engine = nullptr; + IHostMemory* serialized_engine = nullptr; //engine = buildEngineYolov8Cls(max_batchsize, builder, config, DataType::kFLOAT, gd, gw, wts_name); serialized_engine = buildEngineYolov8Cls(builder, config, DataType::kFLOAT, wts_name, gd, gw); assert(serialized_engine); @@ -177,7 +178,7 @@ serialize_engine(unsigned int max_batchsize, float &gd, float &gw, std::string & std::cerr << "Could not open plan output file" << std::endl; assert(false); } - p.write(reinterpret_cast(serialized_engine->data()), serialized_engine->size()); + p.write(reinterpret_cast(serialized_engine->data()), serialized_engine->size()); // Close everything down delete serialized_engine; @@ -185,8 +186,8 @@ serialize_engine(unsigned int max_batchsize, float &gd, float &gw, std::string & delete builder; } -void -deserialize_engine(std::string &engine_name, IRuntime **runtime, ICudaEngine **engine, IExecutionContext **context) { +void deserialize_engine(std::string& engine_name, IRuntime** runtime, ICudaEngine** engine, + IExecutionContext** context) { std::ifstream file(engine_name, std::ios::binary); if (!file.good()) { std::cerr << "read " << engine_name << " error!" << std::endl; @@ -196,7 +197,7 @@ deserialize_engine(std::string &engine_name, IRuntime **runtime, ICudaEngine **e file.seekg(0, file.end); size = file.tellg(); file.seekg(0, file.beg); - char *serialized_engine = new char[size]; + char* serialized_engine = new char[size]; assert(serialized_engine); file.read(serialized_engine, size); file.close(); @@ -210,7 +211,7 @@ deserialize_engine(std::string &engine_name, IRuntime **runtime, ICudaEngine **e delete[] serialized_engine; } -int main(int argc, char **argv) { +int main(int argc, char** argv) { // -s ../models/yolov8n-cls.wts ../models/yolov8n-cls.fp32.trt n // -d ../models/yolov8n-cls.fp32.trt ../images cudaSetDevice(kGpuId); @@ -235,17 +236,17 @@ int main(int argc, char **argv) { } // Deserialize the engine from file - IRuntime *runtime = nullptr; - ICudaEngine *engine = nullptr; - IExecutionContext *context = nullptr; + IRuntime* runtime = nullptr; + ICudaEngine* engine = nullptr; + IExecutionContext* context = nullptr; deserialize_engine(engine_name, &runtime, &engine, &context); cudaStream_t stream; CUDA_CHECK(cudaStreamCreate(&stream)); // Prepare cpu and gpu buffers - float *device_buffers[2]; - float *cpu_input_buffer = nullptr; - float *output_buffer_host = nullptr; + float* device_buffers[2]; + float* cpu_input_buffer = nullptr; + float* output_buffer_host = nullptr; prepare_buffers(engine, &device_buffers[0], &device_buffers[1], &cpu_input_buffer, &output_buffer_host); // Read images from directory @@ -274,18 +275,18 @@ int main(int argc, char **argv) { // Run inference auto start = std::chrono::system_clock::now(); - infer(*context, stream, (void **) device_buffers, cpu_input_buffer, output_buffer_host, kBatchSize); + infer(*context, stream, (void**)device_buffers, cpu_input_buffer, output_buffer_host, kBatchSize); auto end = std::chrono::system_clock::now(); std::cout << "inference time: " << std::chrono::duration_cast(end - start).count() << "ms" << std::endl; // Postprocess and get top-k result for (size_t b = 0; b < img_name_batch.size(); b++) { - float *p = &output_buffer_host[b * kOutputSize]; + float* p = &output_buffer_host[b * kOutputSize]; auto res = softmax(p, kOutputSize); auto topk_idx = topk(res, 3); std::cout << img_name_batch[b] << std::endl; - for (auto idx: topk_idx) { + for (auto idx : topk_idx) { std::cout << " " << classes[idx] << " " << res[idx] << std::endl; } } diff --git a/yolov8/yolov8_trt10/yolov8_det.cpp b/yolov8/yolov8_trt10/yolov8_det.cpp old mode 100755 new mode 100644 index 552df96c..d275e6d6 --- a/yolov8/yolov8_trt10/yolov8_det.cpp +++ b/yolov8/yolov8_trt10/yolov8_det.cpp @@ -12,11 +12,11 @@ Logger gLogger; using namespace nvinfer1; const int kOutputSize = kMaxNumOutputBbox * sizeof(Detection) / sizeof(float) + 1; -void serialize_engine(std::string &wts_name, std::string &engine_name, int &is_p, std::string &sub_type, float &gd, - float &gw, int &max_channels) { - IBuilder *builder = createInferBuilder(gLogger); - IBuilderConfig *config = builder->createBuilderConfig(); - IHostMemory *serialized_engine = nullptr; +void serialize_engine(std::string& wts_name, std::string& engine_name, int& is_p, std::string& sub_type, float& gd, + float& gw, int& max_channels) { + IBuilder* builder = createInferBuilder(gLogger); + IBuilderConfig* config = builder->createBuilderConfig(); + IHostMemory* serialized_engine = nullptr; if (is_p == 6) { serialized_engine = buildEngineYolov8DetP6(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels); @@ -32,15 +32,15 @@ void serialize_engine(std::string &wts_name, std::string &engine_name, int &is_p std::cout << "could not open plan output file" << std::endl; assert(false); } - p.write(reinterpret_cast(serialized_engine->data()), serialized_engine->size()); + p.write(reinterpret_cast(serialized_engine->data()), serialized_engine->size()); delete serialized_engine; delete config; delete builder; } -void deserialize_engine(std::string &engine_name, IRuntime **runtime, ICudaEngine **engine, - IExecutionContext **context) { +void deserialize_engine(std::string& engine_name, IRuntime** runtime, ICudaEngine** engine, + IExecutionContext** context) { std::ifstream file(engine_name, std::ios::binary); if (!file.good()) { std::cerr << "read " << engine_name << " error!" << std::endl; @@ -50,7 +50,7 @@ void deserialize_engine(std::string &engine_name, IRuntime **runtime, ICudaEngin file.seekg(0, file.end); size = file.tellg(); file.seekg(0, file.beg); - char *serialized_engine = new char[size]; + char* serialized_engine = new char[size]; assert(serialized_engine); file.read(serialized_engine, size); file.close(); @@ -64,8 +64,8 @@ void deserialize_engine(std::string &engine_name, IRuntime **runtime, ICudaEngin delete[] serialized_engine; } -void prepare_buffer(ICudaEngine *engine, float **input_buffer_device, float **output_buffer_device, - float **output_buffer_host, float **decode_ptr_host, float **decode_ptr_device, +void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, float** output_buffer_device, + float** output_buffer_host, float** decode_ptr_host, float** decode_ptr_device, std::string cuda_post_process) { assert(engine->getNbIOTensors() == 2); // In order to bind the buffers, we need to know the names of the input and output tensors. @@ -81,8 +81,8 @@ void prepare_buffer(ICudaEngine *engine, float **input_buffer_device, float **ou assert(false); } // Create GPU buffers on device - CUDA_CHECK(cudaMalloc((void **) input_buffer_device, kBatchSize * 3 * kInputH * kInputW * sizeof(float))); - CUDA_CHECK(cudaMalloc((void **) output_buffer_device, kBatchSize * kOutputSize * sizeof(float))); + CUDA_CHECK(cudaMalloc((void**)input_buffer_device, kBatchSize * 3 * kInputH * kInputW * sizeof(float))); + CUDA_CHECK(cudaMalloc((void**)output_buffer_device, kBatchSize * kOutputSize * sizeof(float))); if (cuda_post_process == "c") { *output_buffer_host = new float[kBatchSize * kOutputSize]; } else if (cuda_post_process == "g") { @@ -92,12 +92,12 @@ void prepare_buffer(ICudaEngine *engine, float **input_buffer_device, float **ou } // Allocate memory for decode_ptr_host and copy to device *decode_ptr_host = new float[1 + kMaxNumOutputBbox * bbox_element]; - CUDA_CHECK(cudaMalloc((void **) decode_ptr_device, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element))); + CUDA_CHECK(cudaMalloc((void**)decode_ptr_device, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element))); } } -void infer(IExecutionContext &context, cudaStream_t &stream, void **buffers, float *output, int batchsize, - float *decode_ptr_host, float *decode_ptr_device, int model_bboxes, std::string cuda_post_process) { +void infer(IExecutionContext& context, cudaStream_t& stream, void** buffers, float* output, int batchsize, + float* decode_ptr_host, float* decode_ptr_device, int model_bboxes, std::string cuda_post_process) { // infer on the batch asynchronously, and DMA output back to host auto start = std::chrono::system_clock::now(); context.setInputTensorAddress(kInputTensorName, buffers[0]); @@ -112,7 +112,7 @@ void infer(IExecutionContext &context, cudaStream_t &stream, void **buffers, flo } else if (cuda_post_process == "g") { CUDA_CHECK( cudaMemsetAsync(decode_ptr_device, 0, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element), stream)); - cuda_decode((float *) buffers[1], model_bboxes, kConfThresh, decode_ptr_device, kMaxNumOutputBbox, stream); + cuda_decode((float*)buffers[1], model_bboxes, kConfThresh, decode_ptr_device, kMaxNumOutputBbox, stream); cuda_nms(decode_ptr_device, kNmsThresh, kMaxNumOutputBbox, stream); //cuda nms CUDA_CHECK(cudaMemcpyAsync(decode_ptr_host, decode_ptr_device, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element), cudaMemcpyDeviceToHost, @@ -125,8 +125,8 @@ void infer(IExecutionContext &context, cudaStream_t &stream, void **buffers, flo CUDA_CHECK(cudaStreamSynchronize(stream)); } -bool parse_args(int argc, char **argv, std::string &wts, std::string &engine, int &is_p, std::string &img_dir, - std::string &sub_type, std::string &cuda_post_process, float &gd, float &gw, int &max_channels) { +bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, int& is_p, std::string& img_dir, + std::string& sub_type, std::string& cuda_post_process, float& gd, float& gw, int& max_channels) { if (argc < 4) return false; if (std::string(argv[1]) == "-s" && (argc == 5 || argc == 7)) { @@ -172,7 +172,7 @@ bool parse_args(int argc, char **argv, std::string &wts, std::string &engine, in return true; } -int main(int argc, char **argv) { +int main(int argc, char** argv) { // -s ../models/yolov8n.wts ../models/yolov8n.fp32.trt n // -d ../models/yolov8n.fp32.trt ../images c cudaSetDevice(kGpuId); @@ -203,9 +203,9 @@ int main(int argc, char **argv) { } // Deserialize the engine from file - IRuntime *runtime = nullptr; - ICudaEngine *engine = nullptr; - IExecutionContext *context = nullptr; + IRuntime* runtime = nullptr; + ICudaEngine* engine = nullptr; + IExecutionContext* context = nullptr; deserialize_engine(engine_name, &runtime, &engine, &context); cudaStream_t stream; CUDA_CHECK(cudaStreamCreate(&stream)); @@ -213,10 +213,10 @@ int main(int argc, char **argv) { auto out_dims = engine->getTensorShape(kOutputTensorName); model_bboxes = out_dims.d[1]; // Prepare cpu and gpu buffers - float *device_buffers[2]; - float *output_buffer_host = nullptr; - float *decode_ptr_host = nullptr; - float *decode_ptr_device = nullptr; + float* device_buffers[2]; + float* output_buffer_host = nullptr; + float* decode_ptr_host = nullptr; + float* decode_ptr_device = nullptr; // Read images from directory std::vector file_names; @@ -245,7 +245,7 @@ int main(int argc, char **argv) { // Preprocess cuda_batch_preprocess(img_batch, device_buffers[0], kInputW, kInputH, stream); // Run inference - infer(*context, stream, (void **) device_buffers, output_buffer_host, kBatchSize, decode_ptr_host, + infer(*context, stream, (void**)device_buffers, output_buffer_host, kBatchSize, decode_ptr_host, decode_ptr_device, model_bboxes, cuda_post_process); std::vector> res_batch; @@ -262,8 +262,8 @@ int main(int argc, char **argv) { for (size_t k = 0; k < res_batch[j].size(); k++) { std::cout << "image: " << img_name_batch[j] << ", bbox: " << res_batch[j][k].bbox[0] << ", " << res_batch[j][k].bbox[1] << ", " << res_batch[j][k].bbox[2] << ", " - << res_batch[j][k].bbox[3] << ", conf: " << res_batch[j][k].conf << ", class_id: " - << res_batch[j][k].class_id << std::endl; + << res_batch[j][k].bbox[3] << ", conf: " << res_batch[j][k].conf + << ", class_id: " << res_batch[j][k].class_id << std::endl; } } diff --git a/yolov8/yolov8_trt10/yolov8_pose.cpp b/yolov8/yolov8_trt10/yolov8_pose.cpp old mode 100755 new mode 100644 index da1b8cfa..dee8eac4 --- a/yolov8/yolov8_trt10/yolov8_pose.cpp +++ b/yolov8/yolov8_trt10/yolov8_pose.cpp @@ -265,8 +265,8 @@ int main(int argc, char** argv) { for (size_t k = 0; k < res_batch[j].size(); k++) { std::cout << "image: " << img_name_batch[j] << ", bbox: " << res_batch[j][k].bbox[0] << ", " << res_batch[j][k].bbox[1] << ", " << res_batch[j][k].bbox[2] << ", " - << res_batch[j][k].bbox[3] << ", conf: " << res_batch[j][k].conf << ", class_id: " - << res_batch[j][k].class_id << std::endl; + << res_batch[j][k].bbox[3] << ", conf: " << res_batch[j][k].conf + << ", class_id: " << res_batch[j][k].class_id << std::endl; } } } diff --git a/yolov8/yolov8_trt10/yolov8_seg.cpp b/yolov8/yolov8_trt10/yolov8_seg.cpp old mode 100755 new mode 100644 index dce442fd..b54a3323 --- a/yolov8/yolov8_trt10/yolov8_seg.cpp +++ b/yolov8/yolov8_trt10/yolov8_seg.cpp @@ -32,9 +32,9 @@ static cv::Rect get_downscale_rect(float bbox[4], float scale) { return cv::Rect(int(left), int(top), int(right - left), int(bottom - top)); } -std::vector process_mask(const float *proto, int proto_size, std::vector &dets) { +std::vector process_mask(const float* proto, int proto_size, std::vector& dets) { - std::vector masks; + std::vector masks; for (size_t i = 0; i < dets.size(); i++) { cv::Mat mask_mat = cv::Mat::zeros(kInputH / 4, kInputW / 4, CV_32FC1); @@ -56,11 +56,11 @@ std::vector process_mask(const float *proto, int proto_size, std::vect return masks; } -void serialize_engine(std::string &wts_name, std::string &engine_name, std::string &sub_type, float &gd, float &gw, - int &max_channels) { - IBuilder *builder = createInferBuilder(gLogger); - IBuilderConfig *config = builder->createBuilderConfig(); - IHostMemory *serialized_engine = nullptr; +void serialize_engine(std::string& wts_name, std::string& engine_name, std::string& sub_type, float& gd, float& gw, + int& max_channels) { + IBuilder* builder = createInferBuilder(gLogger); + IBuilderConfig* config = builder->createBuilderConfig(); + IHostMemory* serialized_engine = nullptr; serialized_engine = buildEngineYolov8Seg(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels); @@ -70,15 +70,15 @@ void serialize_engine(std::string &wts_name, std::string &engine_name, std::stri std::cout << "could not open plan output file" << std::endl; assert(false); } - p.write(reinterpret_cast(serialized_engine->data()), serialized_engine->size()); + p.write(reinterpret_cast(serialized_engine->data()), serialized_engine->size()); delete serialized_engine; delete config; delete builder; } -void deserialize_engine(std::string &engine_name, IRuntime **runtime, ICudaEngine **engine, - IExecutionContext **context) { +void deserialize_engine(std::string& engine_name, IRuntime** runtime, ICudaEngine** engine, + IExecutionContext** context) { std::ifstream file(engine_name, std::ios::binary); if (!file.good()) { std::cerr << "read " << engine_name << " error!" << std::endl; @@ -88,7 +88,7 @@ void deserialize_engine(std::string &engine_name, IRuntime **runtime, ICudaEngin file.seekg(0, file.end); size = file.tellg(); file.seekg(0, file.beg); - char *serialized_engine = new char[size]; + char* serialized_engine = new char[size]; assert(serialized_engine); file.read(serialized_engine, size); file.close(); @@ -102,9 +102,9 @@ void deserialize_engine(std::string &engine_name, IRuntime **runtime, ICudaEngin delete[] serialized_engine; } -void prepare_buffer(ICudaEngine *engine, float **input_buffer_device, float **output_buffer_device, - float **output_seg_buffer_device, float **output_buffer_host, float **output_seg_buffer_host, - float **decode_ptr_host, float **decode_ptr_device, std::string cuda_post_process) { +void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, float** output_buffer_device, + float** output_seg_buffer_device, float** output_buffer_host, float** output_seg_buffer_host, + float** decode_ptr_host, float** decode_ptr_device, std::string cuda_post_process) { assert(engine->getNbIOTensors() == 3); // In order to bind the buffers, we need to know the names of the input and output tensors. // Note that indices are guaranteed to be less than IEngine::getNbBindings() @@ -124,9 +124,9 @@ void prepare_buffer(ICudaEngine *engine, float **input_buffer_device, float **ou assert(false); } // Create GPU buffers on device - CUDA_CHECK(cudaMalloc((void **) input_buffer_device, kBatchSize * 3 * kInputH * kInputW * sizeof(float))); - CUDA_CHECK(cudaMalloc((void **) output_buffer_device, kBatchSize * kOutputSize * sizeof(float))); - CUDA_CHECK(cudaMalloc((void **) output_seg_buffer_device, kBatchSize * kOutputSegSize * sizeof(float))); + CUDA_CHECK(cudaMalloc((void**)input_buffer_device, kBatchSize * 3 * kInputH * kInputW * sizeof(float))); + CUDA_CHECK(cudaMalloc((void**)output_buffer_device, kBatchSize * kOutputSize * sizeof(float))); + CUDA_CHECK(cudaMalloc((void**)output_seg_buffer_device, kBatchSize * kOutputSegSize * sizeof(float))); if (cuda_post_process == "c") { *output_buffer_host = new float[kBatchSize * kOutputSize]; @@ -138,12 +138,12 @@ void prepare_buffer(ICudaEngine *engine, float **input_buffer_device, float **ou } // Allocate memory for decode_ptr_host and copy to device *decode_ptr_host = new float[1 + kMaxNumOutputBbox * bbox_element]; - CUDA_CHECK(cudaMalloc((void **) decode_ptr_device, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element))); + CUDA_CHECK(cudaMalloc((void**)decode_ptr_device, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element))); } } -void infer(IExecutionContext &context, cudaStream_t &stream, void **buffers, float *output, float *output_seg, - int batchsize, float *decode_ptr_host, float *decode_ptr_device, int model_bboxes, +void infer(IExecutionContext& context, cudaStream_t& stream, void** buffers, float* output, float* output_seg, + int batchsize, float* decode_ptr_host, float* decode_ptr_device, int model_bboxes, std::string cuda_post_process) { // infer on the batch asynchronously, and DMA output back to host auto start = std::chrono::system_clock::now(); @@ -166,7 +166,7 @@ void infer(IExecutionContext &context, cudaStream_t &stream, void **buffers, flo } else if (cuda_post_process == "g") { CUDA_CHECK( cudaMemsetAsync(decode_ptr_device, 0, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element), stream)); - cuda_decode((float *) buffers[1], model_bboxes, kConfThresh, decode_ptr_device, kMaxNumOutputBbox, stream); + cuda_decode((float*)buffers[1], model_bboxes, kConfThresh, decode_ptr_device, kMaxNumOutputBbox, stream); cuda_nms(decode_ptr_device, kNmsThresh, kMaxNumOutputBbox, stream); //cuda nms CUDA_CHECK(cudaMemcpyAsync(decode_ptr_host, decode_ptr_device, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element), cudaMemcpyDeviceToHost, @@ -179,9 +179,9 @@ void infer(IExecutionContext &context, cudaStream_t &stream, void **buffers, flo CUDA_CHECK(cudaStreamSynchronize(stream)); } -bool parse_args(int argc, char **argv, std::string &wts, std::string &engine, std::string &img_dir, - std::string &sub_type, std::string &cuda_post_process, std::string &labels_filename, float &gd, - float &gw, int &max_channels) { +bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, std::string& img_dir, + std::string& sub_type, std::string& cuda_post_process, std::string& labels_filename, float& gd, + float& gw, int& max_channels) { if (argc < 4) return false; if (std::string(argv[1]) == "-s" && argc == 5) { @@ -222,7 +222,7 @@ bool parse_args(int argc, char **argv, std::string &wts, std::string &engine, st return true; } -int main(int argc, char **argv) { +int main(int argc, char** argv) { // -s ../models/yolov8n-seg.wts ../models/yolov8n-seg.fp32.trt n // -d ../models/yolov8n-seg.fp32.trt ../images c coco.txt cudaSetDevice(kGpuId); @@ -252,9 +252,9 @@ int main(int argc, char **argv) { } // Deserialize the engine from file - IRuntime *runtime = nullptr; - ICudaEngine *engine = nullptr; - IExecutionContext *context = nullptr; + IRuntime* runtime = nullptr; + ICudaEngine* engine = nullptr; + IExecutionContext* context = nullptr; deserialize_engine(engine_name, &runtime, &engine, &context); cudaStream_t stream; CUDA_CHECK(cudaStreamCreate(&stream)); @@ -262,14 +262,14 @@ int main(int argc, char **argv) { auto out_dims = engine->getTensorShape(kOutputTensorName); model_bboxes = out_dims.d[1]; // Prepare cpu and gpu buffers - float *device_buffers[3]; - float *output_buffer_host = nullptr; - float *output_seg_buffer_host = nullptr; - float *decode_ptr_host = nullptr; - float *decode_ptr_device = nullptr; + float* device_buffers[3]; + float* output_buffer_host = nullptr; + float* output_seg_buffer_host = nullptr; + float* decode_ptr_host = nullptr; + float* decode_ptr_device = nullptr; // Read images from directory - std::vector file_names; + std::vector file_names; if (read_files_in_dir(img_dir.c_str(), file_names) < 0) { std::cerr << "read_files_in_dir failed." << std::endl; return -1; @@ -285,8 +285,8 @@ int main(int argc, char **argv) { // // batch predict for (size_t i = 0; i < file_names.size(); i += kBatchSize) { // Get a batch of images - std::vector img_batch; - std::vector img_name_batch; + std::vector img_batch; + std::vector img_name_batch; for (size_t j = i; j < i + kBatchSize && j < file_names.size(); j++) { cv::Mat img = cv::imread(img_dir + "/" + file_names[j]); img_batch.push_back(img); @@ -295,14 +295,14 @@ int main(int argc, char **argv) { // Preprocess cuda_batch_preprocess(img_batch, device_buffers[0], kInputW, kInputH, stream); // Run inference - infer(*context, stream, (void **) device_buffers, output_buffer_host, output_seg_buffer_host, kBatchSize, + infer(*context, stream, (void**)device_buffers, output_buffer_host, output_seg_buffer_host, kBatchSize, decode_ptr_host, decode_ptr_device, model_bboxes, cuda_post_process); - std::vector > res_batch; + std::vector> res_batch; if (cuda_post_process == "c") { // NMS batch_nms(res_batch, output_buffer_host, img_batch.size(), kOutputSize, kConfThresh, kNmsThresh); for (size_t b = 0; b < img_batch.size(); b++) { - auto &res = res_batch[b]; + auto& res = res_batch[b]; cv::Mat img = img_batch[b]; auto masks = process_mask(&output_seg_buffer_host[b * kOutputSegSize], kOutputSegSize, res); draw_mask_bbox(img, res, masks, labels_map); @@ -320,8 +320,8 @@ int main(int argc, char **argv) { for (size_t k = 0; k < res_batch[j].size(); k++) { std::cout << "image: " << img_name_batch[j] << ", bbox: " << res_batch[j][k].bbox[0] << ", " << res_batch[j][k].bbox[1] << ", " << res_batch[j][k].bbox[2] << ", " - << res_batch[j][k].bbox[3] << ", conf: " << res_batch[j][k].conf << ", class_id: " - << res_batch[j][k].class_id << std::endl; + << res_batch[j][k].bbox[3] << ", conf: " << res_batch[j][k].conf + << ", class_id: " << res_batch[j][k].class_id << std::endl; } } }