From 58b68c12742d3ed05dce308ccd72b9a3aa56ee9a Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Tue, 24 Sep 2024 12:26:38 +0300 Subject: [PATCH] Add new `CompiledNetwork` container for blob return --- .../src/al/include/intel_npu/al/icompiler.hpp | 16 ++++++++++++++-- .../include/driver_compiler_adapter.hpp | 2 +- .../include/zero_compiler_in_driver.hpp | 8 +++----- .../compiler/src/driver_compiler_adapter.cpp | 2 +- .../compiler/src/zero_compiler_in_driver.cpp | 18 ++++++++++-------- .../src/plugin/src/compiled_model.cpp | 8 ++++---- 6 files changed, 33 insertions(+), 21 deletions(-) diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp index 57b0a44b1a0c65..6f422c5b3ba00c 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp @@ -151,6 +151,18 @@ struct NetworkDescription final { NetworkMetadata metadata; }; +/** + * @struct CompiledNetwork + * @brief Custom container for compiled model, used for model export + * Underlying container will be empty for optimized memory consumption + */ + +struct CompiledNetwork { + const uint8_t* data; + size_t size; + std::vector container; +}; + /** * @interface ICompiler * @brief An interface to be implemented by a concrete compiler to provide @@ -203,8 +215,8 @@ class ICompiler : public std::enable_shared_from_this { // Driver compiler can use this to release graphHandle, if we do not have executor virtual void release([[maybe_unused]] std::shared_ptr networkDescription){}; - virtual std::pair getCompiledNetwork(std::shared_ptr networkDescription) { - return {networkDescription->compiledNetwork.data(), networkDescription->compiledNetwork.size()}; + virtual CompiledNetwork getCompiledNetwork(std::shared_ptr networkDescription) { + return CompiledNetwork{networkDescription->compiledNetwork.data(), networkDescription->compiledNetwork.size(), networkDescription->compiledNetwork}; } protected: diff --git a/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp index 74488644a75ad6..b9ee9a9be03481 100644 --- a/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp @@ -36,7 +36,7 @@ class LevelZeroCompilerAdapter final : public ICompiler { void release(std::shared_ptr networkDescription) override; - std::pair getCompiledNetwork(std::shared_ptr networkDescription) override; + CompiledNetwork getCompiledNetwork(std::shared_ptr networkDescription) override; private: /** diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp index fa2ad1c0ac8ace..a8a44b7cf6c0fe 100644 --- a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp +++ b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp @@ -105,7 +105,7 @@ class LevelZeroCompilerInDriver final : public ICompiler { void release(std::shared_ptr networkDescription) override; - std::pair getCompiledNetwork(std::shared_ptr networkDescription) override; + CompiledNetwork getCompiledNetwork(std::shared_ptr networkDescription) override; private: NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const; @@ -130,14 +130,12 @@ class LevelZeroCompilerInDriver final : public ICompiler { template = true> void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, - ze_graph_handle_t graphHandle, - std::shared_ptr networkDescription, + ze_graph_handle_t graphHandle, std::vector& blob, uint8_t** blobPtr, size_t* blobSize) const; template = true> void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, - ze_graph_handle_t graphHandle, - std::shared_ptr, + ze_graph_handle_t graphHandle, std::vector& /* unusedBlob */, uint8_t** blobPtr, size_t* blobSize) const; template = true> diff --git a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp index f84c8b0d224fd2..683f56730f1607 100644 --- a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp @@ -115,7 +115,7 @@ void LevelZeroCompilerAdapter::release(std::shared_ptr apiAdapter->release(std::move(networkDescription)); } -std::pair LevelZeroCompilerAdapter::getCompiledNetwork( +CompiledNetwork LevelZeroCompilerAdapter::getCompiledNetwork( std::shared_ptr networkDescription) { _logger.info("getCompiledNetwork - using adapter to perform getCompiledNetwork(networkDescription)"); return apiAdapter->getCompiledNetwork(std::move(networkDescription)); diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp index eee6d7117e2736..1c6f067852cfe7 100644 --- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp +++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp @@ -366,10 +366,11 @@ template template > void LevelZeroCompilerInDriver::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, ze_graph_handle_t graphHandle, - std::shared_ptr networkDescription, + std::vector& blob, uint8_t** blobPtr, size_t* blobSize) const { // Get blob size first auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, blobSize, nullptr); + blob.resize(*blobSize); OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS, "Failed to compile network. L0 pfnGetNativeBinary get blob size", @@ -382,7 +383,7 @@ void LevelZeroCompilerInDriver::getNativeBinary(ze_graph_dditabl getLatestBuildError()); // Get blob data - result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, blobSize, std::const_pointer_cast(networkDescription)->compiledNetwork.data()); + result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, blobSize, blob.data()); OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS, "Failed to compile network. L0 pfnGetNativeBinary get blob data", @@ -394,14 +395,14 @@ void LevelZeroCompilerInDriver::getNativeBinary(ze_graph_dditabl ". ", getLatestBuildError()); - *blobPtr = std::const_pointer_cast(networkDescription)->compiledNetwork.data(); + *blobPtr = blob.data(); } template template > void LevelZeroCompilerInDriver::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, ze_graph_handle_t graphHandle, - std::shared_ptr, + std::vector& /* unusedBlob */, uint8_t** blobPtr, size_t* blobSize) const { // Get blob ptr and size auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, blobSize, blobPtr); @@ -418,7 +419,7 @@ void LevelZeroCompilerInDriver::getNativeBinary(ze_graph_dditabl } template -std::pair LevelZeroCompilerInDriver::getCompiledNetwork( +CompiledNetwork LevelZeroCompilerInDriver::getCompiledNetwork( std::shared_ptr networkDescription) { if (networkDescription->metadata.graphHandle != nullptr && networkDescription->compiledNetwork.size() == 0) { _logger.info("LevelZeroCompilerInDriver getCompiledNetwork get blob from graphHandle"); @@ -426,14 +427,15 @@ std::pair LevelZeroCompilerInDriver::get uint8_t* blobPtr; size_t blobSize = -1; + std::vector blob; - getNativeBinary(_graphDdiTableExt, graphHandle, networkDescription, &blobPtr, &blobSize); + getNativeBinary(_graphDdiTableExt, graphHandle, blob, &blobPtr, &blobSize); _logger.info("LevelZeroCompilerInDriver getCompiledNetwork returning blob"); - return {blobPtr, blobSize}; + return CompiledNetwork{blobPtr, blobSize, std::move(blob)}; } else { _logger.info("return the blob from network description"); - return {networkDescription->compiledNetwork.data(), networkDescription->compiledNetwork.size()}; + return CompiledNetwork{networkDescription->compiledNetwork.data(), networkDescription->compiledNetwork.size(), networkDescription->compiledNetwork}; } } diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp index 782b9b5a34ff7f..14cf6f91c06d7d 100644 --- a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp @@ -27,9 +27,9 @@ constexpr std::string_view NO_EXECUTOR_FOR_INFERENCE = "Can't create infer request!\n" "Please make sure that the device is available. Only exports can be made."; -std::uint32_t hash(std::pair blob) { +std::uint32_t hash(const intel_npu::CompiledNetwork& blob) { std::uint32_t result = 1171117u; - for (const uint8_t* it = blob.first; it != blob.first + blob.second; ++it) { + for (const uint8_t* it = blob.data; it != blob.data + blob.size; ++it) { result = ((result << 7) + result) + static_cast(*it); } return result; @@ -141,11 +141,11 @@ std::shared_ptr CompiledModel::create_sync_infer_request( void CompiledModel::export_model(std::ostream& stream) const { _logger.debug("CompiledModel::export_model"); const auto&& blob = _compiler->getCompiledNetwork(_networkPtr); - stream.write(reinterpret_cast(blob.first), blob.second); + stream.write(reinterpret_cast(blob.data), blob.size); if (_logger.level() == ov::log::Level::INFO) { std::stringstream str; - str << "Blob size: " << blob.second << ", hash: " << std::hex << hash(blob); + str << "Blob size: " << blob.size << ", hash: " << std::hex << hash(blob); _logger.info(str.str().c_str()); if (!stream) {