From ed220a4984f4c2349a1523902f1ff9fc552348b5 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Wed, 27 Nov 2024 18:29:41 +0200 Subject: [PATCH] Add `BlobContainer` class and derivates for each `std::vector` and `std::shared_ptr` blob types --- .../include/intel_npu/common/igraph.hpp | 61 ++++++++++++++++++- .../common/include/intel_npu/common/npu.hpp | 2 +- .../include/driver_compiler_adapter.hpp | 2 +- .../compiler_adapter/include/driver_graph.hpp | 2 +- .../include/plugin_compiler_adapter.hpp | 2 +- .../compiler_adapter/include/plugin_graph.hpp | 2 +- .../src/driver_compiler_adapter.cpp | 6 +- .../src/compiler_adapter/src/driver_graph.cpp | 9 +-- .../src/plugin_compiler_adapter.cpp | 17 +++--- .../src/compiler_adapter/src/plugin_graph.cpp | 6 +- .../intel_npu/src/plugin/include/plugin.hpp | 8 ++- .../intel_npu/src/plugin/src/plugin.cpp | 22 ++++--- 12 files changed, 99 insertions(+), 40 deletions(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index b8e327259ac04f..19fba9253426f7 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -16,9 +16,66 @@ namespace intel_npu { +class BlobContainer { +public: + virtual void* get_ptr() { + OPENVINO_THROW("const BlobContainer::get_ptr() method is not implemented!"); + } + + virtual size_t size() const { + OPENVINO_THROW("BlobContainer::size() method is not implemented!"); + } + + virtual bool release_from_memory() { + OPENVINO_THROW("BlobContainer::release_from_memory() method is not implemented!"); + } +}; + +class BlobContainerVector : public BlobContainer { +public: + BlobContainerVector(std::vector blob) : _ownershipBlob(std::move(blob)) {} + + void* get_ptr() override { + return reinterpret_cast(_ownershipBlob.data()); + } + + size_t size() const override { + return _ownershipBlob.size(); + } + + bool release_from_memory() override { + _ownershipBlob.clear(); + _ownershipBlob.shrink_to_fit(); + return true; + } + +private: + std::vector _ownershipBlob; +}; + +class BlobContainerAlignedBuffer : public BlobContainer { +public: + BlobContainerAlignedBuffer(const std::shared_ptr& blobSO) : _ownershipBlob(blobSO) {} + + void* get_ptr() override { + return _ownershipBlob->get_ptr(); + } + + size_t size() const override { + return _ownershipBlob->size(); + } + + bool release_from_memory() override { + return false; + } + +private: + std::shared_ptr _ownershipBlob; +}; + class IGraph : public std::enable_shared_from_this { public: - IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, std::optional> blob) + IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, std::optional> blob) : _handle(handle), _metadata(std::move(metadata)) { if (blob.has_value()) { @@ -98,7 +155,7 @@ class IGraph : public std::enable_shared_from_this { // first inference starts running std::mutex _mutex; - std::shared_ptr _blob; + std::unique_ptr _blob; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp index 61ff29e5904297..b53482506360eb 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp @@ -58,7 +58,7 @@ class ICompilerAdapter { public: virtual std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const = 0; - virtual std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const = 0; + virtual std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const = 0; virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; virtual ~ICompilerAdapter() = default; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index 31f5fe9aafb986..38e6aab1a21765 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -26,7 +26,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const override; + std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp index 3edcfdb14395c6..ef70ae01cc62af 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp @@ -21,7 +21,7 @@ class DriverGraph final : public IGraph { ze_graph_handle_t graphHandle, NetworkMetadata metadata, const Config& config, - std::optional> blob); + std::optional> blob); void export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index c20fe5ef11643e..89e0b81426ef40 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -21,7 +21,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const override; + std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp index 0459abd8c354f2..5d0ab241bcd9c8 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp @@ -23,7 +23,7 @@ class PluginGraph final : public IGraph { const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, NetworkMetadata metadata, - std::shared_ptr blobSOPtr, + std::unique_ptr blobPtr, const Config& config); void export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 8de79fd5fcb842..5790a243148a09 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -206,11 +206,11 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr DriverCompilerAdapter::parse(std::shared_ptr networkSOPtr, const Config& config) const { +std::shared_ptr DriverCompilerAdapter::parse(std::unique_ptr blobPtr, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse"); _logger.debug("parse start"); - ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(networkSOPtr->get_ptr()), networkSOPtr->size()); + ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); _logger.debug("parse end"); OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta"); @@ -221,7 +221,7 @@ std::shared_ptr DriverCompilerAdapter::parse(std::shared_ptr>(std::move(networkSOPtr))); + std::optional>(std::move(blobPtr))); } ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr& model, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index b66cca1ac0be0f..d80fc33ccc48bd 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -15,7 +15,7 @@ DriverGraph::DriverGraph(const std::shared_ptr& zeGraphExt, ze_graph_handle_t graphHandle, NetworkMetadata metadata, const Config& config, - std::optional> blob) + std::optional> blob) : IGraph(graphHandle, std::move(metadata), std::move(blob)), _zeGraphExt(zeGraphExt), _zeroInitStruct(zeroInitStruct), @@ -142,13 +142,10 @@ bool DriverGraph::release_blob(const Config& config) { return false; } - if (_blob.use_count() > 1) { - // blob is not allocated by plugin, no need for memory optimization - return false; + if(!_blob->release_from_memory()) { + return false; } - _blob.reset(); - _logger.debug("Blob is released"); return true; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index accafa7c8365a0..b7c3d0c75c8f4f 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -83,7 +83,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrcompile(model, config); - auto networkSO = std::make_shared>(std::move(networkDesc.compiledNetwork)); + auto blobPtr = std::make_unique(std::move(networkDesc.compiledNetwork)); _logger.debug("compile end"); ze_graph_handle_t graphHandle = nullptr; @@ -91,28 +91,27 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphHandle(networkSO->data(), networkSO->size()); + graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); } } - auto networkSOPtr = std::make_shared>>>(reinterpret_cast(networkSO->data()), networkSO->size(), networkSO); return std::make_shared(_zeGraphExt, _compiler, _zeroInitStruct, graphHandle, std::move(networkDesc.metadata), - networkSOPtr, + std::move(blobPtr), config); } -std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptr networkSOPtr, const Config& config) const { +std::shared_ptr PluginCompilerAdapter::parse(std::unique_ptr blobPtr, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse"); _logger.debug("parse start"); - std::vector network(networkSOPtr->size()); - network.assign(reinterpret_cast(networkSOPtr->get_ptr()), reinterpret_cast(networkSOPtr->get_ptr()) + networkSOPtr->size()); + std::vector network(blobPtr->size()); + network.assign(reinterpret_cast(blobPtr->get_ptr()), reinterpret_cast(blobPtr->get_ptr()) + blobPtr->size()); auto networkMeta = _compiler->parse(network, config); network.clear(); network.shrink_to_fit(); @@ -121,7 +120,7 @@ std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptrgetGraphHandle(reinterpret_cast(networkSOPtr->get_ptr()), networkSOPtr->size()); + graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); } return std::make_shared(_zeGraphExt, @@ -129,7 +128,7 @@ std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptr& zeGraphExt, const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, NetworkMetadata metadata, - std::shared_ptr blobSOPtr, + std::unique_ptr blobPtr, const Config& config) - : IGraph(graphHandle, std::move(metadata), std::optional>(std::move(blobSOPtr))), + : IGraph(graphHandle, std::move(metadata), std::optional>(std::move(blobPtr))), _zeGraphExt(zeGraphExt), _zeroInitStruct(zeroInitStruct), _compiler(compiler), @@ -58,7 +58,7 @@ std::vector PluginGraph::process_profiling_output(const std:: // Only if we work with std::vector blobs, but then IGraph needs to have 2 declarations for the same blob // Maybe if we templatize blob in IGraph to be either std::vector or std::shared_ptr? std::vector blob(_blob->size()); - blob.assign(reinterpret_cast(_blob->get_ptr()), reinterpret_cast(_blob->get_ptr()) + _blob->size()); + blob.assign(reinterpret_cast(_blob->get_ptr()), reinterpret_cast(_blob->get_ptr()) + _blob->size()); return _compiler->process_profiling_output(profData, blob, config); } diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp index a8c11e6d6ce9f6..e205ce22e913e9 100644 --- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp +++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp @@ -44,14 +44,16 @@ class Plugin : public ov::IPlugin { std::shared_ptr import_model(std::istream& stream, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::shared_ptr model_buffer, - const ov::AnyMap& properties) const override; + std::shared_ptr import_model(std::istream& /* unusedStream */, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const override; std::shared_ptr import_model(std::istream& stream, const ov::SoPtr& context, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::shared_ptr model_buffer, + std::shared_ptr import_model(std::istream& stream, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const override; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index f5667d88985d78..163f5ea9fa2a2c 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -769,15 +769,16 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c auto compiler = getCompiler(localConfig); auto graphSize = getFileSize(stream); - auto blobSO = std::make_shared>(graphSize); - stream.read(reinterpret_cast(blobSO->data()), graphSize); + + std::vector blob(graphSize); + stream.read(reinterpret_cast(blob.data()), graphSize); if (!stream) { OPENVINO_THROW("Failed to read data from stream!"); } _logger.debug("Successfully read %zu bytes into blob.", graphSize); - auto blobSOPtr = std::make_shared>>>(reinterpret_cast(blobSO->data()), graphSize, blobSO); - auto graph = compiler->parse(std::move(blobSOPtr), localConfig); + auto blobContainerPtr = std::make_unique(std::move(blob)); + auto graph = compiler->parse(std::move(blobContainerPtr), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = @@ -795,7 +796,9 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c return compiledModel; } -std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const { +std::shared_ptr Plugin::import_model(std::istream& /* unusedStream */, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); @@ -820,8 +823,8 @@ std::shared_ptr Plugin::import_model(std::shared_ptrparse(model_buffer, localConfig); + auto blobContainerPtr = std::make_unique(model_buffer); + auto graph = compiler->parse(std::move(blobContainerPtr), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = @@ -850,7 +853,8 @@ std::shared_ptr Plugin::import_model(std::istream& stream, return import_model(stream, context, properties); } -std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, +std::shared_ptr Plugin::import_model(std::istream& stream, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const { auto casted = std::dynamic_pointer_cast(context._ptr); @@ -858,7 +862,7 @@ std::shared_ptr Plugin::import_model(std::shared_ptr& model,