diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index 2101f81fc57e6b..9d12180791e159 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -17,6 +17,63 @@ namespace intel_npu { +class BlobContainer { +public: + virtual void* get_ptr() { + OPENVINO_THROW("const BlobContainer::get_ptr() method is not implemented!"); + } + + virtual size_t size() const { + OPENVINO_THROW("BlobContainer::size() method is not implemented!"); + } + + virtual bool release_from_memory() { + OPENVINO_THROW("BlobContainer::release_from_memory() method is not implemented!"); + } +}; + +class BlobContainerVector : public BlobContainer { +public: + BlobContainerVector(std::vector blob) : _ownershipBlob(std::move(blob)) {} + + void* get_ptr() override { + return reinterpret_cast(_ownershipBlob.data()); + } + + size_t size() const override { + return _ownershipBlob.size(); + } + + bool release_from_memory() override { + _ownershipBlob.clear(); + _ownershipBlob.shrink_to_fit(); + return true; + } + +private: + std::vector _ownershipBlob; +}; + +class BlobContainerAlignedBuffer : public BlobContainer { +public: + BlobContainerAlignedBuffer(const std::shared_ptr& blobSO) : _ownershipBlob(blobSO) {} + + void* get_ptr() override { + return _ownershipBlob->get_ptr(); + } + + size_t size() const override { + return _ownershipBlob->size(); + } + + bool release_from_memory() override { + return false; + } + +private: + std::shared_ptr _ownershipBlob; +}; + class IGraph : public std::enable_shared_from_this { public: IGraph(ze_graph_handle_t handle, @@ -90,6 +147,7 @@ class IGraph : public std::enable_shared_from_this { // first inference starts running std::mutex _mutex; +<<<<<<< HEAD <<<<<<< HEAD std::vector _blob; @@ -106,6 +164,9 @@ class IGraph : public std::enable_shared_from_this { ======= std::shared_ptr _blob; >>>>>>> 25b5c05976 (Keep `shared_ptr` of blob in IGraph to fix `export_model` for import scenario) +======= + std::unique_ptr _blob; +>>>>>>> 94e33c4e24 (Add `BlobContainer` class and derivates for each `std::vector` and `std::shared_ptr` blob types) }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index f195d1635b4481..d28037cd069345 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -23,7 +23,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const override; + std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp index 3edcfdb14395c6..ef70ae01cc62af 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp @@ -21,7 +21,7 @@ class DriverGraph final : public IGraph { ze_graph_handle_t graphHandle, NetworkMetadata metadata, const Config& config, - std::optional> blob); + std::optional> blob); void export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index c19d6b0bc2e0fc..291cea90eab464 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -21,7 +21,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const override; + std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp index 0459abd8c354f2..5d0ab241bcd9c8 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp @@ -23,7 +23,7 @@ class PluginGraph final : public IGraph { const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, NetworkMetadata metadata, - std::shared_ptr blobSOPtr, + std::unique_ptr blobPtr, const Config& config); void export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 91df33f08be017..8eae64d28a3620 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -203,11 +203,11 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr DriverCompilerAdapter::parse(std::shared_ptr networkSOPtr, const Config& config) const { +std::shared_ptr DriverCompilerAdapter::parse(std::unique_ptr blobPtr, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse"); _logger.debug("parse start"); - ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(networkSOPtr->get_ptr()), networkSOPtr->size()); + ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); _logger.debug("parse end"); OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta"); @@ -218,7 +218,7 @@ std::shared_ptr DriverCompilerAdapter::parse(std::shared_ptr>(std::move(networkSOPtr))); + std::optional>(std::move(blobPtr))); } ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr& model, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index 8eca6a528390ab..458fa7762658c8 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -152,13 +152,10 @@ bool DriverGraph::release_blob(const Config& config) { return false; } - if (_blob.use_count() > 1) { - // blob is not allocated by plugin, no need for memory optimization - return false; + if(!_blob->release_from_memory()) { + return false; } - _blob.reset(); - _logger.debug("Blob is released"); return true; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 7474b7e5c4a845..bb5364a7aa1645 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -80,7 +80,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrcompile(model, config); - auto networkSO = std::make_shared>(std::move(networkDesc.compiledNetwork)); + auto blobPtr = std::make_unique(std::move(networkDesc.compiledNetwork)); _logger.debug("compile end"); ze_graph_handle_t graphHandle = nullptr; @@ -88,28 +88,27 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphHandle(networkSO->data(), networkSO->size()); + graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); } } - auto networkSOPtr = std::make_shared>>>(reinterpret_cast(networkSO->data()), networkSO->size(), networkSO); return std::make_shared(_zeGraphExt, _compiler, _zeroInitStruct, graphHandle, std::move(networkDesc.metadata), - networkSOPtr, + std::move(blobPtr), config); } -std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptr networkSOPtr, const Config& config) const { +std::shared_ptr PluginCompilerAdapter::parse(std::unique_ptr blobPtr, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse"); _logger.debug("parse start"); - std::vector network(networkSOPtr->size()); - network.assign(reinterpret_cast(networkSOPtr->get_ptr()), reinterpret_cast(networkSOPtr->get_ptr()) + networkSOPtr->size()); + std::vector network(blobPtr->size()); + network.assign(reinterpret_cast(blobPtr->get_ptr()), reinterpret_cast(blobPtr->get_ptr()) + blobPtr->size()); auto networkMeta = _compiler->parse(network, config); network.clear(); network.shrink_to_fit(); @@ -118,7 +117,7 @@ std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptrgetGraphHandle(reinterpret_cast(networkSOPtr->get_ptr()), networkSOPtr->size()); + graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); } return std::make_shared(_zeGraphExt, @@ -126,7 +125,7 @@ std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptr PluginGraph::process_profiling_output(const std::vector& profData, const Config& config) const { - - // Need to fix increased memory usage below, ov::SharedBuffer won't permit us to get underlying shared buffer as it is private - // Only if we work with std::vector blobs, but then IGraph needs to have 2 declarations for the same blob - // Maybe if we templatize blob in IGraph to be either std::vector or std::shared_ptr? std::vector blob(_blob->size()); - blob.assign(reinterpret_cast(_blob->get_ptr()), reinterpret_cast(_blob->get_ptr()) + _blob->size()); + blob.assign(reinterpret_cast(_blob->get_ptr()), reinterpret_cast(_blob->get_ptr()) + _blob->size()); return _compiler->process_profiling_output(profData, blob, config); } diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp index 832fa0666c8fbf..c91af51c5443ce 100644 --- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp +++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp @@ -44,14 +44,16 @@ class Plugin : public ov::IPlugin { std::shared_ptr import_model(std::istream& stream, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::shared_ptr model_buffer, - const ov::AnyMap& properties) const override; + std::shared_ptr import_model(std::istream& /* unusedStream */, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const override; std::shared_ptr import_model(std::istream& stream, const ov::SoPtr& context, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::shared_ptr model_buffer, + std::shared_ptr import_model(std::istream& stream, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const override; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index a11990e6db8045..139db401e2ef5e 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -776,15 +776,16 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); auto graphSize = getFileSize(stream); - auto blobSO = std::make_shared>(graphSize); - stream.read(reinterpret_cast(blobSO->data()), graphSize); + + std::vector blob(graphSize); + stream.read(reinterpret_cast(blob.data()), graphSize); if (!stream) { OPENVINO_THROW("Failed to read data from stream!"); } _logger.debug("Successfully read %zu bytes into blob.", graphSize); - auto blobSOPtr = std::make_shared>>>(reinterpret_cast(blobSO->data()), graphSize, blobSO); - auto graph = compiler->parse(std::move(blobSOPtr), localConfig); + auto blobContainerPtr = std::make_unique(std::move(blob)); + auto graph = compiler->parse(std::move(blobContainerPtr), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = @@ -802,7 +803,9 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c return compiledModel; } -std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const { +std::shared_ptr Plugin::import_model(std::istream& /* unusedStream */, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); @@ -827,8 +830,8 @@ std::shared_ptr Plugin::import_model(std::shared_ptrparse(model_buffer, localConfig); + auto blobContainerPtr = std::make_unique(model_buffer); + auto graph = compiler->parse(std::move(blobContainerPtr), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = @@ -857,7 +860,8 @@ std::shared_ptr Plugin::import_model(std::istream& stream, return import_model(stream, context, properties); } -std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, +std::shared_ptr Plugin::import_model(std::istream& stream, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const { auto casted = std::dynamic_pointer_cast(context._ptr); @@ -865,7 +869,7 @@ std::shared_ptr Plugin::import_model(std::shared_ptr& model,