diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp index 859675344c98b8..fff78ab89750b1 100644 --- a/src/core/dev_api/openvino/runtime/shared_buffer.hpp +++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp @@ -12,7 +12,7 @@ namespace ov { template class SharedBuffer : public ov::AlignedBuffer { public: - SharedBuffer(char* data, size_t size, const T& shared_object) : _shared_object(shared_object) { + SharedBuffer(char* data, size_t size, T shared_object) : _shared_object(std::move(shared_object)) { m_allocated_buffer = data; m_aligned_buffer = data; m_byte_size = size; diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index a2175d7a7d16bc..91aed7793f5f17 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -26,7 +26,7 @@ class IGraph : public std::enable_shared_from_this { } } - virtual void export_blob(std::ostream& stream) = 0; + virtual void export_blob(std::ostream& stream) const = 0; virtual std::vector process_profiling_output(const std::vector& profData, const Config& config) const = 0; diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp index 5dbe0698819fc7..a86aa62bd0963b 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp @@ -58,7 +58,7 @@ class ICompilerAdapter { public: virtual std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const = 0; - virtual std::shared_ptr parse(const std::shared_ptr& networkSO, const Config& config) const = 0; + virtual std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const = 0; virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; virtual ov::intel_npu::CompilerType getCompilerType() const = 0; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index 29e766dcc54141..9f30c82da7f0d7 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -26,7 +26,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(const std::shared_ptr& networkSO, const Config& config) const override; + std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp index b360f4dea384ca..6dc1a0a5bc39e6 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp @@ -24,7 +24,7 @@ class DriverGraph final : public IGraph { const Config& config, std::optional> blob); - void export_blob(std::ostream& stream) override; + void export_blob(std::ostream& stream) const override; std::vector process_profiling_output(const std::vector& profData, const Config& config) const override; @@ -36,10 +36,16 @@ class DriverGraph final : public IGraph { ~DriverGraph() override; private: + bool release_blob(const Config& config); + std::shared_ptr _zeGraphExt; std::shared_ptr _zeroInitStruct; Logger _logger; + + // In the case of the import path, the blob is released after graph initialization so it can not be any longer + // exported + bool _blobIsReleased = false; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index 6ce36e25f57f59..841d5a6c3c6131 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -22,7 +22,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(const std::shared_ptr& networkSO, const Config& config) const override; + std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp index 929d64ea7f73a0..0459abd8c354f2 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp @@ -23,10 +23,10 @@ class PluginGraph final : public IGraph { const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, NetworkMetadata metadata, - const std::shared_ptr& blobSO, + std::shared_ptr blobSOPtr, const Config& config); - void export_blob(std::ostream& stream) override; + void export_blob(std::ostream& stream) const override; std::vector process_profiling_output(const std::vector& profData, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index daadd0dc80e535..8de79fd5fcb842 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -206,11 +206,11 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr DriverCompilerAdapter::parse(const std::shared_ptr& networkSO, const Config& config) const { +std::shared_ptr DriverCompilerAdapter::parse(std::shared_ptr networkSOPtr, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse"); _logger.debug("parse start"); - ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(networkSO->get_ptr()), networkSO->size()); + ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(networkSOPtr->get_ptr()), networkSOPtr->size()); _logger.debug("parse end"); OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta"); @@ -221,7 +221,7 @@ std::shared_ptr DriverCompilerAdapter::parse(const std::shared_ptr>(networkSO)); + std::optional>(std::move(networkSOPtr))); } ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr& model, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index cea7e5bf4c94b9..64a41d7a1a9fb2 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -32,17 +32,18 @@ DriverGraph::DriverGraph(const std::shared_ptr& zeGraphExt, initialize(config); } -void DriverGraph::export_blob(std::ostream& stream) { - if (_blob.get() == nullptr) { - const uint8_t* blobPtr = nullptr; - size_t blobSize = -1; - std::shared_ptr> blob; - - _zeGraphExt->getGraphBinary(_handle, *blob, blobPtr, blobSize); - _blob = std::make_shared>>>(reinterpret_cast(const_cast(blobPtr)), blobSize, blob); +void DriverGraph::export_blob(std::ostream& stream) const { + const uint8_t* blobPtr = nullptr; + size_t blobSize = -1; + std::vector blob; + + if (_blobIsReleased) { + OPENVINO_THROW("Model was imported (not compiled) by the plugin. Model export is forbidden in this case!"); } - stream.write(reinterpret_cast(_blob->get_ptr()), _blob->size()); + _zeGraphExt->getGraphBinary(_handle, blob, blobPtr, blobSize); + + stream.write(reinterpret_cast(blobPtr), blobSize); if (!stream) { _logger.error("Write blob to stream failed. Blob is broken!"); @@ -51,12 +52,12 @@ void DriverGraph::export_blob(std::ostream& stream) { if (_logger.level() >= ov::log::Level::INFO) { std::uint32_t result = 1171117u; - for (const uint8_t* it = reinterpret_cast(_blob->get_ptr()); it != reinterpret_cast(_blob->get_ptr()) + _blob->size(); ++it) { + for (const uint8_t* it = blobPtr; it != blobPtr + blobSize; ++it) { result = ((result << 7) + result) + static_cast(*it); } std::stringstream str; - str << "Blob size: " << _blob->size() << ", hash: " << std::hex << result; + str << "Blob size: " << blobSize << ", hash: " << std::hex << result; _logger.info(str.str().c_str()); } _logger.info("Write blob to stream successfully."); @@ -120,8 +121,34 @@ void DriverGraph::initialize(const Config& config) { _zeGraphExt->initializeGraph(_handle, config); _logger.debug("Graph initialize finish"); + + // We are allowed to release the original blob because weights were loaded in NPU memory during + // _zeGraphExt->initializeGraph(). The driver will not access the original blob from this moment on, so we are + // releasing it here to avoid unnecessary memory usage. + _blobIsReleased = release_blob(config); } +bool DriverGraph::release_blob(const Config& config) { + if (_blob == nullptr || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 || + config.get()) { + return false; + } + + ze_graph_properties_2_t properties = {}; + properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; + _zeroInitStruct->getGraphDdiTable().pfnGetProperties2(_handle, &properties); + + if (~properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) { + return false; + } + + _blob.reset(); + + _logger.debug("Blob is released"); + + return true; +}; + DriverGraph::~DriverGraph() { if (_handle != nullptr) { auto result = _zeGraphExt->destroyGraph(_handle); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 37a18aa27921be..accafa7c8365a0 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -107,12 +107,12 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr PluginCompilerAdapter::parse(const std::shared_ptr& networkSO, const Config& config) const { +std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptr networkSOPtr, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse"); _logger.debug("parse start"); - std::vector network(networkSO->size()); - network.assign(reinterpret_cast(networkSO->get_ptr()), reinterpret_cast(networkSO->get_ptr()) + networkSO->size()); + std::vector network(networkSOPtr->size()); + network.assign(reinterpret_cast(networkSOPtr->get_ptr()), reinterpret_cast(networkSOPtr->get_ptr()) + networkSOPtr->size()); auto networkMeta = _compiler->parse(network, config); network.clear(); network.shrink_to_fit(); @@ -121,7 +121,7 @@ std::shared_ptr PluginCompilerAdapter::parse(const std::shared_ptrgetGraphHandle(reinterpret_cast(networkSO->get_ptr()), networkSO->size()); + graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(networkSOPtr->get_ptr()), networkSOPtr->size()); } return std::make_shared(_zeGraphExt, @@ -129,7 +129,7 @@ std::shared_ptr PluginCompilerAdapter::parse(const std::shared_ptr& zeGraphExt, const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, NetworkMetadata metadata, - const std::shared_ptr& blobSO, + std::shared_ptr blobSOPtr, const Config& config) - : IGraph(graphHandle, std::move(metadata), std::optional>(blobSO)), + : IGraph(graphHandle, std::move(metadata), std::optional>(std::move(blobSOPtr))), _zeGraphExt(zeGraphExt), _zeroInitStruct(zeroInitStruct), _compiler(compiler), @@ -31,7 +31,7 @@ PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, initialize(config); } -void PluginGraph::export_blob(std::ostream& stream) { +void PluginGraph::export_blob(std::ostream& stream) const { stream.write(reinterpret_cast(_blob->get_ptr()), _blob->size()); if (!stream) { diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 2719a7f092e4f7..b519fc3fa7b3b3 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -777,7 +777,8 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c } _logger.debug("Successfully read %zu bytes into blob.", graphSize); - auto graph = compiler->parse(std::make_shared>>>(reinterpret_cast(blobSO->data()), graphSize, blobSO), localConfig); + auto blobSOPtr = std::make_shared>>>(reinterpret_cast(blobSO->data()), graphSize, std::move(blobSO)); + auto graph = compiler->parse(std::move(blobSOPtr), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy =