diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp index 8cb6fe8b2c44db..b347b457fc7e5e 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp @@ -13,41 +13,42 @@ namespace intel_npu { class BlobContainer { public: - virtual void* get_ptr() = 0; + BlobContainer() = default; - virtual size_t size() const = 0; + BlobContainer(std::vector blob) : _blob(std::move(blob)) {} - virtual bool release_from_memory() = 0; - - virtual std::vector get_ownership_blob() = 0; - - virtual ~BlobContainer() = default; -}; - -class BlobContainerVector : public BlobContainer { -public: - BlobContainerVector(std::vector blob) : _ownershipBlob(std::move(blob)) {} - - void* get_ptr() override { - return reinterpret_cast(_ownershipBlob.data()); + virtual const void* get_ptr() const { + return _blob.data(); } - size_t size() const override { - return _ownershipBlob.size(); + virtual size_t size() const { + return _blob.size(); } - bool release_from_memory() override { - _ownershipBlob.clear(); - _ownershipBlob.shrink_to_fit(); - return true; + virtual bool release_from_memory() const { + if (_shouldDeallocate) { + _blob.clear(); + _blob.shrink_to_fit(); + return true; + } + _shouldDeallocate = true; + return false; } - std::vector get_ownership_blob() override { - return _ownershipBlob; + virtual const std::vector& get_blob() const { + // when unerlying blob object was accessed, + // prevent deallocation on next `release_from_memory` call + _shouldDeallocate = false; + return _blob; } + virtual ~BlobContainer() = default; + +protected: + mutable std::vector _blob; + private: - std::vector _ownershipBlob; + mutable bool _shouldDeallocate = true; }; class BlobContainerAlignedBuffer : public BlobContainer { @@ -55,32 +56,35 @@ class BlobContainerAlignedBuffer : public BlobContainer { BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, size_t ovHeaderOffset, uint64_t blobSize) - : _blobSize(blobSize), + : _size(blobSize), _ovHeaderOffset(ovHeaderOffset), - _ownershipBlob(blobSO) {} + _blobSO(blobSO) {} - void* get_ptr() override { - return _ownershipBlob->get_ptr(_ovHeaderOffset); + const void* get_ptr() const override { + return _blobSO->get_ptr(_ovHeaderOffset); } size_t size() const override { - return _blobSize; + return _size; } - bool release_from_memory() override { + bool release_from_memory() const override { + BlobContainer::release_from_memory(); return false; } - std::vector get_ownership_blob() override { - std::vector blob(_blobSize); - blob.assign(reinterpret_cast(this->get_ptr()), reinterpret_cast(this->get_ptr()) + this->size()); - return blob; + const std::vector& get_blob() const override { + BlobContainer::release_from_memory(); + _blob.resize(_size); + _blob.assign(reinterpret_cast(this->get_ptr()), + reinterpret_cast(this->get_ptr()) + _size); + return _blob; } private: - uint64_t _blobSize; + uint64_t _size; size_t _ovHeaderOffset; - std::shared_ptr _ownershipBlob; + std::shared_ptr _blobSO; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index d10d2e194449aa..6f728ed5271678 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -80,7 +80,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrcompile(model, config); - auto blobPtr = std::make_unique(std::move(networkDesc.compiledNetwork)); + auto blobPtr = std::make_unique(std::move(networkDesc.compiledNetwork)); _logger.debug("compile end"); ze_graph_handle_t graphHandle = nullptr; @@ -110,8 +110,9 @@ std::shared_ptr PluginCompilerAdapter::parse(std::unique_ptrget_ownership_blob(); + const auto& blob = blobPtr->get_blob(); auto networkMeta = _compiler->parse(blob, config); + blobPtr->release_from_memory(); _logger.debug("parse end"); ze_graph_handle_t graphHandle = nullptr; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index 8546f04a84ae95..512422b99a19ad 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -56,8 +56,15 @@ size_t PluginGraph::export_blob(std::ostream& stream) const { std::vector PluginGraph::process_profiling_output(const std::vector& profData, const Config& config) const { - const auto& blob = _blobPtr->get_ownership_blob(); - return _compiler->process_profiling_output(profData, blob, config); + std::vector profilingInfo; + const auto& blob = _blobPtr->get_blob(); + try { + profilingInfo = _compiler->process_profiling_output(profData, blob, config); + } catch (const std::exception& ex) { + _logger.error(ex.what()); + } + _blobPtr->release_from_memory(); + return std::move(profilingInfo); } void PluginGraph::set_argument_value(uint32_t argi, const void* argv) const { diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index fb863e1cfa7f0f..57b05b8517dba6 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -636,13 +636,13 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< auto localConfig = merge_configs(_globalConfig, localPropertiesMap); update_log_level(localPropertiesMap); - const auto set_cache_dir = localConfig.get(); + /* const auto set_cache_dir = localConfig.get(); if (!set_cache_dir.empty()) { const auto compilerType = localConfig.get(); if (compilerType == ov::intel_npu::CompilerType::MLIR) { OPENVINO_THROW("Option 'CACHE_DIR' is not supported with MLIR compiler type"); } - } + } */ const auto platform = _backends->getCompilationPlatform(localConfig.get(), localConfig.get()); auto device = _backends->getDevice(localConfig.get()); @@ -806,7 +806,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c } _logger.debug("Successfully read %zu bytes into blob.", graphSize); - blobPtr = std::make_unique(std::move(blob)); + blobPtr = std::make_unique(std::move(blob)); } else { blobPtr = std::make_unique(modelBuffer, stream.tellg(), graphSize); }