From 2ebf0436f8aeb3c24c710f470e6132a811df1af3 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Wed, 11 Dec 2024 10:23:06 +0200 Subject: [PATCH] Avoid creating new plugin option for `CACHED_MODEL_BUFFER` & use `ov::AnyMap` passed by `core` to extract value of memory mapped model buffer --- .../openvino/runtime/shared_buffer.hpp | 6 ----- .../al/include/intel_npu/config/runtime.hpp | 25 ----------------- .../intel_npu/src/al/src/config/runtime.cpp | 27 ------------------- .../intel_npu/src/plugin/src/plugin.cpp | 15 +++++++---- 4 files changed, 10 insertions(+), 63 deletions(-) diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp index 503b6fc42475b3..2c784ef6081c35 100644 --- a/src/core/dev_api/openvino/runtime/shared_buffer.hpp +++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp @@ -12,12 +12,6 @@ namespace ov { template class SharedBuffer : public ov::AlignedBuffer { public: - - SharedBuffer(const SharedBuffer&) = delete; - SharedBuffer(SharedBuffer&&) = default; - SharedBuffer& operator=(const SharedBuffer&) = delete; - SharedBuffer& operator=(SharedBuffer&&) = default; - SharedBuffer(char* data, size_t size, const T& shared_object) : _shared_object(shared_object) { m_allocated_buffer = data; m_aligned_buffer = data; diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp index 4c8044d4de26c5..1fc3a3e20965c6 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp @@ -288,29 +288,4 @@ struct RUN_INFERENCES_SEQUENTIALLY final : OptionBase> { - static std::string_view key() { - return ov::internal::cached_model_buffer.name(); - } - - static constexpr std::string_view getTypeName() { - return "std::shared_ptr"; - } - - static std::shared_ptr defaultValue() { - return nullptr; - } - - static OptionMode mode() { - return OptionMode::RunTime; - } - - static std::shared_ptr parse(std::string_view val); - - static std::string toString(const std::shared_ptr& val); -}; - } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/al/src/config/runtime.cpp b/src/plugins/intel_npu/src/al/src/config/runtime.cpp index dba16fa7a4929b..007cccc451e964 100644 --- a/src/plugins/intel_npu/src/al/src/config/runtime.cpp +++ b/src/plugins/intel_npu/src/al/src/config/runtime.cpp @@ -30,7 +30,6 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) { desc.add(); desc.add(); desc.add(); - desc.add(); } // Heuristically obtained number. Varies depending on the values of PLATFORM and PERFORMANCE_HINT @@ -158,29 +157,3 @@ std::string intel_npu::WORKLOAD_TYPE::toString(const ov::WorkloadType& val) { ss << val; return ss.str(); } - -// -// CACHED_MODEL_BUFFER -// - -std::shared_ptr intel_npu::CACHED_MODEL_BUFFER::parse(std::string_view val) { - std::istringstream ss = std::istringstream(std::string(val)); - void* modelBufferPtr; - - ss >> modelBufferPtr; - // If we don't "steal" below resources from the casted ov::AlignedBuffer, parsed blob will be freed - // after core.import_model causing problems at inference.get_profiling_info() - auto* modelBufferSO = dynamic_cast>*>(static_cast(modelBufferPtr)); - std::shared_ptr mappedMemorySOPtr; - auto modelBufferSOPtr = std::make_shared>>(nullptr, 0, mappedMemorySOPtr); - *modelBufferSOPtr = std::move(*modelBufferSO); - - return modelBufferSOPtr; -} - -std::string intel_npu::CACHED_MODEL_BUFFER::toString(const std::shared_ptr& val) { - std::ostringstream ss; - void* modelBufferPtr = static_cast(val.get()); - ss << modelBufferPtr; - return ss.str(); -} diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index fc52adcbdacff9..bd100e44394bc0 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -752,7 +752,14 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); - const std::map propertiesMap = any_copy(properties); + auto _properties = properties; + std::shared_ptr modelBuffer; + if (_properties.count(ov::internal::cached_model_buffer.name())) { + modelBuffer = _properties.at(ov::internal::cached_model_buffer.name()).as>(); + _properties.erase(ov::internal::cached_model_buffer.name()); + } + + const std::map propertiesMap = any_copy(_properties); auto localConfig = merge_configs(_globalConfig, propertiesMap, OptionMode::RunTime); _logger.setLevel(localConfig.get()); const auto platform = _backends->getCompilationPlatform(localConfig.get(), localConfig.get()); @@ -767,8 +774,6 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c "The usage of a compiled model can lead to undefined behavior. Please use OpenVINO IR instead!"); } - auto model_buffer = localConfig.get(); - OV_ITT_TASK_NEXT(PLUGIN_IMPORT_MODEL, "parse"); std::shared_ptr compiledModel; @@ -778,7 +783,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c std::unique_ptr blobPtr; - if (model_buffer == nullptr) { + if (modelBuffer == nullptr) { auto graphSize = getFileSize(stream); std::vector blob(graphSize); @@ -790,7 +795,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c blobPtr = std::move(std::make_unique(std::move(blob))); } else { - blobPtr = std::move(std::make_unique(model_buffer, stream.tellg())); + blobPtr = std::move(std::make_unique(modelBuffer, stream.tellg())); } auto graph = compiler->parse(std::move(blobPtr), localConfig);