From 5f43fa87315e7a1c8e6495cc31af93e4ff963d61 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Tue, 26 Nov 2024 10:18:46 +0200 Subject: [PATCH] Refactor `import_model` new API to accept only either `std::istream` or `ov::AlignedBuffer` --- .../dev_api/openvino/runtime/iplugin.hpp | 6 +- src/inference/src/dev/core_impl.cpp | 4 +- src/inference/src/dev/iplugin.cpp | 6 +- src/inference/src/dev/plugin.cpp | 9 +-- src/inference/src/dev/plugin.hpp | 5 +- src/plugins/intel_cpu/src/plugin.cpp | 46 ++++++++++- src/plugins/intel_cpu/src/plugin.h | 6 +- src/plugins/intel_cpu/src/utils/serialize.cpp | 7 +- src/plugins/intel_cpu/src/utils/serialize.hpp | 4 +- .../intel_npu/src/plugin/include/plugin.hpp | 6 +- .../intel_npu/src/plugin/src/plugin.cpp | 77 ++++++++++++++----- 11 files changed, 121 insertions(+), 55 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp index e88c3e4a539d15..7e6f9a917a1d1e 100644 --- a/src/inference/dev_api/openvino/runtime/iplugin.hpp +++ b/src/inference/dev_api/openvino/runtime/iplugin.hpp @@ -193,8 +193,7 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this import_model(std::istream& model, - std::shared_ptr model_buffer, + virtual std::shared_ptr import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const; /** @@ -207,8 +206,7 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this import_model(std::istream& model, - std::shared_ptr model_buffer, + virtual std::shared_ptr import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const; diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index d30581c0c98beb..fff63bed147325 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -1460,8 +1460,8 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( } } if (model_buffer) { - compiled_model = context ? plugin.import_model(networkStream, model_buffer, context, update_config) - : plugin.import_model(networkStream, model_buffer, update_config); + compiled_model = context ? plugin.import_model(model_buffer, context, update_config) + : plugin.import_model(model_buffer, update_config); } else { compiled_model = context ? plugin.import_model(networkStream, context, update_config) : plugin.import_model(networkStream, update_config); diff --git a/src/inference/src/dev/iplugin.cpp b/src/inference/src/dev/iplugin.cpp index 42d735baa0449a..403beb3ccd0451 100644 --- a/src/inference/src/dev/iplugin.cpp +++ b/src/inference/src/dev/iplugin.cpp @@ -57,14 +57,12 @@ const std::string& ov::IPlugin::get_device_name() const { return m_plugin_name; } -std::shared_ptr ov::IPlugin::import_model(std::istream& model, - std::shared_ptr model_buffer, +std::shared_ptr ov::IPlugin::import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const{ OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); } -std::shared_ptr ov::IPlugin::import_model(std::istream& model, - std::shared_ptr model_buffer, +std::shared_ptr ov::IPlugin::import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const{ OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp index 23e0e04bb6d0e0..a6f211743d5c7f 100644 --- a/src/inference/src/dev/plugin.cpp +++ b/src/inference/src/dev/plugin.cpp @@ -79,15 +79,14 @@ ov::SoPtr ov::Plugin::import_model(std::istream& model, OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so}); } -ov::SoPtr ov::Plugin::import_model(std::istream& model, std::shared_ptr model_buffer, const ov::AnyMap& properties) const { - OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so}); +ov::SoPtr ov::Plugin::import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const { + OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model_buffer, properties), m_so}); } -ov::SoPtr ov::Plugin::import_model(std::istream& model, - std::shared_ptr model_buffer, +ov::SoPtr ov::Plugin::import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& config) const { - OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, context, config), m_so}); + OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model_buffer, context, config), m_so}); } ov::SoPtr ov::Plugin::create_context(const AnyMap& params) const { diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp index 004fcc04446c0a..792900c4698f2d 100644 --- a/src/inference/src/dev/plugin.hpp +++ b/src/inference/src/dev/plugin.hpp @@ -59,10 +59,9 @@ class Plugin { const ov::SoPtr& context, const ov::AnyMap& config) const; - SoPtr import_model(std::istream& model, std::shared_ptr model_buffer, const ov::AnyMap& properties) const; + SoPtr import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const; - SoPtr import_model(std::istream& model, - std::shared_ptr model_buffer, + SoPtr import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& config) const; diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index f81e531698eb01..82fa70822838eb 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -553,12 +553,48 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& std::shared_ptr Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const { - return import_model(model_stream, nullptr, config); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); + + CacheDecrypt decrypt{ codec_xor }; + bool decript_from_string = false; + if (config.count(ov::cache_encryption_callbacks.name())) { + auto encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as(); + decrypt.m_decrypt_str = encryption_callbacks.decrypt; + decript_from_string = true; + } + + ModelDeserializer deserializer( + model_stream, + nullptr, + [this](const std::shared_ptr& model, const std::shared_ptr& weights) { + return get_core()->read_model(model, weights); + }, + decrypt, decript_from_string); + + std::shared_ptr model; + deserializer >> model; + + Config conf = engConfig; + Config::ModelType modelType = getModelType(model); + + // check ov::loaded_from_cache property and erase it to avoid exception in readProperties. + auto _config = config; + const auto& it = _config.find(ov::loaded_from_cache.name()); + bool loaded_from_cache = false; + if (it != _config.end()) { + loaded_from_cache = it->second.as(); + _config.erase(it); + } + conf.readProperties(_config, modelType); + + // import config props from caching model + calculate_streams(conf, model, true); + auto compiled_model = std::make_shared(model, shared_from_this(), conf, loaded_from_cache); + return compiled_model; } -std::shared_ptr Plugin::import_model(std::istream& model_stream, - std::shared_ptr model_buffer, +std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, const ov::AnyMap& config) const { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); @@ -570,8 +606,10 @@ std::shared_ptr Plugin::import_model(std::istream& model_str decript_from_string = true; } + std::stringstream empty_model_stream(""); + ModelDeserializer deserializer( - model_stream, + empty_model_stream, model_buffer, [this](const std::shared_ptr& model, const std::shared_ptr& weights) { return get_core()->read_model(model, weights); diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index b33a6d922e0cae..013b9bf8a65d16 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -34,11 +34,9 @@ class Plugin : public ov::IPlugin { "import_model with RemoteContext is not supported by CPU plugin!"); }; - std::shared_ptr import_model(std::istream& model, - std::shared_ptr model_buffer, + std::shared_ptr import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::istream& model, - std::shared_ptr model_buffer, + std::shared_ptr import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const override { OPENVINO_THROW_NOT_IMPLEMENTED( diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp index 33d8140fbe4a84..afe8efb3019a0e 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.cpp +++ b/src/plugins/intel_cpu/src/utils/serialize.cpp @@ -47,14 +47,13 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream, void ModelDeserializer::operator>>(std::shared_ptr& model) { if (m_model_buffer) { - process_mmap(model, m_model_buffer); + process_mmap(m_model_buffer); } else { process_stream(model); } } -void ModelDeserializer::process_mmap(std::shared_ptr& model, - const std::shared_ptr& mmemory) { +void ModelDeserializer::process_mmap(const std::shared_ptr& mmemory) { // Note: Don't use seekg with mmaped stream. This may affect the performance of some models. // Get file size before seek content. // Blob from cache may have other header, so need to skip this. @@ -107,7 +106,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, hdr.model_size, xml_buff); - model = m_model_builder(model_buf, weights_buf); + auto model = m_model_builder(model_buf, weights_buf); // Set Info pugi::xml_node root = xml_in_out_doc.child("cnndata"); diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp index 4dfdd6b22afbd4..f018b3ca415952 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.hpp +++ b/src/plugins/intel_cpu/src/utils/serialize.hpp @@ -31,7 +31,7 @@ class ModelDeserializer { public: typedef std::function(const std::shared_ptr&, const std::shared_ptr&)> ModelBuilder; - ModelDeserializer(std::istream& model, + ModelDeserializer(std::istream& model_stream, std::shared_ptr model_buffer, ModelBuilder fn, const CacheDecrypt& encrypt_fn, @@ -44,7 +44,7 @@ class ModelDeserializer { protected: static void set_info(pugi::xml_node& root, std::shared_ptr& model); - void process_mmap(std::shared_ptr& model, const std::shared_ptr& memory); + void process_mmap(const std::shared_ptr& memory); void process_stream(std::shared_ptr& model); diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp index 9d6e813d402c83..a8c11e6d6ce9f6 100644 --- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp +++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp @@ -44,16 +44,14 @@ class Plugin : public ov::IPlugin { std::shared_ptr import_model(std::istream& stream, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::istream& stream, - std::shared_ptr model_buffer, + std::shared_ptr import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const override; std::shared_ptr import_model(std::istream& stream, const ov::SoPtr& context, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::istream& stream, - std::shared_ptr model_buffer, + std::shared_ptr import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const override; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 46c66ad01aaed6..2719a7f092e4f7 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -744,10 +744,58 @@ ov::SoPtr Plugin::get_default_context(const ov::AnyMap&) con } std::shared_ptr Plugin::import_model(std::istream& stream, const ov::AnyMap& properties) const { - return import_model(stream, nullptr, properties); + OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); + OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); + + const std::map propertiesMap = any_copy(properties); + auto localConfig = merge_configs(_globalConfig, propertiesMap, OptionMode::RunTime); + _logger.setLevel(localConfig.get()); + const auto platform = _backends->getCompilationPlatform(localConfig.get(), localConfig.get()); + localConfig.update({{ov::intel_npu::platform.name(), platform}}); + auto device = _backends->getDevice(localConfig.get()); + + set_batch_config(_backends->isBatchingSupported(), localConfig); + + const auto loadedFromCache = localConfig.get(); + if (!loadedFromCache) { + _logger.warning( + "The usage of a compiled model can lead to undefined behavior. Please use OpenVINO IR instead!"); + } + + OV_ITT_TASK_NEXT(PLUGIN_IMPORT_MODEL, "parse"); + + std::shared_ptr compiledModel; + + try { + auto compiler = getCompiler(localConfig); + + auto graphSize = getFileSize(stream); + auto blobSO = std::make_shared>(graphSize); + stream.read(reinterpret_cast(blobSO->data()), graphSize); + if (!stream) { + OPENVINO_THROW("Failed to read data from stream!"); + } + _logger.debug("Successfully read %zu bytes into blob.", graphSize); + + auto graph = compiler->parse(std::make_shared>>>(reinterpret_cast(blobSO->data()), graphSize, blobSO), localConfig); + graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); + + const std::shared_ptr modelDummy = + create_dummy_model(graph->get_metadata().inputs, graph->get_metadata().outputs); + + compiledModel = std::make_shared(modelDummy, shared_from_this(), device, graph, localConfig); + } catch (const std::exception& ex) { + OPENVINO_THROW("Can't import network: ", ex.what()); + } catch (...) { + OPENVINO_THROW("NPU import_model got unexpected exception from CompiledModel"); + } + + OV_ITT_TASK_SKIP(PLUGIN_IMPORT_MODEL); + + return compiledModel; } -std::shared_ptr Plugin::import_model(std::istream& stream, std::shared_ptr model_buffer, const ov::AnyMap& properties) const { +std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); @@ -773,19 +821,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, s try { auto compiler = getCompiler(localConfig); - std::shared_ptr graph; - if (model_buffer != nullptr) { - graph = compiler->parse(model_buffer, localConfig); - } else { - auto graphSize = getFileSize(stream); - auto blobSO = std::make_shared>(graphSize); - stream.read(reinterpret_cast(blobSO->data()), graphSize); - if (!stream) { - OPENVINO_THROW("Failed to read data from stream!"); - } - _logger.debug("Successfully read %zu bytes into blob.", graphSize); - graph = compiler->parse(std::make_shared>>>(reinterpret_cast(blobSO->data()), graphSize, blobSO), localConfig); - } + auto graph = compiler->parse(model_buffer, localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = @@ -806,12 +842,15 @@ std::shared_ptr Plugin::import_model(std::istream& stream, s std::shared_ptr Plugin::import_model(std::istream& stream, const ov::SoPtr& context, const ov::AnyMap& properties) const { + auto casted = std::dynamic_pointer_cast(context._ptr); + if (casted == nullptr) { + OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type"); + } - return import_model(stream, nullptr, context, properties); + return import_model(stream, context, properties); } -std::shared_ptr Plugin::import_model(std::istream& stream, - std::shared_ptr model_buffer, +std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const { auto casted = std::dynamic_pointer_cast(context._ptr); @@ -819,7 +858,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type"); } - return import_model(stream, model_buffer, properties); + return import_model(model_buffer, properties); } ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& model,