From 5f43fa87315e7a1c8e6495cc31af93e4ff963d61 Mon Sep 17 00:00:00 2001
From: MirceaDan99 <mircea-aurelian.dan@intel.com>
Date: Tue, 26 Nov 2024 10:18:46 +0200
Subject: [PATCH] Refactor `import_model` new API to accept only either
 `std::istream` or `ov::AlignedBuffer`

---
 .../dev_api/openvino/runtime/iplugin.hpp      |  6 +-
 src/inference/src/dev/core_impl.cpp           |  4 +-
 src/inference/src/dev/iplugin.cpp             |  6 +-
 src/inference/src/dev/plugin.cpp              |  9 +--
 src/inference/src/dev/plugin.hpp              |  5 +-
 src/plugins/intel_cpu/src/plugin.cpp          | 46 ++++++++++-
 src/plugins/intel_cpu/src/plugin.h            |  6 +-
 src/plugins/intel_cpu/src/utils/serialize.cpp |  7 +-
 src/plugins/intel_cpu/src/utils/serialize.hpp |  4 +-
 .../intel_npu/src/plugin/include/plugin.hpp   |  6 +-
 .../intel_npu/src/plugin/src/plugin.cpp       | 77 ++++++++++++++-----
 11 files changed, 121 insertions(+), 55 deletions(-)
diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp
index e88c3e4a539d15..7e6f9a917a1d1e 100644
--- a/src/inference/dev_api/openvino/runtime/iplugin.hpp
+++ b/src/inference/dev_api/openvino/runtime/iplugin.hpp
@@ -193,8 +193,7 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this<IPlugin
      * @param properties A ov::AnyMap of properties
      * @return An Compiled model
      */
-    virtual std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
-                                                             std::shared_ptr<ov::AlignedBuffer> model_buffer,
+    virtual std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
                                                              const ov::AnyMap& properties) const;
 
     /**
@@ -207,8 +206,7 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this<IPlugin
      * @param properties A ov::AnyMap of properties
      * @return An Compiled model
      */
-    virtual std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
-                                                             std::shared_ptr<ov::AlignedBuffer> model_buffer,
+    virtual std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
                                                              const ov::SoPtr<ov::IRemoteContext>& context,
                                                              const ov::AnyMap& properties) const;
 
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index d30581c0c98beb..fff63bed147325 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -1460,8 +1460,8 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
                     }
                 }
                 if (model_buffer) {
-                    compiled_model = context ? plugin.import_model(networkStream, model_buffer, context, update_config)
-                                             : plugin.import_model(networkStream, model_buffer, update_config);
+                    compiled_model = context ? plugin.import_model(model_buffer, context, update_config)
+                                             : plugin.import_model(model_buffer, update_config);
                 } else {
                     compiled_model = context ? plugin.import_model(networkStream, context, update_config)
                                              : plugin.import_model(networkStream, update_config);
diff --git a/src/inference/src/dev/iplugin.cpp b/src/inference/src/dev/iplugin.cpp
index 42d735baa0449a..403beb3ccd0451 100644
--- a/src/inference/src/dev/iplugin.cpp
+++ b/src/inference/src/dev/iplugin.cpp
@@ -57,14 +57,12 @@ const std::string& ov::IPlugin::get_device_name() const {
     return m_plugin_name;
 }
 
-std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::istream& model,
-                                                              std::shared_ptr<ov::AlignedBuffer> model_buffer,
+std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
                                                               const ov::AnyMap& properties) const{
     OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented");
 }
 
-std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::istream& model,
-                                                              std::shared_ptr<ov::AlignedBuffer> model_buffer,
+std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
                                                               const ov::SoPtr<ov::IRemoteContext>& context,
                                                               const ov::AnyMap& properties) const{
     OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented");
diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp
index 23e0e04bb6d0e0..a6f211743d5c7f 100644
--- a/src/inference/src/dev/plugin.cpp
+++ b/src/inference/src/dev/plugin.cpp
@@ -79,15 +79,14 @@ ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
     OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so});
 }
 
-ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model, std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const {
-    OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so});
+ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const {
+    OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model_buffer, properties), m_so});
 }
 
-ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
-                                                       std::shared_ptr<ov::AlignedBuffer> model_buffer,
+ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
                                                        const ov::SoPtr<ov::IRemoteContext>& context,
                                                        const ov::AnyMap& config) const {
-    OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, context, config), m_so});
+    OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model_buffer, context, config), m_so});
 }
 
 ov::SoPtr<ov::IRemoteContext> ov::Plugin::create_context(const AnyMap& params) const {
diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp
index 004fcc04446c0a..792900c4698f2d 100644
--- a/src/inference/src/dev/plugin.hpp
+++ b/src/inference/src/dev/plugin.hpp
@@ -59,10 +59,9 @@ class Plugin {
                                            const ov::SoPtr<ov::IRemoteContext>& context,
                                            const ov::AnyMap& config) const;
 
-    SoPtr<ov::ICompiledModel> import_model(std::istream& model, std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const;
+    SoPtr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const;
 
-    SoPtr<ov::ICompiledModel> import_model(std::istream& model,
-                                           std::shared_ptr<ov::AlignedBuffer> model_buffer,
+    SoPtr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
                                            const ov::SoPtr<ov::IRemoteContext>& context,
                                            const ov::AnyMap& config) const;
 
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index f81e531698eb01..82fa70822838eb 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -553,12 +553,48 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
 
 std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream,
                                                          const ov::AnyMap& config) const {
-    return import_model(model_stream, nullptr, config);
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model");
+
+    CacheDecrypt decrypt{ codec_xor };
+    bool decript_from_string = false;
+    if (config.count(ov::cache_encryption_callbacks.name())) {
+        auto encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as<EncryptionCallbacks>();
+        decrypt.m_decrypt_str = encryption_callbacks.decrypt;
+        decript_from_string = true;
+    }
+
+    ModelDeserializer deserializer(
+        model_stream,
+        nullptr,
+        [this](const std::shared_ptr<ov::AlignedBuffer>& model, const std::shared_ptr<ov::AlignedBuffer>& weights) {
+            return get_core()->read_model(model, weights);
+        },
+        decrypt, decript_from_string);
+
+    std::shared_ptr<ov::Model> model;
+    deserializer >> model;
+
+    Config conf = engConfig;
+    Config::ModelType modelType = getModelType(model);
+
+    // check ov::loaded_from_cache property and erase it to avoid exception in readProperties.
+    auto _config = config;
+    const auto& it = _config.find(ov::loaded_from_cache.name());
+    bool loaded_from_cache = false;
+    if (it != _config.end()) {
+        loaded_from_cache = it->second.as<bool>();
+        _config.erase(it);
+    }
+    conf.readProperties(_config, modelType);
+
+    // import config props from caching model
+    calculate_streams(conf, model, true);
+    auto compiled_model = std::make_shared<CompiledModel>(model, shared_from_this(), conf, loaded_from_cache);
+    return compiled_model;
 }
 
 
-std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream,
-                                                         std::shared_ptr<ov::AlignedBuffer> model_buffer,
+std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
                                                          const ov::AnyMap& config) const {
     OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model");
 
@@ -570,8 +606,10 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_str
         decript_from_string = true;
     }
 
+    std::stringstream empty_model_stream("");
+
     ModelDeserializer deserializer(
-        model_stream,
+        empty_model_stream,
         model_buffer,
         [this](const std::shared_ptr<ov::AlignedBuffer>& model, const std::shared_ptr<ov::AlignedBuffer>& weights) {
             return get_core()->read_model(model, weights);
diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h
index b33a6d922e0cae..013b9bf8a65d16 100644
--- a/src/plugins/intel_cpu/src/plugin.h
+++ b/src/plugins/intel_cpu/src/plugin.h
@@ -34,11 +34,9 @@ class Plugin : public ov::IPlugin {
             "import_model with RemoteContext is not supported by CPU plugin!");
     };
 
-    std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
-                                                     std::shared_ptr<ov::AlignedBuffer> model_buffer,
+    std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
                                                      const ov::AnyMap& properties) const override;
-    std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
-                                                     std::shared_ptr<ov::AlignedBuffer> model_buffer,
+    std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
                                                      const ov::SoPtr<ov::IRemoteContext>& context,
                                                      const ov::AnyMap& properties) const override {
         OPENVINO_THROW_NOT_IMPLEMENTED(
diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp
index 33d8140fbe4a84..afe8efb3019a0e 100644
--- a/src/plugins/intel_cpu/src/utils/serialize.cpp
+++ b/src/plugins/intel_cpu/src/utils/serialize.cpp
@@ -47,14 +47,13 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream,
 
     void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
         if (m_model_buffer) {
-            process_mmap(model, m_model_buffer);
+            process_mmap(m_model_buffer);
         } else {
             process_stream(model);
         }
 }
 
-void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
-                                     const std::shared_ptr<ov::AlignedBuffer>& mmemory) {
+void ModelDeserializer::process_mmap(const std::shared_ptr<ov::AlignedBuffer>& mmemory) {
     // Note: Don't use seekg with mmaped stream. This may affect the performance of some models.
     // Get file size before seek content.
     // Blob from cache may have other header, so need to skip this.
@@ -107,7 +106,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
                                                                              hdr.model_size,
                                                                              xml_buff);
 
-    model = m_model_builder(model_buf, weights_buf);
+    auto model = m_model_builder(model_buf, weights_buf);
 
     // Set Info
     pugi::xml_node root = xml_in_out_doc.child("cnndata");
diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp
index 4dfdd6b22afbd4..f018b3ca415952 100644
--- a/src/plugins/intel_cpu/src/utils/serialize.hpp
+++ b/src/plugins/intel_cpu/src/utils/serialize.hpp
@@ -31,7 +31,7 @@ class ModelDeserializer {
 public:
     typedef std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&, const std::shared_ptr<ov::AlignedBuffer>&)> ModelBuilder;
 
-    ModelDeserializer(std::istream& model,
+    ModelDeserializer(std::istream& model_stream,
                       std::shared_ptr<ov::AlignedBuffer> model_buffer,
                       ModelBuilder fn,
                       const CacheDecrypt& encrypt_fn,
@@ -44,7 +44,7 @@ class ModelDeserializer {
 protected:
     static void set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model);
 
-    void process_mmap(std::shared_ptr<ov::Model>& model, const std::shared_ptr<ov::AlignedBuffer>& memory);
+    void process_mmap(const std::shared_ptr<ov::AlignedBuffer>& memory);
 
     void process_stream(std::shared_ptr<ov::Model>& model);
 
diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp
index 9d6e813d402c83..a8c11e6d6ce9f6 100644
--- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp
+++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp
@@ -44,16 +44,14 @@ class Plugin : public ov::IPlugin {
 
     std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream, const ov::AnyMap& properties) const override;
 
-    std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream,
-                                                    std::shared_ptr<ov::AlignedBuffer> model_buffer,
+    std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
                                                     const ov::AnyMap& properties) const override;
 
     std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream,
                                                      const ov::SoPtr<ov::IRemoteContext>& context,
                                                      const ov::AnyMap& properties) const override;
 
-    std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream,
-                                                     std::shared_ptr<ov::AlignedBuffer> model_buffer,
+    std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
                                                      const ov::SoPtr<ov::IRemoteContext>& context,
                                                      const ov::AnyMap& properties) const override;
 
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 46c66ad01aaed6..2719a7f092e4f7 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -744,10 +744,58 @@ ov::SoPtr<ov::IRemoteContext> Plugin::get_default_context(const ov::AnyMap&) con
 }
 
 std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, const ov::AnyMap& properties) const {
-    return import_model(stream, nullptr, properties);
+    OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model");
+    OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs");
+
+    const std::map<std::string, std::string> propertiesMap = any_copy(properties);
+    auto localConfig = merge_configs(_globalConfig, propertiesMap, OptionMode::RunTime);
+    _logger.setLevel(localConfig.get<LOG_LEVEL>());
+    const auto platform = _backends->getCompilationPlatform(localConfig.get<PLATFORM>(), localConfig.get<DEVICE_ID>());
+    localConfig.update({{ov::intel_npu::platform.name(), platform}});
+    auto device = _backends->getDevice(localConfig.get<DEVICE_ID>());
+
+    set_batch_config(_backends->isBatchingSupported(), localConfig);
+
+    const auto loadedFromCache = localConfig.get<LOADED_FROM_CACHE>();
+    if (!loadedFromCache) {
+        _logger.warning(
+            "The usage of a compiled model can lead to undefined behavior. Please use OpenVINO IR instead!");
+    }
+
+    OV_ITT_TASK_NEXT(PLUGIN_IMPORT_MODEL, "parse");
+
+    std::shared_ptr<ov::ICompiledModel> compiledModel;
+
+    try {
+        auto compiler = getCompiler(localConfig);
+
+        auto graphSize = getFileSize(stream);
+        auto blobSO = std::make_shared<std::vector<uint8_t>>(graphSize);
+        stream.read(reinterpret_cast<char*>(blobSO->data()), graphSize);
+        if (!stream) {
+            OPENVINO_THROW("Failed to read data from stream!");
+        }
+        _logger.debug("Successfully read %zu bytes into blob.", graphSize);
+
+        auto graph = compiler->parse(std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blobSO->data()), graphSize, blobSO), localConfig);
+        graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));
+
+        const std::shared_ptr<ov::Model> modelDummy =
+            create_dummy_model(graph->get_metadata().inputs, graph->get_metadata().outputs);
+
+        compiledModel = std::make_shared<CompiledModel>(modelDummy, shared_from_this(), device, graph, localConfig);
+    } catch (const std::exception& ex) {
+        OPENVINO_THROW("Can't import network: ", ex.what());
+    } catch (...) {
+        OPENVINO_THROW("NPU import_model got unexpected exception from CompiledModel");
+    }
+
+    OV_ITT_TASK_SKIP(PLUGIN_IMPORT_MODEL);
+
+    return compiledModel;
 }
 
-std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const { 
+std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const { 
     OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model");
     OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs");
 
@@ -773,19 +821,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, s
     try {
         auto compiler = getCompiler(localConfig);
 
-        std::shared_ptr<IGraph> graph;
-        if (model_buffer != nullptr) {
-            graph = compiler->parse(model_buffer, localConfig);
-        } else {
-            auto graphSize = getFileSize(stream);
-            auto blobSO = std::make_shared<std::vector<uint8_t>>(graphSize);
-            stream.read(reinterpret_cast<char*>(blobSO->data()), graphSize);
-            if (!stream) {
-                OPENVINO_THROW("Failed to read data from stream!");
-            }
-            _logger.debug("Successfully read %zu bytes into blob.", graphSize);
-            graph = compiler->parse(std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blobSO->data()), graphSize, blobSO), localConfig);
-        }
+        auto graph = compiler->parse(model_buffer, localConfig);
         graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));
 
         const std::shared_ptr<ov::Model> modelDummy =
@@ -806,12 +842,15 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, s
 std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream,
                                                          const ov::SoPtr<ov::IRemoteContext>& context,
                                                          const ov::AnyMap& properties) const {
+    auto casted = std::dynamic_pointer_cast<RemoteContextImpl>(context._ptr);
+    if (casted == nullptr) {
+        OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type");
+    }
 
-    return import_model(stream, nullptr, context, properties);
+    return import_model(stream, context, properties);
 }
 
-std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream,
-                                                         std::shared_ptr<ov::AlignedBuffer> model_buffer,
+std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
                                                          const ov::SoPtr<ov::IRemoteContext>& context,
                                                          const ov::AnyMap& properties) const {
     auto casted = std::dynamic_pointer_cast<RemoteContextImpl>(context._ptr);
@@ -819,7 +858,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream,
         OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type");
     }
 
-    return import_model(stream, model_buffer, properties);
+    return import_model(model_buffer, properties);
 }
 
 ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>& model,