From 4ff7d8b776c68fb082f081ba14802ae7353045a4 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Thu, 14 Nov 2024 14:05:13 +0200 Subject: [PATCH] Add `ov::internal::caching_with_mmap` property logic --- .../common/include/intel_npu/common/npu.hpp | 8 +++++ .../include/driver_compiler_adapter.hpp | 4 ++- .../include/plugin_compiler_adapter.hpp | 6 +++- .../include/ze_graph_ext_wrappers.hpp | 2 ++ .../ze_graph_ext_wrappers_interface.hpp | 3 ++ .../src/driver_compiler_adapter.cpp | 23 ++++++++++-- .../src/plugin_compiler_adapter.cpp | 5 +-- .../src/ze_graph_ext_wrappers.cpp | 24 +++++++++++++ .../intel_npu/src/plugin/include/metrics.hpp | 2 +- .../intel_npu/src/plugin/src/plugin.cpp | 36 ++++++++++++------- 10 files changed, 94 insertions(+), 19 deletions(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp index b34f2deee6c61e..0ea654bbce4bff 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp @@ -56,12 +56,20 @@ class IEngineBackend : public std::enable_shared_from_this { class ICompilerAdapter { public: + ICompilerAdapter(ov::intel_npu::CompilerType compilerType) : _compilerType(compilerType) {} virtual std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const = 0; virtual std::shared_ptr parse(std::vector network, const Config& config) const = 0; + virtual std::shared_ptr parse(const std::shared_ptr& mmapNetwork, const Config& config) const = 0; virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; virtual ~ICompilerAdapter() = default; + + ov::intel_npu::CompilerType getCompilerType() { + return _compilerType; + } +private: + ov::intel_npu::CompilerType _compilerType; }; //------------------------------------------------------------------------------ diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index dc000b99d7446b..d77d0768f981e9 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -22,12 +22,14 @@ namespace intel_npu { class DriverCompilerAdapter final : public ICompilerAdapter { public: - DriverCompilerAdapter(const std::shared_ptr& zeroInitStruct); + DriverCompilerAdapter(const std::shared_ptr& zeroInitStruct, ov::intel_npu::CompilerType compilerType); std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; std::shared_ptr parse(std::vector network, const Config& config) const override; + std::shared_ptr parse(const std::shared_ptr& mmapNetwork, const Config& config) const; + ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; private: diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index eab8a19627cd1c..d2512b72c26acc 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -17,12 +17,16 @@ namespace intel_npu { class PluginCompilerAdapter final : public ICompilerAdapter { public: - PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct); + PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct, ov::intel_npu::CompilerType compilerType); std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; std::shared_ptr parse(std::vector network, const Config& config) const override; + std::shared_ptr parse(const std::shared_ptr& mmapNetwork, const Config& config) const override { + OPENVINO_THROW("CIP needs a blob vector!"); + } + ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; private: diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp index 1bc58b153a48ff..5e02562f8535c2 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp @@ -60,6 +60,8 @@ class ZeGraphExtWrappers final : public ZeGraphExtWrappersInterface { ze_graph_handle_t getGraphHandle(const std::vector& network) const override; + ze_graph_handle_t getGraphHandle(const std::shared_ptr& mmapNetwork) const override; + NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const override; _ze_result_t destroyGraph(ze_graph_handle_t graphHandle) override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers_interface.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers_interface.hpp index ac44f9853e11e3..b37561b31e70c2 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers_interface.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers_interface.hpp @@ -7,6 +7,7 @@ #include #include "intel_npu/network_metadata.hpp" +#include "openvino/runtime/aligned_buffer.hpp" namespace intel_npu { @@ -23,6 +24,8 @@ class ZeGraphExtWrappersInterface { virtual ze_graph_handle_t getGraphHandle(const std::vector& network) const = 0; + virtual ze_graph_handle_t getGraphHandle(const std::shared_ptr& mmapNetwork) const = 0; + virtual NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const = 0; virtual _ze_result_t destroyGraph(ze_graph_handle_t graphHandle) = 0; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index b4da8a2bcc316b..cc4a4c9e12b4a3 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -141,9 +141,10 @@ std::string rankToLegacyLayoutString(const size_t rank) { namespace intel_npu { -DriverCompilerAdapter::DriverCompilerAdapter(const std::shared_ptr& zeroInitStruct) +DriverCompilerAdapter::DriverCompilerAdapter(const std::shared_ptr& zeroInitStruct, ov::intel_npu::CompilerType compilerType) : _zeroInitStruct(zeroInitStruct), - _logger("DriverCompilerAdapter", Logger::global().level()) { + _logger("DriverCompilerAdapter", Logger::global().level()), + ICompilerAdapter(compilerType) { _logger.debug("initialize DriverCompilerAdapter start"); uint32_t graphExtVersion = _zeroInitStruct->getGraphDdiTable().version(); @@ -246,6 +247,24 @@ std::shared_ptr DriverCompilerAdapter::parse(std::vector networ std::optional>(std::move(network))); } +std::shared_ptr DriverCompilerAdapter::parse(const std::shared_ptr& mmapNetwork, const Config& config) const { + OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse"); + + _logger.debug("parse start"); + ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(mmapNetwork); + _logger.debug("parse end"); + + OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta"); + auto networkMeta = _zeGraphExt->getNetworkMeta(graphHandle); + + return std::make_shared(_zeGraphExt, + _zeroInitStruct, + graphHandle, + std::move(networkMeta), + config, + std::nullopt); +} + ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr& model, const Config& config) const { OV_ITT_TASK_CHAIN(query_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "query"); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 73dd3817e24812..96c3ae1ea0c501 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -52,9 +52,10 @@ ov::SoPtr loadCompiler(const std::string& libpath) { namespace intel_npu { -PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct) +PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct, ov::intel_npu::CompilerType compilerType) : _zeroInitStruct(zeroInitStruct), - _logger("PluginCompilerAdapter", Logger::global().level()) { + _logger("PluginCompilerAdapter", Logger::global().level()), + ICompilerAdapter(compilerType) { _logger.debug("initialize PluginCompilerAdapter start"); _logger.info("MLIR compiler will be used."); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp index fad389ca30e0c7..778102eccd930f 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp @@ -417,6 +417,30 @@ ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std:: return graphHandle; } +template +ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std::shared_ptr& mmapNetwork) const { + ze_graph_handle_t graphHandle; + + if (mmapNetwork->size() == 0) { + OPENVINO_THROW("Empty blob"); + } + + ze_graph_desc_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, + nullptr, + ZE_GRAPH_FORMAT_NATIVE, + mmapNetwork->size(), + reinterpret_cast(mmapNetwork->get_ptr()), + nullptr}; + + auto result = _zeroInitStruct->getGraphDdiTable().pfnCreate(_zeroInitStruct->getContext(), + _zeroInitStruct->getDevice(), + &desc, + &graphHandle); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnCreate", result, _zeroInitStruct->getGraphDdiTable()); + + return graphHandle; +} + /** * @brief Extracts the I/O metadata from Level Zero specific structures and converts them into OpenVINO specific * ones. diff --git a/src/plugins/intel_npu/src/plugin/include/metrics.hpp b/src/plugins/intel_npu/src/plugin/include/metrics.hpp index 7bce9eb0881a51..9dc24908633c5a 100644 --- a/src/plugins/intel_npu/src/plugin/include/metrics.hpp +++ b/src/plugins/intel_npu/src/plugin/include/metrics.hpp @@ -67,7 +67,7 @@ class Metrics final { ov::intel_npu::batch_mode.name(), ov::hint::execution_mode.name()}; - const std::vector _internalSupportedProperties = {ov::internal::caching_properties.name()}; + const std::vector _internalSupportedProperties = {ov::internal::caching_properties.name(), ov::internal::caching_with_mmap.name()}; // Metric to provide a hint for a range for number of async infer requests. (bottom bound, upper bound, step) const std::tuple _rangeForAsyncInferRequests{1u, 10u, 1u}; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 9f77d952fd813b..fe171293fe76d8 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -21,6 +21,7 @@ #include "openvino/op/parameter.hpp" #include "openvino/runtime/intel_npu/properties.hpp" #include "openvino/runtime/properties.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "plugin_compiler_adapter.hpp" #include "remote_context.hpp" #include "zero_backend.hpp" @@ -768,16 +769,27 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c try { auto compiler = getCompiler(localConfig); - auto graphSize = getFileSize(stream); - - std::vector blob(graphSize); - stream.read(reinterpret_cast(blob.data()), graphSize); - if (!stream) { - OPENVINO_THROW("Failed to read data from stream!"); + std::shared_ptr graph; + if (compiler->getCompilerType() == ov::intel_npu::CompilerType::DRIVER) { + if (auto mmap_buffer = dynamic_cast(stream.rdbuf())) { + graph = compiler->parse(mmap_buffer->get_buffer(), localConfig); + goto GRAPH_PARSED; + } } - _logger.debug("Successfully read %zu bytes into blob.", graphSize); - auto graph = compiler->parse(std::move(blob), localConfig); + { + auto graphSize = getFileSize(stream); + + std::vector blob(graphSize); + stream.read(reinterpret_cast(blob.data()), graphSize); + if (!stream) { + OPENVINO_THROW("Failed to read data from stream!"); + } + _logger.debug("Successfully read %zu bytes into blob.", graphSize); + + graph = compiler->parse(std::move(blob), localConfig); + } +GRAPH_PARSED: graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = @@ -835,15 +847,15 @@ std::unique_ptr Plugin::getCompiler(const Config& config) cons switch (compilerType) { case ov::intel_npu::CompilerType::MLIR: { if (_backends->getBackendName() != "LEVEL0") { - return std::make_unique(nullptr); + return std::make_unique(nullptr, compilerType); } auto zeroBackend = std::dynamic_pointer_cast(_backends->getIEngineBackend()._ptr); if (zeroBackend == nullptr) { - return std::make_unique(nullptr); + return std::make_unique(nullptr, compilerType); } - return std::make_unique(zeroBackend->getInitStruct()); + return std::make_unique(zeroBackend->getInitStruct(), compilerType); } case ov::intel_npu::CompilerType::DRIVER: { if (_backends->getBackendName() != "LEVEL0") { @@ -855,7 +867,7 @@ std::unique_ptr Plugin::getCompiler(const Config& config) cons OPENVINO_THROW("Failed to cast zeroBackend, zeroBackend is a nullptr"); } - return std::make_unique(zeroBackend->getInitStruct()); + return std::make_unique(zeroBackend->getInitStruct(), compilerType); } default: OPENVINO_THROW("Invalid NPU_COMPILER_TYPE");