From 8cdc4ea30fc5e460e407170af6af46312ef82fc6 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Thu, 12 Dec 2024 16:37:04 +0200 Subject: [PATCH] Prepare `BlobContainerAlignedBuffer` for `OV versioning metadata` --- src/plugins/intel_cpu/src/plugin.cpp | 23 +++--- src/plugins/intel_cpu/src/plugin.h | 6 +- src/plugins/intel_cpu/src/utils/serialize.cpp | 70 +++++++++++-------- src/plugins/intel_cpu/src/utils/serialize.hpp | 8 ++- .../intel_npu/common/blob_container.hpp | 16 +++-- .../compiler_adapter/include/driver_graph.hpp | 2 +- .../src/compiler_adapter/src/driver_graph.cpp | 2 +- .../src/compiler_adapter/src/plugin_graph.cpp | 2 +- .../intel_npu/src/plugin/src/plugin.cpp | 20 +++--- 9 files changed, 86 insertions(+), 63 deletions(-) diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 27d906befbd2da..103c33b6c00be4 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -7,6 +7,7 @@ #include "cpu_streams_calculation.hpp" #include "internal_properties.hpp" #include "itt.h" +#include "openvino/op/paged_attention.hpp" #include "openvino/runtime/intel_cpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/properties.hpp" @@ -19,7 +20,6 @@ #include "utils/precision_support.h" #include "utils/serialize.hpp" #include "weights_cache.hpp" -#include "openvino/op/paged_attention.hpp" #if defined(__linux__) # include @@ -200,7 +200,7 @@ static Config::ModelType getModelType(const std::shared_ptr& model) return Config::ModelType::CNN; if ((op::util::has_op_with_type(model) && model->get_variables().size() > 0) || - op::util::has_op_with_type(model)) + op::util::has_op_with_type(model)) return Config::ModelType::LLM; return Config::ModelType::Unknown; @@ -441,15 +441,17 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio return decltype(ov::supported_properties)::value_type(std::move(supportedProperties)); } else if (ov::internal::supported_properties == name) { - return decltype(ov::internal::supported_properties)::value_type{ + return decltype(ov::internal::supported_properties)::value_type { ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, #if !defined(OPENVINO_ARCH_ARM) && !(defined(__APPLE__) || defined(__MACOSX)) - ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO}, #endif - ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, - ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), - ov::PropertyMutability::RO}}; + ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, + ov::PropertyName { + ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO + } + }; } else if (name == ov::device::full_name) { return decltype(ov::device::full_name)::value_type(deviceFullName); } else if (name == ov::available_devices) { @@ -551,7 +553,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& std::shared_ptr Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); - CacheDecrypt decrypt{ codec_xor }; + CacheDecrypt decrypt{codec_xor}; bool decript_from_string = false; if (config.count(ov::cache_encryption_callbacks.name())) { const auto& encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as(); @@ -572,7 +574,8 @@ std::shared_ptr Plugin::import_model(std::istream& model_str [this](const std::shared_ptr& model, const std::shared_ptr& weights) { return get_core()->read_model(model, weights); }, - decrypt, decript_from_string); + decrypt, + decript_from_string); std::shared_ptr model; deserializer >> model; diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index 8973478d30403f..a67602ec4ece12 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -20,8 +20,7 @@ class Plugin : public ov::IPlugin { std::shared_ptr compile_model(const std::shared_ptr& model, const ov::AnyMap& properties, const ov::SoPtr& context) const override { - OPENVINO_THROW_NOT_IMPLEMENTED( - "compile_model with RemoteContext is not supported by CPU plugin!"); + OPENVINO_THROW_NOT_IMPLEMENTED("compile_model with RemoteContext is not supported by CPU plugin!"); }; void set_property(const ov::AnyMap& properties) override; @@ -30,8 +29,7 @@ class Plugin : public ov::IPlugin { std::shared_ptr import_model(std::istream& model, const ov::SoPtr& context, const ov::AnyMap& properties) const override { - OPENVINO_THROW_NOT_IMPLEMENTED( - "import_model with RemoteContext is not supported by CPU plugin!"); + OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!"); }; ov::SupportedOpsMap query_model(const std::shared_ptr& model, diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp index 33d8140fbe4a84..55b53116e4ac01 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.cpp +++ b/src/plugins/intel_cpu/src/utils/serialize.cpp @@ -14,7 +14,8 @@ namespace intel_cpu { ////////// ModelSerializer ////////// ModelSerializer::ModelSerializer(std::ostream& ostream, CacheEncrypt encrypt_fn) - : m_ostream(ostream), m_cache_encrypt(std::move(encrypt_fn)) {} + : m_ostream(ostream), + m_cache_encrypt(std::move(encrypt_fn)) {} void ModelSerializer::operator<<(const std::shared_ptr& model) { auto serialize_info = [&](std::ostream& stream) { @@ -35,22 +36,25 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream, ModelBuilder fn, const CacheDecrypt& decrypt_fn, bool decript_from_string) - : m_istream(model_stream), m_model_builder(std::move(fn)), m_decript_from_string(decript_from_string), m_model_buffer(model_buffer) { - if (m_decript_from_string) { - m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str; - } else { - m_cache_decrypt.m_decrypt_char = decrypt_fn.m_decrypt_char; - } + : m_istream(model_stream), + m_model_builder(std::move(fn)), + m_decript_from_string(decript_from_string), + m_model_buffer(model_buffer) { + if (m_decript_from_string) { + m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str; + } else { + m_cache_decrypt.m_decrypt_char = decrypt_fn.m_decrypt_char; } +} - void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr& model) {} +void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr& model) {} - void ModelDeserializer::operator>>(std::shared_ptr& model) { - if (m_model_buffer) { - process_mmap(model, m_model_buffer); - } else { - process_stream(model); - } +void ModelDeserializer::operator>>(std::shared_ptr& model) { + if (m_model_buffer) { + process_mmap(model, m_model_buffer); + } else { + process_stream(model); + } } void ModelDeserializer::process_mmap(std::shared_ptr& model, @@ -77,7 +81,10 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, // Read model input/output precisions. pugi::xml_document xml_in_out_doc; if (hdr.custom_data_size > 0lu) { - auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset, hdr.custom_data_size, pugi::parse_default, pugi::encoding_utf8); + auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset, + hdr.custom_data_size, + pugi::parse_default, + pugi::encoding_utf8); if (res.status != pugi::status_ok) { OPENVINO_THROW("[CPU] Could to deserialize custom data."); } @@ -86,7 +93,10 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, // Map blob content std::shared_ptr weights_buf; if (hdr.consts_size) { - weights_buf = std::make_shared>>(buffer_base + hdr.consts_offset, hdr.consts_size, mmemory); + weights_buf = + std::make_shared>>(buffer_base + hdr.consts_offset, + hdr.consts_size, + mmemory); } // XML content @@ -103,9 +113,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, xml_buff->assign(buffer_base + hdr.model_offset, hdr.model_size); } std::shared_ptr model_buf = - std::make_shared>>(&((*xml_buff)[0]), - hdr.model_size, - xml_buff); + std::make_shared>>(&((*xml_buff)[0]), hdr.model_size, xml_buff); model = m_model_builder(model_buf, weights_buf); @@ -150,7 +158,7 @@ void ModelDeserializer::process_stream(std::shared_ptr& model) { auto data_blob = std::make_shared(ov::element::u8, ov::Shape({hdr.consts_size})); m_istream.seekg(hdr.consts_offset); if (hdr.consts_size) { - m_istream.read(static_cast(data_blob->data(ov::element::u8)), hdr.consts_size); + m_istream.read(static_cast(data_blob->data(ov::element::u8)), hdr.consts_size); } // read XML content @@ -162,16 +170,20 @@ void ModelDeserializer::process_stream(std::shared_ptr& model) { if (m_decript_from_string) { *xml_string = m_cache_decrypt.m_decrypt_str(*xml_string); } else { - m_cache_decrypt.m_decrypt_char(const_cast(xml_string->data()), xml_string->data(), xml_string->size()); + m_cache_decrypt.m_decrypt_char(const_cast(xml_string->data()), + xml_string->data(), + xml_string->size()); } } - auto model_buf = std::make_shared>>(const_cast(xml_string->data()), - xml_string->size(), - xml_string); - auto weights_buf = std::make_shared>>(reinterpret_cast(data_blob->data(ov::element::u8)), - hdr.consts_size, - data_blob); + auto model_buf = + std::make_shared>>(const_cast(xml_string->data()), + xml_string->size(), + xml_string); + auto weights_buf = std::make_shared>>( + reinterpret_cast(data_blob->data(ov::element::u8)), + hdr.consts_size, + data_blob); model = m_model_builder(model_buf, weights_buf); @@ -180,5 +192,5 @@ void ModelDeserializer::process_stream(std::shared_ptr& model) { set_info(root, model); } -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp index 4dfdd6b22afbd4..0821b1160c38d7 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.hpp +++ b/src/plugins/intel_cpu/src/utils/serialize.hpp @@ -29,7 +29,9 @@ class ModelSerializer { class ModelDeserializer { public: - typedef std::function(const std::shared_ptr&, const std::shared_ptr&)> ModelBuilder; + typedef std::function(const std::shared_ptr&, + const std::shared_ptr&)> + ModelBuilder; ModelDeserializer(std::istream& model, std::shared_ptr model_buffer, @@ -55,5 +57,5 @@ class ModelDeserializer { std::shared_ptr m_model_buffer; }; -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp index fbceccb26824e0..6dcc30c487e46a 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp @@ -46,16 +46,19 @@ class BlobContainerVector : public BlobContainer { class BlobContainerAlignedBuffer : public BlobContainer { public: - BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, size_t offset) - : _ownershipBlob(blobSO), - _offset(offset) {} + BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, + size_t ovHeaderOffset, + uint64_t blobSize) + : _blobSize(blobSize), + _ovHeaderOffset(ovHeaderOffset), + _ownershipBlob(blobSO) {} void* get_ptr() override { - return _ownershipBlob->get_ptr(_offset); + return _ownershipBlob->get_ptr(_ovHeaderOffset); } size_t size() const override { - return _ownershipBlob->size(); + return _blobSize; } bool release_from_memory() override { @@ -63,8 +66,9 @@ class BlobContainerAlignedBuffer : public BlobContainer { } private: + uint64_t _blobSize; + size_t _ovHeaderOffset; std::shared_ptr _ownershipBlob; - size_t _offset; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp index 2c845227e8aac7..92063703b6f577 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp @@ -21,7 +21,7 @@ class DriverGraph final : public IGraph { ze_graph_handle_t graphHandle, NetworkMetadata metadata, const Config& config, - std::unique_ptr blob); + std::unique_ptr blobPtr); size_t export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index f34c72ad015cf5..52bdf55cb28cf5 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -52,7 +52,7 @@ size_t DriverGraph::export_blob(std::ostream& stream) const { if (_logger.level() >= ov::log::Level::INFO) { std::uint32_t result = 1171117u; - for (const uint8_t* it = blobPtr; it != blobPtr + blobSize; ++it) { + for (const uint8_t* it = blobPtr; it != blobPtr + blobSize; ++it) { result = ((result << 7) + result) + static_cast(*it); } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index a54d14c0462587..90e531d5b940c7 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -30,7 +30,7 @@ PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, initialize(config); } -void PluginGraph::export_blob(std::ostream& stream) const { +size_t PluginGraph::export_blob(std::ostream& stream) const { stream.write(reinterpret_cast(_blobPtr->get_ptr()), _blobPtr->size()); if (!stream) { diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 29b675b9baa659..313dd1aeea6534 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -7,11 +7,8 @@ #include #include "compiled_model.hpp" -#include "npuw/compiled_model.hpp" -#include "npuw/llm_compiled_model.hpp" -#include "npuw/serialization.hpp" -#include "driver_compiler_adapter.hpp" #include "compiler_adapter_factory.hpp" +#include "driver_compiler_adapter.hpp" #include "intel_npu/common/device_helpers.hpp" #include "intel_npu/common/icompiler_adapter.hpp" #include "intel_npu/common/igraph.hpp" @@ -23,6 +20,8 @@ #include "intel_npu/utils/zero/zero_init.hpp" #include "metadata.hpp" #include "npuw/compiled_model.hpp" +#include "npuw/llm_compiled_model.hpp" +#include "npuw/serialization.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/parameter.hpp" #include "openvino/runtime/intel_npu/properties.hpp" @@ -762,7 +761,8 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c std::shared_ptr modelBuffer; if (npu_plugin_properties.count(ov::internal::cached_model_buffer.name())) { - modelBuffer = npu_plugin_properties.at(ov::internal::cached_model_buffer.name()).as>(); + modelBuffer = + npu_plugin_properties.at(ov::internal::cached_model_buffer.name()).as>(); npu_plugin_properties.erase(ov::internal::cached_model_buffer.name()); } @@ -790,11 +790,15 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c CompilerAdapterFactory compilerAdapterFactory; auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); + auto storedMeta = read_metadata_from(stream); + if (!storedMeta->is_compatible()) { + OPENVINO_THROW("Incompatible blob version!"); + } + std::unique_ptr blobPtr; + auto graphSize = storedMeta->get_blob_size(); if (modelBuffer == nullptr) { - auto graphSize = getFileSize(stream); - std::vector blob(graphSize); stream.read(reinterpret_cast(blob.data()), graphSize); if (!stream) { @@ -804,7 +808,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c blobPtr = std::make_unique(std::move(blob)); } else { - blobPtr = std::make_unique(modelBuffer, stream.tellg()); + blobPtr = std::make_unique(modelBuffer, stream.tellg(), graphSize); } auto graph = compiler->parse(std::move(blobPtr), localConfig);