Skip to content

Commit

Permalink
Prepare BlobContainerAlignedBuffer for OV versioning metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
MirceaDan99 committed Jan 14, 2025
1 parent 082c52d commit 8cdc4ea
Show file tree
Hide file tree
Showing 9 changed files with 86 additions and 63 deletions.
23 changes: 13 additions & 10 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "cpu_streams_calculation.hpp"
#include "internal_properties.hpp"
#include "itt.h"
#include "openvino/op/paged_attention.hpp"
#include "openvino/runtime/intel_cpu/properties.hpp"
#include "openvino/runtime/internal_properties.hpp"
#include "openvino/runtime/properties.hpp"
Expand All @@ -19,7 +20,6 @@
#include "utils/precision_support.h"
#include "utils/serialize.hpp"
#include "weights_cache.hpp"
#include "openvino/op/paged_attention.hpp"

#if defined(__linux__)
# include <signal.h>
Expand Down Expand Up @@ -200,7 +200,7 @@ static Config::ModelType getModelType(const std::shared_ptr<const Model>& model)
return Config::ModelType::CNN;

if ((op::util::has_op_with_type<op::v13::ScaledDotProductAttention>(model) && model->get_variables().size() > 0) ||
op::util::has_op_with_type<ov::op::PagedAttentionExtension>(model))
op::util::has_op_with_type<ov::op::PagedAttentionExtension>(model))
return Config::ModelType::LLM;

return Config::ModelType::Unknown;
Expand Down Expand Up @@ -441,15 +441,17 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio

return decltype(ov::supported_properties)::value_type(std::move(supportedProperties));
} else if (ov::internal::supported_properties == name) {
return decltype(ov::internal::supported_properties)::value_type{
return decltype(ov::internal::supported_properties)::value_type {
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
#if !defined(OPENVINO_ARCH_ARM) && !(defined(__APPLE__) || defined(__MACOSX))
ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO},
#endif
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(),
ov::PropertyMutability::RO}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName {
ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO
}
};
} else if (name == ov::device::full_name) {
return decltype(ov::device::full_name)::value_type(deviceFullName);
} else if (name == ov::available_devices) {
Expand Down Expand Up @@ -551,7 +553,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model");

CacheDecrypt decrypt{ codec_xor };
CacheDecrypt decrypt{codec_xor};
bool decript_from_string = false;
if (config.count(ov::cache_encryption_callbacks.name())) {
const auto& encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as<EncryptionCallbacks>();
Expand All @@ -572,7 +574,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_str
[this](const std::shared_ptr<ov::AlignedBuffer>& model, const std::shared_ptr<ov::AlignedBuffer>& weights) {
return get_core()->read_model(model, weights);
},
decrypt, decript_from_string);
decrypt,
decript_from_string);

std::shared_ptr<ov::Model> model;
deserializer >> model;
Expand Down
6 changes: 2 additions & 4 deletions src/plugins/intel_cpu/src/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ class Plugin : public ov::IPlugin {
std::shared_ptr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model,
const ov::AnyMap& properties,
const ov::SoPtr<ov::IRemoteContext>& context) const override {
OPENVINO_THROW_NOT_IMPLEMENTED(
"compile_model with RemoteContext is not supported by CPU plugin!");
OPENVINO_THROW_NOT_IMPLEMENTED("compile_model with RemoteContext is not supported by CPU plugin!");
};

void set_property(const ov::AnyMap& properties) override;
Expand All @@ -30,8 +29,7 @@ class Plugin : public ov::IPlugin {
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override {
OPENVINO_THROW_NOT_IMPLEMENTED(
"import_model with RemoteContext is not supported by CPU plugin!");
OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!");
};

ov::SupportedOpsMap query_model(const std::shared_ptr<const ov::Model>& model,
Expand Down
70 changes: 41 additions & 29 deletions src/plugins/intel_cpu/src/utils/serialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ namespace intel_cpu {
////////// ModelSerializer //////////

ModelSerializer::ModelSerializer(std::ostream& ostream, CacheEncrypt encrypt_fn)
: m_ostream(ostream), m_cache_encrypt(std::move(encrypt_fn)) {}
: m_ostream(ostream),
m_cache_encrypt(std::move(encrypt_fn)) {}

void ModelSerializer::operator<<(const std::shared_ptr<ov::Model>& model) {
auto serialize_info = [&](std::ostream& stream) {
Expand All @@ -35,22 +36,25 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream,
ModelBuilder fn,
const CacheDecrypt& decrypt_fn,
bool decript_from_string)
: m_istream(model_stream), m_model_builder(std::move(fn)), m_decript_from_string(decript_from_string), m_model_buffer(model_buffer) {
if (m_decript_from_string) {
m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str;
} else {
m_cache_decrypt.m_decrypt_char = decrypt_fn.m_decrypt_char;
}
: m_istream(model_stream),
m_model_builder(std::move(fn)),
m_decript_from_string(decript_from_string),
m_model_buffer(model_buffer) {
if (m_decript_from_string) {
m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str;
} else {
m_cache_decrypt.m_decrypt_char = decrypt_fn.m_decrypt_char;
}
}

void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model) {}
void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model) {}

void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
if (m_model_buffer) {
process_mmap(model, m_model_buffer);
} else {
process_stream(model);
}
void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
if (m_model_buffer) {
process_mmap(model, m_model_buffer);
} else {
process_stream(model);
}
}

void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
Expand All @@ -77,7 +81,10 @@ void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
// Read model input/output precisions.
pugi::xml_document xml_in_out_doc;
if (hdr.custom_data_size > 0lu) {
auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset, hdr.custom_data_size, pugi::parse_default, pugi::encoding_utf8);
auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset,
hdr.custom_data_size,
pugi::parse_default,
pugi::encoding_utf8);
if (res.status != pugi::status_ok) {
OPENVINO_THROW("[CPU] Could to deserialize custom data.");
}
Expand All @@ -86,7 +93,10 @@ void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
// Map blob content
std::shared_ptr<ov::AlignedBuffer> weights_buf;
if (hdr.consts_size) {
weights_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(buffer_base + hdr.consts_offset, hdr.consts_size, mmemory);
weights_buf =
std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(buffer_base + hdr.consts_offset,
hdr.consts_size,
mmemory);
}

// XML content
Expand All @@ -103,9 +113,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
xml_buff->assign(buffer_base + hdr.model_offset, hdr.model_size);
}
std::shared_ptr<ov::AlignedBuffer> model_buf =
std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(&((*xml_buff)[0]),
hdr.model_size,
xml_buff);
std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(&((*xml_buff)[0]), hdr.model_size, xml_buff);

model = m_model_builder(model_buf, weights_buf);

Expand Down Expand Up @@ -150,7 +158,7 @@ void ModelDeserializer::process_stream(std::shared_ptr<ov::Model>& model) {
auto data_blob = std::make_shared<ov::Tensor>(ov::element::u8, ov::Shape({hdr.consts_size}));
m_istream.seekg(hdr.consts_offset);
if (hdr.consts_size) {
m_istream.read(static_cast<char *>(data_blob->data(ov::element::u8)), hdr.consts_size);
m_istream.read(static_cast<char*>(data_blob->data(ov::element::u8)), hdr.consts_size);
}

// read XML content
Expand All @@ -162,16 +170,20 @@ void ModelDeserializer::process_stream(std::shared_ptr<ov::Model>& model) {
if (m_decript_from_string) {
*xml_string = m_cache_decrypt.m_decrypt_str(*xml_string);
} else {
m_cache_decrypt.m_decrypt_char(const_cast<char*>(xml_string->data()), xml_string->data(), xml_string->size());
m_cache_decrypt.m_decrypt_char(const_cast<char*>(xml_string->data()),
xml_string->data(),
xml_string->size());
}
}

auto model_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(const_cast<char*>(xml_string->data()),
xml_string->size(),
xml_string);
auto weights_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::Tensor>>>(reinterpret_cast<char*>(data_blob->data(ov::element::u8)),
hdr.consts_size,
data_blob);
auto model_buf =
std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(const_cast<char*>(xml_string->data()),
xml_string->size(),
xml_string);
auto weights_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::Tensor>>>(
reinterpret_cast<char*>(data_blob->data(ov::element::u8)),
hdr.consts_size,
data_blob);

model = m_model_builder(model_buf, weights_buf);

Expand All @@ -180,5 +192,5 @@ void ModelDeserializer::process_stream(std::shared_ptr<ov::Model>& model) {
set_info(root, model);
}

} // namespace intel_cpu
} // namespace ov
} // namespace intel_cpu
} // namespace ov
8 changes: 5 additions & 3 deletions src/plugins/intel_cpu/src/utils/serialize.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ class ModelSerializer {

class ModelDeserializer {
public:
typedef std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&, const std::shared_ptr<ov::AlignedBuffer>&)> ModelBuilder;
typedef std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&,
const std::shared_ptr<ov::AlignedBuffer>&)>
ModelBuilder;

ModelDeserializer(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
Expand All @@ -55,5 +57,5 @@ class ModelDeserializer {
std::shared_ptr<ov::AlignedBuffer> m_model_buffer;
};

} // namespace intel_cpu
} // namespace ov
} // namespace intel_cpu
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -46,25 +46,29 @@ class BlobContainerVector : public BlobContainer {

class BlobContainerAlignedBuffer : public BlobContainer {
public:
BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO, size_t offset)
: _ownershipBlob(blobSO),
_offset(offset) {}
BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO,
size_t ovHeaderOffset,
uint64_t blobSize)
: _blobSize(blobSize),
_ovHeaderOffset(ovHeaderOffset),
_ownershipBlob(blobSO) {}

void* get_ptr() override {
return _ownershipBlob->get_ptr(_offset);
return _ownershipBlob->get_ptr(_ovHeaderOffset);
}

size_t size() const override {
return _ownershipBlob->size();
return _blobSize;
}

bool release_from_memory() override {
return false;
}

private:
uint64_t _blobSize;
size_t _ovHeaderOffset;
std::shared_ptr<ov::AlignedBuffer> _ownershipBlob;
size_t _offset;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class DriverGraph final : public IGraph {
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
const Config& config,
std::unique_ptr<BlobContainer> blob);
std::unique_ptr<BlobContainer> blobPtr);

size_t export_blob(std::ostream& stream) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ size_t DriverGraph::export_blob(std::ostream& stream) const {

if (_logger.level() >= ov::log::Level::INFO) {
std::uint32_t result = 1171117u;
for (const uint8_t* it = blobPtr; it != blobPtr + blobSize; ++it) {
for (const uint8_t* it = blobPtr; it != blobPtr + blobSize; ++it) {
result = ((result << 7) + result) + static_cast<uint32_t>(*it);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
initialize(config);
}

void PluginGraph::export_blob(std::ostream& stream) const {
size_t PluginGraph::export_blob(std::ostream& stream) const {
stream.write(reinterpret_cast<const char*>(_blobPtr->get_ptr()), _blobPtr->size());

if (!stream) {
Expand Down
20 changes: 12 additions & 8 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,8 @@
#include <fstream>

#include "compiled_model.hpp"
#include "npuw/compiled_model.hpp"
#include "npuw/llm_compiled_model.hpp"
#include "npuw/serialization.hpp"
#include "driver_compiler_adapter.hpp"
#include "compiler_adapter_factory.hpp"
#include "driver_compiler_adapter.hpp"
#include "intel_npu/common/device_helpers.hpp"
#include "intel_npu/common/icompiler_adapter.hpp"
#include "intel_npu/common/igraph.hpp"
Expand All @@ -23,6 +20,8 @@
#include "intel_npu/utils/zero/zero_init.hpp"
#include "metadata.hpp"
#include "npuw/compiled_model.hpp"
#include "npuw/llm_compiled_model.hpp"
#include "npuw/serialization.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/parameter.hpp"
#include "openvino/runtime/intel_npu/properties.hpp"
Expand Down Expand Up @@ -762,7 +761,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c

std::shared_ptr<ov::AlignedBuffer> modelBuffer;
if (npu_plugin_properties.count(ov::internal::cached_model_buffer.name())) {
modelBuffer = npu_plugin_properties.at(ov::internal::cached_model_buffer.name()).as<std::shared_ptr<ov::AlignedBuffer>>();
modelBuffer =
npu_plugin_properties.at(ov::internal::cached_model_buffer.name()).as<std::shared_ptr<ov::AlignedBuffer>>();
npu_plugin_properties.erase(ov::internal::cached_model_buffer.name());
}

Expand Down Expand Up @@ -790,11 +790,15 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
CompilerAdapterFactory compilerAdapterFactory;
auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig);

auto storedMeta = read_metadata_from(stream);
if (!storedMeta->is_compatible()) {
OPENVINO_THROW("Incompatible blob version!");
}

std::unique_ptr<BlobContainer> blobPtr;
auto graphSize = storedMeta->get_blob_size();

if (modelBuffer == nullptr) {
auto graphSize = getFileSize(stream);

std::vector<uint8_t> blob(graphSize);
stream.read(reinterpret_cast<char*>(blob.data()), graphSize);
if (!stream) {
Expand All @@ -804,7 +808,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c

blobPtr = std::make_unique<BlobContainerVector>(std::move(blob));
} else {
blobPtr = std::make_unique<BlobContainerAlignedBuffer>(modelBuffer, stream.tellg());
blobPtr = std::make_unique<BlobContainerAlignedBuffer>(modelBuffer, stream.tellg(), graphSize);
}

auto graph = compiler->parse(std::move(blobPtr), localConfig);
Expand Down

0 comments on commit 8cdc4ea

Please sign in to comment.