Skip to content

Commit

Permalink
Refactor import_model new API to accept only either std::istream
Browse files Browse the repository at this point in the history
…or `ov::AlignedBuffer`
  • Loading branch information
MirceaDan99 committed Nov 26, 2024
1 parent 41b08f6 commit 5f43fa8
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 55 deletions.
6 changes: 2 additions & 4 deletions src/inference/dev_api/openvino/runtime/iplugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,7 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this<IPlugin
* @param properties A ov::AnyMap of properties
* @return An Compiled model
*/
virtual std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
virtual std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const;

/**
Expand All @@ -207,8 +206,7 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this<IPlugin
* @param properties A ov::AnyMap of properties
* @return An Compiled model
*/
virtual std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
virtual std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const;

Expand Down
4 changes: 2 additions & 2 deletions src/inference/src/dev/core_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1460,8 +1460,8 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
}
}
if (model_buffer) {
compiled_model = context ? plugin.import_model(networkStream, model_buffer, context, update_config)
: plugin.import_model(networkStream, model_buffer, update_config);
compiled_model = context ? plugin.import_model(model_buffer, context, update_config)
: plugin.import_model(model_buffer, update_config);
} else {
compiled_model = context ? plugin.import_model(networkStream, context, update_config)
: plugin.import_model(networkStream, update_config);
Expand Down
6 changes: 2 additions & 4 deletions src/inference/src/dev/iplugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,12 @@ const std::string& ov::IPlugin::get_device_name() const {
return m_plugin_name;
}

std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const{
OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented");
}

std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const{
OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented");
Expand Down
9 changes: 4 additions & 5 deletions src/inference/src/dev/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,14 @@ ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so});
}

ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model, std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const {
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so});
ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const {
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model_buffer, properties), m_so});
}

ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& config) const {
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, context, config), m_so});
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model_buffer, context, config), m_so});
}

ov::SoPtr<ov::IRemoteContext> ov::Plugin::create_context(const AnyMap& params) const {
Expand Down
5 changes: 2 additions & 3 deletions src/inference/src/dev/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,9 @@ class Plugin {
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& config) const;

SoPtr<ov::ICompiledModel> import_model(std::istream& model, std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const;
SoPtr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const;

SoPtr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
SoPtr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& config) const;

Expand Down
46 changes: 42 additions & 4 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,12 +553,48 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&

std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream,
const ov::AnyMap& config) const {
return import_model(model_stream, nullptr, config);
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model");

CacheDecrypt decrypt{ codec_xor };
bool decript_from_string = false;
if (config.count(ov::cache_encryption_callbacks.name())) {
auto encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as<EncryptionCallbacks>();
decrypt.m_decrypt_str = encryption_callbacks.decrypt;
decript_from_string = true;
}

ModelDeserializer deserializer(
model_stream,
nullptr,
[this](const std::shared_ptr<ov::AlignedBuffer>& model, const std::shared_ptr<ov::AlignedBuffer>& weights) {
return get_core()->read_model(model, weights);
},
decrypt, decript_from_string);

std::shared_ptr<ov::Model> model;
deserializer >> model;

Config conf = engConfig;
Config::ModelType modelType = getModelType(model);

// check ov::loaded_from_cache property and erase it to avoid exception in readProperties.
auto _config = config;
const auto& it = _config.find(ov::loaded_from_cache.name());
bool loaded_from_cache = false;
if (it != _config.end()) {
loaded_from_cache = it->second.as<bool>();
_config.erase(it);
}
conf.readProperties(_config, modelType);

// import config props from caching model
calculate_streams(conf, model, true);
auto compiled_model = std::make_shared<CompiledModel>(model, shared_from_this(), conf, loaded_from_cache);
return compiled_model;
}


std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& config) const {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model");

Expand All @@ -570,8 +606,10 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_str
decript_from_string = true;
}

std::stringstream empty_model_stream("");

ModelDeserializer deserializer(
model_stream,
empty_model_stream,
model_buffer,
[this](const std::shared_ptr<ov::AlignedBuffer>& model, const std::shared_ptr<ov::AlignedBuffer>& weights) {
return get_core()->read_model(model, weights);
Expand Down
6 changes: 2 additions & 4 deletions src/plugins/intel_cpu/src/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,9 @@ class Plugin : public ov::IPlugin {
"import_model with RemoteContext is not supported by CPU plugin!");
};

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const override;
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override {
OPENVINO_THROW_NOT_IMPLEMENTED(
Expand Down
7 changes: 3 additions & 4 deletions src/plugins/intel_cpu/src/utils/serialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,13 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream,

void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
if (m_model_buffer) {
process_mmap(model, m_model_buffer);
process_mmap(m_model_buffer);
} else {
process_stream(model);
}
}

void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
const std::shared_ptr<ov::AlignedBuffer>& mmemory) {
void ModelDeserializer::process_mmap(const std::shared_ptr<ov::AlignedBuffer>& mmemory) {
// Note: Don't use seekg with mmaped stream. This may affect the performance of some models.
// Get file size before seek content.
// Blob from cache may have other header, so need to skip this.
Expand Down Expand Up @@ -107,7 +106,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
hdr.model_size,
xml_buff);

model = m_model_builder(model_buf, weights_buf);
auto model = m_model_builder(model_buf, weights_buf);

// Set Info
pugi::xml_node root = xml_in_out_doc.child("cnndata");
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_cpu/src/utils/serialize.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class ModelDeserializer {
public:
typedef std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&, const std::shared_ptr<ov::AlignedBuffer>&)> ModelBuilder;

ModelDeserializer(std::istream& model,
ModelDeserializer(std::istream& model_stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
ModelBuilder fn,
const CacheDecrypt& encrypt_fn,
Expand All @@ -44,7 +44,7 @@ class ModelDeserializer {
protected:
static void set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model);

void process_mmap(std::shared_ptr<ov::Model>& model, const std::shared_ptr<ov::AlignedBuffer>& memory);
void process_mmap(const std::shared_ptr<ov::AlignedBuffer>& memory);

void process_stream(std::shared_ptr<ov::Model>& model);

Expand Down
6 changes: 2 additions & 4 deletions src/plugins/intel_npu/src/plugin/include/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,14 @@ class Plugin : public ov::IPlugin {

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream, const ov::AnyMap& properties) const override;

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const override;

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override;

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override;

Expand Down
77 changes: 58 additions & 19 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -744,10 +744,58 @@ ov::SoPtr<ov::IRemoteContext> Plugin::get_default_context(const ov::AnyMap&) con
}

std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, const ov::AnyMap& properties) const {
return import_model(stream, nullptr, properties);
OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model");
OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs");

const std::map<std::string, std::string> propertiesMap = any_copy(properties);
auto localConfig = merge_configs(_globalConfig, propertiesMap, OptionMode::RunTime);
_logger.setLevel(localConfig.get<LOG_LEVEL>());
const auto platform = _backends->getCompilationPlatform(localConfig.get<PLATFORM>(), localConfig.get<DEVICE_ID>());
localConfig.update({{ov::intel_npu::platform.name(), platform}});
auto device = _backends->getDevice(localConfig.get<DEVICE_ID>());

set_batch_config(_backends->isBatchingSupported(), localConfig);

const auto loadedFromCache = localConfig.get<LOADED_FROM_CACHE>();
if (!loadedFromCache) {
_logger.warning(
"The usage of a compiled model can lead to undefined behavior. Please use OpenVINO IR instead!");
}

OV_ITT_TASK_NEXT(PLUGIN_IMPORT_MODEL, "parse");

std::shared_ptr<ov::ICompiledModel> compiledModel;

try {
auto compiler = getCompiler(localConfig);

auto graphSize = getFileSize(stream);
auto blobSO = std::make_shared<std::vector<uint8_t>>(graphSize);
stream.read(reinterpret_cast<char*>(blobSO->data()), graphSize);
if (!stream) {
OPENVINO_THROW("Failed to read data from stream!");
}
_logger.debug("Successfully read %zu bytes into blob.", graphSize);

auto graph = compiler->parse(std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blobSO->data()), graphSize, blobSO), localConfig);
graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));

const std::shared_ptr<ov::Model> modelDummy =
create_dummy_model(graph->get_metadata().inputs, graph->get_metadata().outputs);

compiledModel = std::make_shared<CompiledModel>(modelDummy, shared_from_this(), device, graph, localConfig);
} catch (const std::exception& ex) {
OPENVINO_THROW("Can't import network: ", ex.what());
} catch (...) {
OPENVINO_THROW("NPU import_model got unexpected exception from CompiledModel");
}

OV_ITT_TASK_SKIP(PLUGIN_IMPORT_MODEL);

return compiledModel;
}

std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const {
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const {
OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model");
OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs");

Expand All @@ -773,19 +821,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, s
try {
auto compiler = getCompiler(localConfig);

std::shared_ptr<IGraph> graph;
if (model_buffer != nullptr) {
graph = compiler->parse(model_buffer, localConfig);
} else {
auto graphSize = getFileSize(stream);
auto blobSO = std::make_shared<std::vector<uint8_t>>(graphSize);
stream.read(reinterpret_cast<char*>(blobSO->data()), graphSize);
if (!stream) {
OPENVINO_THROW("Failed to read data from stream!");
}
_logger.debug("Successfully read %zu bytes into blob.", graphSize);
graph = compiler->parse(std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blobSO->data()), graphSize, blobSO), localConfig);
}
auto graph = compiler->parse(model_buffer, localConfig);
graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));

const std::shared_ptr<ov::Model> modelDummy =
Expand All @@ -806,20 +842,23 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, s
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const {
auto casted = std::dynamic_pointer_cast<RemoteContextImpl>(context._ptr);
if (casted == nullptr) {
OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type");
}

return import_model(stream, nullptr, context, properties);
return import_model(stream, context, properties);
}

std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const {
auto casted = std::dynamic_pointer_cast<RemoteContextImpl>(context._ptr);
if (casted == nullptr) {
OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type");
}

return import_model(stream, model_buffer, properties);
return import_model(model_buffer, properties);
}

ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>& model,
Expand Down

0 comments on commit 5f43fa8

Please sign in to comment.