Skip to content

Commit

Permalink
Avoid creating new plugin option for CACHED_MODEL_BUFFER & use `ov:…
Browse files Browse the repository at this point in the history
…:AnyMap` passed by `core` to extract value of memory mapped model buffer
  • Loading branch information
MirceaDan99 committed Dec 11, 2024
1 parent 6b5a462 commit 2ebf043
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 63 deletions.
6 changes: 0 additions & 6 deletions src/core/dev_api/openvino/runtime/shared_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,6 @@ namespace ov {
template <typename T>
class SharedBuffer : public ov::AlignedBuffer {
public:

SharedBuffer(const SharedBuffer&) = delete;
SharedBuffer(SharedBuffer&&) = default;
SharedBuffer& operator=(const SharedBuffer&) = delete;
SharedBuffer& operator=(SharedBuffer&&) = default;

SharedBuffer(char* data, size_t size, const T& shared_object) : _shared_object(shared_object) {
m_allocated_buffer = data;
m_aligned_buffer = data;
Expand Down
25 changes: 0 additions & 25 deletions src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,29 +288,4 @@ struct RUN_INFERENCES_SEQUENTIALLY final : OptionBase<RUN_INFERENCES_SEQUENTIALL
}
};

//
// CACHED_MODEL_BUFFER
//
struct CACHED_MODEL_BUFFER final : OptionBase<CACHED_MODEL_BUFFER, std::shared_ptr<ov::AlignedBuffer>> {
static std::string_view key() {
return ov::internal::cached_model_buffer.name();
}

static constexpr std::string_view getTypeName() {
return "std::shared_ptr<ov::AlignedBuffer>";
}

static std::shared_ptr<ov::AlignedBuffer> defaultValue() {
return nullptr;
}

static OptionMode mode() {
return OptionMode::RunTime;
}

static std::shared_ptr<ov::AlignedBuffer> parse(std::string_view val);

static std::string toString(const std::shared_ptr<ov::AlignedBuffer>& val);
};

} // namespace intel_npu
27 changes: 0 additions & 27 deletions src/plugins/intel_npu/src/al/src/config/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) {
desc.add<TURBO>();
desc.add<BYPASS_UMD_CACHING>();
desc.add<RUN_INFERENCES_SEQUENTIALLY>();
desc.add<CACHED_MODEL_BUFFER>();
}

// Heuristically obtained number. Varies depending on the values of PLATFORM and PERFORMANCE_HINT
Expand Down Expand Up @@ -158,29 +157,3 @@ std::string intel_npu::WORKLOAD_TYPE::toString(const ov::WorkloadType& val) {
ss << val;
return ss.str();
}

//
// CACHED_MODEL_BUFFER
//

std::shared_ptr<ov::AlignedBuffer> intel_npu::CACHED_MODEL_BUFFER::parse(std::string_view val) {
std::istringstream ss = std::istringstream(std::string(val));
void* modelBufferPtr;

ss >> modelBufferPtr;
// If we don't "steal" below resources from the casted ov::AlignedBuffer, parsed blob will be freed
// after core.import_model causing problems at inference.get_profiling_info()
auto* modelBufferSO = dynamic_cast<ov::SharedBuffer<std::shared_ptr<ov::MappedMemory>>*>(static_cast<ov::AlignedBuffer*>(modelBufferPtr));
std::shared_ptr<ov::MappedMemory> mappedMemorySOPtr;
auto modelBufferSOPtr = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::MappedMemory>>>(nullptr, 0, mappedMemorySOPtr);
*modelBufferSOPtr = std::move(*modelBufferSO);

return modelBufferSOPtr;
}

std::string intel_npu::CACHED_MODEL_BUFFER::toString(const std::shared_ptr<ov::AlignedBuffer>& val) {
std::ostringstream ss;
void* modelBufferPtr = static_cast<void*>(val.get());
ss << modelBufferPtr;
return ss.str();
}
15 changes: 10 additions & 5 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,14 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model");
OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs");

const std::map<std::string, std::string> propertiesMap = any_copy(properties);
auto _properties = properties;
std::shared_ptr<ov::AlignedBuffer> modelBuffer;
if (_properties.count(ov::internal::cached_model_buffer.name())) {
modelBuffer = _properties.at(ov::internal::cached_model_buffer.name()).as<std::shared_ptr<ov::AlignedBuffer>>();
_properties.erase(ov::internal::cached_model_buffer.name());
}

const std::map<std::string, std::string> propertiesMap = any_copy(_properties);
auto localConfig = merge_configs(_globalConfig, propertiesMap, OptionMode::RunTime);
_logger.setLevel(localConfig.get<LOG_LEVEL>());
const auto platform = _backends->getCompilationPlatform(localConfig.get<PLATFORM>(), localConfig.get<DEVICE_ID>());
Expand All @@ -767,8 +774,6 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
"The usage of a compiled model can lead to undefined behavior. Please use OpenVINO IR instead!");
}

auto model_buffer = localConfig.get<CACHED_MODEL_BUFFER>();

OV_ITT_TASK_NEXT(PLUGIN_IMPORT_MODEL, "parse");

std::shared_ptr<ov::ICompiledModel> compiledModel;
Expand All @@ -778,7 +783,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c

std::unique_ptr<BlobContainer> blobPtr;

if (model_buffer == nullptr) {
if (modelBuffer == nullptr) {
auto graphSize = getFileSize(stream);

std::vector<uint8_t> blob(graphSize);
Expand All @@ -790,7 +795,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c

blobPtr = std::move(std::make_unique<BlobContainerVector>(std::move(blob)));
} else {
blobPtr = std::move(std::make_unique<BlobContainerAlignedBuffer>(model_buffer, stream.tellg()));
blobPtr = std::move(std::make_unique<BlobContainerAlignedBuffer>(modelBuffer, stream.tellg()));
}

auto graph = compiler->parse(std::move(blobPtr), localConfig);
Expand Down

0 comments on commit 2ebf043

Please sign in to comment.