Skip to content

Commit

Permalink
Add BlobContainer class and derivates for each `std::vector<uint8_t…
Browse files Browse the repository at this point in the history
…>` and `std::shared_ptr<ov::AlignedBuffer>` blob types
  • Loading branch information
MirceaDan99 committed Dec 9, 2024
1 parent 41cab91 commit d29e036
Show file tree
Hide file tree
Showing 12 changed files with 98 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,63 @@

namespace intel_npu {

class BlobContainer {
public:
virtual void* get_ptr() {
OPENVINO_THROW("const BlobContainer::get_ptr() method is not implemented!");
}

virtual size_t size() const {
OPENVINO_THROW("BlobContainer::size() method is not implemented!");
}

virtual bool release_from_memory() {
OPENVINO_THROW("BlobContainer::release_from_memory() method is not implemented!");
}
};

class BlobContainerVector : public BlobContainer {
public:
BlobContainerVector(std::vector<uint8_t> blob) : _ownershipBlob(std::move(blob)) {}

void* get_ptr() override {
return reinterpret_cast<void*>(_ownershipBlob.data());
}

size_t size() const override {
return _ownershipBlob.size();
}

bool release_from_memory() override {
_ownershipBlob.clear();
_ownershipBlob.shrink_to_fit();
return true;
}

private:
std::vector<uint8_t> _ownershipBlob;
};

class BlobContainerAlignedBuffer : public BlobContainer {
public:
BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO) : _ownershipBlob(blobSO) {}

void* get_ptr() override {
return _ownershipBlob->get_ptr();
}

size_t size() const override {
return _ownershipBlob->size();
}

bool release_from_memory() override {
return false;
}

private:
std::shared_ptr<ov::AlignedBuffer> _ownershipBlob;
};

class IGraph : public std::enable_shared_from_this<IGraph> {
public:
IGraph(ze_graph_handle_t handle,
Expand Down Expand Up @@ -90,6 +147,7 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
// first inference starts running
std::mutex _mutex;

<<<<<<< HEAD
<<<<<<< HEAD
std::vector<uint8_t> _blob;

Expand All @@ -106,6 +164,9 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
=======
std::shared_ptr<ov::AlignedBuffer> _blob;
>>>>>>> 25b5c05976 (Keep `shared_ptr` of blob in IGraph to fix `export_model` for import scenario)
=======
std::unique_ptr<BlobContainer> _blob;
>>>>>>> 94e33c4e24 (Add `BlobContainer` class and derivates for each `std::vector<uint8_t>` and `std::shared_ptr<ov::AlignedBuffer>` blob types)
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class ICompilerAdapter {
public:
virtual std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
const Config& config) const = 0;
virtual std::shared_ptr<IGraph> parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const = 0;
virtual std::shared_ptr<IGraph> parse(std::unique_ptr<BlobContainer> blobPtr, const Config& config) const = 0;
virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;

virtual ~ICompilerAdapter() = default;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter {

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const override;
std::shared_ptr<IGraph> parse(std::unique_ptr<BlobContainer> blobPtr, const Config& config) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class DriverGraph final : public IGraph {
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
const Config& config,
std::optional<std::shared_ptr<ov::AlignedBuffer>> blob);
std::optional<std::unique_ptr<BlobContainer>> blob);

void export_blob(std::ostream& stream) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter {

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const override;
std::shared_ptr<IGraph> parse(std::unique_ptr<BlobContainer> blobPtr, const Config& config) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class PluginGraph final : public IGraph {
const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
std::shared_ptr<ov::AlignedBuffer> blobSOPtr,
std::unique_ptr<BlobContainer> blobPtr,
const Config& config);

void export_blob(std::ostream& stream) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,11 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compile(const std::shared_ptr<con
std::nullopt);
}

std::shared_ptr<IGraph> DriverCompilerAdapter::parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const {
std::shared_ptr<IGraph> DriverCompilerAdapter::parse(std::unique_ptr<BlobContainer> blobPtr, const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse");

_logger.debug("parse start");
ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(networkSOPtr->get_ptr()), networkSOPtr->size());
ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()), blobPtr->size());
_logger.debug("parse end");

OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta");
Expand All @@ -221,7 +221,7 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::parse(std::shared_ptr<ov::Aligned
graphHandle,
std::move(networkMeta),
config,
std::optional<std::shared_ptr<ov::AlignedBuffer>>(std::move(networkSOPtr)));
std::optional<std::unique_ptr<BlobContainer>>(std::move(blobPtr)));
}

ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr<const ov::Model>& model,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,10 @@ bool DriverGraph::release_blob(const Config& config) {
return false;
}

if (_blob.use_count() > 1) {
// blob is not allocated by plugin, no need for memory optimization
return false;
if(!_blob->release_from_memory()) {
return false;
}

_blob.reset();

_logger.debug("Blob is released");

return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,36 +83,35 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con

_logger.debug("compile start");
auto networkDesc = _compiler->compile(model, config);
auto networkSO = std::make_shared<std::vector<uint8_t>>(std::move(networkDesc.compiledNetwork));
auto blobPtr = std::make_unique<BlobContainerVector>(std::move(networkDesc.compiledNetwork));
_logger.debug("compile end");

ze_graph_handle_t graphHandle = nullptr;

if (_zeGraphExt) {
// Depending on the config, we may get an error when trying to get the graph handle from the compiled network
try {
graphHandle = _zeGraphExt->getGraphHandle(networkSO->data(), networkSO->size());
graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()), blobPtr->size());
} catch (...) {
_logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
"allowed. Only exports are available");
}
}
auto networkSOPtr = std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(networkSO->data()), networkSO->size(), networkSO);
return std::make_shared<PluginGraph>(_zeGraphExt,
_compiler,
_zeroInitStruct,
graphHandle,
std::move(networkDesc.metadata),
networkSOPtr,
std::move(blobPtr),
config);
}

std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const {
std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::unique_ptr<BlobContainer> blobPtr, const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");

_logger.debug("parse start");
std::vector<uint8_t> network(networkSOPtr->size());
network.assign(reinterpret_cast<uint8_t*>(networkSOPtr->get_ptr()), reinterpret_cast<uint8_t*>(networkSOPtr->get_ptr()) + networkSOPtr->size());
std::vector<uint8_t> network(blobPtr->size());
network.assign(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()), reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()) + blobPtr->size());
auto networkMeta = _compiler->parse(network, config);
network.clear();
network.shrink_to_fit();
Expand All @@ -121,15 +120,15 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::shared_ptr<ov::Aligned
ze_graph_handle_t graphHandle = nullptr;

if (_zeGraphExt) {
graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(networkSOPtr->get_ptr()), networkSOPtr->size());
graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()), blobPtr->size());
}

return std::make_shared<PluginGraph>(_zeGraphExt,
_compiler,
_zeroInitStruct,
graphHandle,
std::move(networkMeta),
std::move(networkSOPtr),
std::move(blobPtr),
config);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,8 @@ void PluginGraph::export_blob(std::ostream& stream) const {

std::vector<ov::ProfilingInfo> PluginGraph::process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const {

// Need to fix increased memory usage below, ov::SharedBuffer won't permit us to get underlying shared buffer as it is private
// Only if we work with std::vector<uint8_t> blobs, but then IGraph needs to have 2 declarations for the same blob
// Maybe if we templatize blob in IGraph to be either std::vector<uint8_t> or std::shared_ptr<ov::AlignedBuffer>?
std::vector<uint8_t> blob(_blob->size());
blob.assign(reinterpret_cast<uint8_t*>(_blob->get_ptr()), reinterpret_cast<uint8_t*>(_blob->get_ptr()) + _blob->size());
blob.assign(reinterpret_cast<const uint8_t*>(_blob->get_ptr()), reinterpret_cast<const uint8_t*>(_blob->get_ptr()) + _blob->size());
return _compiler->process_profiling_output(profData, blob, config);
}

Expand Down
8 changes: 5 additions & 3 deletions src/plugins/intel_npu/src/plugin/include/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,16 @@ class Plugin : public ov::IPlugin {

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream, const ov::AnyMap& properties) const override;

std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const override;
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& /* unusedStream */,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const override;

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override;

std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override;

Expand Down
22 changes: 13 additions & 9 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -775,15 +775,16 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
auto compiler = getCompiler(localConfig);

auto graphSize = getFileSize(stream);
auto blobSO = std::make_shared<std::vector<uint8_t>>(graphSize);
stream.read(reinterpret_cast<char*>(blobSO->data()), graphSize);

std::vector<uint8_t> blob(graphSize);
stream.read(reinterpret_cast<char*>(blob.data()), graphSize);
if (!stream) {
OPENVINO_THROW("Failed to read data from stream!");
}
_logger.debug("Successfully read %zu bytes into blob.", graphSize);

auto blobSOPtr = std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blobSO->data()), graphSize, blobSO);
auto graph = compiler->parse(std::move(blobSOPtr), localConfig);
auto blobContainerPtr = std::make_unique<BlobContainerVector>(std::move(blob));
auto graph = compiler->parse(std::move(blobContainerPtr), localConfig);
graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));

const std::shared_ptr<ov::Model> modelDummy =
Expand All @@ -801,7 +802,9 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
return compiledModel;
}

std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const {
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& /* unusedStream */,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const {
OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model");
OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs");

Expand All @@ -826,8 +829,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::Ali

try {
auto compiler = getCompiler(localConfig);

auto graph = compiler->parse(model_buffer, localConfig);
auto blobContainerPtr = std::make_unique<BlobContainerAlignedBuffer>(model_buffer);
auto graph = compiler->parse(std::move(blobContainerPtr), localConfig);
graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));

const std::shared_ptr<ov::Model> modelDummy =
Expand Down Expand Up @@ -856,15 +859,16 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream,
return import_model(stream, context, properties);
}

std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const {
auto casted = std::dynamic_pointer_cast<RemoteContextImpl>(context._ptr);
if (casted == nullptr) {
OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type");
}

return import_model(model_buffer, properties);
return import_model(stream, model_buffer, properties);
}

ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>& model,
Expand Down

0 comments on commit d29e036

Please sign in to comment.