Skip to content

Commit

Permalink
Keep shared_ptr of blob in IGraph to fix export_model for import …
Browse files Browse the repository at this point in the history
…scenario
  • Loading branch information
MirceaDan99 committed Nov 20, 2024
1 parent 11714e1 commit 25b5c05
Show file tree
Hide file tree
Showing 10 changed files with 46 additions and 82 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,22 @@
#include "intel_npu/network_metadata.hpp"
#include "intel_npu/utils/zero/zero_utils.hpp"
#include "intel_npu/utils/zero/zero_wrappers.hpp"
#include "openvino/runtime/aligned_buffer.hpp"
#include "openvino/runtime/profiling_info.hpp"

namespace intel_npu {

class IGraph : public std::enable_shared_from_this<IGraph> {
public:
IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, std::optional<std::vector<uint8_t>> blob)
IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, std::optional<std::shared_ptr<ov::AlignedBuffer>> blob)
: _handle(handle),
_metadata(std::move(metadata)) {
if (blob.has_value()) {
_blob = std::move(*blob);
}
}

virtual void export_blob(std::ostream& stream) const = 0;
virtual void export_blob(std::ostream& stream) = 0;

virtual std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const = 0;
Expand Down Expand Up @@ -97,7 +98,7 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
// first inference starts running
std::mutex _mutex;

std::vector<uint8_t> _blob;
std::shared_ptr<ov::AlignedBuffer> _blob;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ class DriverCompilerAdapter final : public ICompilerAdapter {

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(std::vector<uint8_t> network, const Config& config) const override;
std::shared_ptr<IGraph> parse(std::vector<uint8_t> network, const Config& config) const override {
OPENVINO_THROW("CID should not parse from std::vector anymore!");
}

std::shared_ptr<IGraph> parse(const std::shared_ptr<ov::AlignedBuffer>& mmapNetwork, const Config& config) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "intel_npu/common/igraph.hpp"
#include "intel_npu/utils/zero/zero_init.hpp"
#include "openvino/runtime/shared_buffer.hpp"
#include "ze_graph_ext_wrappers.hpp"

namespace intel_npu {
Expand All @@ -21,9 +22,9 @@ class DriverGraph final : public IGraph {
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
const Config& config,
std::optional<std::vector<uint8_t>> blob);
std::optional<std::shared_ptr<ov::AlignedBuffer>> blob);

void export_blob(std::ostream& stream) const override;
void export_blob(std::ostream& stream) override;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const override;
Expand All @@ -35,16 +36,10 @@ class DriverGraph final : public IGraph {
~DriverGraph() override;

private:
bool release_blob(const Config& config);

std::shared_ptr<ZeGraphExtWrappers> _zeGraphExt;
std::shared_ptr<ZeroInitStructsHolder> _zeroInitStruct;

Logger _logger;

// In the case of the import path, the blob is released after graph initialization so it can not be any longer
// exported
bool _blobIsReleased = false;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class PluginGraph final : public IGraph {
std::vector<uint8_t> blob,
const Config& config);

void export_blob(std::ostream& stream) const override;
void export_blob(std::ostream& stream) override;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include "intel_npu/utils/zero/zero_init.hpp"
#include "intel_npu/utils/zero/zero_types.hpp"

#include "openvino/runtime/aligned_buffer.hpp"

namespace intel_npu {

using SerializedIR = std::pair<size_t, std::shared_ptr<uint8_t>>;
Expand All @@ -37,9 +39,9 @@ class ZeGraphExtWrappers {

ze_graph_handle_t getGraphHandle(const std::vector<uint8_t>& network) const;

ze_graph_handle_t getGraphHandle(const std::shared_ptr<ov::AlignedBuffer>& mmapNetwork) const override;
ze_graph_handle_t getGraphHandle(const std::shared_ptr<ov::AlignedBuffer>& mmapNetwork) const;

NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const override;
NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const;

_ze_result_t destroyGraph(ze_graph_handle_t graphHandle);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,24 +206,6 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compile(const std::shared_ptr<con
std::nullopt);
}

std::shared_ptr<IGraph> DriverCompilerAdapter::parse(std::vector<uint8_t> network, const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse");

_logger.debug("parse start");
ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(network);
_logger.debug("parse end");

OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta");
auto networkMeta = _zeGraphExt->getNetworkMeta(graphHandle);

return std::make_shared<DriverGraph>(_zeGraphExt,
_zeroInitStruct,
graphHandle,
std::move(networkMeta),
config,
std::optional<std::vector<uint8_t>>(std::move(network)));
}

std::shared_ptr<IGraph> DriverCompilerAdapter::parse(const std::shared_ptr<ov::AlignedBuffer>& mmapNetwork, const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse");

Expand All @@ -239,7 +221,7 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::parse(const std::shared_ptr<ov::A
graphHandle,
std::move(networkMeta),
config,
std::nullopt);
std::optional<std::shared_ptr<ov::AlignedBuffer>>(mmapNetwork));
}

ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr<const ov::Model>& model,
Expand Down
50 changes: 11 additions & 39 deletions src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ DriverGraph::DriverGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
const Config& config,
std::optional<std::vector<uint8_t>> blob)
std::optional<std::shared_ptr<ov::AlignedBuffer>> blob)
: IGraph(graphHandle, std::move(metadata), std::move(blob)),
_zeGraphExt(zeGraphExt),
_zeroInitStruct(zeroInitStruct),
Expand All @@ -32,18 +32,17 @@ DriverGraph::DriverGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
initialize(config);
}

void DriverGraph::export_blob(std::ostream& stream) const {
const uint8_t* blobPtr = nullptr;
size_t blobSize = -1;
std::vector<uint8_t> blob;
void DriverGraph::export_blob(std::ostream& stream) {
if (_blob.get() == nullptr) {
const uint8_t* blobPtr = nullptr;
size_t blobSize = -1;
std::shared_ptr<std::vector<uint8_t>> blob;

if (_blobIsReleased) {
OPENVINO_THROW("Model was imported (not compiled) by the plugin. Model export is forbidden in this case!");
_zeGraphExt->getGraphBinary(_handle, *blob, blobPtr, blobSize);
_blob = std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(const_cast<uint8_t*>(blobPtr)), blobSize, blob);
}

_zeGraphExt->getGraphBinary(_handle, blob, blobPtr, blobSize);

stream.write(reinterpret_cast<const char*>(blobPtr), blobSize);
stream.write(reinterpret_cast<const char*>(_blob->get_ptr()), _blob->size());

if (!stream) {
_logger.error("Write blob to stream failed. Blob is broken!");
Expand All @@ -52,12 +51,12 @@ void DriverGraph::export_blob(std::ostream& stream) const {

if (_logger.level() >= ov::log::Level::INFO) {
std::uint32_t result = 1171117u;
for (const uint8_t* it = blobPtr; it != blobPtr + blobSize; ++it) {
for (const uint8_t* it = reinterpret_cast<const uint8_t*>(_blob->get_ptr()); it != reinterpret_cast<const uint8_t*>(_blob->get_ptr()) + _blob->size(); ++it) {
result = ((result << 7) + result) + static_cast<uint32_t>(*it);
}

std::stringstream str;
str << "Blob size: " << blobSize << ", hash: " << std::hex << result;
str << "Blob size: " << _blob->size() << ", hash: " << std::hex << result;
_logger.info(str.str().c_str());
}
_logger.info("Write blob to stream successfully.");
Expand Down Expand Up @@ -121,35 +120,8 @@ void DriverGraph::initialize(const Config& config) {
_zeGraphExt->initializeGraph(_handle, config);

_logger.debug("Graph initialize finish");

// We are allowed to release the original blob because weights were loaded in NPU memory during
// _zeGraphExt->initializeGraph(). The driver will not access the original blob from this moment on, so we are
// releasing it here to avoid unnecessary memory usage.
_blobIsReleased = release_blob(config);
}

bool DriverGraph::release_blob(const Config& config) {
if (_blob.empty() || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 ||
config.get<PERF_COUNT>()) {
return false;
}

ze_graph_properties_2_t properties = {};
properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;
_zeroInitStruct->getGraphDdiTable().pfnGetProperties2(_handle, &properties);

if (~properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) {
return false;
}

_blob.clear();
_blob.shrink_to_fit();

_logger.debug("Blob is released");

return true;
};

DriverGraph::~DriverGraph() {
if (_handle != nullptr) {
auto result = _zeGraphExt->destroyGraph(_handle);
Expand Down
19 changes: 13 additions & 6 deletions src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "intel_npu/config/common.hpp"
#include "intel_npu/config/runtime.hpp"
#include "intel_npu/utils/zero/zero_api.hpp"
#include "openvino/runtime/shared_buffer.hpp"

namespace intel_npu {

Expand All @@ -17,7 +18,7 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
NetworkMetadata metadata,
std::vector<uint8_t> blob,
const Config& config)
: IGraph(graphHandle, std::move(metadata), std::optional<std::vector<uint8_t>>(std::move(blob))),
: IGraph(graphHandle, std::move(metadata), std::optional<std::shared_ptr<ov::AlignedBuffer>>(std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blob.data()), blob.size(), std::make_shared<std::vector<uint8_t>>(std::move(blob))))),
_zeGraphExt(zeGraphExt),
_zeroInitStruct(zeroInitStruct),
_compiler(compiler),
Expand All @@ -30,8 +31,8 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
initialize(config);
}

void PluginGraph::export_blob(std::ostream& stream) const {
stream.write(reinterpret_cast<const char*>(_blob.data()), _blob.size());
void PluginGraph::export_blob(std::ostream& stream) {
stream.write(reinterpret_cast<const char*>(_blob->get_ptr()), _blob->size());

if (!stream) {
_logger.error("Write blob to stream failed. Blob is broken!");
Expand All @@ -40,20 +41,26 @@ void PluginGraph::export_blob(std::ostream& stream) const {

if (_logger.level() >= ov::log::Level::INFO) {
std::uint32_t result = 1171117u;
for (const uint8_t* it = _blob.data(); it != _blob.data() + _blob.size(); ++it) {
for (const uint8_t* it = reinterpret_cast<const uint8_t*>(_blob->get_ptr()); it != reinterpret_cast<const uint8_t*>(_blob->get_ptr()) + _blob->size(); ++it) {
result = ((result << 7) + result) + static_cast<uint32_t>(*it);
}

std::stringstream str;
str << "Blob size: " << _blob.size() << ", hash: " << std::hex << result;
str << "Blob size: " << _blob->size() << ", hash: " << std::hex << result;
_logger.info(str.str().c_str());
}
_logger.info("Write blob to stream successfully.");
}

std::vector<ov::ProfilingInfo> PluginGraph::process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const {
return _compiler->process_profiling_output(profData, _blob, config);

// Need to fix increased memory usage below, ov::SharedBuffer won't permit us to get underlying shared buffer as it is private
// Only if we work with std::vector<uint8_t> blobs, but then IGraph needs to have 2 declarations for the same blob
// Maybe if we templatize blob in IGraph to be either std::vector<uint8_t> or std::shared_ptr<ov::AlignedBuffer>?
std::vector<uint8_t> blob(_blob->size());
blob.assign(reinterpret_cast<uint8_t*>(_blob->get_ptr()), reinterpret_cast<uint8_t*>(_blob->get_ptr()) + _blob->size());
return _compiler->process_profiling_output(profData, blob, config);
}

void PluginGraph::set_argument_value(uint32_t argi, const void* argv) const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,8 +387,7 @@ ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std::vector<uint8_t>&
return graphHandle;
}

template <ze_graph_ext_version_t TableExtension>
ze_graph_handle_t ZeGraphExtWrappers<TableExtension>::getGraphHandle(const std::shared_ptr<ov::AlignedBuffer>& mmapNetwork) const {
ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std::shared_ptr<ov::AlignedBuffer>& mmapNetwork) const {
ze_graph_handle_t graphHandle;

if (mmapNetwork->size() == 0) {
Expand Down
6 changes: 5 additions & 1 deletion src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -773,8 +773,12 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
if (compiler->getCompilerType() == ov::intel_npu::CompilerType::DRIVER) {
if (auto mmap_buffer = dynamic_cast<ov::OwningSharedStreamBuffer*>(stream.rdbuf())) {
graph = compiler->parse(mmap_buffer->get_buffer(), localConfig);
goto GRAPH_PARSED;
} else {
auto graphSize = getFileSize(stream);
std::vector<uint8_t> blob(graphSize);
graph = compiler->parse(std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blob.data()), blob.size(), std::make_shared<std::vector<uint8_t>>(std::move(blob))), localConfig);
}
goto GRAPH_PARSED;
}

{
Expand Down

0 comments on commit 25b5c05

Please sign in to comment.