Skip to content

Commit

Permalink
Re-add DriverGraph::release_blob method and adapt to `ov::AlignedBu…
Browse files Browse the repository at this point in the history
…ffer` (no release for mmap shared object)
  • Loading branch information
MirceaDan99 committed Nov 26, 2024
1 parent 5f43fa8 commit 8c53310
Show file tree
Hide file tree
Showing 12 changed files with 65 additions and 31 deletions.
2 changes: 1 addition & 1 deletion src/core/dev_api/openvino/runtime/shared_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace ov {
template <typename T>
class SharedBuffer : public ov::AlignedBuffer {
public:
SharedBuffer(char* data, size_t size, const T& shared_object) : _shared_object(shared_object) {
SharedBuffer(char* data, size_t size, T shared_object) : _shared_object(std::move(shared_object)) {
m_allocated_buffer = data;
m_aligned_buffer = data;
m_byte_size = size;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
}
}

virtual void export_blob(std::ostream& stream) = 0;
virtual void export_blob(std::ostream& stream) const = 0;

virtual std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class ICompilerAdapter {
public:
virtual std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
const Config& config) const = 0;
virtual std::shared_ptr<IGraph> parse(const std::shared_ptr<ov::AlignedBuffer>& networkSO, const Config& config) const = 0;
virtual std::shared_ptr<IGraph> parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const = 0;
virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;
virtual ov::intel_npu::CompilerType getCompilerType() const = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter {

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(const std::shared_ptr<ov::AlignedBuffer>& networkSO, const Config& config) const override;
std::shared_ptr<IGraph> parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class DriverGraph final : public IGraph {
const Config& config,
std::optional<std::shared_ptr<ov::AlignedBuffer>> blob);

void export_blob(std::ostream& stream) override;
void export_blob(std::ostream& stream) const override;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const override;
Expand All @@ -36,10 +36,16 @@ class DriverGraph final : public IGraph {
~DriverGraph() override;

private:
bool release_blob(const Config& config);

std::shared_ptr<ZeGraphExtWrappers> _zeGraphExt;
std::shared_ptr<ZeroInitStructsHolder> _zeroInitStruct;

Logger _logger;

// In the case of the import path, the blob is released after graph initialization so it can not be any longer
// exported
bool _blobIsReleased = false;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter {

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(const std::shared_ptr<ov::AlignedBuffer>& networkSO, const Config& config) const override;
std::shared_ptr<IGraph> parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ class PluginGraph final : public IGraph {
const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
const std::shared_ptr<ov::AlignedBuffer>& blobSO,
std::shared_ptr<ov::AlignedBuffer> blobSOPtr,
const Config& config);

void export_blob(std::ostream& stream) override;
void export_blob(std::ostream& stream) const override;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,11 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compile(const std::shared_ptr<con
std::nullopt);
}

std::shared_ptr<IGraph> DriverCompilerAdapter::parse(const std::shared_ptr<ov::AlignedBuffer>& networkSO, const Config& config) const {
std::shared_ptr<IGraph> DriverCompilerAdapter::parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse");

_logger.debug("parse start");
ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(networkSO->get_ptr()), networkSO->size());
ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(networkSOPtr->get_ptr()), networkSOPtr->size());
_logger.debug("parse end");

OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta");
Expand All @@ -221,7 +221,7 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::parse(const std::shared_ptr<ov::A
graphHandle,
std::move(networkMeta),
config,
std::optional<std::shared_ptr<ov::AlignedBuffer>>(networkSO));
std::optional<std::shared_ptr<ov::AlignedBuffer>>(std::move(networkSOPtr)));
}

ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr<const ov::Model>& model,
Expand Down
49 changes: 38 additions & 11 deletions src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,18 @@ DriverGraph::DriverGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
initialize(config);
}

void DriverGraph::export_blob(std::ostream& stream) {
if (_blob.get() == nullptr) {
const uint8_t* blobPtr = nullptr;
size_t blobSize = -1;
std::shared_ptr<std::vector<uint8_t>> blob;

_zeGraphExt->getGraphBinary(_handle, *blob, blobPtr, blobSize);
_blob = std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(const_cast<uint8_t*>(blobPtr)), blobSize, blob);
void DriverGraph::export_blob(std::ostream& stream) const {
const uint8_t* blobPtr = nullptr;
size_t blobSize = -1;
std::vector<uint8_t> blob;

if (_blobIsReleased) {
OPENVINO_THROW("Model was imported (not compiled) by the plugin. Model export is forbidden in this case!");
}

stream.write(reinterpret_cast<const char*>(_blob->get_ptr()), _blob->size());
_zeGraphExt->getGraphBinary(_handle, blob, blobPtr, blobSize);

stream.write(reinterpret_cast<const char*>(blobPtr), blobSize);

if (!stream) {
_logger.error("Write blob to stream failed. Blob is broken!");
Expand All @@ -51,12 +52,12 @@ void DriverGraph::export_blob(std::ostream& stream) {

if (_logger.level() >= ov::log::Level::INFO) {
std::uint32_t result = 1171117u;
for (const uint8_t* it = reinterpret_cast<const uint8_t*>(_blob->get_ptr()); it != reinterpret_cast<const uint8_t*>(_blob->get_ptr()) + _blob->size(); ++it) {
for (const uint8_t* it = blobPtr; it != blobPtr + blobSize; ++it) {
result = ((result << 7) + result) + static_cast<uint32_t>(*it);
}

std::stringstream str;
str << "Blob size: " << _blob->size() << ", hash: " << std::hex << result;
str << "Blob size: " << blobSize << ", hash: " << std::hex << result;
_logger.info(str.str().c_str());
}
_logger.info("Write blob to stream successfully.");
Expand Down Expand Up @@ -120,8 +121,34 @@ void DriverGraph::initialize(const Config& config) {
_zeGraphExt->initializeGraph(_handle, config);

_logger.debug("Graph initialize finish");

// We are allowed to release the original blob because weights were loaded in NPU memory during
// _zeGraphExt->initializeGraph(). The driver will not access the original blob from this moment on, so we are
// releasing it here to avoid unnecessary memory usage.
_blobIsReleased = release_blob(config);
}

bool DriverGraph::release_blob(const Config& config) {
if (_blob == nullptr || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 ||
config.get<PERF_COUNT>()) {
return false;
}

ze_graph_properties_2_t properties = {};
properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;
_zeroInitStruct->getGraphDdiTable().pfnGetProperties2(_handle, &properties);

if (~properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) {
return false;
}

_blob.reset();

_logger.debug("Blob is released");

return true;
};

DriverGraph::~DriverGraph() {
if (_handle != nullptr) {
auto result = _zeGraphExt->destroyGraph(_handle);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,12 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
config);
}

std::shared_ptr<IGraph> PluginCompilerAdapter::parse(const std::shared_ptr<ov::AlignedBuffer>& networkSO, const Config& config) const {
std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");

_logger.debug("parse start");
std::vector<uint8_t> network(networkSO->size());
network.assign(reinterpret_cast<uint8_t*>(networkSO->get_ptr()), reinterpret_cast<uint8_t*>(networkSO->get_ptr()) + networkSO->size());
std::vector<uint8_t> network(networkSOPtr->size());
network.assign(reinterpret_cast<uint8_t*>(networkSOPtr->get_ptr()), reinterpret_cast<uint8_t*>(networkSOPtr->get_ptr()) + networkSOPtr->size());
auto networkMeta = _compiler->parse(network, config);
network.clear();
network.shrink_to_fit();
Expand All @@ -121,15 +121,15 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(const std::shared_ptr<ov::A
ze_graph_handle_t graphHandle = nullptr;

if (_zeGraphExt) {
graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(networkSO->get_ptr()), networkSO->size());
graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(networkSOPtr->get_ptr()), networkSOPtr->size());
}

return std::make_shared<PluginGraph>(_zeGraphExt,
_compiler,
_zeroInitStruct,
graphHandle,
std::move(networkMeta),
networkSO,
std::move(networkSOPtr),
config);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
const std::shared_ptr<ov::AlignedBuffer>& blobSO,
std::shared_ptr<ov::AlignedBuffer> blobSOPtr,
const Config& config)
: IGraph(graphHandle, std::move(metadata), std::optional<std::shared_ptr<ov::AlignedBuffer>>(blobSO)),
: IGraph(graphHandle, std::move(metadata), std::optional<std::shared_ptr<ov::AlignedBuffer>>(std::move(blobSOPtr))),
_zeGraphExt(zeGraphExt),
_zeroInitStruct(zeroInitStruct),
_compiler(compiler),
Expand All @@ -31,7 +31,7 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
initialize(config);
}

void PluginGraph::export_blob(std::ostream& stream) {
void PluginGraph::export_blob(std::ostream& stream) const {
stream.write(reinterpret_cast<const char*>(_blob->get_ptr()), _blob->size());

if (!stream) {
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
}
_logger.debug("Successfully read %zu bytes into blob.", graphSize);

auto graph = compiler->parse(std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blobSO->data()), graphSize, blobSO), localConfig);
auto blobSOPtr = std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blobSO->data()), graphSize, std::move(blobSO));
auto graph = compiler->parse(std::move(blobSOPtr), localConfig);
graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));

const std::shared_ptr<ov::Model> modelDummy =
Expand Down

0 comments on commit 8c53310

Please sign in to comment.