Skip to content

Commit

Permalink
Re-add DriverGraph::release_blob method and adapt to `ov::AlignedBu…
Browse files Browse the repository at this point in the history
…ffer` (no release for mmap shared object)
  • Loading branch information
MirceaDan99 committed Nov 26, 2024
1 parent 5f43fa8 commit b73c51d
Show file tree
Hide file tree
Showing 11 changed files with 69 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
}
}

virtual void export_blob(std::ostream& stream) = 0;
virtual void export_blob(std::ostream& stream) const = 0;

virtual std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class ICompilerAdapter {
public:
virtual std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
const Config& config) const = 0;
virtual std::shared_ptr<IGraph> parse(const std::shared_ptr<ov::AlignedBuffer>& networkSO, const Config& config) const = 0;
virtual std::shared_ptr<IGraph> parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const = 0;
virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;
virtual ov::intel_npu::CompilerType getCompilerType() const = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter {

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(const std::shared_ptr<ov::AlignedBuffer>& networkSO, const Config& config) const override;
std::shared_ptr<IGraph> parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class DriverGraph final : public IGraph {
const Config& config,
std::optional<std::shared_ptr<ov::AlignedBuffer>> blob);

void export_blob(std::ostream& stream) override;
void export_blob(std::ostream& stream) const override;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const override;
Expand All @@ -36,10 +36,16 @@ class DriverGraph final : public IGraph {
~DriverGraph() override;

private:
bool release_blob(const Config& config);

std::shared_ptr<ZeGraphExtWrappers> _zeGraphExt;
std::shared_ptr<ZeroInitStructsHolder> _zeroInitStruct;

Logger _logger;

// In the case of the import path, the blob is released after graph initialization so it can not be any longer
// exported
bool _blobIsReleased = false;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter {

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(const std::shared_ptr<ov::AlignedBuffer>& networkSO, const Config& config) const override;
std::shared_ptr<IGraph> parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ class PluginGraph final : public IGraph {
const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
const std::shared_ptr<ov::AlignedBuffer>& blobSO,
std::shared_ptr<ov::AlignedBuffer> blobSOPtr,
const Config& config);

void export_blob(std::ostream& stream) override;
void export_blob(std::ostream& stream) const override;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,11 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compile(const std::shared_ptr<con
std::nullopt);
}

std::shared_ptr<IGraph> DriverCompilerAdapter::parse(const std::shared_ptr<ov::AlignedBuffer>& networkSO, const Config& config) const {
std::shared_ptr<IGraph> DriverCompilerAdapter::parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse");

_logger.debug("parse start");
ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(networkSO->get_ptr()), networkSO->size());
ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(networkSOPtr->get_ptr()), networkSOPtr->size());
_logger.debug("parse end");

OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta");
Expand All @@ -221,7 +221,7 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::parse(const std::shared_ptr<ov::A
graphHandle,
std::move(networkMeta),
config,
std::optional<std::shared_ptr<ov::AlignedBuffer>>(networkSO));
std::optional<std::shared_ptr<ov::AlignedBuffer>>(std::move(networkSOPtr)));
}

ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr<const ov::Model>& model,
Expand Down
54 changes: 43 additions & 11 deletions src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,18 @@ DriverGraph::DriverGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
initialize(config);
}

void DriverGraph::export_blob(std::ostream& stream) {
if (_blob.get() == nullptr) {
const uint8_t* blobPtr = nullptr;
size_t blobSize = -1;
std::shared_ptr<std::vector<uint8_t>> blob;

_zeGraphExt->getGraphBinary(_handle, *blob, blobPtr, blobSize);
_blob = std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(const_cast<uint8_t*>(blobPtr)), blobSize, blob);
void DriverGraph::export_blob(std::ostream& stream) const {
const uint8_t* blobPtr = nullptr;
size_t blobSize = -1;
std::vector<uint8_t> blob;

if (_blobIsReleased) {
OPENVINO_THROW("Model was imported (not compiled) by the plugin. Model export is forbidden in this case!");
}

stream.write(reinterpret_cast<const char*>(_blob->get_ptr()), _blob->size());
_zeGraphExt->getGraphBinary(_handle, blob, blobPtr, blobSize);

stream.write(reinterpret_cast<const char*>(blobPtr), blobSize);

if (!stream) {
_logger.error("Write blob to stream failed. Blob is broken!");
Expand All @@ -51,12 +52,12 @@ void DriverGraph::export_blob(std::ostream& stream) {

if (_logger.level() >= ov::log::Level::INFO) {
std::uint32_t result = 1171117u;
for (const uint8_t* it = reinterpret_cast<const uint8_t*>(_blob->get_ptr()); it != reinterpret_cast<const uint8_t*>(_blob->get_ptr()) + _blob->size(); ++it) {
for (const uint8_t* it = blobPtr; it != blobPtr + blobSize; ++it) {
result = ((result << 7) + result) + static_cast<uint32_t>(*it);
}

std::stringstream str;
str << "Blob size: " << _blob->size() << ", hash: " << std::hex << result;
str << "Blob size: " << blobSize << ", hash: " << std::hex << result;
_logger.info(str.str().c_str());
}
_logger.info("Write blob to stream successfully.");
Expand Down Expand Up @@ -120,8 +121,39 @@ void DriverGraph::initialize(const Config& config) {
_zeGraphExt->initializeGraph(_handle, config);

_logger.debug("Graph initialize finish");

// We are allowed to release the original blob because weights were loaded in NPU memory during
// _zeGraphExt->initializeGraph(). The driver will not access the original blob from this moment on, so we are
// releasing it here to avoid unnecessary memory usage.
_blobIsReleased = release_blob(config);
}

bool DriverGraph::release_blob(const Config& config) {
if (_blob == nullptr || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 ||
config.get<PERF_COUNT>()) {
return false;
}

ze_graph_properties_2_t properties = {};
properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;
_zeroInitStruct->getGraphDdiTable().pfnGetProperties2(_handle, &properties);

if (~properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) {
return false;
}

if (_blob.use_count() > 1) {
// blob is not allocated by plugin, no need for memory optimization
return false;
}

_blob.reset();

_logger.debug("Blob is released");

return true;
};

DriverGraph::~DriverGraph() {
if (_handle != nullptr) {
auto result = _zeGraphExt->destroyGraph(_handle);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,12 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
config);
}

std::shared_ptr<IGraph> PluginCompilerAdapter::parse(const std::shared_ptr<ov::AlignedBuffer>& networkSO, const Config& config) const {
std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");

_logger.debug("parse start");
std::vector<uint8_t> network(networkSO->size());
network.assign(reinterpret_cast<uint8_t*>(networkSO->get_ptr()), reinterpret_cast<uint8_t*>(networkSO->get_ptr()) + networkSO->size());
std::vector<uint8_t> network(networkSOPtr->size());
network.assign(reinterpret_cast<uint8_t*>(networkSOPtr->get_ptr()), reinterpret_cast<uint8_t*>(networkSOPtr->get_ptr()) + networkSOPtr->size());
auto networkMeta = _compiler->parse(network, config);
network.clear();
network.shrink_to_fit();
Expand All @@ -121,15 +121,15 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(const std::shared_ptr<ov::A
ze_graph_handle_t graphHandle = nullptr;

if (_zeGraphExt) {
graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(networkSO->get_ptr()), networkSO->size());
graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(networkSOPtr->get_ptr()), networkSOPtr->size());
}

return std::make_shared<PluginGraph>(_zeGraphExt,
_compiler,
_zeroInitStruct,
graphHandle,
std::move(networkMeta),
networkSO,
std::move(networkSOPtr),
config);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
const std::shared_ptr<ov::AlignedBuffer>& blobSO,
std::shared_ptr<ov::AlignedBuffer> blobSOPtr,
const Config& config)
: IGraph(graphHandle, std::move(metadata), std::optional<std::shared_ptr<ov::AlignedBuffer>>(blobSO)),
: IGraph(graphHandle, std::move(metadata), std::optional<std::shared_ptr<ov::AlignedBuffer>>(std::move(blobSOPtr))),
_zeGraphExt(zeGraphExt),
_zeroInitStruct(zeroInitStruct),
_compiler(compiler),
Expand All @@ -31,7 +31,7 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
initialize(config);
}

void PluginGraph::export_blob(std::ostream& stream) {
void PluginGraph::export_blob(std::ostream& stream) const {
stream.write(reinterpret_cast<const char*>(_blob->get_ptr()), _blob->size());

if (!stream) {
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
}
_logger.debug("Successfully read %zu bytes into blob.", graphSize);

auto graph = compiler->parse(std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blobSO->data()), graphSize, blobSO), localConfig);
auto blobSOPtr = std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blobSO->data()), graphSize, blobSO);
auto graph = compiler->parse(std::move(blobSOPtr), localConfig);
graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));

const std::shared_ptr<ov::Model> modelDummy =
Expand Down

0 comments on commit b73c51d

Please sign in to comment.