Skip to content

Commit

Permalink
Add ov::internal::caching_with_mmap property logic
Browse files Browse the repository at this point in the history
  • Loading branch information
MirceaDan99 committed Nov 14, 2024
1 parent 7880504 commit 4ff7d8b
Show file tree
Hide file tree
Showing 10 changed files with 94 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,20 @@ class IEngineBackend : public std::enable_shared_from_this<IEngineBackend> {

class ICompilerAdapter {
public:
ICompilerAdapter(ov::intel_npu::CompilerType compilerType) : _compilerType(compilerType) {}
virtual std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
const Config& config) const = 0;
virtual std::shared_ptr<IGraph> parse(std::vector<uint8_t> network, const Config& config) const = 0;
virtual std::shared_ptr<IGraph> parse(const std::shared_ptr<ov::AlignedBuffer>& mmapNetwork, const Config& config) const = 0;
virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;

virtual ~ICompilerAdapter() = default;

ov::intel_npu::CompilerType getCompilerType() {
return _compilerType;
}
private:
ov::intel_npu::CompilerType _compilerType;
};

//------------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ namespace intel_npu {

class DriverCompilerAdapter final : public ICompilerAdapter {
public:
DriverCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct);
DriverCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct, ov::intel_npu::CompilerType compilerType);

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(std::vector<uint8_t> network, const Config& config) const override;

std::shared_ptr<IGraph> parse(const std::shared_ptr<ov::AlignedBuffer>& mmapNetwork, const Config& config) const;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

private:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,16 @@ namespace intel_npu {

class PluginCompilerAdapter final : public ICompilerAdapter {
public:
PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct);
PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct, ov::intel_npu::CompilerType compilerType);

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(std::vector<uint8_t> network, const Config& config) const override;

std::shared_ptr<IGraph> parse(const std::shared_ptr<ov::AlignedBuffer>& mmapNetwork, const Config& config) const override {
OPENVINO_THROW("CIP needs a blob vector!");
}

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

private:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ class ZeGraphExtWrappers final : public ZeGraphExtWrappersInterface {

ze_graph_handle_t getGraphHandle(const std::vector<uint8_t>& network) const override;

ze_graph_handle_t getGraphHandle(const std::shared_ptr<ov::AlignedBuffer>& mmapNetwork) const override;

NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const override;

_ze_result_t destroyGraph(ze_graph_handle_t graphHandle) override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <ze_graph_ext.h>

#include "intel_npu/network_metadata.hpp"
#include "openvino/runtime/aligned_buffer.hpp"

namespace intel_npu {

Expand All @@ -23,6 +24,8 @@ class ZeGraphExtWrappersInterface {

virtual ze_graph_handle_t getGraphHandle(const std::vector<uint8_t>& network) const = 0;

virtual ze_graph_handle_t getGraphHandle(const std::shared_ptr<ov::AlignedBuffer>& mmapNetwork) const = 0;

virtual NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const = 0;

virtual _ze_result_t destroyGraph(ze_graph_handle_t graphHandle) = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,10 @@ std::string rankToLegacyLayoutString(const size_t rank) {

namespace intel_npu {

DriverCompilerAdapter::DriverCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct)
DriverCompilerAdapter::DriverCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct, ov::intel_npu::CompilerType compilerType)
: _zeroInitStruct(zeroInitStruct),
_logger("DriverCompilerAdapter", Logger::global().level()) {
_logger("DriverCompilerAdapter", Logger::global().level()),
ICompilerAdapter(compilerType) {
_logger.debug("initialize DriverCompilerAdapter start");

uint32_t graphExtVersion = _zeroInitStruct->getGraphDdiTable().version();
Expand Down Expand Up @@ -246,6 +247,24 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::parse(std::vector<uint8_t> networ
std::optional<std::vector<uint8_t>>(std::move(network)));
}

std::shared_ptr<IGraph> DriverCompilerAdapter::parse(const std::shared_ptr<ov::AlignedBuffer>& mmapNetwork, const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse");

_logger.debug("parse start");
ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(mmapNetwork);
_logger.debug("parse end");

OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta");
auto networkMeta = _zeGraphExt->getNetworkMeta(graphHandle);

return std::make_shared<DriverGraph>(_zeGraphExt,
_zeroInitStruct,
graphHandle,
std::move(networkMeta),
config,
std::nullopt);
}

ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr<const ov::Model>& model,
const Config& config) const {
OV_ITT_TASK_CHAIN(query_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "query");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ ov::SoPtr<intel_npu::ICompiler> loadCompiler(const std::string& libpath) {

namespace intel_npu {

PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct)
PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct, ov::intel_npu::CompilerType compilerType)
: _zeroInitStruct(zeroInitStruct),
_logger("PluginCompilerAdapter", Logger::global().level()) {
_logger("PluginCompilerAdapter", Logger::global().level()),
ICompilerAdapter(compilerType) {
_logger.debug("initialize PluginCompilerAdapter start");

_logger.info("MLIR compiler will be used.");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,30 @@ ze_graph_handle_t ZeGraphExtWrappers<TableExtension>::getGraphHandle(const std::
return graphHandle;
}

template <ze_graph_ext_version_t TableExtension>
ze_graph_handle_t ZeGraphExtWrappers<TableExtension>::getGraphHandle(const std::shared_ptr<ov::AlignedBuffer>& mmapNetwork) const {
ze_graph_handle_t graphHandle;

if (mmapNetwork->size() == 0) {
OPENVINO_THROW("Empty blob");
}

ze_graph_desc_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES,
nullptr,
ZE_GRAPH_FORMAT_NATIVE,
mmapNetwork->size(),
reinterpret_cast<const uint8_t*>(mmapNetwork->get_ptr()),
nullptr};

auto result = _zeroInitStruct->getGraphDdiTable().pfnCreate(_zeroInitStruct->getContext(),
_zeroInitStruct->getDevice(),
&desc,
&graphHandle);
THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnCreate", result, _zeroInitStruct->getGraphDdiTable());

return graphHandle;
}

/**
* @brief Extracts the I/O metadata from Level Zero specific structures and converts them into OpenVINO specific
* ones.
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/plugin/include/metrics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class Metrics final {
ov::intel_npu::batch_mode.name(),
ov::hint::execution_mode.name()};

const std::vector<ov::PropertyName> _internalSupportedProperties = {ov::internal::caching_properties.name()};
const std::vector<ov::PropertyName> _internalSupportedProperties = {ov::internal::caching_properties.name(), ov::internal::caching_with_mmap.name()};

// Metric to provide a hint for a range for number of async infer requests. (bottom bound, upper bound, step)
const std::tuple<uint32_t, uint32_t, uint32_t> _rangeForAsyncInferRequests{1u, 10u, 1u};
Expand Down
36 changes: 24 additions & 12 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "openvino/op/parameter.hpp"
#include "openvino/runtime/intel_npu/properties.hpp"
#include "openvino/runtime/properties.hpp"
#include "openvino/runtime/shared_buffer.hpp"
#include "plugin_compiler_adapter.hpp"
#include "remote_context.hpp"
#include "zero_backend.hpp"
Expand Down Expand Up @@ -768,16 +769,27 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
try {
auto compiler = getCompiler(localConfig);

auto graphSize = getFileSize(stream);

std::vector<uint8_t> blob(graphSize);
stream.read(reinterpret_cast<char*>(blob.data()), graphSize);
if (!stream) {
OPENVINO_THROW("Failed to read data from stream!");
std::shared_ptr<IGraph> graph;
if (compiler->getCompilerType() == ov::intel_npu::CompilerType::DRIVER) {
if (auto mmap_buffer = dynamic_cast<ov::OwningSharedStreamBuffer*>(stream.rdbuf())) {
graph = compiler->parse(mmap_buffer->get_buffer(), localConfig);
goto GRAPH_PARSED;
}
}
_logger.debug("Successfully read %zu bytes into blob.", graphSize);

auto graph = compiler->parse(std::move(blob), localConfig);
{
auto graphSize = getFileSize(stream);

std::vector<uint8_t> blob(graphSize);
stream.read(reinterpret_cast<char*>(blob.data()), graphSize);
if (!stream) {
OPENVINO_THROW("Failed to read data from stream!");
}
_logger.debug("Successfully read %zu bytes into blob.", graphSize);

graph = compiler->parse(std::move(blob), localConfig);
}
GRAPH_PARSED:
graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));

const std::shared_ptr<ov::Model> modelDummy =
Expand Down Expand Up @@ -835,15 +847,15 @@ std::unique_ptr<ICompilerAdapter> Plugin::getCompiler(const Config& config) cons
switch (compilerType) {
case ov::intel_npu::CompilerType::MLIR: {
if (_backends->getBackendName() != "LEVEL0") {
return std::make_unique<PluginCompilerAdapter>(nullptr);
return std::make_unique<PluginCompilerAdapter>(nullptr, compilerType);
}

auto zeroBackend = std::dynamic_pointer_cast<ZeroEngineBackend>(_backends->getIEngineBackend()._ptr);
if (zeroBackend == nullptr) {
return std::make_unique<PluginCompilerAdapter>(nullptr);
return std::make_unique<PluginCompilerAdapter>(nullptr, compilerType);
}

return std::make_unique<PluginCompilerAdapter>(zeroBackend->getInitStruct());
return std::make_unique<PluginCompilerAdapter>(zeroBackend->getInitStruct(), compilerType);
}
case ov::intel_npu::CompilerType::DRIVER: {
if (_backends->getBackendName() != "LEVEL0") {
Expand All @@ -855,7 +867,7 @@ std::unique_ptr<ICompilerAdapter> Plugin::getCompiler(const Config& config) cons
OPENVINO_THROW("Failed to cast zeroBackend, zeroBackend is a nullptr");
}

return std::make_unique<DriverCompilerAdapter>(zeroBackend->getInitStruct());
return std::make_unique<DriverCompilerAdapter>(zeroBackend->getInitStruct(), compilerType);
}
default:
OPENVINO_THROW("Invalid NPU_COMPILER_TYPE");
Expand Down

0 comments on commit 4ff7d8b

Please sign in to comment.