Skip to content

Commit

Permalink
Add new CACHED_MODEL_BUFFER runtime property & update changes from …
Browse files Browse the repository at this point in the history
…new `import_model` API POC
  • Loading branch information
MirceaDan99 committed Dec 10, 2024
1 parent 2c52a77 commit e2c1b93
Show file tree
Hide file tree
Showing 17 changed files with 181 additions and 47 deletions.
25 changes: 25 additions & 0 deletions src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,4 +288,29 @@ struct RUN_INFERENCES_SEQUENTIALLY final : OptionBase<RUN_INFERENCES_SEQUENTIALL
}
};

//
// CACHED_MODEL_BUFFER
//
struct CACHED_MODEL_BUFFER final : OptionBase<CACHED_MODEL_BUFFER, std::shared_ptr<ov::AlignedBuffer>> {
static std::string_view key() {
return ov::internal::cached_model_buffer.name();
}

static constexpr std::string_view getTypeName() {
return "std::shared_ptr<ov::AlignedBuffer>";
}

static std::shared_ptr<ov::AlignedBuffer> defaultValue() {
return nullptr;
}

static OptionMode mode() {
return OptionMode::RunTime;
}

static std::shared_ptr<ov::AlignedBuffer> parse(std::string_view val);

static std::string toString(const std::shared_ptr<ov::AlignedBuffer>& val);
};

} // namespace intel_npu
21 changes: 21 additions & 0 deletions src/plugins/intel_npu/src/al/src/config/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) {
desc.add<TURBO>();
desc.add<BYPASS_UMD_CACHING>();
desc.add<RUN_INFERENCES_SEQUENTIALLY>();
desc.add<CACHED_MODEL_BUFFER>();
}

// Heuristically obtained number. Varies depending on the values of PLATFORM and PERFORMANCE_HINT
Expand Down Expand Up @@ -155,3 +156,23 @@ std::string intel_npu::WORKLOAD_TYPE::toString(const ov::WorkloadType& val) {
ss << val;
return ss.str();
}

//
// WORKLOAD_TYPE
//

std::shared_ptr<ov::AlignedBuffer> intel_npu::CACHED_MODEL_BUFFER::parse(std::string_view val) {
std::istringstream ss = std::istringstream(std::string(val));
void* modelBufferPtr;

ss >> modelBufferPtr;

return std::shared_ptr<ov::AlignedBuffer>(static_cast<ov::AlignedBuffer*>(modelBufferPtr));
}

std::string intel_npu::CACHED_MODEL_BUFFER::toString(const std::shared_ptr<ov::AlignedBuffer>& val) {
std::ostringstream ss;
void* modelBufferPtr = static_cast<void*>(val.get());
ss << modelBufferPtr;
return ss.str();
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,77 @@
#include "intel_npu/utils/zero/zero_utils.hpp"
#include "intel_npu/utils/zero/zero_wrappers.hpp"
#include "openvino/runtime/profiling_info.hpp"
#include "openvino/runtime/shared_buffer.hpp"

namespace intel_npu {

class BlobContainer {
public:
virtual void* get_ptr() {
OPENVINO_THROW("const BlobContainer::get_ptr() method is not implemented!");
}

virtual size_t size() const {
OPENVINO_THROW("BlobContainer::size() method is not implemented!");
}

virtual bool release_from_memory() {
OPENVINO_THROW("BlobContainer::release_from_memory() method is not implemented!");
}

virtual ~BlobContainer() = default;
};

class BlobContainerVector : public BlobContainer {
public:
BlobContainerVector(std::vector<uint8_t> blob) : _ownershipBlob(std::move(blob)) {}

void* get_ptr() override {
return reinterpret_cast<void*>(_ownershipBlob.data());
}

size_t size() const override {
return _ownershipBlob.size();
}

bool release_from_memory() override {
_ownershipBlob.clear();
_ownershipBlob.shrink_to_fit();
return true;
}

private:
std::vector<uint8_t> _ownershipBlob;
};

class BlobContainerAlignedBuffer : public BlobContainer {
public:
BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO,
size_t offset) : _ownershipBlob(blobSO), _offset(offset) {}

void* get_ptr() override {
return _ownershipBlob->get_ptr(_offset);
}

size_t size() const override {
return _ownershipBlob->size();
}

bool release_from_memory() override {
return false;
}

private:
std::shared_ptr<ov::AlignedBuffer> _ownershipBlob;
size_t _offset;
};

class IGraph : public std::enable_shared_from_this<IGraph> {
public:
IGraph(ze_graph_handle_t handle,
NetworkMetadata metadata,
const Config& config,
std::optional<std::vector<uint8_t>> blob);
std::optional<std::unique_ptr<BlobContainer>> blobPtr);

virtual void export_blob(std::ostream& stream) const = 0;

Expand Down Expand Up @@ -89,7 +151,7 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
// first inference starts running
std::mutex _mutex;

std::vector<uint8_t> _blob;
std::unique_ptr<BlobContainer> _blobPtr;

uint32_t _unique_id = 0;
uint32_t _last_submitted_id;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class ICompilerAdapter {
public:
virtual std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
const Config& config) const = 0;
virtual std::shared_ptr<IGraph> parse(std::vector<uint8_t> network, const Config& config) const = 0;
virtual std::shared_ptr<IGraph> parse(std::unique_ptr<BlobContainer> blobPtr, const Config& config) const = 0;
virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;

virtual ~ICompilerAdapter() = default;
Expand Down
6 changes: 3 additions & 3 deletions src/plugins/intel_npu/src/common/src/igraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ namespace intel_npu {
IGraph::IGraph(ze_graph_handle_t handle,
NetworkMetadata metadata,
const Config& config,
std::optional<std::vector<uint8_t>> blob)
std::optional<std::unique_ptr<BlobContainer>> blobPtr)
: _handle(handle),
_metadata(std::move(metadata)),
_logger("IGraph", config.get<LOG_LEVEL>()) {
if (blob.has_value()) {
_blob = std::move(*blob);
if (blobPtr.has_value()) {
_blobPtr = std::move(*blobPtr);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter {

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(std::vector<uint8_t> network, const Config& config) const override;
std::shared_ptr<IGraph> parse(std::unique_ptr<BlobContainer> blobPtr, const Config& config) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class DriverGraph final : public IGraph {
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
const Config& config,
std::optional<std::vector<uint8_t>> blob);
std::optional<std::unique_ptr<BlobContainer>> blob);

void export_blob(std::ostream& stream) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter {

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(std::vector<uint8_t> network, const Config& config) const override;
std::shared_ptr<IGraph> parse(std::unique_ptr<BlobContainer> blobPtr, const Config& config) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class PluginGraph final : public IGraph {
const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
std::vector<uint8_t> blob,
std::unique_ptr<BlobContainer> blobPtr,
const Config& config);

void export_blob(std::ostream& stream) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class ZeGraphExtWrappers {
const std::string& buildFlags,
const uint32_t& flags) const;

ze_graph_handle_t getGraphHandle(const std::vector<uint8_t>& network) const;
ze_graph_handle_t getGraphHandle(const uint8_t* data, size_t size) const;

NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,13 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compile(const std::shared_ptr<con
std::nullopt);
}

std::shared_ptr<IGraph> DriverCompilerAdapter::parse(std::vector<uint8_t> network, const Config& config) const {
std::shared_ptr<IGraph> DriverCompilerAdapter::parse(std::unique_ptr<BlobContainer> blobPtr,
const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse");

_logger.debug("parse start");
ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(network);
ze_graph_handle_t graphHandle =
_zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()), blobPtr->size());
_logger.debug("parse end");

OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta");
Expand All @@ -221,7 +223,7 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::parse(std::vector<uint8_t> networ
graphHandle,
std::move(networkMeta),
config,
std::optional<std::vector<uint8_t>>(std::move(network)));
std::optional<std::unique_ptr<BlobContainer>>(std::move(blobPtr)));
}

ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr<const ov::Model>& model,
Expand Down
11 changes: 6 additions & 5 deletions src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ DriverGraph::DriverGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
const Config& config,
std::optional<std::vector<uint8_t>> blob)
: IGraph(graphHandle, std::move(metadata), config, std::move(blob)),
std::optional<std::unique_ptr<BlobContainer>> blobPtr)
: IGraph(graphHandle, std::move(metadata), config, std::move(blobPtr)),
_zeGraphExt(zeGraphExt),
_zeroInitStruct(zeroInitStruct),
_logger("DriverGraph", config.get<LOG_LEVEL>()) {
Expand Down Expand Up @@ -139,7 +139,7 @@ void DriverGraph::initialize(const Config& config) {
}

bool DriverGraph::release_blob(const Config& config) {
if (_blob.empty() || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 ||
if (_blobPtr == nullptr || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 ||
config.get<PERF_COUNT>()) {
return false;
}
Expand All @@ -152,8 +152,9 @@ bool DriverGraph::release_blob(const Config& config) {
return false;
}

_blob.clear();
_blob.shrink_to_fit();
if (!_blobPtr->release_from_memory()) {
return false;
}

_logger.debug("Blob is released");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,48 +83,56 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con

_logger.debug("compile start");
auto networkDesc = _compiler->compile(model, config);
auto blobPtr = std::make_unique<BlobContainerVector>(std::move(networkDesc.compiledNetwork));
_logger.debug("compile end");

ze_graph_handle_t graphHandle = nullptr;

if (_zeGraphExt) {
// Depending on the config, we may get an error when trying to get the graph handle from the compiled network
try {
graphHandle = _zeGraphExt->getGraphHandle(networkDesc.compiledNetwork);
graphHandle =
_zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()), blobPtr->size());
} catch (...) {
_logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
"allowed. Only exports are available");
}
}

return std::make_shared<PluginGraph>(_zeGraphExt,
_compiler,
_zeroInitStruct,
graphHandle,
std::move(networkDesc.metadata),
std::move(networkDesc.compiledNetwork),
std::move(blobPtr),
config);
}

std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::vector<uint8_t> network, const Config& config) const {
std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::unique_ptr<BlobContainer> blobPtr,
const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");

_logger.debug("parse start");
std::vector<uint8_t> network(blobPtr->size());
network.assign(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()),
reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()) + blobPtr->size());
auto networkMeta = _compiler->parse(network, config);
network.clear();
network.shrink_to_fit();
_logger.debug("parse end");

ze_graph_handle_t graphHandle = nullptr;

if (_zeGraphExt) {
graphHandle = _zeGraphExt->getGraphHandle(network);
graphHandle =
_zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()), blobPtr->size());
}

return std::make_shared<PluginGraph>(_zeGraphExt,
_compiler,
_zeroInitStruct,
graphHandle,
std::move(networkMeta),
std::move(network),
std::move(blobPtr),
config);
}

Expand Down
20 changes: 14 additions & 6 deletions src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
std::vector<uint8_t> blob,
std::unique_ptr<BlobContainer> blobPtr,
const Config& config)
: IGraph(graphHandle, std::move(metadata), config, std::optional<std::vector<uint8_t>>(std::move(blob))),
: IGraph(graphHandle,
std::move(metadata),
config,
std::optional<std::unique_ptr<BlobContainer>>(std::move(blobPtr))),
_zeGraphExt(zeGraphExt),
_zeroInitStruct(zeroInitStruct),
_compiler(compiler),
Expand All @@ -31,7 +34,7 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
}

void PluginGraph::export_blob(std::ostream& stream) const {
stream.write(reinterpret_cast<const char*>(_blob.data()), _blob.size());
stream.write(reinterpret_cast<const char*>(_blobPtr->get_ptr()), _blobPtr->size());

if (!stream) {
_logger.error("Write blob to stream failed. Blob is broken!");
Expand All @@ -40,20 +43,25 @@ void PluginGraph::export_blob(std::ostream& stream) const {

if (_logger.level() >= ov::log::Level::INFO) {
std::uint32_t result = 1171117u;
for (const uint8_t* it = _blob.data(); it != _blob.data() + _blob.size(); ++it) {
for (const uint8_t* it = reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr());
it != reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr()) + _blobPtr->size();
++it) {
result = ((result << 7) + result) + static_cast<uint32_t>(*it);
}

std::stringstream str;
str << "Blob size: " << _blob.size() << ", hash: " << std::hex << result;
str << "Blob size: " << _blobPtr->size() << ", hash: " << std::hex << result;
_logger.info(str.str().c_str());
}
_logger.info("Write blob to stream successfully.");
}

std::vector<ov::ProfilingInfo> PluginGraph::process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const {
return _compiler->process_profiling_output(profData, _blob, config);
std::vector<uint8_t> blob(_blobPtr->size());
blob.assign(reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr()),
reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr()) + _blobPtr->size());
return _compiler->process_profiling_output(profData, blob, config);
}

void PluginGraph::set_argument_value(uint32_t argi, const void* argv) const {
Expand Down
Loading

0 comments on commit e2c1b93

Please sign in to comment.