Skip to content

Commit

Permalink
POC/use_mmap_at_import_model v1
Browse files Browse the repository at this point in the history
  • Loading branch information
MirceaDan99 committed Aug 19, 2024
1 parent 3692cf8 commit b2b8dc9
Show file tree
Hide file tree
Showing 10 changed files with 145 additions and 35 deletions.
42 changes: 34 additions & 8 deletions src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
#include "openvino/core/type/element_type.hpp"
#include "openvino/runtime/common.hpp"
#include "openvino/runtime/profiling_info.hpp"
#include "openvino/util/mmap_object.hpp"

#include <optional>

namespace intel_npu {

Expand Down Expand Up @@ -131,22 +134,41 @@ struct NetworkMetadata final {
* to provide such information about a network as description of inputs and outputs,
* name and compiled network in a format executable by device
*/
struct NetworkDescription final {
NetworkDescription(std::vector<uint8_t>&& compiledNetwork, NetworkMetadata&& metadata)
: compiledNetwork(std::move(compiledNetwork)),
metadata(std::move(metadata)) {}

#define NetworkDescriptionCastCheck(network) dynamic_cast<const NetworkDescriptionT<std::vector<uint8_t>>*>(network.get()) != nullptr ? true : false
#define NetworkDescriptionPtrCast1(network) std::dynamic_pointer_cast<const NetworkDescriptionT<std::vector<uint8_t>>>(network)
#define NetworkDescriptionPtrCast2(network) std::dynamic_pointer_cast<const NetworkDescriptionT<std::shared_ptr<ov::MappedMemory>>>(network)

struct NetworkDescription {

// Force move semantics to prevent blob copies
NetworkDescription(NetworkMetadata&& metadata) : metadata(std::move(metadata)) {}
NetworkDescription(const NetworkDescription&) = delete;
NetworkDescription(NetworkDescription&&) = default;
NetworkDescription& operator=(const NetworkDescription&) = delete;
NetworkDescription& operator=(NetworkDescription&&) = default;
~NetworkDescription() = default;

std::vector<uint8_t> compiledNetwork;
virtual ~NetworkDescription() = default;

NetworkMetadata metadata;
};

template<typename T>
struct NetworkDescriptionT : public NetworkDescription {

NetworkDescriptionT(T&& compiledNetwork, NetworkMetadata&& metadata)
: NetworkDescription(std::move(metadata)) {
this->compiledNetwork = compiledNetwork;
}

NetworkDescriptionT(const NetworkDescriptionT<T>&) = delete;
NetworkDescriptionT(NetworkDescriptionT<T>&&) = default;
NetworkDescriptionT& operator=(const NetworkDescriptionT<T>&) = delete;
NetworkDescriptionT& operator=(NetworkDescriptionT<T>&&) = default;
~NetworkDescriptionT() = default;

T compiledNetwork;
};

/**
* @interface ICompiler
* @brief An interface to be implemented by a concrete compiler to provide
Expand Down Expand Up @@ -190,12 +212,16 @@ class ICompiler : public std::enable_shared_from_this<ICompiler> {
* to be used for creating network description
* @return a shared pointer on an object implementing NetworkDescription interface
*/
virtual NetworkMetadata parse(const std::vector<uint8_t>& network, const Config& config) const = 0;
virtual NetworkMetadata parse(const std::shared_ptr<ov::MappedMemory>& mmapNetwork, const Config& config) const = 0;

virtual std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const std::vector<uint8_t>& network,
const Config& config) const = 0;

virtual std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const std::shared_ptr<ov::MappedMemory>& network,
const Config& config) const = 0;

protected:
virtual ~ICompiler() = default;
};
Expand Down
15 changes: 13 additions & 2 deletions src/plugins/intel_npu/src/backend/src/zero_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,20 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
ze_graph_desc_t desc{ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES,
nullptr,
ZE_GRAPH_FORMAT_NATIVE,
_networkDesc->compiledNetwork.size(),
_networkDesc->compiledNetwork.data(),
0,
nullptr,
nullptr};

if (NetworkDescriptionCastCheck(_networkDesc)) {
auto _networkDescCast = NetworkDescriptionPtrCast1(_networkDesc);
desc.inputSize = _networkDescCast->compiledNetwork.size();
desc.pInput = _networkDescCast->compiledNetwork.data();
} else {
auto _networkDescCast = NetworkDescriptionPtrCast2(_networkDesc);
desc.inputSize = _networkDescCast->compiledNetwork->size();
desc.pInput = reinterpret_cast<uint8_t*>(_networkDescCast->compiledNetwork->data());
}

zeroUtils::throwOnFail(
"pfnCreate",
_graph_ddi_table_ext->pfnCreate(_initStructs->getContext(), _initStructs->getDevice(), &desc, &_graph));
Expand Down
7 changes: 5 additions & 2 deletions src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -595,10 +595,13 @@ std::vector<ov::ProfilingInfo> ZeroInferRequest::get_profiling_info() const {
// processing to the compiler
const auto& networkDesc = compiledModel.get_network_description();
const auto& compiler = compiledModel.get_compiler();
const auto& blob = networkDesc->compiledNetwork;
auto profData = get_raw_profiling_data();
_logger.debug("InferRequest::get_profiling_info complete with compiler->process_profiling_output().");
return compiler->process_profiling_output(profData, blob, compilerConfig);
if (NetworkDescriptionCastCheck(networkDesc) == false) {
auto networkDescCast = NetworkDescriptionPtrCast2(networkDesc);
return compiler->process_profiling_output(profData, networkDescCast->compiledNetwork, compilerConfig);
}
return compiler->process_profiling_output(profData, NetworkDescriptionPtrCast1(networkDesc)->compiledNetwork, compilerConfig);
} else {
auto proftype = _config.get<PROFILING_TYPE>();
if (proftype == ov::intel_npu::ProfilingType::INFER) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,16 @@ class LevelZeroCompilerAdapter final : public ICompiler {

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override final;

NetworkMetadata parse(const std::vector<uint8_t>& network, const Config& config) const override final;
NetworkMetadata parse(const std::shared_ptr<ov::MappedMemory>& mmapNetwork, const Config& config) const override final;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const std::vector<uint8_t>& network,
const Config& config) const override final;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const std::shared_ptr<ov::MappedMemory>& mmapNetwork,
const Config& config) const override final;

private:
/**
* @brief Separate externals calls to separate class
Expand Down
44 changes: 44 additions & 0 deletions src/plugins/intel_npu/src/compiler/include/iexternal_compiler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once
#include "intel_npu/al/icompiler.hpp"

namespace intel_npu {
namespace driverCompilerAdapter {

struct IR {
std::stringstream xml;
std::stringstream weights;
};

/**
* @brief Interface for external compiler
* @details Isolate external API calls from general logic
*/
class IExternalCompiler {
public:
virtual ~IExternalCompiler() = default;

/**
* @brief Get opset supported by compiler
*/
virtual uint32_t getSupportedOpset() const = 0;

/**
* @brief Get query result for current network
*/
virtual std::unordered_set<std::string> getQueryResult(IR& irModel, const Config& config) const = 0;

/**
* @brief Sends the serialized model and its I/O metadata to the driver for compilation.
* @return The compiled model descriptor corresponding to the previously given network.
*/
virtual NetworkDescription compileIR(const std::shared_ptr<const ov::Model>& model,
IR& irModel,
const Config& config) const = 0;
virtual NetworkMetadata parseBlob(const std::shared_ptr<ov::MappedMemory>& mmapBlob, const Config& config) const = 0;
};
} // namespace driverCompilerAdapter
} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,20 @@ class LevelZeroCompilerInDriver final : public ICompiler {
ze_device_graph_properties_t deviceGraphProperties,
ze_graph_handle_t& graphHandle) const;

NetworkMetadata parse(const std::vector<uint8_t>& network, const Config& config) const override final;
NetworkMetadata parse(const std::shared_ptr<ov::MappedMemory>& mmapNetwork, const Config& config) const override final;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const std::vector<uint8_t>& network,
const Config& config) const override final {
OPENVINO_THROW("Profiling post-processing is not implemented.");
}

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const std::shared_ptr<ov::MappedMemory>& mmapNetwork,
const Config& config) const override final {
OPENVINO_THROW("Profiling post-processing is not implemented.");
}

template <typename T = TableExtension, std::enable_if_t<!NotSupportQuery(T), bool> = true>
std::unordered_set<std::string> getQueryResultFromSupportedLayers(
ze_result_t result,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,9 @@ ov::SupportedOpsMap LevelZeroCompilerAdapter::query(const std::shared_ptr<const
return apiAdapter->query(model, config);
}

NetworkMetadata LevelZeroCompilerAdapter::parse(const std::vector<uint8_t>& network, const Config& config) const {
NetworkMetadata LevelZeroCompilerAdapter::parse(const std::shared_ptr<ov::MappedMemory>& mmapNetwork, const Config& config) const {
_logger.debug("parse start");
return apiAdapter->parse(network, config);
return apiAdapter->parse(mmapNetwork, config);
}

std::vector<ov::ProfilingInfo> LevelZeroCompilerAdapter::process_profiling_output(const std::vector<uint8_t>&,
Expand All @@ -206,5 +206,11 @@ std::vector<ov::ProfilingInfo> LevelZeroCompilerAdapter::process_profiling_outpu
OPENVINO_THROW("Profiling post-processing is not implemented.");
}

std::vector<ov::ProfilingInfo> LevelZeroCompilerAdapter::process_profiling_output(const std::vector<uint8_t>&,
const std::shared_ptr<ov::MappedMemory>&,
const Config&) const {
OPENVINO_THROW("Profiling post-processing is not implemented.");
}

} // namespace driverCompilerAdapter
} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -870,23 +870,23 @@ NetworkDescription LevelZeroCompilerInDriver<TableExtension>::compile(const std:
}

_logger.debug("compile end");
return NetworkDescription(std::move(blob), std::move(networkMeta));
return NetworkDescriptionT<std::vector<uint8_t>>(std::move(blob), std::move(networkMeta));
}

template <typename TableExtension>
NetworkMetadata LevelZeroCompilerInDriver<TableExtension>::parse(const std::vector<uint8_t>& network,
NetworkMetadata LevelZeroCompilerInDriver<TableExtension>::parse(const std::shared_ptr<ov::MappedMemory>& mmapNetwork,
const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "LevelZeroCompilerInDriver::parse", "desc");
ze_graph_handle_t graphHandle;

if (!network.empty()) {
if (mmapNetwork->size() > 0) {
_logger.debug("Import network case");
ze_graph_format_t format = ZE_GRAPH_FORMAT_NATIVE;
ze_graph_desc_t desc{ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES,
nullptr,
format,
network.size(),
network.data(),
mmapNetwork->size(),
reinterpret_cast<uint8_t*>(mmapNetwork->data()),
nullptr};

auto result = _graphDdiTableExt->pfnCreate(_context, _deviceHandle, &desc, &graphHandle);
Expand Down
22 changes: 16 additions & 6 deletions src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,17 +128,27 @@ std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request(
}

void CompiledModel::export_model(std::ostream& stream) const {
const auto& blob = _networkPtr->compiledNetwork;
stream.write(reinterpret_cast<const char*>(blob.data()), blob.size());
const uint8_t* data;
size_t size;
if (NetworkDescriptionCastCheck(_networkPtr)) {
auto networkPtrCast = NetworkDescriptionPtrCast1(_networkPtr);
data = networkPtrCast->compiledNetwork.data();
size = networkPtrCast->compiledNetwork.size();
const auto& blob = networkPtrCast->compiledNetwork;
std::stringstream str;
str << "Blob size: " << blob.size() << ", hash: " << std::hex << hash(blob);
_logger.info(str.str().c_str());
} else {
auto networkPtrCast = NetworkDescriptionPtrCast2(_networkPtr);
data = reinterpret_cast<uint8_t*>(networkPtrCast->compiledNetwork->data());
size = networkPtrCast->compiledNetwork->size();
}
stream.write(reinterpret_cast<const char*>(data), size);
if (!stream) {
_logger.error("Write blob to stream failed. Blob is broken!");
} else {
_logger.info("Write blob to stream successfully.");
}

std::stringstream str;
str << "Blob size: " << blob.size() << ", hash: " << std::hex << hash(blob);
_logger.info(str.str().c_str());
}

std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
Expand Down
16 changes: 8 additions & 8 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "openvino/op/parameter.hpp"
#include "openvino/runtime/intel_npu/properties.hpp"
#include "openvino/runtime/properties.hpp"
#include "openvino/util/mmap_object.hpp"
#include "remote_context.hpp"

using namespace intel_npu;
Expand Down Expand Up @@ -742,21 +743,20 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c

auto graphSize = getFileSize(stream);

std::vector<uint8_t> blob(graphSize);
stream.read(reinterpret_cast<char*>(blob.data()), graphSize);
if (!stream) {
OPENVINO_THROW("Failed to read data from stream!");
}
_logger.debug("Successfully read %zu bytes into blob.", graphSize);
std::ofstream tmpBlob("tmpBlob.blob", std::ios::binary);
tmpBlob << stream.rdbuf();
tmpBlob.close();

auto mMapBlob = ov::load_mmap_object("tmpBlob.blob");

auto meta = compiler->parse(blob, localConfig);
auto meta = compiler->parse(mMapBlob, localConfig);
meta.name = "net" + std::to_string(_compiledModelLoadCounter++);

const std::shared_ptr<ov::Model> modelDummy = create_dummy_model(meta.inputs, meta.outputs);

bool profiling = localConfig.get<PERF_COUNT>();

auto networkDescription = std::make_shared<const NetworkDescription>(std::move(blob), std::move(meta));
auto networkDescription = std::make_shared<const NetworkDescriptionT<std::shared_ptr<ov::MappedMemory>>>(std::move(mMapBlob), std::move(meta));

compiledModel = std::make_shared<CompiledModel>(modelDummy,
shared_from_this(),
Expand Down

0 comments on commit b2b8dc9

Please sign in to comment.