Skip to content

Commit

Permalink
Add new CompiledNetwork container for blob return
Browse files Browse the repository at this point in the history
  • Loading branch information
MirceaDan99 committed Sep 24, 2024
1 parent 13aff9b commit 58b68c1
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 21 deletions.
16 changes: 14 additions & 2 deletions src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,18 @@ struct NetworkDescription final {
NetworkMetadata metadata;
};

/**
* @struct CompiledNetwork
* @brief Custom container for compiled model, used for model export
* Underlying container will be empty for optimized memory consumption
*/

struct CompiledNetwork {
const uint8_t* data;
size_t size;
std::vector<uint8_t> container;
};

/**
* @interface ICompiler
* @brief An interface to be implemented by a concrete compiler to provide
Expand Down Expand Up @@ -203,8 +215,8 @@ class ICompiler : public std::enable_shared_from_this<ICompiler> {
// Driver compiler can use this to release graphHandle, if we do not have executor
virtual void release([[maybe_unused]] std::shared_ptr<const NetworkDescription> networkDescription){};

virtual std::pair<const uint8_t*, size_t> getCompiledNetwork(std::shared_ptr<const NetworkDescription> networkDescription) {
return {networkDescription->compiledNetwork.data(), networkDescription->compiledNetwork.size()};
virtual CompiledNetwork getCompiledNetwork(std::shared_ptr<const NetworkDescription> networkDescription) {
return CompiledNetwork{networkDescription->compiledNetwork.data(), networkDescription->compiledNetwork.size(), networkDescription->compiledNetwork};
}

protected:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class LevelZeroCompilerAdapter final : public ICompiler {

void release(std::shared_ptr<const NetworkDescription> networkDescription) override;

std::pair<const uint8_t*, size_t> getCompiledNetwork(std::shared_ptr<const NetworkDescription> networkDescription) override;
CompiledNetwork getCompiledNetwork(std::shared_ptr<const NetworkDescription> networkDescription) override;

private:
/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class LevelZeroCompilerInDriver final : public ICompiler {

void release(std::shared_ptr<const NetworkDescription> networkDescription) override;

std::pair<const uint8_t*, size_t> getCompiledNetwork(std::shared_ptr<const NetworkDescription> networkDescription) override;
CompiledNetwork getCompiledNetwork(std::shared_ptr<const NetworkDescription> networkDescription) override;

private:
NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const;
Expand All @@ -130,14 +130,12 @@ class LevelZeroCompilerInDriver final : public ICompiler {

template <typename T = TableExtension, typename std::enable_if_t<UseCopyForNativeBinary(T), bool> = true>
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::shared_ptr<const NetworkDescription> networkDescription,
ze_graph_handle_t graphHandle, std::vector<uint8_t>& blob,
uint8_t** blobPtr, size_t* blobSize) const;

template <typename T = TableExtension, typename std::enable_if_t<!UseCopyForNativeBinary(T), bool> = true>
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::shared_ptr<const NetworkDescription>,
ze_graph_handle_t graphHandle, std::vector<uint8_t>& /* unusedBlob */,
uint8_t** blobPtr, size_t* blobSize) const;

template <typename T = TableExtension, typename std::enable_if_t<SupportAPIGraphQueryNetworkV2(T), bool> = true>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ void LevelZeroCompilerAdapter::release(std::shared_ptr<const NetworkDescription>
apiAdapter->release(std::move(networkDescription));
}

std::pair<const uint8_t*, size_t> LevelZeroCompilerAdapter::getCompiledNetwork(
CompiledNetwork LevelZeroCompilerAdapter::getCompiledNetwork(
std::shared_ptr<const NetworkDescription> networkDescription) {
_logger.info("getCompiledNetwork - using adapter to perform getCompiledNetwork(networkDescription)");
return apiAdapter->getCompiledNetwork(std::move(networkDescription));
Expand Down
18 changes: 10 additions & 8 deletions src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,10 +366,11 @@ template <typename TableExtension>
template <typename T, std::enable_if_t<UseCopyForNativeBinary(T), bool>>
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::shared_ptr<const NetworkDescription> networkDescription,
std::vector<uint8_t>& blob,
uint8_t** blobPtr, size_t* blobSize) const {
// Get blob size first
auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, blobSize, nullptr);
blob.resize(*blobSize);

OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
"Failed to compile network. L0 pfnGetNativeBinary get blob size",
Expand All @@ -382,7 +383,7 @@ void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditabl
getLatestBuildError());

// Get blob data
result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, blobSize, std::const_pointer_cast<NetworkDescription>(networkDescription)->compiledNetwork.data());
result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, blobSize, blob.data());

OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
"Failed to compile network. L0 pfnGetNativeBinary get blob data",
Expand All @@ -394,14 +395,14 @@ void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditabl
". ",
getLatestBuildError());

*blobPtr = std::const_pointer_cast<NetworkDescription>(networkDescription)->compiledNetwork.data();
*blobPtr = blob.data();
}

template <typename TableExtension>
template <typename T, std::enable_if_t<!UseCopyForNativeBinary(T), bool>>
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::shared_ptr<const NetworkDescription>,
std::vector<uint8_t>& /* unusedBlob */,
uint8_t** blobPtr, size_t* blobSize) const {
// Get blob ptr and size
auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, blobSize, blobPtr);
Expand All @@ -418,22 +419,23 @@ void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditabl
}

template <typename TableExtension>
std::pair<const uint8_t*, size_t> LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
CompiledNetwork LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
std::shared_ptr<const NetworkDescription> networkDescription) {
if (networkDescription->metadata.graphHandle != nullptr && networkDescription->compiledNetwork.size() == 0) {
_logger.info("LevelZeroCompilerInDriver getCompiledNetwork get blob from graphHandle");
ze_graph_handle_t graphHandle = static_cast<ze_graph_handle_t>(networkDescription->metadata.graphHandle);

uint8_t* blobPtr;
size_t blobSize = -1;
std::vector<uint8_t> blob;

getNativeBinary(_graphDdiTableExt, graphHandle, networkDescription, &blobPtr, &blobSize);
getNativeBinary(_graphDdiTableExt, graphHandle, blob, &blobPtr, &blobSize);

_logger.info("LevelZeroCompilerInDriver getCompiledNetwork returning blob");
return {blobPtr, blobSize};
return CompiledNetwork{blobPtr, blobSize, std::move(blob)};
} else {
_logger.info("return the blob from network description");
return {networkDescription->compiledNetwork.data(), networkDescription->compiledNetwork.size()};
return CompiledNetwork{networkDescription->compiledNetwork.data(), networkDescription->compiledNetwork.size(), networkDescription->compiledNetwork};
}
}

Expand Down
8 changes: 4 additions & 4 deletions src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ constexpr std::string_view NO_EXECUTOR_FOR_INFERENCE =
"Can't create infer request!\n"
"Please make sure that the device is available. Only exports can be made.";

std::uint32_t hash(std::pair<const uint8_t*, size_t> blob) {
std::uint32_t hash(const intel_npu::CompiledNetwork& blob) {
std::uint32_t result = 1171117u;
for (const uint8_t* it = blob.first; it != blob.first + blob.second; ++it) {
for (const uint8_t* it = blob.data; it != blob.data + blob.size; ++it) {
result = ((result << 7) + result) + static_cast<uint32_t>(*it);
}
return result;
Expand Down Expand Up @@ -141,11 +141,11 @@ std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request(
void CompiledModel::export_model(std::ostream& stream) const {
_logger.debug("CompiledModel::export_model");
const auto&& blob = _compiler->getCompiledNetwork(_networkPtr);
stream.write(reinterpret_cast<const char*>(blob.first), blob.second);
stream.write(reinterpret_cast<const char*>(blob.data), blob.size);

if (_logger.level() == ov::log::Level::INFO) {
std::stringstream str;
str << "Blob size: " << blob.second << ", hash: " << std::hex << hash(blob);
str << "Blob size: " << blob.size << ", hash: " << std::hex << hash(blob);
_logger.info(str.str().c_str());

if (!stream) {
Expand Down

0 comments on commit 58b68c1

Please sign in to comment.