Skip to content

Commit

Permalink
POC for supporting blob metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
MirceaDan99 committed Dec 15, 2024
1 parent 99b823b commit 33817f4
Show file tree
Hide file tree
Showing 12 changed files with 487 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,29 +47,27 @@ class BlobContainerVector : public BlobContainer {
class BlobContainerAlignedBuffer : public BlobContainer {
public:
BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO,
size_t ovHeaderOffset,
size_t metadataSize)
: _ownershipBlob(blobSO),
size_t ovHeaderOffset, uint64_t blobSize)
: _blobSize(blobSize),
_ovHeaderOffset(ovHeaderOffset),
_metadataSize(metadataSize) {}
_ownershipBlob(blobSO) {}

void* get_ptr() override {
return _ownershipBlob->get_ptr(_ovHeaderOffset);
}

size_t size() const override {
// remove OV header offset and metadata from blob size
return _ownershipBlob->size() - _ovHeaderOffset - _metadataSize;
return _blobSize;
}

bool release_from_memory() override {
return false;
}

private:
std::shared_ptr<ov::AlignedBuffer> _ownershipBlob;
uint64_t _blobSize;
size_t _ovHeaderOffset;
size_t _metadataSize;
std::shared_ptr<ov::AlignedBuffer> _ownershipBlob;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
const Config& config,
std::unique_ptr<BlobContainer> blobPtr);

virtual void export_blob(std::ostream& stream) const = 0;
virtual size_t export_blob(std::ostream& stream) const = 0;

virtual std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class DriverGraph final : public IGraph {
const Config& config,
std::unique_ptr<BlobContainer> blob);

void export_blob(std::ostream& stream) const override;
size_t export_blob(std::ostream& stream) const override;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class PluginGraph final : public IGraph {
std::unique_ptr<BlobContainer> blobPtr,
const Config& config);

void export_blob(std::ostream& stream) const override;
size_t export_blob(std::ostream& stream) const override;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ DriverGraph::DriverGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
initialize(config);
}

void DriverGraph::export_blob(std::ostream& stream) const {
size_t DriverGraph::export_blob(std::ostream& stream) const {
const uint8_t* blobPtr = nullptr;
size_t blobSize = -1;
size_t blobSize;
std::vector<uint8_t> blob;

if (_blobIsReleased) {
Expand All @@ -47,7 +47,7 @@ void DriverGraph::export_blob(std::ostream& stream) const {

if (!stream) {
_logger.error("Write blob to stream failed. Blob is broken!");
return;
return 0;
}

if (_logger.level() >= ov::log::Level::INFO) {
Expand All @@ -61,6 +61,7 @@ void DriverGraph::export_blob(std::ostream& stream) const {
_logger.info(str.str().c_str());
}
_logger.info("Write blob to stream successfully.");
return blobSize;
}

std::vector<ov::ProfilingInfo> DriverGraph::process_profiling_output(const std::vector<uint8_t>& profData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
initialize(config);
}

void PluginGraph::export_blob(std::ostream& stream) const {
size_t PluginGraph::export_blob(std::ostream& stream) const {
stream.write(reinterpret_cast<const char*>(_blobPtr->get_ptr()), _blobPtr->size());

if (!stream) {
_logger.error("Write blob to stream failed. Blob is broken!");
return;
return 0;
}

if (_logger.level() >= ov::log::Level::INFO) {
Expand All @@ -51,6 +51,7 @@ void PluginGraph::export_blob(std::ostream& stream) const {
_logger.info(str.str().c_str());
}
_logger.info("Write blob to stream successfully.");
return _blobPtr->size();
}

std::vector<ov::ProfilingInfo> PluginGraph::process_profiling_output(const std::vector<uint8_t>& profData,
Expand Down
178 changes: 178 additions & 0 deletions src/plugins/intel_npu/src/plugin/include/metadata.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <stdint.h>

#include <memory>
#include <string>
#include <vector>

#include <openvino/runtime/aligned_buffer.hpp>

namespace intel_npu {

/**
* @brief Magic bytes used for identifying NPU blobs.
*/
constexpr std::string_view MAGIC_BYTES = "OVNPU";

/**
* @brief Returns a uint32_t value which represents two uint16_t values concatenated.
* @details Convention for bumping the metadata version:
* - Increment Major in case of: removing a current field OR adding a new field in between fields.
* - Increment Minor in case of: adding a new field at the end.
*
* @return Major and minor versions concatenated into a single uint32_t value.
*/
constexpr uint32_t make_version(uint16_t major, uint16_t minor) {
return major << 16 | (minor & 0x0000ffff);
}

/**
* @brief Gets the major version.
*
* @return Major version.
*/
constexpr uint16_t get_major(uint32_t version) {
return static_cast<uint16_t>(version >> 16);
}

/**
* @brief Gets the minor version.
*
* @return Minor version.
*/
constexpr uint16_t get_minor(uint32_t version) {
return static_cast<uint16_t>(version);
}

/**
* @brief List of supported version formats.
*/
constexpr uint32_t METADATA_VERSION_1_0{make_version(1, 0)};

/**
* @brief Current metadata version.
*/
constexpr uint32_t CURRENT_METADATA_VERSION{METADATA_VERSION_1_0};

struct OpenvinoVersion {
private:
std::string _version;
uint32_t _size;

public:
OpenvinoVersion(std::string_view version);

/**
* @brief Reads version data from a stream.
*/
void read(std::istream& stream);

/**
* @brief Gets the version string.
*/
std::string get_version();

/**
* @brief Gets the size of version string as reference.
* @return Reference to the size.
*
* @note Needed as reference for reading its binary representation in memory.
* @see Metadata::write()
*/
uint32_t& get_size();
};

struct MetadataBase {
/**
* @brief Reads metadata from a stream.
*/
virtual void read(std::istream& stream) = 0;

/**
* @brief Writes metadata to a stream.
*/
virtual void write(std::ostream& stream) = 0;

virtual bool is_compatible() = 0;

virtual uint64_t get_blob_size() const = 0;

virtual size_t get_ov_header_offset() const = 0;

virtual ~MetadataBase() = default;
};

/**
* @brief Template for metadata class handling.
*
* @attention It's a must to have metadata version as first field in any metadata specialization.
*/
template <uint32_t version>
struct Metadata : public MetadataBase {};

/**
* @brief Template specialization for metadata version 1.0.
*/
template <>
struct Metadata<METADATA_VERSION_1_0> : public MetadataBase {
private:
uint64_t _blobDataSize;
uint32_t _version;
size_t _ovHeaderOffset;
OpenvinoVersion _ovVersion;

public:
Metadata();

Metadata(size_t ovHeaderOffset, uint64_t blobDataSize);

void read(std::istream& stream) override;

void write(std::ostream& stream) override;

/**
* @brief Checks if metadata is supported.
*
* @return Returns:
* - false:
* - if blob metadata does not match current metadata.
* - if blob OpenVINO version does not match current one.
*
* - true: if all versions match.
*
* @note The version check can be disabled if the "NPU_DISABLE_VERSION_CHECK" environment variable is set to '1'.
*/
bool is_compatible() override;

void set_version(uint32_t newVersion);

void set_ov_version(const OpenvinoVersion& newVersion);

uint64_t get_blob_size() const override;

size_t get_ov_header_offset() const override;
};

/**
* @brief Creates a Metadata object.
*
* @return Unique pointer to the created MetadataBase object if the major version is supported; otherwise, returns
* 'nullptr'.
*/
std::unique_ptr<MetadataBase> create_metadata(uint32_t version, size_t ovHeaderOffset, uint64_t blobDataSize);

/**
* @brief Reads metadata from a blob.
*
* @return If the blob is versioned and its major version is supported, returns an unique pointer to the read
* MetadataBase object; otherwise, returns 'nullptr'.
*/
std::unique_ptr<MetadataBase> read_metadata_from(std::istream& stream);

std::unique_ptr<MetadataBase> read_metadata_from(std::istream& stream, const std::shared_ptr<ov::AlignedBuffer>& modelBuffer);
} // namespace intel_npu
8 changes: 7 additions & 1 deletion src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "intel_npu/config/compiler.hpp"
#include "intel_npu/config/config.hpp"
#include "intel_npu/config/runtime.hpp"
#include "metadata.hpp"
#include "openvino/pass/constant_folding.hpp"
#include "openvino/pass/manager.hpp"
#include "openvino/runtime/properties.hpp"
Expand Down Expand Up @@ -72,7 +73,12 @@ std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request(

void CompiledModel::export_model(std::ostream& stream) const {
_logger.debug("CompiledModel::export_model");
_graph->export_blob(stream);
size_t blobSizeBeforeVersioning = _graph->export_blob(stream);

auto meta = Metadata<CURRENT_METADATA_VERSION>();
meta.write(stream);
stream.write(reinterpret_cast<const char*>(&blobSizeBeforeVersioning), sizeof(blobSizeBeforeVersioning));
stream.write(MAGIC_BYTES.data(), MAGIC_BYTES.size());
}

std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
Expand Down
Loading

0 comments on commit 33817f4

Please sign in to comment.