Skip to content

Commit 9e2224e

Browse files
committed
POC for supporting blob metadata to be read from std::istream instead
of `std::vector<uint8_t>`
1 parent 4c989ab commit 9e2224e

File tree

6 files changed

+208
-153
lines changed

6 files changed

+208
-153
lines changed

src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,29 +47,27 @@ class BlobContainerVector : public BlobContainer {
4747
class BlobContainerAlignedBuffer : public BlobContainer {
4848
public:
4949
BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO,
50-
size_t ovHeaderOffset,
51-
size_t metadataSize)
52-
: _ownershipBlob(blobSO),
50+
size_t ovHeaderOffset, uint64_t blobSize)
51+
: _blobSize(blobSize),
5352
_ovHeaderOffset(ovHeaderOffset),
54-
_metadataSize(metadataSize) {}
53+
_ownershipBlob(blobSO) {}
5554

5655
void* get_ptr() override {
5756
return _ownershipBlob->get_ptr(_ovHeaderOffset);
5857
}
5958

6059
size_t size() const override {
61-
// remove OV header offset and metadata from blob size
62-
return _ownershipBlob->size() - _ovHeaderOffset - _metadataSize;
60+
return _blobSize;
6361
}
6462

6563
bool release_from_memory() override {
6664
return false;
6765
}
6866

6967
private:
70-
std::shared_ptr<ov::AlignedBuffer> _ownershipBlob;
68+
uint64_t _blobSize;
7169
size_t _ovHeaderOffset;
72-
size_t _metadataSize;
70+
std::shared_ptr<ov::AlignedBuffer> _ownershipBlob;
7371
};
7472

7573
} // namespace intel_npu

src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
1515
const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
1616
ze_graph_handle_t graphHandle,
1717
NetworkMetadata metadata,
18-
std::vector<uint8_t> blob,
18+
std::unique_ptr<BlobContainer> blobPtr,
1919
const Config& config)
20-
: IGraph(graphHandle, std::move(metadata), std::optional<std::vector<uint8_t>>(std::move(blob))),
20+
: IGraph(graphHandle, std::move(metadata), config, std::move(blobPtr)),
2121
_zeGraphExt(zeGraphExt),
2222
_zeroInitStruct(zeroInitStruct),
2323
_compiler(compiler),
@@ -31,7 +31,7 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
3131
}
3232

3333
size_t PluginGraph::export_blob(std::ostream& stream) const {
34-
stream.write(reinterpret_cast<const char*>(_blob.data()), _blob.size());
34+
stream.write(reinterpret_cast<const char*>(_blobPtr->get_ptr()), _blobPtr->size());
3535

3636
if (!stream) {
3737
_logger.error("Write blob to stream failed. Blob is broken!");
@@ -40,21 +40,26 @@ size_t PluginGraph::export_blob(std::ostream& stream) const {
4040

4141
if (_logger.level() >= ov::log::Level::INFO) {
4242
std::uint32_t result = 1171117u;
43-
for (const uint8_t* it = _blob.data(); it != _blob.data() + _blob.size(); ++it) {
43+
for (const uint8_t* it = reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr());
44+
it != reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr()) + _blobPtr->size();
45+
++it) {
4446
result = ((result << 7) + result) + static_cast<uint32_t>(*it);
4547
}
4648

4749
std::stringstream str;
48-
str << "Blob size: " << _blob.size() << ", hash: " << std::hex << result;
50+
str << "Blob size: " << _blobPtr->size() << ", hash: " << std::hex << result;
4951
_logger.info(str.str().c_str());
5052
}
5153
_logger.info("Write blob to stream successfully.");
52-
return _blob.size();
54+
return _blobPtr->size();
5355
}
5456

5557
std::vector<ov::ProfilingInfo> PluginGraph::process_profiling_output(const std::vector<uint8_t>& profData,
5658
const Config& config) const {
57-
return _compiler->process_profiling_output(profData, _blob, config);
59+
std::vector<uint8_t> blob(_blobPtr->size());
60+
blob.assign(reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr()),
61+
reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr()) + _blobPtr->size());
62+
return _compiler->process_profiling_output(profData, blob, config);
5863
}
5964

6065
void PluginGraph::set_argument_value(uint32_t argi, const void* argv) const {
@@ -116,6 +121,16 @@ void PluginGraph::initialize(const Config& config) {
116121

117122
_zeGraphExt->initializeGraph(_handle, config);
118123

124+
if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
125+
_batch_size = get_batch_size(_metadata);
126+
}
127+
128+
if (config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
129+
auto number_of_command_lists = _batch_size.has_value() ? *_batch_size : 1;
130+
131+
_last_submitted_event.resize(number_of_command_lists);
132+
}
133+
119134
_logger.debug("Graph initialize finish");
120135
}
121136

src/plugins/intel_npu/src/plugin/include/metadata.hpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#include <string>
1111
#include <vector>
1212

13+
#include <openvino/runtime/aligned_buffer.hpp>
14+
1315
namespace intel_npu {
1416

1517
/**
@@ -98,6 +100,10 @@ struct MetadataBase {
98100

99101
virtual bool is_compatible() = 0;
100102

103+
virtual uint64_t get_blob_size() const = 0;
104+
105+
virtual size_t get_ov_header_offset() const = 0;
106+
101107
virtual ~MetadataBase() = default;
102108
};
103109

@@ -114,13 +120,17 @@ struct Metadata : public MetadataBase {};
114120
*/
115121
template <>
116122
struct Metadata<METADATA_VERSION_1_0> : public MetadataBase {
117-
protected:
123+
private:
124+
uint64_t _blobDataSize;
118125
uint32_t _version;
126+
size_t _ovHeaderOffset;
119127
OpenvinoVersion _ovVersion;
120128

121129
public:
122130
Metadata();
123131

132+
Metadata(size_t ovHeaderOffset, uint64_t blobDataSize);
133+
124134
void read(std::istream& stream) override;
125135

126136
void write(std::ostream& stream) override;
@@ -142,6 +152,10 @@ struct Metadata<METADATA_VERSION_1_0> : public MetadataBase {
142152
void set_version(uint32_t newVersion);
143153

144154
void set_ov_version(const OpenvinoVersion& newVersion);
155+
156+
uint64_t get_blob_size() const override;
157+
158+
size_t get_ov_header_offset() const override;
145159
};
146160

147161
/**
@@ -150,14 +164,15 @@ struct Metadata<METADATA_VERSION_1_0> : public MetadataBase {
150164
* @return Unique pointer to the created MetadataBase object if the major version is supported; otherwise, returns
151165
* 'nullptr'.
152166
*/
153-
std::unique_ptr<MetadataBase> create_metadata(uint32_t version);
167+
std::unique_ptr<MetadataBase> create_metadata(uint32_t version, size_t ovHeaderOffset, uint64_t blobDataSize);
154168

155169
/**
156170
* @brief Reads metadata from a blob.
157171
*
158172
* @return If the blob is versioned and its major version is supported, returns an unique pointer to the read
159173
* MetadataBase object; otherwise, returns 'nullptr'.
160174
*/
161-
std::unique_ptr<MetadataBase> read_metadata_from(const std::vector<uint8_t>& blob);
175+
std::unique_ptr<MetadataBase> read_metadata_from(std::istream& stream);
162176

177+
std::unique_ptr<MetadataBase> read_metadata_from(std::istream& stream, const std::shared_ptr<ov::AlignedBuffer>& modelBuffer);
163178
} // namespace intel_npu

src/plugins/intel_npu/src/plugin/src/metadata.cpp

Lines changed: 98 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,34 @@
1010
#include "intel_npu/config/config.hpp"
1111
#include "intel_npu/utils/logger/logger.hpp"
1212
#include "openvino/core/version.hpp"
13+
#include "openvino/runtime/shared_buffer.hpp"
14+
namespace {
15+
16+
size_t getFileSize(std::istream& stream) {
17+
auto log = intel_npu::Logger::global().clone("getFileSize");
18+
if (!stream) {
19+
OPENVINO_THROW("Stream is in bad status! Please check the passed stream status!");
20+
}
21+
22+
const size_t streamStart = stream.tellg();
23+
stream.seekg(0, std::ios_base::end);
24+
const size_t streamEnd = stream.tellg();
25+
stream.seekg(streamStart, std::ios_base::beg);
26+
27+
log.debug("Read blob size: streamStart=%zu, streamEnd=%zu", streamStart, streamEnd);
28+
29+
if (streamEnd < streamStart) {
30+
OPENVINO_THROW("Invalid stream size: streamEnd (",
31+
streamEnd,
32+
") is not larger than streamStart (",
33+
streamStart,
34+
")!");
35+
}
36+
37+
return streamEnd - streamStart;
38+
}
39+
40+
} // namespace
1341

1442
namespace intel_npu {
1543

@@ -24,7 +52,15 @@ void OpenvinoVersion::read(std::istream& stream) {
2452
}
2553

2654
Metadata<METADATA_VERSION_1_0>::Metadata()
27-
: _version{METADATA_VERSION_1_0},
55+
: _blobDataSize{0},
56+
_version{METADATA_VERSION_1_0},
57+
_ovHeaderOffset{0},
58+
_ovVersion{ov::get_openvino_version().buildNumber} {}
59+
60+
Metadata<METADATA_VERSION_1_0>::Metadata(size_t ovHeaderOffset, uint64_t blobDataSize)
61+
: _blobDataSize{blobDataSize},
62+
_version{METADATA_VERSION_1_0},
63+
_ovHeaderOffset{ovHeaderOffset},
2864
_ovVersion{ov::get_openvino_version().buildNumber} {}
2965

3066
void Metadata<METADATA_VERSION_1_0>::read(std::istream& stream) {
@@ -41,13 +77,13 @@ void Metadata<METADATA_VERSION_1_0>::write(std::ostream& stream) {
4177
stream.write(_ovVersion.get_version().data(), _ovVersion.get_version().size());
4278
}
4379

44-
std::unique_ptr<MetadataBase> create_metadata(uint32_t version) {
80+
std::unique_ptr<MetadataBase> create_metadata(uint32_t version, size_t ovHeaderOffset, uint64_t blobDataSize) {
4581
switch (version) {
4682
case METADATA_VERSION_1_0:
47-
return std::make_unique<Metadata<METADATA_VERSION_1_0>>();
83+
return std::make_unique<Metadata<METADATA_VERSION_1_0>>(ovHeaderOffset, blobDataSize);
4884

4985
default:
50-
OPENVINO_THROW("Invalid metadata version!");
86+
return nullptr;
5187
}
5288
}
5389

@@ -75,35 +111,69 @@ bool Metadata<METADATA_VERSION_1_0>::is_compatible() {
75111
return true;
76112
}
77113

78-
std::unique_ptr<MetadataBase> read_metadata_from(const std::vector<uint8_t>& blob) {
114+
std::unique_ptr<MetadataBase> read_metadata_from(std::istream& stream) {
79115
Logger logger("NPUPlugin", Logger::global().level());
80116
size_t magicBytesSize = MAGIC_BYTES.size();
81117
std::string blobMagicBytes;
82118
blobMagicBytes.resize(magicBytesSize);
83119

84-
auto metadataIterator = blob.end() - magicBytesSize;
85-
std::memcpy(blobMagicBytes.data(), &(*metadataIterator), magicBytesSize);
120+
size_t currentStreamPos = stream.tellg();
121+
size_t streamSize = getFileSize(stream);
122+
stream.seekg(streamSize - magicBytesSize, std::ios::beg);
123+
stream.read(blobMagicBytes.data(), magicBytesSize);
86124
if (MAGIC_BYTES != blobMagicBytes) {
87-
OPENVINO_THROW("Blob is missing NPU metadata!");
125+
logger.error("Blob is missing NPU metadata!");
126+
return nullptr;
88127
}
89128

90129
uint64_t blobDataSize;
91-
metadataIterator -= sizeof(blobDataSize);
92-
std::memcpy(&blobDataSize, &(*metadataIterator), sizeof(blobDataSize));
93-
metadataIterator = blob.begin() + blobDataSize;
130+
stream.seekg(streamSize - magicBytesSize - sizeof(blobDataSize), std::ios::beg);
131+
stream.read(reinterpret_cast<char*>(&blobDataSize), sizeof(blobDataSize));
132+
stream.seekg(currentStreamPos + blobDataSize, std::ios::beg);
133+
134+
uint32_t metaVersion;
135+
stream.read(reinterpret_cast<char*>(&metaVersion), sizeof(metaVersion));
136+
137+
auto storedMeta = create_metadata(metaVersion, currentStreamPos, blobDataSize);
138+
if (storedMeta != nullptr) {
139+
storedMeta->read(stream);
140+
} else {
141+
logger.warning("Imported blob metadata version: %d.%d, but the current version is: %d.%d",
142+
get_major(metaVersion),
143+
get_minor(metaVersion),
144+
get_major(CURRENT_METADATA_VERSION),
145+
get_minor(CURRENT_METADATA_VERSION));
146+
}
147+
stream.seekg(currentStreamPos, std::ios::beg);
148+
return storedMeta;
149+
}
150+
151+
std::unique_ptr<MetadataBase> read_metadata_from(std::istream& stream, const std::shared_ptr<ov::AlignedBuffer>& modelBuffer) {
152+
Logger logger("NPUPlugin", Logger::global().level());
153+
size_t magicBytesSize = MAGIC_BYTES.size();
154+
std::string blobMagicBytes;
155+
blobMagicBytes.resize(magicBytesSize);
94156

95-
std::stringstream metadataStream;
96-
metadataStream.write(reinterpret_cast<const char*>(&(*metadataIterator)),
97-
blob.end() - metadataIterator - sizeof(blobDataSize));
157+
size_t currentStreamPos = stream.tellg();
158+
size_t streamSize = modelBuffer->size();
159+
160+
blobMagicBytes.assign(reinterpret_cast<const char*>(modelBuffer->get_ptr(streamSize - magicBytesSize)), magicBytesSize);
161+
if (MAGIC_BYTES != blobMagicBytes) {
162+
logger.error("Blob is missing NPU metadata!");
163+
return nullptr;
164+
}
165+
166+
uint64_t blobDataSize;
167+
blobDataSize = *reinterpret_cast<uint64_t*>(modelBuffer->get_ptr(streamSize - magicBytesSize - sizeof(blobDataSize)));
98168

99169
uint32_t metaVersion;
100-
metadataStream.read(reinterpret_cast<char*>(&metaVersion), sizeof(metaVersion));
170+
metaVersion = *reinterpret_cast<uint32_t*>(modelBuffer->get_ptr(currentStreamPos + blobDataSize));
101171

102-
std::unique_ptr<MetadataBase> storedMeta;
103-
try {
104-
storedMeta = create_metadata(metaVersion);
105-
storedMeta->read(metadataStream);
106-
} catch(...) {
172+
auto storedMeta = create_metadata(metaVersion, currentStreamPos, blobDataSize);
173+
stream.seekg(blobDataSize + sizeof(metaVersion), std::ios::cur);
174+
if (storedMeta != nullptr) {
175+
storedMeta->read(stream);
176+
} else {
107177
logger.warning("Imported blob metadata version: %d.%d, but the current version is: %d.%d",
108178
get_major(metaVersion),
109179
get_minor(metaVersion),
@@ -121,4 +191,12 @@ void Metadata<METADATA_VERSION_1_0>::set_ov_version(const OpenvinoVersion& newVe
121191
_ovVersion = newVersion;
122192
}
123193

194+
uint64_t Metadata<METADATA_VERSION_1_0>::get_blob_size() const {
195+
return _blobDataSize;
196+
}
197+
198+
size_t Metadata<METADATA_VERSION_1_0>::get_ov_header_offset() const {
199+
return _ovHeaderOffset;
200+
}
201+
124202
} // namespace intel_npu

0 commit comments

Comments
 (0)