Skip to content

Commit

Permalink
Revert changes for CIP Optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
MirceaDan99 committed Jan 21, 2025
1 parent c647071 commit 1801088
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 76 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,50 +13,50 @@ namespace intel_npu {

class BlobContainer {
public:
BlobContainer() = default;
/**
* @brief Returns the address at the beginning of the blob.
*/
virtual const void* get_ptr() const = 0;

BlobContainer(std::vector<uint8_t> blob) : _blob(std::move(blob)) {}
/**
* @brief Size of the blob.
*/
virtual size_t size() const = 0;

virtual const void* get_ptr() const {
return _blob.data();
}
/**
* @brief Returns true if the blob can be deallocated from memory, false otherwise.
*/
virtual bool release_from_memory() = 0;

virtual size_t size() const {
return _blob.size();
}
virtual ~BlobContainer() = default;
};

virtual bool release_from_memory() const {
if (_shouldDeallocate) {
_blob.clear();
_blob.shrink_to_fit();
return true;
}
_shouldDeallocate = true;
return false;
}
class BlobContainerVector : public BlobContainer {
public:
BlobContainerVector(std::vector<uint8_t> blob) : _blob(std::move(blob)) {}

virtual const std::vector<uint8_t>& get_blob() const {
// when unerlying blob object was accessed,
// prevent deallocation on next `release_from_memory` call
_shouldDeallocate = false;
return _blob;
const void* get_ptr() const override {
return reinterpret_cast<const void*>(_blob.data());
}

virtual ~BlobContainer() = default;
size_t size() const override {
return _blob.size();
}

protected:
mutable std::vector<uint8_t> _blob;
bool release_from_memory() override {
_blob.clear();
_blob.shrink_to_fit();
return true;
}

private:
mutable bool _shouldDeallocate = true;
std::vector<uint8_t> _blob;
};

class BlobContainerAlignedBuffer : public BlobContainer {
public:
BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO,
size_t ovHeaderOffset,
uint64_t blobSize)
: _size(blobSize),
BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO, size_t ovHeaderOffset, uint64_t size)
: _size(size),
_ovHeaderOffset(ovHeaderOffset),
_blobSO(blobSO) {}

Expand All @@ -68,19 +68,10 @@ class BlobContainerAlignedBuffer : public BlobContainer {
return _size;
}

bool release_from_memory() const override {
BlobContainer::release_from_memory();
bool release_from_memory() override {
return false;
}

const std::vector<uint8_t>& get_blob() const override {
BlobContainer::release_from_memory();
_blob.resize(_size);
_blob.assign(reinterpret_cast<const uint8_t*>(this->get_ptr()),
reinterpret_cast<const uint8_t*>(this->get_ptr()) + _size);
return _blob;
}

private:
uint64_t _size;
size_t _ovHeaderOffset;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con

_logger.debug("compile start");
auto networkDesc = _compiler->compile(model, config);
auto blobPtr = std::make_unique<BlobContainer>(std::move(networkDesc.compiledNetwork));
auto blobPtr = std::make_unique<BlobContainerVector>(std::move(networkDesc.compiledNetwork));
_logger.debug("compile end");

ze_graph_handle_t graphHandle = nullptr;
Expand Down Expand Up @@ -110,9 +110,12 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::unique_ptr<BlobContain
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");

_logger.debug("parse start");
const auto& blob = blobPtr->get_blob();
auto networkMeta = _compiler->parse(blob, config);
blobPtr->release_from_memory();
std::vector<uint8_t> network(blobPtr->size());
network.assign(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()),
reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()) + blobPtr->size());
auto networkMeta = _compiler->parse(network, config);
network.clear();
network.shrink_to_fit();
_logger.debug("parse end");

ze_graph_handle_t graphHandle = nullptr;
Expand Down
13 changes: 4 additions & 9 deletions src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,10 @@ size_t PluginGraph::export_blob(std::ostream& stream) const {

std::vector<ov::ProfilingInfo> PluginGraph::process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const {
std::vector<ov::ProfilingInfo> profilingInfo;
const auto& blob = _blobPtr->get_blob();
try {
profilingInfo = _compiler->process_profiling_output(profData, blob, config);
} catch (const std::exception& ex) {
_logger.error(ex.what());
}
_blobPtr->release_from_memory();
return profilingInfo;
std::vector<uint8_t> blob(_blobPtr->size());
blob.assign(reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr()),
reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr()) + _blobPtr->size());
return _compiler->process_profiling_output(profData, blob, config);
}

void PluginGraph::set_argument_value(uint32_t argi, const void* argv) const {
Expand Down
6 changes: 3 additions & 3 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -686,13 +686,13 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
auto localConfig = merge_configs(_globalConfig, localPropertiesMap);
update_log_level(localPropertiesMap);

/* const auto set_cache_dir = localConfig.get<CACHE_DIR>();
const auto set_cache_dir = localConfig.get<CACHE_DIR>();
if (!set_cache_dir.empty()) {
const auto compilerType = localConfig.get<COMPILER_TYPE>();
if (compilerType == ov::intel_npu::CompilerType::MLIR) {
OPENVINO_THROW("Option 'CACHE_DIR' is not supported with MLIR compiler type");
}
} */
}

const auto platform = _backends->getCompilationPlatform(localConfig.get<PLATFORM>(), localConfig.get<DEVICE_ID>());
auto device = _backends->getDevice(localConfig.get<DEVICE_ID>());
Expand Down Expand Up @@ -856,7 +856,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
}
_logger.debug("Successfully read %zu bytes into blob.", graphSize);

blobPtr = std::make_unique<BlobContainer>(std::move(blob));
blobPtr = std::make_unique<BlobContainerVector>(std::move(blob));
} else {
blobPtr = std::make_unique<BlobContainerAlignedBuffer>(modelBuffer, stream.tellg(), graphSize);
}
Expand Down
62 changes: 42 additions & 20 deletions src/plugins/intel_npu/tests/unit/npu/blob_container.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,22 @@

using namespace intel_npu;

using BlobContainerUnitTests = ::testing::Test;

namespace {
const char* dummyBlobHeader = "blobwillstartafterspace correctblob!";
const char* testCacheDir = "blob_container_test_cache_dir";
const char* testFileName = "blob_container_test.blob";
class BlobContainerUnitTests : public ::testing::Test {
protected:
void TearDown() override {
ov::util::iterate_files(testCacheDir, [](const std::string& file, bool is_dir) {
if (!is_dir) {
ov::test::utils::removeFile(file);
}
});
ov::test::utils::removeDir(testCacheDir);
ov::test::utils::removeFile(testFileName);
}

} // namespace
const char* dummyBlobHeader = "blobwillstartafterspace ";
const char* testCacheDir = "blob_container_test_cache_dir";
const char* testFileName = "blob_container_test.blob";
};

TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) {
auto core = std::make_shared<ov::CoreImpl>();
Expand Down Expand Up @@ -59,18 +67,26 @@ TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) {
auto inferRequest = compiledModel->create_infer_request();
inferRequest->infer();
OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info());
auto outputFile =
std::ofstream(std::filesystem::path(testCacheDir) / testFileName, std::ios::out | std::ios::binary);

auto testCacheDirPath = ov::util::Path(testCacheDir);
auto outputFile = std::ofstream(testCacheDirPath / testFileName, std::ios::out | std::ios::binary);
std::ostringstream blobStream;
OV_ASSERT_NO_THROW(compiledModel->export_model(outputFile));
OV_ASSERT_NO_THROW(compiledModel->export_model(blobStream));

auto* compiledModelPtr = dynamic_cast<intel_npu::ICompiledModel*>(compiledModel._ptr.get());
OPENVINO_ASSERT(compiledModelPtr != nullptr);
const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container();
auto* blobContainerAlignedBufferPtr =
dynamic_cast<const intel_npu::BlobContainerAlignedBuffer*>(&blobContainer);
OPENVINO_ASSERT(blobContainerAlignedBufferPtr != nullptr, "Cached blob should be memory mapped!");

// Expect output stream with metadata to be larger than actual blob size
OPENVINO_ASSERT(outputFile.tellp() > 0 && blobContainer.size() > 0 &&
static_cast<size_t>(outputFile.tellp()) > blobContainer.size());
OPENVINO_ASSERT(blobStream.tellp() > 0 && blobContainer.size() > 0 &&
static_cast<size_t>(blobStream.tellp()) > blobContainer.size());
}
ov::test::utils::removeDir(testCacheDir);
}

TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForFStream) {
Expand Down Expand Up @@ -104,7 +120,6 @@ TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForFStream) {
dynamic_cast<const intel_npu::BlobContainerAlignedBuffer*>(&blobContainer);
OPENVINO_ASSERT(blobContainerAlignedBufferPtr == nullptr, "Cannot have memory mapped blob for std::fstream!");
}
ov::test::utils::removeFile(testFileName);
}

TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForSStream) {
Expand Down Expand Up @@ -161,35 +176,42 @@ TEST_F(BlobContainerUnitTests, isBlobHeaderHandledCorrectly) {
std::string parseDummyHeader;
std::string blob;
blobStream >> parseDummyHeader;
blobStream.get();

EXPECT_THAT(parseDummyHeader, testing::HasSubstr("blobwillstartafterspace"));
auto compiledModel =
core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)});
blobStream = {};

auto* compiledModelPtr = dynamic_cast<intel_npu::ICompiledModel*>(compiledModel._ptr.get());
OPENVINO_ASSERT(compiledModelPtr != nullptr);
const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container();
blob.assign(reinterpret_cast<const char*>(blobContainer.get_ptr()), blobContainer.size());
EXPECT_THAT(blob, testing::HasSubstr("correctblob!"));
ASSERT_EQ(blobStream.str().substr(std::strlen(dummyBlobHeader), blobContainer.size()), blob);
}

{
std::string parseDummyHeader;
std::string blob;
std::string referenceBlob;
auto inputFile = std::ifstream(testFileName, std::ios::in | std::ios::binary);
blobStream >> parseDummyHeader;
inputFile >> parseDummyHeader;
inputFile.get();

std::streampos currentPos = inputFile.tellg();
inputFile.seekg(0, std::ios::end);
std::streampos endPos = inputFile.tellg();
inputFile.seekg(currentPos, std::ios::beg);
referenceBlob.resize(endPos - currentPos);
inputFile.read(&referenceBlob[0], referenceBlob.size());
inputFile.seekg(currentPos, std::ios::beg);

EXPECT_THAT(parseDummyHeader, testing::HasSubstr("blobwillstartafterspace"));
auto compiledModel =
core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)});
core->import_model(inputFile, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)});

auto* compiledModelPtr = dynamic_cast<intel_npu::ICompiledModel*>(compiledModel._ptr.get());
OPENVINO_ASSERT(compiledModelPtr != nullptr);
const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container();
blob.assign(reinterpret_cast<const char*>(blobContainer.get_ptr()), blobContainer.size());
EXPECT_THAT(blob, testing::HasSubstr("correctblob!"));
referenceBlob.resize(blobContainer.size()); // exclude metadata
ASSERT_EQ(referenceBlob, blob);
}

ov::test::utils::removeFile(testFileName);
}

0 comments on commit 1801088

Please sign in to comment.