Skip to content

Commit

Permalink
Add fix for new CIP optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
MirceaDan99 committed Jan 17, 2025
1 parent 2f7ec86 commit 6de8d82
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,74 +13,78 @@ namespace intel_npu {

class BlobContainer {
public:
virtual void* get_ptr() = 0;
BlobContainer() = default;

virtual size_t size() const = 0;
BlobContainer(std::vector<uint8_t> blob) : _blob(std::move(blob)) {}

virtual bool release_from_memory() = 0;

virtual std::vector<uint8_t> get_ownership_blob() = 0;

virtual ~BlobContainer() = default;
};

class BlobContainerVector : public BlobContainer {
public:
BlobContainerVector(std::vector<uint8_t> blob) : _ownershipBlob(std::move(blob)) {}

void* get_ptr() override {
return reinterpret_cast<void*>(_ownershipBlob.data());
virtual const void* get_ptr() const {
return _blob.data();
}

size_t size() const override {
return _ownershipBlob.size();
virtual size_t size() const {
return _blob.size();
}

bool release_from_memory() override {
_ownershipBlob.clear();
_ownershipBlob.shrink_to_fit();
return true;
virtual bool release_from_memory() const {
if (_shouldDeallocate) {
_blob.clear();
_blob.shrink_to_fit();
return true;
}
_shouldDeallocate = true;
return false;
}

std::vector<uint8_t> get_ownership_blob() override {
return _ownershipBlob;
virtual const std::vector<uint8_t>& get_blob() const {
// when unerlying blob object was accessed,
// prevent deallocation on next `release_from_memory` call
_shouldDeallocate = false;
return _blob;
}

virtual ~BlobContainer() = default;

protected:
mutable std::vector<uint8_t> _blob;

private:
std::vector<uint8_t> _ownershipBlob;
mutable bool _shouldDeallocate = true;
};

class BlobContainerAlignedBuffer : public BlobContainer {
public:
BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO,
size_t ovHeaderOffset,
uint64_t blobSize)
: _blobSize(blobSize),
: _size(blobSize),
_ovHeaderOffset(ovHeaderOffset),
_ownershipBlob(blobSO) {}
_blobSO(blobSO) {}

void* get_ptr() override {
return _ownershipBlob->get_ptr(_ovHeaderOffset);
const void* get_ptr() const override {
return _blobSO->get_ptr(_ovHeaderOffset);
}

size_t size() const override {
return _blobSize;
return _size;
}

bool release_from_memory() override {
bool release_from_memory() const override {
BlobContainer::release_from_memory();
return false;
}

std::vector<uint8_t> get_ownership_blob() override {
std::vector<uint8_t> blob(_blobSize);
blob.assign(reinterpret_cast<const uint8_t*>(this->get_ptr()), reinterpret_cast<const uint8_t*>(this->get_ptr()) + this->size());
return blob;
const std::vector<uint8_t>& get_blob() const override {
BlobContainer::release_from_memory();
_blob.resize(_size);
_blob.assign(reinterpret_cast<const uint8_t*>(this->get_ptr()),
reinterpret_cast<const uint8_t*>(this->get_ptr()) + _size);
return _blob;
}

private:
uint64_t _blobSize;
uint64_t _size;
size_t _ovHeaderOffset;
std::shared_ptr<ov::AlignedBuffer> _ownershipBlob;
std::shared_ptr<ov::AlignedBuffer> _blobSO;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con

_logger.debug("compile start");
auto networkDesc = _compiler->compile(model, config);
auto blobPtr = std::make_unique<BlobContainerVector>(std::move(networkDesc.compiledNetwork));
auto blobPtr = std::make_unique<BlobContainer>(std::move(networkDesc.compiledNetwork));
_logger.debug("compile end");

ze_graph_handle_t graphHandle = nullptr;
Expand Down Expand Up @@ -110,8 +110,9 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::unique_ptr<BlobContain
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");

_logger.debug("parse start");
const auto& blob = blobPtr->get_ownership_blob();
const auto& blob = blobPtr->get_blob();
auto networkMeta = _compiler->parse(blob, config);
blobPtr->release_from_memory();
_logger.debug("parse end");

ze_graph_handle_t graphHandle = nullptr;
Expand Down
11 changes: 9 additions & 2 deletions src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,15 @@ size_t PluginGraph::export_blob(std::ostream& stream) const {

std::vector<ov::ProfilingInfo> PluginGraph::process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const {
const auto& blob = _blobPtr->get_ownership_blob();
return _compiler->process_profiling_output(profData, blob, config);
std::vector<ov::ProfilingInfo> profilingInfo;
const auto& blob = _blobPtr->get_blob();
try {
profilingInfo = _compiler->process_profiling_output(profData, blob, config);
} catch (const std::exception& ex) {
_logger.error(ex.what());
}
_blobPtr->release_from_memory();
return std::move(profilingInfo);
}

void PluginGraph::set_argument_value(uint32_t argi, const void* argv) const {
Expand Down
6 changes: 3 additions & 3 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,13 +636,13 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
auto localConfig = merge_configs(_globalConfig, localPropertiesMap);
update_log_level(localPropertiesMap);

const auto set_cache_dir = localConfig.get<CACHE_DIR>();
/* const auto set_cache_dir = localConfig.get<CACHE_DIR>();
if (!set_cache_dir.empty()) {
const auto compilerType = localConfig.get<COMPILER_TYPE>();
if (compilerType == ov::intel_npu::CompilerType::MLIR) {
OPENVINO_THROW("Option 'CACHE_DIR' is not supported with MLIR compiler type");
}
}
} */

const auto platform = _backends->getCompilationPlatform(localConfig.get<PLATFORM>(), localConfig.get<DEVICE_ID>());
auto device = _backends->getDevice(localConfig.get<DEVICE_ID>());
Expand Down Expand Up @@ -806,7 +806,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
}
_logger.debug("Successfully read %zu bytes into blob.", graphSize);

blobPtr = std::make_unique<BlobContainerVector>(std::move(blob));
blobPtr = std::make_unique<BlobContainer>(std::move(blob));
} else {
blobPtr = std::make_unique<BlobContainerAlignedBuffer>(modelBuffer, stream.tellg(), graphSize);
}
Expand Down

0 comments on commit 6de8d82

Please sign in to comment.