Revert changes for CIP Optimization

MirceaDan99 · Jan 21, 2025 · 1801088 · 1801088
1 parent c647071
commit 1801088
Show file tree

Hide file tree

Showing 5 changed files with 87 additions and 76 deletions.
diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp
@@ -13,50 +13,50 @@ namespace intel_npu {
 
 class BlobContainer {
 public:
-    BlobContainer() = default;
+    /**
+     * @brief Returns the address at the beginning of the blob.
+     */
+    virtual const void* get_ptr() const = 0;
 
-    BlobContainer(std::vector<uint8_t> blob) : _blob(std::move(blob)) {}
+    /**
+     * @brief Size of the blob.
+     */
+    virtual size_t size() const = 0;
 
-    virtual const void* get_ptr() const {
-        return _blob.data();
-    }
+    /**
+     * @brief Returns true if the blob can be deallocated from memory, false otherwise.
+     */
+    virtual bool release_from_memory() = 0;
 
-    virtual size_t size() const {
-        return _blob.size();
-    }
+    virtual ~BlobContainer() = default;
+};
 
-    virtual bool release_from_memory() const {
-        if (_shouldDeallocate) {
-            _blob.clear();
-            _blob.shrink_to_fit();
-            return true;
-        }
-        _shouldDeallocate = true;
-        return false;
-    }
+class BlobContainerVector : public BlobContainer {
+public:
+    BlobContainerVector(std::vector<uint8_t> blob) : _blob(std::move(blob)) {}
 
-    virtual const std::vector<uint8_t>& get_blob() const {
-        // when unerlying blob object was accessed,
-        // prevent deallocation on next `release_from_memory` call
-        _shouldDeallocate = false;
-        return _blob;
+    const void* get_ptr() const override {
+        return reinterpret_cast<const void*>(_blob.data());
     }
 
-    virtual ~BlobContainer() = default;
+    size_t size() const override {
+        return _blob.size();
+    }
 
-protected:
-    mutable std::vector<uint8_t> _blob;
+    bool release_from_memory() override {
+        _blob.clear();
+        _blob.shrink_to_fit();
+        return true;
+    }
 
 private:
-    mutable bool _shouldDeallocate = true;
+    std::vector<uint8_t> _blob;
 };
 
 class BlobContainerAlignedBuffer : public BlobContainer {
 public:
-    BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO,
-                               size_t ovHeaderOffset,
-                               uint64_t blobSize)
-        : _size(blobSize),
+    BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO, size_t ovHeaderOffset, uint64_t size)
+        : _size(size),
           _ovHeaderOffset(ovHeaderOffset),
           _blobSO(blobSO) {}
 
@@ -68,19 +68,10 @@ class BlobContainerAlignedBuffer : public BlobContainer {
         return _size;
     }
 
-    bool release_from_memory() const override {
-        BlobContainer::release_from_memory();
+    bool release_from_memory() override {
         return false;
     }
 
-    const std::vector<uint8_t>& get_blob() const override {
-        BlobContainer::release_from_memory();
-        _blob.resize(_size);
-        _blob.assign(reinterpret_cast<const uint8_t*>(this->get_ptr()),
-                     reinterpret_cast<const uint8_t*>(this->get_ptr()) + _size);
-        return _blob;
-    }
-
 private:
     uint64_t _size;
     size_t _ovHeaderOffset;

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -80,7 +80,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
 
     _logger.debug("compile start");
     auto networkDesc = _compiler->compile(model, config);
-    auto blobPtr = std::make_unique<BlobContainer>(std::move(networkDesc.compiledNetwork));
+    auto blobPtr = std::make_unique<BlobContainerVector>(std::move(networkDesc.compiledNetwork));
     _logger.debug("compile end");
 
     ze_graph_handle_t graphHandle = nullptr;
@@ -110,9 +110,12 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::unique_ptr<BlobContain
     OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");
 
     _logger.debug("parse start");
-    const auto& blob = blobPtr->get_blob();
-    auto networkMeta = _compiler->parse(blob, config);
-    blobPtr->release_from_memory();
+    std::vector<uint8_t> network(blobPtr->size());
+    network.assign(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()),
+                   reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()) + blobPtr->size());
+    auto networkMeta = _compiler->parse(network, config);
+    network.clear();
+    network.shrink_to_fit();
     _logger.debug("parse end");
 
     ze_graph_handle_t graphHandle = nullptr;

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
@@ -56,15 +56,10 @@ size_t PluginGraph::export_blob(std::ostream& stream) const {
 
 std::vector<ov::ProfilingInfo> PluginGraph::process_profiling_output(const std::vector<uint8_t>& profData,
                                                                      const Config& config) const {
-    std::vector<ov::ProfilingInfo> profilingInfo;
-    const auto& blob = _blobPtr->get_blob();
-    try {
-        profilingInfo = _compiler->process_profiling_output(profData, blob, config);
-    } catch (const std::exception& ex) {
-        _logger.error(ex.what());
-    }
-    _blobPtr->release_from_memory();
-    return profilingInfo;
+    std::vector<uint8_t> blob(_blobPtr->size());
+    blob.assign(reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr()),
+                reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr()) + _blobPtr->size());
+    return _compiler->process_profiling_output(profData, blob, config);
 }
 
 void PluginGraph::set_argument_value(uint32_t argi, const void* argv) const {

diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -686,13 +686,13 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     auto localConfig = merge_configs(_globalConfig, localPropertiesMap);
     update_log_level(localPropertiesMap);
 
-    /* const auto set_cache_dir = localConfig.get<CACHE_DIR>();
+    const auto set_cache_dir = localConfig.get<CACHE_DIR>();
     if (!set_cache_dir.empty()) {
         const auto compilerType = localConfig.get<COMPILER_TYPE>();
         if (compilerType == ov::intel_npu::CompilerType::MLIR) {
             OPENVINO_THROW("Option 'CACHE_DIR' is not supported with MLIR compiler type");
         }
-    } */
+    }
 
     const auto platform = _backends->getCompilationPlatform(localConfig.get<PLATFORM>(), localConfig.get<DEVICE_ID>());
     auto device = _backends->getDevice(localConfig.get<DEVICE_ID>());
@@ -856,7 +856,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
             }
             _logger.debug("Successfully read %zu bytes into blob.", graphSize);
 
-            blobPtr = std::make_unique<BlobContainer>(std::move(blob));
+            blobPtr = std::make_unique<BlobContainerVector>(std::move(blob));
         } else {
             blobPtr = std::make_unique<BlobContainerAlignedBuffer>(modelBuffer, stream.tellg(), graphSize);
         }

diff --git a/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp b/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp
@@ -21,14 +21,22 @@
 
 using namespace intel_npu;
 
-using BlobContainerUnitTests = ::testing::Test;
-
-namespace {
-const char* dummyBlobHeader = "blobwillstartafterspace correctblob!";
-const char* testCacheDir = "blob_container_test_cache_dir";
-const char* testFileName = "blob_container_test.blob";
+class BlobContainerUnitTests : public ::testing::Test {
+protected:
+    void TearDown() override {
+        ov::util::iterate_files(testCacheDir, [](const std::string& file, bool is_dir) {
+            if (!is_dir) {
+                ov::test::utils::removeFile(file);
+            }
+        });
+        ov::test::utils::removeDir(testCacheDir);
+        ov::test::utils::removeFile(testFileName);
+    }
 
-}  // namespace
+    const char* dummyBlobHeader = "blobwillstartafterspace ";
+    const char* testCacheDir = "blob_container_test_cache_dir";
+    const char* testFileName = "blob_container_test.blob";
+};
 
 TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) {
     auto core = std::make_shared<ov::CoreImpl>();
@@ -59,18 +67,26 @@ TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) {
         auto inferRequest = compiledModel->create_infer_request();
         inferRequest->infer();
         OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info());
-        auto outputFile =
-            std::ofstream(std::filesystem::path(testCacheDir) / testFileName, std::ios::out | std::ios::binary);
+
+        auto testCacheDirPath = ov::util::Path(testCacheDir);
+        auto outputFile = std::ofstream(testCacheDirPath / testFileName, std::ios::out | std::ios::binary);
+        std::ostringstream blobStream;
         OV_ASSERT_NO_THROW(compiledModel->export_model(outputFile));
+        OV_ASSERT_NO_THROW(compiledModel->export_model(blobStream));
 
         auto* compiledModelPtr = dynamic_cast<intel_npu::ICompiledModel*>(compiledModel._ptr.get());
         OPENVINO_ASSERT(compiledModelPtr != nullptr);
         const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container();
         auto* blobContainerAlignedBufferPtr =
             dynamic_cast<const intel_npu::BlobContainerAlignedBuffer*>(&blobContainer);
         OPENVINO_ASSERT(blobContainerAlignedBufferPtr != nullptr, "Cached blob should be memory mapped!");
+
+        // Expect output stream with metadata to be larger than actual blob size
+        OPENVINO_ASSERT(outputFile.tellp() > 0 && blobContainer.size() > 0 &&
+                        static_cast<size_t>(outputFile.tellp()) > blobContainer.size());
+        OPENVINO_ASSERT(blobStream.tellp() > 0 && blobContainer.size() > 0 &&
+                        static_cast<size_t>(blobStream.tellp()) > blobContainer.size());
     }
-    ov::test::utils::removeDir(testCacheDir);
 }
 
 TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForFStream) {
@@ -104,7 +120,6 @@ TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForFStream) {
             dynamic_cast<const intel_npu::BlobContainerAlignedBuffer*>(&blobContainer);
         OPENVINO_ASSERT(blobContainerAlignedBufferPtr == nullptr, "Cannot have memory mapped blob for std::fstream!");
     }
-    ov::test::utils::removeFile(testFileName);
 }
 
 TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForSStream) {
@@ -161,35 +176,42 @@ TEST_F(BlobContainerUnitTests, isBlobHeaderHandledCorrectly) {
         std::string parseDummyHeader;
         std::string blob;
         blobStream >> parseDummyHeader;
+        blobStream.get();
 
-        EXPECT_THAT(parseDummyHeader, testing::HasSubstr("blobwillstartafterspace"));
         auto compiledModel =
             core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)});
-        blobStream = {};
 
         auto* compiledModelPtr = dynamic_cast<intel_npu::ICompiledModel*>(compiledModel._ptr.get());
         OPENVINO_ASSERT(compiledModelPtr != nullptr);
         const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container();
         blob.assign(reinterpret_cast<const char*>(blobContainer.get_ptr()), blobContainer.size());
-        EXPECT_THAT(blob, testing::HasSubstr("correctblob!"));
+        ASSERT_EQ(blobStream.str().substr(std::strlen(dummyBlobHeader), blobContainer.size()), blob);
     }
 
     {
         std::string parseDummyHeader;
         std::string blob;
+        std::string referenceBlob;
         auto inputFile = std::ifstream(testFileName, std::ios::in | std::ios::binary);
-        blobStream >> parseDummyHeader;
+        inputFile >> parseDummyHeader;
+        inputFile.get();
+
+        std::streampos currentPos = inputFile.tellg();
+        inputFile.seekg(0, std::ios::end);
+        std::streampos endPos = inputFile.tellg();
+        inputFile.seekg(currentPos, std::ios::beg);
+        referenceBlob.resize(endPos - currentPos);
+        inputFile.read(&referenceBlob[0], referenceBlob.size());
+        inputFile.seekg(currentPos, std::ios::beg);
 
-        EXPECT_THAT(parseDummyHeader, testing::HasSubstr("blobwillstartafterspace"));
         auto compiledModel =
-            core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)});
+            core->import_model(inputFile, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)});
 
         auto* compiledModelPtr = dynamic_cast<intel_npu::ICompiledModel*>(compiledModel._ptr.get());
         OPENVINO_ASSERT(compiledModelPtr != nullptr);
         const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container();
         blob.assign(reinterpret_cast<const char*>(blobContainer.get_ptr()), blobContainer.size());
-        EXPECT_THAT(blob, testing::HasSubstr("correctblob!"));
+        referenceBlob.resize(blobContainer.size());  // exclude metadata
+        ASSERT_EQ(referenceBlob, blob);
     }
-
-    ov::test::utils::removeFile(testFileName);
 }