Skip to content

Commit 8c9d4be

Browse files
authored
[CPU] Add interface to release compiled model internal memory (#26390)
### Details: This PR introduces an `ov::CompiledModel` level interface that allows to release memory allocated by the compiled model. In this PR the interface is only supported by the CPU plugin. ### Tickets: - CVS-145873
1 parent ad3f51b commit 8c9d4be

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+1429
-825
lines changed

src/inference/dev_api/openvino/runtime/icompiled_model.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,12 @@ class OPENVINO_RUNTIME_API ICompiledModel : public std::enable_shared_from_this<
134134
*/
135135
ov::SoPtr<ov::IRemoteContext> get_context() const;
136136

137+
/**
138+
* @brief Release intermediate memory
139+
*
140+
*/
141+
virtual void release_memory();
142+
137143
virtual ~ICompiledModel() = default;
138144

139145
private:

src/inference/include/openvino/runtime/compiled_model.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,15 @@ class OPENVINO_RUNTIME_API CompiledModel {
200200
return get_property(property.name()).template as<T>();
201201
}
202202

203+
/**
204+
* @brief Release intermediate memory.
205+
*
206+
* This method forces the Compiled model to release memory allocated for intermediate structures, e.g. caches,
207+
* tensors, temporal buffers etc., when possible
208+
*
209+
*/
210+
void release_memory();
211+
203212
/**
204213
* @brief Returns pointer to device-specific shared context
205214
* on a remote accelerator device that was used to create this CompiledModel.

src/inference/src/cpp/compiled_model.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,10 @@ Any CompiledModel::get_property(const std::string& name) const {
145145
});
146146
}
147147

148+
void CompiledModel::release_memory() {
149+
OV_COMPILED_MODEL_CALL_STATEMENT(_impl->release_memory());
150+
}
151+
148152
RemoteContext CompiledModel::get_context() const {
149153
OV_COMPILED_MODEL_CALL_STATEMENT({
150154
auto ctx = _impl->get_context();

src/inference/src/dev/icompiled_model.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,3 +147,7 @@ ov::SoPtr<ov::IRemoteContext> ov::ICompiledModel::get_context() const {
147147
void ov::ICompiledModel::set_model_shared_object(ov::Model& model, const std::shared_ptr<void>& shared_object) {
148148
model.m_shared_object = shared_object;
149149
}
150+
151+
void ov::ICompiledModel::release_memory() {
152+
// nothing to do
153+
}

src/plugins/intel_cpu/src/compiled_model.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,5 +342,13 @@ void CompiledModel::export_model(std::ostream& modelStream) const {
342342
serializer << m_model;
343343
}
344344

345+
void CompiledModel::release_memory() {
346+
for (auto&& graph : m_graphs) {
347+
GraphGuard::Lock graph_lock{graph};
348+
auto ctx = graph_lock._graph.getGraphContext();
349+
ctx->getNetworkMemoryControl()->releaseMemory();
350+
}
351+
}
352+
345353
} // namespace intel_cpu
346354
} // namespace ov

src/plugins/intel_cpu/src/compiled_model.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ class CompiledModel : public ov::ICompiledModel {
4949
"Set property to Core::compile_model during compilation");
5050
};
5151

52+
void release_memory() override;
53+
5254
private:
5355
std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
5456
friend class SyncInferRequest;

0 commit comments

Comments
 (0)