From 6ad934bd3fd4e0780e71150dbff157b873077e5e Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 17 Dec 2024 13:25:10 +0400
Subject: [PATCH 01/44] Base impl

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../tests/unit/module_tests/config_common.cpp |  77 +++++++++
 .../tests/unit/module_tests/config_common.hpp | 127 ++++++++++++++
 .../tests/unit/module_tests/config_gpu.cpp    | 160 ++++++++++++++++++
 .../tests/unit/module_tests/config_gpu.hpp    |  48 ++++++
 .../module_tests/config_gpu_debug_options.inl |  31 ++++
 .../config_gpu_debug_properties.hpp           |  42 +++++
 .../unit/module_tests/config_gpu_options.inl  |  50 ++++++
 .../tests/unit/module_tests/device_test.cpp   |  41 +++++
 8 files changed, 576 insertions(+)
 create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp
 create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp
 create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp
 create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp
 create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl
 create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp
 create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp
new file mode 100644
index 00000000000000..06625d33c40307
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp
@@ -0,0 +1,77 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "config_common.hpp"
+#include "openvino/core/except.hpp"
+
+
+namespace ov {
+
+void PluginConfig::set_property(const AnyMap& config) {
+    for (auto& kv : config) {
+        auto& name = kv.first;
+        auto& val = kv.second;
+
+        const auto& known_options = m_options_map;
+        auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; });
+        OPENVINO_ASSERT(it != known_options.end());
+
+        it->second->set_any(val);
+    }
+}
+
+ov::Any PluginConfig::get_property(const std::string& name) const {
+    const auto& known_options = m_options_map;
+    auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; });
+    OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name);
+
+    return it->second->get_any();
+}
+
+void PluginConfig::set_user_property(const AnyMap& config) {
+    for (auto& kv : config) {
+        auto& name = kv.first;
+        auto& val = kv.second;
+
+        const auto& known_options = m_options_map;
+        auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; });
+        OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name);
+        OPENVINO_ASSERT(it->second->is_valid_value(val), "Invalid value: ", val.as<std::string>(), " for property: ",  name);
+
+        user_properties[name] = val;
+    }
+}
+
+void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {
+    // Copy internal properties before applying hints to ensure that
+    // a property set by hint won't be overriden by a value in user config.
+    // E.g num_streams=AUTO && hint=THROUGHPUT
+    // If we apply hints first and then copy all values from user config to internal one,
+    // then we'll get num_streams=AUTO in final config while some integer number is expected.
+    for (const auto& prop : user_properties) {
+        auto& option = m_options_map.at(prop.first);
+        option->set_any(prop.second);
+    }
+
+    finalize_impl(context, rt_info);
+}
+
+std::string PluginConfig::to_string() const {
+    std::stringstream s;
+
+    s << "-----------------------------------------\n";
+    s << "PROPERTIES:\n";
+
+    for (const auto& option : m_options_map) {
+        s << "\t" << option.first << ":" << option.second->get_any().as<std::string>() << std::endl;
+    }
+    s << "USER PROPERTIES:\n";
+    for (const auto& user_prop : user_properties) {
+        s << "\t" << user_prop.first << ": " << user_prop.second.as<std::string>() << std::endl;
+    }
+
+    return s.str();
+}
+
+}  // namespace ov
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp
new file mode 100644
index 00000000000000..3a6a2bc0d8de51
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp
@@ -0,0 +1,127 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+#include "openvino/runtime/iremote_context.hpp"
+#include "openvino/runtime/properties.hpp"
+#include "openvino/core/except.hpp"
+
+#ifndef COUNT_N
+    #define COUNT_N(_1, _2, _3, _4, _5, N, ...) N
+#endif
+
+#ifndef COUNT
+    #define COUNT(...) EXPAND(COUNT_N(__VA_ARGS__, 5, 4, 3, 2, 1))
+#endif
+
+#ifndef CAT
+    #define CAT(a, b) a ## b
+#endif
+
+#ifndef EXPAND
+    #define EXPAND(N) N
+#endif
+
+#define GET_EXCEPT_LAST_IMPL(N, ...) CAT(GET_EXCEPT_LAST_IMPL_, N)(__VA_ARGS__)
+#define GET_EXCEPT_LAST_IMPL_2(_0, _1) _0
+#define GET_EXCEPT_LAST_IMPL_3(_0, _1, _2) _0, _1
+#define GET_EXCEPT_LAST_IMPL_4(_0, _1, _2, _3) _0, _1, _2
+
+#define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__))
+
+namespace ov {
+
+
+struct ConfigOptionBase {
+    explicit ConfigOptionBase() {}
+    virtual ~ConfigOptionBase() = default;
+
+    virtual void set_any(const ov::Any any) = 0;
+    virtual ov::Any get_any() const = 0;
+    virtual bool is_valid_value(ov::Any val) = 0;
+};
+
+template <typename T>
+struct ConfigOption : public ConfigOptionBase {
+    ConfigOption(const T& default_val, std::function<bool(T)> validator = nullptr)
+        : ConfigOptionBase(), value(default_val), validator(validator) {}
+    T value;
+    std::function<bool(T)> validator;
+
+    void set_any(const ov::Any any) override {
+        if (validator)
+            OPENVINO_ASSERT(validator(any.as<T>()), "Invalid value: ", any.as<std::string>());
+        value = any.as<T>();
+    }
+
+    ov::Any get_any() const override {
+        return ov::Any(value);
+    }
+
+    bool is_valid_value(ov::Any val) override {
+        try {
+            return validator ? validator(val.as<T>()) : true;
+        } catch (std::exception&) {
+            return false;
+        }
+
+    }
+};
+
+class PluginConfig {
+public:
+    PluginConfig() {}
+    PluginConfig(std::initializer_list<ov::AnyMap::value_type> values) : PluginConfig() { set_property(ov::AnyMap(values)); }
+    explicit PluginConfig(const ov::AnyMap& properties) : PluginConfig() { set_property(properties); }
+    explicit PluginConfig(const ov::AnyMap::value_type& property) : PluginConfig() { set_property(property); }
+
+    void set_property(const ov::AnyMap& properties);
+    Any get_property(const std::string& name) const;
+    void set_user_property(const ov::AnyMap& properties);
+
+    template <typename... Properties>
+    util::EnableIfAllStringAny<void, Properties...> set_property(Properties&&... properties) {
+        set_property(ov::AnyMap{std::forward<Properties>(properties)...});
+    }
+
+    template <typename... Properties>
+    util::EnableIfAllStringAny<void, Properties...> set_user_property(Properties&&... properties) {
+        set_user_property(ov::AnyMap{std::forward<Properties>(properties)...});
+    }
+
+    template <typename T, PropertyMutability mutability>
+    T get_property(const ov::Property<T, mutability>& property) const {
+        OPENVINO_ASSERT(m_options_map.find(property.name()) != m_options_map.end(), "Property not found: ", property.name());
+        return static_cast<ConfigOption<T>*>(m_options_map.at(property.name()))->value;
+    }
+
+    std::string to_string() const;
+
+    void finalize(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info);
+    virtual void finalize_impl(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) = 0;
+
+protected:
+    template <typename T, PropertyMutability mutability>
+    bool is_set_by_user(const ov::Property<T, mutability>& property) const {
+        return user_properties.find(property.name()) != user_properties.end();
+    }
+
+    template <typename T, PropertyMutability mutability>
+    void apply_rt_info_property(const ov::Property<T, mutability>& property, const ov::RTMap& rt_info) {
+        if (!is_set_by_user(property)) {
+            auto rt_info_val = rt_info.find(property.name());
+            if (rt_info_val != rt_info.end()) {
+                set_user_property(property(rt_info_val->second.template as<T>()));
+            }
+        }
+    }
+    std::unordered_map<std::string, ConfigOptionBase*> m_options_map;
+    ov::AnyMap user_properties;
+    using OptionMapEntry = decltype(m_options_map)::value_type;
+};
+
+}  // namespace ov
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp
new file mode 100644
index 00000000000000..b3c81da368ae68
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp
@@ -0,0 +1,160 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "config_gpu.hpp"
+#include "intel_gpu/plugin/remote_context.hpp"
+#include "openvino/runtime/internal_properties.hpp"
+#include "intel_gpu/runtime/internal_properties.hpp"
+#include "config_gpu_debug_properties.hpp"
+
+
+namespace ov {
+namespace intel_gpu {
+
+NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() {
+    #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \
+        m_options_map[PropertyNamespace::PropertyVar.name()] = &PropertyVar;
+
+    OV_CONFIG_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin")
+    #include "config_gpu_options.inl"
+    #include "config_gpu_debug_options.inl"
+
+    #undef OV_CONFIG_OPTION
+}
+
+void NewExecutionConfig::finalize_impl(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {
+    const auto& device_info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
+    apply_user_properties(device_info);
+    apply_rt_info(device_info, rt_info);
+}
+
+void NewExecutionConfig::apply_execution_hints(const cldnn::device_info& info) {
+    if (is_set_by_user(ov::hint::execution_mode)) {
+        const auto mode = get_property(ov::hint::execution_mode);
+        if (!is_set_by_user(ov::hint::inference_precision)) {
+            if (mode == ov::hint::ExecutionMode::ACCURACY) {
+                set_property(ov::hint::inference_precision(ov::element::undefined));
+            } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) {
+                if (info.supports_fp16)
+                    set_property(ov::hint::inference_precision(ov::element::f16));
+                else
+                    set_property(ov::hint::inference_precision(ov::element::f32));
+            }
+        }
+    }
+}
+
+void NewExecutionConfig::apply_performance_hints(const cldnn::device_info& info) {
+    if (is_set_by_user(ov::hint::performance_mode)) {
+        const auto mode = get_property(ov::hint::performance_mode);
+        if (!is_set_by_user(ov::num_streams)) {
+            if (mode == ov::hint::PerformanceMode::LATENCY) {
+                set_property(ov::num_streams(1));
+            } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) {
+                set_property(ov::num_streams(ov::streams::AUTO));
+            }
+        }
+    }
+
+    if (get_property(ov::num_streams) == ov::streams::AUTO) {
+        int32_t n_streams = std::max<int32_t>(info.num_ccs, 2);
+        set_property(ov::num_streams(n_streams));
+    }
+
+    if (get_property(ov::internal::exclusive_async_requests)) {
+        set_property(ov::num_streams(1));
+    }
+
+    // Allow kernels reuse only for single-stream scenarios
+    if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) {
+        if (get_property(ov::num_streams) != 1) {
+            set_property(ov::intel_gpu::hint::enable_kernels_reuse(false));
+        }
+    }
+}
+
+void NewExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
+    if (is_set_by_user(ov::hint::model_priority)) {
+        const auto priority = get_property(ov::hint::model_priority);
+        if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) {
+            set_property(ov::intel_gpu::hint::queue_priority(priority));
+        }
+    }
+}
+
+void NewExecutionConfig::apply_debug_options(const cldnn::device_info& info) {
+    // GPU_DEBUG_GET_INSTANCE(debug_config);
+    // GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) {
+    //     set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs));
+    // }
+
+    // GPU_DEBUG_IF(debug_config->serialize_compile == 1) {
+    //     set_property(ov::compilation_num_threads(1));
+    // }
+
+    // GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) {
+    //     GPU_DEBUG_COUT << "[WARNING] ov::enable_profiling property was forced because of enabled OV_GPU_DumpProfilingData debug option\n";
+    //     set_property(ov::enable_profiling(true));
+    // }
+
+    // GPU_DEBUG_IF(debug_config->disable_dynamic_impl == 1) {
+    //     set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true));
+    // }
+
+    // GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) {
+    //     if (debug_config->dynamic_quantize_group_size == -1)
+    //         set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX));
+    //     else
+    //         set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size));
+    // }
+
+    // GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) {
+    //     GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) {
+    //         set_property(ov::hint::kv_cache_precision(ov::element::i8));
+    //     } else {
+    //         set_property(ov::hint::kv_cache_precision(ov::element::undefined));
+    //     }
+    // }
+}
+
+void NewExecutionConfig::apply_hints(const cldnn::device_info& info) {
+    apply_execution_hints(info);
+    apply_performance_hints(info);
+    apply_priority_hints(info);
+    apply_debug_options(info);
+}
+
+void NewExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
+    apply_hints(info);
+    if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) {
+        set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad));
+    }
+    if (info.supports_immad) {
+        set_property(ov::intel_gpu::use_onednn(true));
+    }
+    if (get_property(ov::intel_gpu::use_onednn)) {
+        set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
+    }
+
+    // Enable KV-cache compression by default for non-systolic platforms
+    if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) {
+        set_property(ov::hint::kv_cache_precision(ov::element::i8));
+    }
+
+    // Enable dynamic quantization by default for non-systolic platforms
+    if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) {
+        set_property(ov::hint::dynamic_quantization_group_size(32));
+    }
+}
+
+void NewExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) {
+    if (!info.supports_immad) {
+        apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
+        apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
+    }
+    apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
+}
+
+}  // namespace intel_gpu
+}  // namespace ov
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp
new file mode 100644
index 00000000000000..69b9c321863c03
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "config_common.hpp"
+#include "intel_gpu/runtime/device_info.hpp"
+#include "intel_gpu/runtime/utils.hpp"
+#include "intel_gpu/runtime/internal_properties.hpp"
+#include "openvino/runtime/internal_properties.hpp"
+#include "config_gpu_debug_properties.hpp"
+#include <thread>
+
+namespace ov {
+namespace intel_gpu {
+
+struct NewExecutionConfig : public ov::PluginConfig {
+    NewExecutionConfig();
+
+    #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \
+        ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type> PropertyVar = \
+            ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type>(GET_EXCEPT_LAST(__VA_ARGS__));
+
+
+    #include "config_gpu_options.inl"
+    #include "config_gpu_debug_options.inl"
+
+    #undef OV_CONFIG_OPTION
+
+    void finalize_impl(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) override;
+
+protected:
+    // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call
+    // So this method should be called after setting all user properties, but before apply_user_properties() call.
+    void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info);
+
+    void apply_user_properties(const cldnn::device_info& info);
+    void apply_hints(const cldnn::device_info& info);
+    void apply_execution_hints(const cldnn::device_info& info);
+    void apply_performance_hints(const cldnn::device_info& info);
+    void apply_priority_hints(const cldnn::device_info& info);
+    void apply_debug_options(const cldnn::device_info& info);
+};
+
+
+}  // namespace intel_gpu
+}  // namespace ov
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl
new file mode 100644
index 00000000000000..687475f67f287c
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl
@@ -0,0 +1,31 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifdef GPU_DEBUG_CONFIG
+OV_CONFIG_OPTION(ov::intel_gpu, verbose, false, "Enable")
+OV_CONFIG_OPTION(ov::intel_gpu, help, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, disable_usm, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, disable_onednn_post_ops, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, dump_profiling_data, "", "")
+OV_CONFIG_OPTION(ov::intel_gpu, dump_graphs, "", "")
+OV_CONFIG_OPTION(ov::intel_gpu, dump_sources, "", "")
+OV_CONFIG_OPTION(ov::intel_gpu, dump_tensors, "", "")
+OV_CONFIG_OPTION(ov::intel_gpu, dump_memory_pool, "", "")
+OV_CONFIG_OPTION(ov::intel_gpu, dump_iterations, "", "")
+OV_CONFIG_OPTION(ov::intel_gpu, host_time_profiling, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "")
+OV_CONFIG_OPTION(ov::intel_gpu, impls_cache_capacity, 0, "")
+OV_CONFIG_OPTION(ov::intel_gpu, disable_async_compilation, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, disable_shape_agnostic_impls, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, disable_memory_reuse, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, use_usm_host, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, enable_kv_cache_compression, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "")
+OV_CONFIG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "")
+
+#endif
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp
new file mode 100644
index 00000000000000..f3ff878ce07740
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/runtime/properties.hpp"
+
+#ifdef GPU_DEBUG_CONFIG
+
+namespace ov {
+namespace intel_gpu {
+
+static constexpr Property<bool, ov::PropertyMutability::RW> verbose{"VERBOSE"};
+static constexpr Property<bool, ov::PropertyMutability::RW> help{"HELP"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_usm{"DISABLE_USM"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_onednn_post_ops{"DISABLE_ONEDNN_POST_OPS"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_profiling_data{"DUMP_PROFILING_DATA"};
+// static constexpr Property<std::string, ov::PropertyMutability::RW> dump_graphs{"DUMP_GRAPHS"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_sources{"DUMP_SOURCES"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_tensors{"DUMP_TENSORS"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_memory_pool{"DUMP_MEMORY_POOL"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_iterations{"DUMP_ITERATIONS"};
+static constexpr Property<bool, ov::PropertyMutability::RW> host_time_profiling{"HOST_TIME_PROFILING"};
+// static constexpr Property<size_t, ov::PropertyMutability::RW> max_kernels_per_batch{"MAX_KERNELS_PER_BATCH"};
+static constexpr Property<size_t, ov::PropertyMutability::RW> impls_cache_capacity{"IMPLS_CACHE_CAPACITY"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_async_compilation{"DISABLE_ASYNC_COMPILATION"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_shape_agnostic_impls{"DISABLE_SHAPE_AGNOSTIC_IMPLS"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_memory_reuse{"DISABLE_MEMORY_REUSE"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"};
+static constexpr Property<bool, ov::PropertyMutability::RW> use_usm_host{"USE_USM_HOST"};
+static constexpr Property<bool, ov::PropertyMutability::RW> enable_kv_cache_compression{"ENABLE_KV_CACHE_COMPRESSION"};
+static constexpr Property<bool, ov::PropertyMutability::RW> asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> mem_prealloc_options{"MEM_PREALLOC_OPTIONS"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"};
+
+}  // namespace intel_gpu
+}  // namespace ov
+
+#endif  // GPU_DEBUG_CONFIG
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl
new file mode 100644
index 00000000000000..b3aa12dc75c49b
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl
@@ -0,0 +1,50 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// Namespace, property name, default value, [validator], description
+OV_CONFIG_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin")
+OV_CONFIG_OPTION(ov::device, id, "0", "ID of the current device")
+OV_CONFIG_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty")
+OV_CONFIG_OPTION(ov, num_streams, 1, "Number of streams to be used for inference")
+OV_CONFIG_OPTION(ov, compilation_num_threads, std::max(1, static_cast<int>(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism")
+OV_CONFIG_OPTION(ov::hint, inference_precision, ov::element::f16,
+                [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision")
+OV_CONFIG_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings")
+OV_CONFIG_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc")
+OV_CONFIG_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy")
+OV_CONFIG_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application")
+OV_CONFIG_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not")
+
+OV_CONFIG_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks")
+OV_CONFIG_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level")
+OV_CONFIG_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property")
+OV_CONFIG_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution")
+OV_CONFIG_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling")
+OV_CONFIG_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available")
+OV_CONFIG_OPTION(ov::internal, exclusive_async_requests, false, "")
+OV_CONFIG_OPTION(ov::internal, query_model_ratio, 1.0f, "")
+OV_CONFIG_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache")
+OV_CONFIG_OPTION(ov, cache_encryption_callbacks, EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model")
+OV_CONFIG_OPTION(ov::hint, dynamic_quantization_group_size, 0, "")
+OV_CONFIG_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "")
+OV_CONFIG_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "")
+OV_CONFIG_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching")
+OV_CONFIG_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision")
+
+OV_CONFIG_OPTION(ov::intel_gpu, nv12_two_inputs, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, config_file, "", "")
+OV_CONFIG_OPTION(ov::intel_gpu, enable_lp_transformations, false, "")
+
+OV_CONFIG_OPTION(ov::intel_gpu, max_dynamic_batch, 1, "")
+OV_CONFIG_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "")
+OV_CONFIG_OPTION(ov::intel_gpu, optimize_data, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, enable_memory_pool, true, "")
+OV_CONFIG_OPTION(ov::intel_gpu, allow_static_input_reorder, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, custom_outputs, std::vector<std::string>{}, "")
+OV_CONFIG_OPTION(ov::intel_gpu, force_implementations, ImplForcingMap{}, "")
+OV_CONFIG_OPTION(ov::intel_gpu, partial_build_program, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, use_only_static_kernels_for_dynamic_shape, false, "")
+OV_CONFIG_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "")
+OV_CONFIG_OPTION(ov::intel_gpu, use_onednn, false, "")
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp
index b27275d0f03d99..b296242905b958 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp
@@ -2,6 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "intel_gpu/runtime/execution_config.hpp"
+#include "module_tests/config_gpu.hpp"
+#include "openvino/runtime/properties.hpp"
 #include "test_utils.h"
 #include "intel_gpu/runtime/device.hpp"
 #include "runtime/ocl/ocl_device_detector.hpp"
@@ -101,3 +104,41 @@ TEST(devices_test, sort_order_three_vendors) {
 
     ASSERT_EQ(expected_devices_order, actual_devices_order);
 }
+
+// class Test {
+// public:
+//     int i;
+//     constexpr Test(int i) : i(i) {}
+// };
+
+// constexpr const Test test1(1);
+// constexpr const Test test2(2);
+
+// template<Test t>
+// int get_prop() {
+//     static_assert(false, "FAIL");
+// }
+
+// template<template<typename, ov::PropertyMutability> class prop, typename T, ov::PropertyMutability mutability>
+// T get_prop() {
+//     static_assert(false, "FAIL");
+// }
+
+
+TEST(config_test, basic) {
+    ov::intel_gpu::NewExecutionConfig cfg;
+    std::cerr << cfg.to_string();
+
+    cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY));
+    cfg.set_property(ov::hint::inference_precision(ov::element::f32));
+
+    std::cerr << "PROF: " << cfg.enable_profiling.value << std::endl;
+
+    std::cerr << cfg.to_string();
+
+    std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl;
+    std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl;
+
+//     std::cerr << get_prop<ov::hint::inference_precision>() << std::endl;
+//     std::cerr << get_prop<test1>() << std::endl;
+}

From e5a286cd59828f0fbbd8d5c00799ab13477dc20f Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 17 Dec 2024 14:19:13 +0400
Subject: [PATCH 02/44] make it common

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp}       | 30 +++++++++---
 .../src/dev/plugin_config.cpp}                |  5 +-
 .../intel_gpu/runtime/internal_properties.hpp | 24 ++++++++-
 .../intel_gpu/runtime/options_debug.inl}      |  4 +-
 .../intel_gpu/runtime/options_release.inl}    |  0
 .../intel_gpu/runtime/plugin_config.hpp}      | 13 ++---
 .../runtime/plugin_config.cpp}                | 49 ++++---------------
 .../config_gpu_debug_properties.hpp           | 42 ----------------
 .../tests/unit/module_tests/config_test.cpp   | 28 +++++++++++
 .../tests/unit/module_tests/device_test.cpp   | 41 ----------------
 10 files changed, 96 insertions(+), 140 deletions(-)
 rename src/{plugins/intel_gpu/tests/unit/module_tests/config_common.hpp => inference/dev_api/openvino/runtime/plugin_config.hpp} (74%)
 rename src/{plugins/intel_gpu/tests/unit/module_tests/config_common.cpp => inference/src/dev/plugin_config.cpp} (93%)
 rename src/plugins/intel_gpu/{tests/unit/module_tests/config_gpu_debug_options.inl => include/intel_gpu/runtime/options_debug.inl} (91%)
 rename src/plugins/intel_gpu/{tests/unit/module_tests/config_gpu_options.inl => include/intel_gpu/runtime/options_release.inl} (100%)
 rename src/plugins/intel_gpu/{tests/unit/module_tests/config_gpu.hpp => include/intel_gpu/runtime/plugin_config.hpp} (84%)
 rename src/plugins/intel_gpu/{tests/unit/module_tests/config_gpu.cpp => src/runtime/plugin_config.cpp} (71%)
 delete mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp
 create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp

diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
similarity index 74%
rename from src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp
rename to src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 3a6a2bc0d8de51..acccd0bf343604 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -5,7 +5,7 @@
 #pragma once
 
 #include <memory>
-#include <unordered_map>
+#include <map>
 #include "openvino/runtime/iremote_context.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "openvino/core/except.hpp"
@@ -35,7 +35,6 @@
 
 namespace ov {
 
-
 struct ConfigOptionBase {
     explicit ConfigOptionBase() {}
     virtual ~ConfigOptionBase() = default;
@@ -50,7 +49,6 @@ struct ConfigOption : public ConfigOptionBase {
     ConfigOption(const T& default_val, std::function<bool(T)> validator = nullptr)
         : ConfigOptionBase(), value(default_val), validator(validator) {}
     T value;
-    std::function<bool(T)> validator;
 
     void set_any(const ov::Any any) override {
         if (validator)
@@ -68,11 +66,29 @@ struct ConfigOption : public ConfigOptionBase {
         } catch (std::exception&) {
             return false;
         }
-
     }
+
+private:
+    std::function<bool(T)> validator;
 };
 
-class PluginConfig {
+// Base class for configuration of plugins
+// Implementation should provide a list of properties with default values and validators (optional)
+// For the sake of efficiency, we expect that plugin properties are defined as class members of the derived class
+// and accessed directly in the plugin's code (i.e. w/o get_property()/set_property() calls)
+// get/set property members are provided to handle external property access
+// The class provides a helpers to read the properties from configuration file and from environment variables
+//
+// Expected order of properties resolution:
+// 1. Assign default value for each property per device
+// 2. Save user properties passed via Core::set_property() call to user_properties
+// 3. Save user properties passed via Core::compile_model() call to user_properties
+// 4. Apply RT info properties to user_properties if they were not set by user
+// 5. Read and apply properties from the config file as user_properties
+// 6. Read and apply properties from the the environment variables as user_properties
+// 7. Apply user_properties to actual plugin properties
+// 8. Update dependant properties if they were not set by user either way
+class OPENVINO_RUNTIME_API PluginConfig {
 public:
     PluginConfig() {}
     PluginConfig(std::initializer_list<ov::AnyMap::value_type> values) : PluginConfig() { set_property(ov::AnyMap(values)); }
@@ -119,7 +135,9 @@ class PluginConfig {
             }
         }
     }
-    std::unordered_map<std::string, ConfigOptionBase*> m_options_map;
+    std::map<std::string, ConfigOptionBase*> m_options_map;
+
+    // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info
     ov::AnyMap user_properties;
     using OptionMapEntry = decltype(m_options_map)::value_type;
 };
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp b/src/inference/src/dev/plugin_config.cpp
similarity index 93%
rename from src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp
rename to src/inference/src/dev/plugin_config.cpp
index 06625d33c40307..9f169c07663a40 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "config_common.hpp"
+#include "openvino/runtime/plugin_config.hpp"
 #include "openvino/core/except.hpp"
 
 
@@ -55,6 +55,9 @@ void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::R
     }
 
     finalize_impl(context, rt_info);
+
+    // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization
+    user_properties.clear();
 }
 
 std::string PluginConfig::to_string() const {
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
index 765333e971842e..ddbb260647b287 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
@@ -46,7 +46,6 @@ static constexpr Property<bool, PropertyMutability::RW> allow_static_input_reord
 static constexpr Property<bool, PropertyMutability::RW> partial_build_program{"GPU_PARTIAL_BUILD"};
 static constexpr Property<bool, PropertyMutability::RW> allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"};
 static constexpr Property<bool, PropertyMutability::RW> use_only_static_kernels_for_dynamic_shape{"GPU_USE_ONLY_STATIC_KERNELS_FOR_DYNAMIC_SHAPE"};
-static constexpr Property<std::string, PropertyMutability::RW> dump_graphs{"GPU_DUMP_GRAPHS"};
 static constexpr Property<std::vector<std::string>, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"};
 static constexpr Property<ImplForcingMap, PropertyMutability::RW> force_implementations{"GPU_FORCE_IMPLEMENTATIONS"};
 static constexpr Property<std::string, PropertyMutability::RW> config_file{"CONFIG_FILE"};
@@ -57,6 +56,29 @@ static constexpr Property<float, PropertyMutability::RW> buffers_preallocation_r
 static constexpr Property<size_t, PropertyMutability::RW> max_kernels_per_batch{"GPU_MAX_KERNELS_PER_BATCH"};
 static constexpr Property<bool, PropertyMutability::RW> use_onednn{"USE_ONEDNN"};
 
+static constexpr Property<bool, ov::PropertyMutability::RW> help{"HELP"};
+static constexpr Property<size_t, ov::PropertyMutability::RW> verbose{"VERBOSE"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_usm{"DISABLE_USM"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_onednn_post_ops{"DISABLE_ONEDNN_POST_OPS"};
+static constexpr Property<std::string, PropertyMutability::RW> dump_graphs{"GPU_DUMP_GRAPHS"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_profiling_data{"DUMP_PROFILING_DATA"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_sources{"DUMP_SOURCES"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_tensors{"DUMP_TENSORS"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_memory_pool{"DUMP_MEMORY_POOL"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_iterations{"DUMP_ITERATIONS"};
+static constexpr Property<bool, ov::PropertyMutability::RW> host_time_profiling{"HOST_TIME_PROFILING"};
+static constexpr Property<size_t, ov::PropertyMutability::RW> impls_cache_capacity{"IMPLS_CACHE_CAPACITY"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_async_compilation{"DISABLE_ASYNC_COMPILATION"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_shape_agnostic_impls{"DISABLE_SHAPE_AGNOSTIC_IMPLS"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_memory_reuse{"DISABLE_MEMORY_REUSE"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"};
+static constexpr Property<bool, ov::PropertyMutability::RW> use_usm_host{"USE_USM_HOST"};
+static constexpr Property<bool, ov::PropertyMutability::RW> asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> mem_prealloc_options{"MEM_PREALLOC_OPTIONS"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"};
+
 }  // namespace ov::intel_gpu
 
 namespace cldnn {
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl
similarity index 91%
rename from src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl
rename to src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl
index 687475f67f287c..62548a7abb17fd 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl
@@ -3,7 +3,8 @@
 //
 
 #ifdef GPU_DEBUG_CONFIG
-OV_CONFIG_OPTION(ov::intel_gpu, verbose, false, "Enable")
+
+OV_CONFIG_OPTION(ov::intel_gpu, verbose, 0, "Enable")
 OV_CONFIG_OPTION(ov::intel_gpu, help, false, "")
 OV_CONFIG_OPTION(ov::intel_gpu, disable_usm, false, "")
 OV_CONFIG_OPTION(ov::intel_gpu, disable_onednn_post_ops, false, "")
@@ -23,7 +24,6 @@ OV_CONFIG_OPTION(ov::intel_gpu, disable_memory_reuse, false, "")
 OV_CONFIG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "")
 OV_CONFIG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "")
 OV_CONFIG_OPTION(ov::intel_gpu, use_usm_host, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, enable_kv_cache_compression, false, "")
 OV_CONFIG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "")
 OV_CONFIG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "")
 OV_CONFIG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "")
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl
similarity index 100%
rename from src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl
rename to src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
similarity index 84%
rename from src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp
rename to src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
index 69b9c321863c03..51e72da8be5923 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
@@ -4,12 +4,10 @@
 
 #pragma once
 
-#include "config_common.hpp"
+#include "openvino/runtime/plugin_config.hpp"
 #include "intel_gpu/runtime/device_info.hpp"
-#include "intel_gpu/runtime/utils.hpp"
 #include "intel_gpu/runtime/internal_properties.hpp"
 #include "openvino/runtime/internal_properties.hpp"
-#include "config_gpu_debug_properties.hpp"
 #include <thread>
 
 namespace ov {
@@ -22,15 +20,14 @@ struct NewExecutionConfig : public ov::PluginConfig {
         ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type> PropertyVar = \
             ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type>(GET_EXCEPT_LAST(__VA_ARGS__));
 
-
-    #include "config_gpu_options.inl"
-    #include "config_gpu_debug_options.inl"
+    #include "options_release.inl"
+    #include "options_debug.inl"
 
     #undef OV_CONFIG_OPTION
 
     void finalize_impl(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) override;
 
-protected:
+private:
     // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call
     // So this method should be called after setting all user properties, but before apply_user_properties() call.
     void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info);
@@ -40,7 +37,7 @@ struct NewExecutionConfig : public ov::PluginConfig {
     void apply_execution_hints(const cldnn::device_info& info);
     void apply_performance_hints(const cldnn::device_info& info);
     void apply_priority_hints(const cldnn::device_info& info);
-    void apply_debug_options(const cldnn::device_info& info);
+    void read_debug_options(const cldnn::device_info& info);
 };
 
 
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
similarity index 71%
rename from src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp
rename to src/plugins/intel_gpu/src/runtime/plugin_config.cpp
index b3c81da368ae68..0a3c49e6387104 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp
+++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
@@ -2,11 +2,11 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "config_gpu.hpp"
+#include "intel_gpu/runtime/plugin_config.hpp"
 #include "intel_gpu/plugin/remote_context.hpp"
+#include "openvino/core/any.hpp"
 #include "openvino/runtime/internal_properties.hpp"
 #include "intel_gpu/runtime/internal_properties.hpp"
-#include "config_gpu_debug_properties.hpp"
 
 
 namespace ov {
@@ -16,15 +16,15 @@ NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() {
     #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \
         m_options_map[PropertyNamespace::PropertyVar.name()] = &PropertyVar;
 
-    OV_CONFIG_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin")
-    #include "config_gpu_options.inl"
-    #include "config_gpu_debug_options.inl"
+    #include "intel_gpu/runtime/options_release.inl"
+    #include "intel_gpu/runtime/options_debug.inl"
 
     #undef OV_CONFIG_OPTION
 }
 
 void NewExecutionConfig::finalize_impl(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {
     const auto& device_info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
+    read_debug_options(device_info);
     apply_user_properties(device_info);
     apply_rt_info(device_info, rt_info);
 }
@@ -83,46 +83,17 @@ void NewExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
     }
 }
 
-void NewExecutionConfig::apply_debug_options(const cldnn::device_info& info) {
-    // GPU_DEBUG_GET_INSTANCE(debug_config);
-    // GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) {
-    //     set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs));
-    // }
-
-    // GPU_DEBUG_IF(debug_config->serialize_compile == 1) {
-    //     set_property(ov::compilation_num_threads(1));
-    // }
-
-    // GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) {
-    //     GPU_DEBUG_COUT << "[WARNING] ov::enable_profiling property was forced because of enabled OV_GPU_DumpProfilingData debug option\n";
-    //     set_property(ov::enable_profiling(true));
-    // }
-
-    // GPU_DEBUG_IF(debug_config->disable_dynamic_impl == 1) {
-    //     set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true));
-    // }
-
-    // GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) {
-    //     if (debug_config->dynamic_quantize_group_size == -1)
-    //         set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX));
-    //     else
-    //         set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size));
-    // }
-
-    // GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) {
-    //     GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) {
-    //         set_property(ov::hint::kv_cache_precision(ov::element::i8));
-    //     } else {
-    //         set_property(ov::hint::kv_cache_precision(ov::element::undefined));
-    //     }
-    // }
+void NewExecutionConfig::read_debug_options(const cldnn::device_info& info) {
+    ov::AnyMap config_properties;
+    set_user_property(config_properties);
+    ov::AnyMap env_properties;
+    set_user_property(env_properties);
 }
 
 void NewExecutionConfig::apply_hints(const cldnn::device_info& info) {
     apply_execution_hints(info);
     apply_performance_hints(info);
     apply_priority_hints(info);
-    apply_debug_options(info);
 }
 
 void NewExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp
deleted file mode 100644
index f3ff878ce07740..00000000000000
--- a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (C) 2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "openvino/runtime/properties.hpp"
-
-#ifdef GPU_DEBUG_CONFIG
-
-namespace ov {
-namespace intel_gpu {
-
-static constexpr Property<bool, ov::PropertyMutability::RW> verbose{"VERBOSE"};
-static constexpr Property<bool, ov::PropertyMutability::RW> help{"HELP"};
-static constexpr Property<bool, ov::PropertyMutability::RW> disable_usm{"DISABLE_USM"};
-static constexpr Property<bool, ov::PropertyMutability::RW> disable_onednn_post_ops{"DISABLE_ONEDNN_POST_OPS"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> dump_profiling_data{"DUMP_PROFILING_DATA"};
-// static constexpr Property<std::string, ov::PropertyMutability::RW> dump_graphs{"DUMP_GRAPHS"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> dump_sources{"DUMP_SOURCES"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> dump_tensors{"DUMP_TENSORS"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> dump_memory_pool{"DUMP_MEMORY_POOL"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> dump_iterations{"DUMP_ITERATIONS"};
-static constexpr Property<bool, ov::PropertyMutability::RW> host_time_profiling{"HOST_TIME_PROFILING"};
-// static constexpr Property<size_t, ov::PropertyMutability::RW> max_kernels_per_batch{"MAX_KERNELS_PER_BATCH"};
-static constexpr Property<size_t, ov::PropertyMutability::RW> impls_cache_capacity{"IMPLS_CACHE_CAPACITY"};
-static constexpr Property<bool, ov::PropertyMutability::RW> disable_async_compilation{"DISABLE_ASYNC_COMPILATION"};
-static constexpr Property<bool, ov::PropertyMutability::RW> disable_shape_agnostic_impls{"DISABLE_SHAPE_AGNOSTIC_IMPLS"};
-static constexpr Property<bool, ov::PropertyMutability::RW> disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"};
-static constexpr Property<bool, ov::PropertyMutability::RW> disable_memory_reuse{"DISABLE_MEMORY_REUSE"};
-static constexpr Property<bool, ov::PropertyMutability::RW> disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"};
-static constexpr Property<bool, ov::PropertyMutability::RW> disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"};
-static constexpr Property<bool, ov::PropertyMutability::RW> use_usm_host{"USE_USM_HOST"};
-static constexpr Property<bool, ov::PropertyMutability::RW> enable_kv_cache_compression{"ENABLE_KV_CACHE_COMPRESSION"};
-static constexpr Property<bool, ov::PropertyMutability::RW> asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> mem_prealloc_options{"MEM_PREALLOC_OPTIONS"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"};
-
-}  // namespace intel_gpu
-}  // namespace ov
-
-#endif  // GPU_DEBUG_CONFIG
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp
new file mode 100644
index 00000000000000..a1bb0ac8b6e6a0
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2022-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_gpu/runtime/plugin_config.hpp"
+#include "openvino/runtime/properties.hpp"
+#include "test_utils.h"
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(config_test, basic) {
+    ov::intel_gpu::NewExecutionConfig cfg;
+    std::cerr << cfg.to_string();
+
+    cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY));
+    cfg.set_property(ov::hint::inference_precision(ov::element::f32));
+
+    std::cerr << "PROF: " << cfg.enable_profiling.value << std::endl;
+
+    std::cerr << cfg.to_string();
+
+    std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl;
+    std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl;
+
+//     std::cerr << get_prop<ov::hint::inference_precision>() << std::endl;
+//     std::cerr << get_prop<test1>() << std::endl;
+}
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp
index b296242905b958..b27275d0f03d99 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp
@@ -2,9 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "intel_gpu/runtime/execution_config.hpp"
-#include "module_tests/config_gpu.hpp"
-#include "openvino/runtime/properties.hpp"
 #include "test_utils.h"
 #include "intel_gpu/runtime/device.hpp"
 #include "runtime/ocl/ocl_device_detector.hpp"
@@ -104,41 +101,3 @@ TEST(devices_test, sort_order_three_vendors) {
 
     ASSERT_EQ(expected_devices_order, actual_devices_order);
 }
-
-// class Test {
-// public:
-//     int i;
-//     constexpr Test(int i) : i(i) {}
-// };
-
-// constexpr const Test test1(1);
-// constexpr const Test test2(2);
-
-// template<Test t>
-// int get_prop() {
-//     static_assert(false, "FAIL");
-// }
-
-// template<template<typename, ov::PropertyMutability> class prop, typename T, ov::PropertyMutability mutability>
-// T get_prop() {
-//     static_assert(false, "FAIL");
-// }
-
-
-TEST(config_test, basic) {
-    ov::intel_gpu::NewExecutionConfig cfg;
-    std::cerr << cfg.to_string();
-
-    cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY));
-    cfg.set_property(ov::hint::inference_precision(ov::element::f32));
-
-    std::cerr << "PROF: " << cfg.enable_profiling.value << std::endl;
-
-    std::cerr << cfg.to_string();
-
-    std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl;
-    std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl;
-
-//     std::cerr << get_prop<ov::hint::inference_precision>() << std::endl;
-//     std::cerr << get_prop<test1>() << std::endl;
-}

From 67fb59ce1face7c4156177d013625766c76cfc7b Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 17 Dec 2024 17:14:52 +0400
Subject: [PATCH 03/44] env and config

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../intel_gpu/runtime/execution_config.hpp    | 10 +--
 .../intel_gpu/runtime/plugin_config.hpp       |  9 +-
 .../src/runtime/execution_config.cpp          | 51 +++++++----
 .../intel_gpu/src/runtime/plugin_config.cpp   | 86 ++++++++-----------
 .../tests/unit/module_tests/config_test.cpp   |  4 +
 5 files changed, 82 insertions(+), 78 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
index 96e09605eaa998..924f6cf5d42a40 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
@@ -56,12 +56,12 @@ class PropertyTypeValidator : public BaseValidator {
     }
 };
 
-class ExecutionConfig {
+class OldExecutionConfig {
 public:
-    ExecutionConfig();
-    ExecutionConfig(std::initializer_list<ov::AnyMap::value_type> values) : ExecutionConfig() { set_property(ov::AnyMap(values)); }
-    explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); }
-    explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); }
+    OldExecutionConfig();
+    OldExecutionConfig(std::initializer_list<ov::AnyMap::value_type> values) : OldExecutionConfig() { set_property(ov::AnyMap(values)); }
+    explicit OldExecutionConfig(const ov::AnyMap& properties) : OldExecutionConfig() { set_property(properties); }
+    explicit OldExecutionConfig(const ov::AnyMap::value_type& property) : OldExecutionConfig() { set_property(property); }
 
     void set_default();
     void set_property(const ov::AnyMap& properties);
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
index 51e72da8be5923..6ea8f4e107bfc8 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
@@ -25,21 +25,16 @@ struct NewExecutionConfig : public ov::PluginConfig {
 
     #undef OV_CONFIG_OPTION
 
-    void finalize_impl(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) override;
+    void finalize_impl(std::shared_ptr<IRemoteContext> context) override;
+    void apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) override;
 
 private:
-    // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call
-    // So this method should be called after setting all user properties, but before apply_user_properties() call.
-    void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info);
-
     void apply_user_properties(const cldnn::device_info& info);
     void apply_hints(const cldnn::device_info& info);
     void apply_execution_hints(const cldnn::device_info& info);
     void apply_performance_hints(const cldnn::device_info& info);
     void apply_priority_hints(const cldnn::device_info& info);
-    void read_debug_options(const cldnn::device_info& info);
 };
 
-
 }  // namespace intel_gpu
 }  // namespace ov
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 7d2a9d5f90fc8b..5362f8bccb531c 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -11,7 +11,7 @@
 
 namespace ov::intel_gpu {
 
-ExecutionConfig::ExecutionConfig() {
+OldExecutionConfig::OldExecutionConfig() {
     set_default();
 }
 
@@ -33,7 +33,7 @@ class PerformanceModeValidator : public BaseValidator {
     }
 };
 
-void ExecutionConfig::set_default() {
+void OldExecutionConfig::set_default() {
     register_property<PropertyVisibility::PUBLIC>(
         std::make_tuple(ov::device::id, "0"),
         std::make_tuple(ov::enable_profiling, false),
@@ -86,13 +86,13 @@ void ExecutionConfig::set_default() {
         std::make_tuple(ov::intel_gpu::use_onednn, false));
 }
 
-void ExecutionConfig::register_property_impl(const std::pair<std::string, ov::Any>& property, PropertyVisibility visibility, BaseValidator::Ptr validator) {
+void OldExecutionConfig::register_property_impl(const std::pair<std::string, ov::Any>& property, PropertyVisibility visibility, BaseValidator::Ptr validator) {
     property_validators[property.first] = validator;
     supported_properties[property.first] = visibility;
     internal_properties[property.first] = property.second;
 }
 
-void ExecutionConfig::set_property(const AnyMap& config) {
+void OldExecutionConfig::set_property(const AnyMap& config) {
     for (auto& kv : config) {
         auto& name = kv.first;
         auto& val = kv.second;
@@ -102,18 +102,18 @@ void ExecutionConfig::set_property(const AnyMap& config) {
     }
 }
 
-bool ExecutionConfig::is_supported(const std::string& name) const {
+bool OldExecutionConfig::is_supported(const std::string& name) const {
     bool supported = supported_properties.find(name) != supported_properties.end();
     bool has_validator = property_validators.find(name) != property_validators.end();
 
     return supported && has_validator;
 }
 
-bool ExecutionConfig::is_set_by_user(const std::string& name) const {
+bool OldExecutionConfig::is_set_by_user(const std::string& name) const {
     return user_properties.find(name) != user_properties.end();
 }
 
-void ExecutionConfig::set_user_property(const AnyMap& config) {
+void OldExecutionConfig::set_user_property(const AnyMap& config) {
     for (auto& kv : config) {
         auto& name = kv.first;
         auto& val = kv.second;
@@ -125,7 +125,7 @@ void ExecutionConfig::set_user_property(const AnyMap& config) {
     }
 }
 
-Any ExecutionConfig::get_property(const std::string& name) const {
+Any OldExecutionConfig::get_property(const std::string& name) const {
     if (user_properties.find(name) != user_properties.end()) {
         return user_properties.at(name);
     }
@@ -134,7 +134,7 @@ Any ExecutionConfig::get_property(const std::string& name) const {
     return internal_properties.at(name);
 }
 
-void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) {
+void OldExecutionConfig::apply_execution_hints(const cldnn::device_info& info) {
     if (is_set_by_user(ov::hint::execution_mode)) {
         const auto mode = get_property(ov::hint::execution_mode);
         if (!is_set_by_user(ov::hint::inference_precision)) {
@@ -150,7 +150,7 @@ void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) {
     }
 }
 
-void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) {
+void OldExecutionConfig::apply_performance_hints(const cldnn::device_info& info) {
     if (is_set_by_user(ov::hint::performance_mode)) {
         const auto mode = get_property(ov::hint::performance_mode);
         if (!is_set_by_user(ov::num_streams)) {
@@ -179,7 +179,7 @@ void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) {
     }
 }
 
-void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
+void OldExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
     if (is_set_by_user(ov::hint::model_priority)) {
         const auto priority = get_property(ov::hint::model_priority);
         if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) {
@@ -188,7 +188,7 @@ void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
     }
 }
 
-void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) {
+void OldExecutionConfig::apply_debug_options(const cldnn::device_info& info) {
     GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) {
         set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs));
@@ -223,16 +223,33 @@ void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) {
     }
 }
 
-void ExecutionConfig::apply_hints(const cldnn::device_info& info) {
+void OldExecutionConfig::apply_hints(const cldnn::device_info& info) {
     apply_execution_hints(info);
     apply_performance_hints(info);
     apply_priority_hints(info);
     apply_debug_options(info);
 }
 
-void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
-    if (finalized)
+void ExecutionConfig::update_specific_default_properties(const cldnn::device_info& info) {
+    // These default properties should be set once.
+    if (specific_default_properties_is_set)
         return;
+    specific_default_properties_is_set = true;
+
+    // Enable KV-cache compression by default for non-systolic platforms MFDNN-11755
+    if (get_property(ov::hint::kv_cache_precision) == ov::element::undefined && !info.supports_immad) {
+        set_property(ov::hint::kv_cache_precision(ov::element::i8));
+    }
+
+    // Enable dynamic quantization by default for non-systolic platforms
+    if (get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) {
+        set_property(ov::hint::dynamic_quantization_group_size(32));
+    }
+}
+
+void OldExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
+    // Update specific default properties, call once before internal_properties updated.
+    update_specific_default_properties(info);
 
     // Copy internal properties before applying hints to ensure that
     // a property set by hint won't be overriden by a value in user config.
@@ -283,7 +300,7 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
     user_properties.clear();
 }
 
-void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info, const bool is_llm) {
+void OldExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info, const bool is_llm) {
     if (!info.supports_immad) {
         apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
     }
@@ -292,7 +309,7 @@ void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RT
     apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
 }
 
-std::string ExecutionConfig::to_string() const {
+std::string OldExecutionConfig::to_string() const {
     std::stringstream s;
     s << "internal properties:\n";
     for (auto& kv : internal_properties) {
diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
index 0a3c49e6387104..9aa975d83923a3 100644
--- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
@@ -22,11 +22,43 @@ NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() {
     #undef OV_CONFIG_OPTION
 }
 
-void NewExecutionConfig::finalize_impl(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {
-    const auto& device_info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
-    read_debug_options(device_info);
-    apply_user_properties(device_info);
-    apply_rt_info(device_info, rt_info);
+void NewExecutionConfig::apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {
+    const auto& info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
+    if (!info.supports_immad) {
+        apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
+        apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
+    }
+    apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
+}
+
+void NewExecutionConfig::finalize_impl(std::shared_ptr<IRemoteContext> context) {
+    const auto& info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
+    apply_hints(info);
+    if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) {
+        set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad));
+    }
+    if (info.supports_immad) {
+        set_property(ov::intel_gpu::use_onednn(true));
+    }
+    if (get_property(ov::intel_gpu::use_onednn)) {
+        set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
+    }
+
+    // Enable KV-cache compression by default for non-systolic platforms
+    if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) {
+        set_property(ov::hint::kv_cache_precision(ov::element::i8));
+    }
+
+    // Enable dynamic quantization by default for non-systolic platforms
+    if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) {
+        set_property(ov::hint::dynamic_quantization_group_size(32));
+    }
+}
+
+void NewExecutionConfig::apply_hints(const cldnn::device_info& info) {
+    apply_execution_hints(info);
+    apply_performance_hints(info);
+    apply_priority_hints(info);
 }
 
 void NewExecutionConfig::apply_execution_hints(const cldnn::device_info& info) {
@@ -83,49 +115,5 @@ void NewExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
     }
 }
 
-void NewExecutionConfig::read_debug_options(const cldnn::device_info& info) {
-    ov::AnyMap config_properties;
-    set_user_property(config_properties);
-    ov::AnyMap env_properties;
-    set_user_property(env_properties);
-}
-
-void NewExecutionConfig::apply_hints(const cldnn::device_info& info) {
-    apply_execution_hints(info);
-    apply_performance_hints(info);
-    apply_priority_hints(info);
-}
-
-void NewExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
-    apply_hints(info);
-    if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) {
-        set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad));
-    }
-    if (info.supports_immad) {
-        set_property(ov::intel_gpu::use_onednn(true));
-    }
-    if (get_property(ov::intel_gpu::use_onednn)) {
-        set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
-    }
-
-    // Enable KV-cache compression by default for non-systolic platforms
-    if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) {
-        set_property(ov::hint::kv_cache_precision(ov::element::i8));
-    }
-
-    // Enable dynamic quantization by default for non-systolic platforms
-    if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) {
-        set_property(ov::hint::dynamic_quantization_group_size(32));
-    }
-}
-
-void NewExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) {
-    if (!info.supports_immad) {
-        apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
-        apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
-    }
-    apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
-}
-
 }  // namespace intel_gpu
 }  // namespace ov
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp
index a1bb0ac8b6e6a0..930128ef53bff6 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "intel_gpu/plugin/remote_context.hpp"
 #include "intel_gpu/runtime/plugin_config.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "test_utils.h"
@@ -23,6 +24,9 @@ TEST(config_test, basic) {
     std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl;
     std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl;
 
+    auto ctx = std::make_shared<ov::intel_gpu::RemoteContextImpl>("GPU", std::vector<device::ptr>{ get_test_engine().get_device() });
+    cfg.finalize(ctx, {});
+    std::cerr << cfg.to_string();
 //     std::cerr << get_prop<ov::hint::inference_precision>() << std::endl;
 //     std::cerr << get_prop<test1>() << std::endl;
 }

From e5f7cc350403c1b935de3e553b963d7a5a58e955 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Wed, 18 Dec 2024 16:28:20 +0400
Subject: [PATCH 04/44] Replace old config & fixes

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/inference/CMakeLists.txt                  |   2 +-
 .../openvino/runtime/plugin_config.hpp        |  25 ++++-
 src/inference/src/dev/plugin_config.cpp       | 106 +++++++++++++++++-
 .../intel_gpu/runtime/execution_config.hpp    |   1 +
 .../intel_gpu/runtime/plugin_config.hpp       |   6 +
 src/plugins/intel_gpu/src/graph/program.cpp   |  13 ++-
 src/plugins/intel_gpu/src/plugin/plugin.cpp   |  27 +++--
 .../intel_gpu/src/runtime/plugin_config.cpp   |  17 ++-
 8 files changed, 170 insertions(+), 27 deletions(-)

diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt
index 22c28c2acde6e6..7e6cae62b85b67 100644
--- a/src/inference/CMakeLists.txt
+++ b/src/inference/CMakeLists.txt
@@ -87,7 +87,7 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE
     # for ov_plugins.hpp
     $<IF:$<AND:$<BOOL:${OV_GENERATOR_MULTI_CONFIG}>,$<VERSION_GREATER_EQUAL:${CMAKE_VERSION},3.20>>,${CMAKE_CURRENT_BINARY_DIR}/$<CONFIG>,${CMAKE_CURRENT_BINARY_DIR}>)
 
-target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt openvino::util openvino::core::dev)
+target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt openvino::util openvino::core::dev nlohmann_json::nlohmann_json)
 ov_mark_target_as_cc(${TARGET_NAME}_obj)
 
 # OpenVINO Runtime is public API => need to mark this library as important for ABI free
diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index acccd0bf343604..16985d70b2841a 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -62,7 +62,8 @@ struct ConfigOption : public ConfigOptionBase {
 
     bool is_valid_value(ov::Any val) override {
         try {
-            return validator ? validator(val.as<T>()) : true;
+            auto v = val.as<T>();
+            return validator ? validator(v) : true;
         } catch (std::exception&) {
             return false;
         }
@@ -74,6 +75,7 @@ struct ConfigOption : public ConfigOptionBase {
 
 // Base class for configuration of plugins
 // Implementation should provide a list of properties with default values and validators (optional)
+// and prepare a map string property name -> ConfigOptionBase pointer
 // For the sake of efficiency, we expect that plugin properties are defined as class members of the derived class
 // and accessed directly in the plugin's code (i.e. w/o get_property()/set_property() calls)
 // get/set property members are provided to handle external property access
@@ -91,9 +93,14 @@ struct ConfigOption : public ConfigOptionBase {
 class OPENVINO_RUNTIME_API PluginConfig {
 public:
     PluginConfig() {}
-    PluginConfig(std::initializer_list<ov::AnyMap::value_type> values) : PluginConfig() { set_property(ov::AnyMap(values)); }
-    explicit PluginConfig(const ov::AnyMap& properties) : PluginConfig() { set_property(properties); }
-    explicit PluginConfig(const ov::AnyMap::value_type& property) : PluginConfig() { set_property(property); }
+    virtual ~PluginConfig() = default;
+
+    // Disable copy and move as we need to setup m_options_map properly and ensure that
+    // values are a part of current config object
+    PluginConfig(const PluginConfig& other) = delete;
+    PluginConfig& operator=(const PluginConfig& other) = delete;
+    PluginConfig(PluginConfig&& other) = delete;
+    PluginConfig& operator=(PluginConfig&& other) = delete;
 
     void set_property(const ov::AnyMap& properties);
     Any get_property(const std::string& name) const;
@@ -118,9 +125,12 @@ class OPENVINO_RUNTIME_API PluginConfig {
     std::string to_string() const;
 
     void finalize(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info);
-    virtual void finalize_impl(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) = 0;
 
 protected:
+    virtual void apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {}
+    virtual void apply_debug_options(std::shared_ptr<IRemoteContext> context);
+    virtual void finalize_impl(std::shared_ptr<IRemoteContext> context) {}
+
     template <typename T, PropertyMutability mutability>
     bool is_set_by_user(const ov::Property<T, mutability>& property) const {
         return user_properties.find(property.name()) != user_properties.end();
@@ -135,6 +145,11 @@ class OPENVINO_RUNTIME_API PluginConfig {
             }
         }
     }
+
+    ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const;
+    ov::AnyMap read_env(const std::vector<std::string>& prefixes) const;
+    void cleanup_unsupported(ov::AnyMap& config) const;
+
     std::map<std::string, ConfigOptionBase*> m_options_map;
 
     // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index 9f169c07663a40..c3ac86e05ba04b 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -1,10 +1,20 @@
-// Copyright (C) 2022 Intel Corporation
+// Copyright (C) 2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "openvino/runtime/plugin_config.hpp"
+#include "openvino/core/any.hpp"
 #include "openvino/core/except.hpp"
+#include "openvino/runtime/device_id_parser.hpp"
+#include "openvino/util/common_util.hpp"
+#include "openvino/util/env_util.hpp"
+#include <fstream>
 
+#ifdef JSON_HEADER
+#    include <json.hpp>
+#else
+#    include <nlohmann/json.hpp>
+#endif
 
 namespace ov {
 
@@ -15,7 +25,8 @@ void PluginConfig::set_property(const AnyMap& config) {
 
         const auto& known_options = m_options_map;
         auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; });
-        OPENVINO_ASSERT(it != known_options.end());
+        OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name);
+        OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name);
 
         it->second->set_any(val);
     }
@@ -25,6 +36,7 @@ ov::Any PluginConfig::get_property(const std::string& name) const {
     const auto& known_options = m_options_map;
     auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; });
     OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name);
+    OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name);
 
     return it->second->get_any();
 }
@@ -37,6 +49,7 @@ void PluginConfig::set_user_property(const AnyMap& config) {
         const auto& known_options = m_options_map;
         auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; });
         OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name);
+        OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name);
         OPENVINO_ASSERT(it->second->is_valid_value(val), "Invalid value: ", val.as<std::string>(), " for property: ",  name);
 
         user_properties[name] = val;
@@ -44,6 +57,8 @@ void PluginConfig::set_user_property(const AnyMap& config) {
 }
 
 void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {
+    apply_rt_info(context, rt_info);
+    apply_debug_options(context);
     // Copy internal properties before applying hints to ensure that
     // a property set by hint won't be overriden by a value in user config.
     // E.g num_streams=AUTO && hint=THROUGHPUT
@@ -54,12 +69,95 @@ void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::R
         option->set_any(prop.second);
     }
 
-    finalize_impl(context, rt_info);
+    finalize_impl(context);
 
     // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization
     user_properties.clear();
 }
 
+void PluginConfig::apply_debug_options(std::shared_ptr<IRemoteContext> context) {
+    ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name());
+    cleanup_unsupported(config_properties);
+    set_user_property(config_properties);
+    ov::AnyMap env_properties = read_env({"OV_"});
+    set_user_property(env_properties);
+}
+
+ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const {
+    ov::AnyMap config;
+
+    std::ifstream ifs(filename);
+    if (!ifs.is_open()) {
+        return config;
+    }
+
+    nlohmann::json json_config;
+    try {
+        ifs >> json_config;
+    } catch (const std::exception& e) {
+        return config;
+    }
+
+    DeviceIDParser parser(target_device_name);
+    for (auto item = json_config.cbegin(), end = json_config.cend(); item != end; ++item) {
+        const std::string& device_name = item.key();
+        if (DeviceIDParser(device_name).get_device_name() != parser.get_device_name())
+            continue;
+
+        const auto& item_value = item.value();
+        for (auto option = item_value.cbegin(), item_value_end = item_value.cend(); option != item_value_end; ++option) {
+            config[option.key()] = option.value().get<std::string>();
+        }
+    }
+
+    return config;
+}
+
+ov::AnyMap PluginConfig::read_env(const std::vector<std::string>& prefixes) const {
+    ov::AnyMap config;
+
+    for (auto& kv : m_options_map) {
+        for (auto& prefix : prefixes) {
+            auto var_name = prefix + kv.first;
+            const auto& val = ov::util::getenv_string(var_name.c_str());
+
+            if (!val.empty()) {
+                if (dynamic_cast<ConfigOption<bool>*>(kv.second) != nullptr) {
+                    const std::set<std::string> off = {"0", "false", "off", "no"};
+                    const std::set<std::string> on = {"1", "true", "on", "yes"};
+
+                    const auto& val_lower = ov::util::to_lower(val);
+                    if (off.count(val_lower)) {
+                        config[kv.first] = false;
+                    } else if (on.count(val_lower)) {
+                        config[kv.first] = true;
+                    } else {
+                        OPENVINO_THROW("Unexpected value for boolean property: ", val);
+                    }
+                } else {
+                    config[kv.first] = val;
+                }
+                break;
+            }
+        }
+    }
+
+    return config;
+}
+
+void PluginConfig::cleanup_unsupported(ov::AnyMap& config) const {
+    for (auto it = config.begin(); it != config.end();) {
+        const auto& known_options = m_options_map;
+        auto& name = it->first;
+        auto opt_it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; });
+        if (opt_it == known_options.end()) {
+            it = config.erase(it);
+        } else {
+            ++it;
+        }
+    }
+}
+
 std::string PluginConfig::to_string() const {
     std::stringstream s;
 
@@ -67,7 +165,7 @@ std::string PluginConfig::to_string() const {
     s << "PROPERTIES:\n";
 
     for (const auto& option : m_options_map) {
-        s << "\t" << option.first << ":" << option.second->get_any().as<std::string>() << std::endl;
+        s << "\t" << option.first << ": " << option.second->get_any().as<std::string>() << std::endl;
     }
     s << "USER PROPERTIES:\n";
     for (const auto& user_prop : user_properties) {
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
index 924f6cf5d42a40..6e39e0b0b6a822 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
@@ -6,6 +6,7 @@
 
 #include "intel_gpu/runtime/internal_properties.hpp"
 #include "intel_gpu/runtime/device.hpp"
+#include "intel_gpu/runtime/plugin_config.hpp"
 
 namespace ov::intel_gpu {
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
index 6ea8f4e107bfc8..5931a60ffae37a 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
@@ -15,6 +15,12 @@ namespace intel_gpu {
 
 struct NewExecutionConfig : public ov::PluginConfig {
     NewExecutionConfig();
+    NewExecutionConfig(std::initializer_list<ov::AnyMap::value_type> values) : NewExecutionConfig() { set_property(ov::AnyMap(values)); }
+    explicit NewExecutionConfig(const ov::AnyMap& properties) : NewExecutionConfig() { set_property(properties); }
+    explicit NewExecutionConfig(const ov::AnyMap::value_type& property) : NewExecutionConfig() { set_property(property); }
+
+    NewExecutionConfig(const NewExecutionConfig& other);
+    NewExecutionConfig& operator=(const NewExecutionConfig& other);
 
     #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \
         ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type> PropertyVar = \
diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
index c3de17f8a196d3..800ac5ce997d6c 100644
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -3,6 +3,7 @@
 //
 
 #include "impls/registry/implementation_manager.hpp"
+#include "intel_gpu/plugin/remote_context.hpp"
 #include "intel_gpu/runtime/internal_properties.hpp"
 #include "openvino/core/type.hpp"
 #include "openvino/runtime/system_conf.hpp"
@@ -162,7 +163,8 @@ program::program(engine& engine_ref,
     program_node::reset_unique_id();
     if (no_optimizations) {
         init_graph();
-        _config.apply_user_properties(_engine.get_device_info());
+        auto ctx = std::make_shared<RemoteContextImpl>("GPU", std::vector<device::ptr>{_engine.get_device()});
+        _config.finalize(ctx, {});
     } else {
         build_program(is_internal);
         if (_is_body_program) {
@@ -198,7 +200,8 @@ program::program(engine& engine_ref,
       _task_executor(std::move(task_executor)),
       processing_order(),
       is_internal(is_internal) {
-    _config.apply_user_properties(_engine.get_device_info());
+    auto ctx = std::make_shared<RemoteContextImpl>("GPU", std::vector<device::ptr>{_engine.get_device()});
+    _config.finalize(ctx, {});
     init_primitives();
     init_program();
     prepare_nodes(nodes);
@@ -211,7 +214,8 @@ program::program(engine& engine, const ExecutionConfig& config)
       _config(config),
       processing_order() {
     init_primitives();
-    _config.apply_user_properties(_engine.get_device_info());
+    auto ctx = std::make_shared<RemoteContextImpl>("GPU", std::vector<device::ptr>{_engine.get_device()});
+    _config.finalize(ctx, {});
     new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer);
     _layout_optimizer = std::make_unique<layout_optimizer>();
 }
@@ -498,7 +502,8 @@ void program::set_options() {
 
 void program::build_program(bool is_internal) {
     init_graph();
-    _config.apply_user_properties(_engine.get_device_info());
+    auto ctx = std::make_shared<RemoteContextImpl>("GPU", std::vector<device::ptr>{_engine.get_device()});
+    _config.finalize(ctx, {});
     { pre_optimize_graph(is_internal); }
     run_graph_compilation();
     { post_optimize_graph(is_internal); }
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index a99ac1e534312b..63c98b8c756845 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -25,6 +25,7 @@
 #include "intel_gpu/runtime/device_query.hpp"
 #include "intel_gpu/runtime/execution_config.hpp"
 #include "intel_gpu/runtime/itt.hpp"
+#include "openvino/core/any.hpp"
 #include "openvino/core/deprecated.hpp"
 #include "openvino/op/gather.hpp"
 #include "openvino/op/concat.hpp"
@@ -56,6 +57,16 @@ using Time = std::chrono::high_resolution_clock;
 
 namespace ov::intel_gpu {
 
+namespace {
+
+ov::RTMap get_rt_info(const ov::Model& model) {
+    if (model.has_rt_info("runtime_options"))
+        return model.get_rt_info<ov::AnyMap>("runtime_options");
+    return {};
+}
+
+}  // namespace
+
 #define FACTORY_DECLARATION(op_version, op_name) \
     void __register ## _ ## op_name ## _ ## op_version();
 
@@ -220,9 +231,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
 
     ExecutionConfig config = m_configs_map.at(device_id);
     config.set_user_property(orig_config);
-    if (model->has_rt_info("runtime_options"))
-        config.apply_rt_info(context->get_engine().get_device_info(), model->get_rt_info<ov::AnyMap>("runtime_options"), is_llm(model));
-    config.apply_user_properties(context->get_engine().get_device_info());
+    config.finalize(context, get_rt_info(*model));
 
     set_cache_info(model, config);
 
@@ -242,11 +251,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id);
 
     ExecutionConfig config = m_configs_map.at(device_id);
-    config.set_user_property(orig_config);
-    if (model->has_rt_info("runtime_options"))
-        config.apply_rt_info(context_impl->get_engine().get_device_info(), model->get_rt_info<ov::AnyMap>("runtime_options"), is_llm(model));
-    config.apply_user_properties(context_impl->get_engine().get_device_info());
-
+    config.finalize(context_impl, get_rt_info(*model));
     set_cache_info(model, config);
 
     auto transformed_model = clone_and_transform_model(model, config, context_impl);
@@ -313,9 +318,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
 
     ExecutionConfig config = m_configs_map.at(device_id);
     config.set_user_property(orig_config);
-    if (model->has_rt_info("runtime_options"))
-        config.apply_rt_info(ctx->get_engine().get_device_info(), model->get_rt_info<ov::AnyMap>("runtime_options"), is_llm(model));
-    config.apply_user_properties(ctx->get_engine().get_device_info());
+    config.finalize(ctx, get_rt_info(*model));
 
     ProgramBuilder prog(ctx->get_engine(), config);
 
@@ -370,7 +373,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
 
     ExecutionConfig config = m_configs_map.at(device_id);
     config.set_user_property(_orig_config);
-    config.apply_user_properties(context_impl->get_engine().get_device_info());
+    config.finalize(context_impl, {});
 
     ov::CacheMode cache_mode = config.get_property(ov::cache_mode);
     ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks);
diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
index 9aa975d83923a3..330d3ed40c2175 100644
--- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2022 Intel Corporation
+// Copyright (C) 2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,6 +22,21 @@ NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() {
     #undef OV_CONFIG_OPTION
 }
 
+NewExecutionConfig::NewExecutionConfig(const NewExecutionConfig& other) : NewExecutionConfig() {
+    user_properties = other.user_properties;
+    for (const auto& kv : other.m_options_map) {
+        m_options_map.at(kv.first)->set_any(kv.second->get_any());
+    }
+}
+
+NewExecutionConfig& NewExecutionConfig::operator=(const NewExecutionConfig& other) {
+    user_properties = other.user_properties;
+    for (const auto& kv : other.m_options_map) {
+        m_options_map.at(kv.first)->set_any(kv.second->get_any());
+    }
+    return *this;
+}
+
 void NewExecutionConfig::apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {
     const auto& info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
     if (!info.supports_immad) {

From 90572bd4e0a1de851cbd5a06a6242ef3b861c90d Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Wed, 18 Dec 2024 17:34:47 +0400
Subject: [PATCH 05/44] prefix for config members and unit tests

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        |   3 +
 src/inference/src/dev/plugin_config.cpp       |   9 +-
 src/inference/tests/unit/config_test.cpp      | 202 ++++++++++++++++++
 .../intel_gpu/runtime/plugin_config.hpp       |   2 +-
 .../intel_gpu/src/runtime/plugin_config.cpp   |   2 +-
 .../tests/unit/module_tests/config_test.cpp   |   3 +-
 6 files changed, 215 insertions(+), 6 deletions(-)
 create mode 100644 src/inference/tests/unit/config_test.cpp

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 16985d70b2841a..78f8da4fe61ca2 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -118,6 +118,9 @@ class OPENVINO_RUNTIME_API PluginConfig {
 
     template <typename T, PropertyMutability mutability>
     T get_property(const ov::Property<T, mutability>& property) const {
+        if (is_set_by_user(property)) {
+            return user_properties.at(property.name()).template as<T>();
+        }
         OPENVINO_ASSERT(m_options_map.find(property.name()) != m_options_map.end(), "Property not found: ", property.name());
         return static_cast<ConfigOption<T>*>(m_options_map.at(property.name()))->value;
     }
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index c3ac86e05ba04b..c4489cdc1bc69f 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -76,9 +76,12 @@ void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::R
 }
 
 void PluginConfig::apply_debug_options(std::shared_ptr<IRemoteContext> context) {
-    ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name());
-    cleanup_unsupported(config_properties);
-    set_user_property(config_properties);
+    if (context) {
+        ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name());
+        cleanup_unsupported(config_properties);
+        set_user_property(config_properties);
+    }
+
     ov::AnyMap env_properties = read_env({"OV_"});
     set_user_property(env_properties);
 }
diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
new file mode 100644
index 00000000000000..db832247dd2bd6
--- /dev/null
+++ b/src/inference/tests/unit/config_test.cpp
@@ -0,0 +1,202 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/core/any.hpp"
+#include "openvino/runtime/plugin_config.hpp"
+
+#include <gtest/gtest.h>
+#include <string>
+
+#include "common_test_utils/common_utils.hpp"
+
+using namespace ::testing;
+using namespace ov;
+
+static constexpr Property<float, PropertyMutability::RW> unsupported_property{"UNSUPPORTED_PROPERTY"};
+static constexpr Property<bool, PropertyMutability::RW> bool_property{"BOOL_PROPERTY"};
+static constexpr Property<int32_t, PropertyMutability::RW> int_property{"INT_PROPERTY"};
+static constexpr Property<std::string, PropertyMutability::RW> high_level_property{"HIGH_LEVEL_PROPERTY"};
+static constexpr Property<std::string, PropertyMutability::RW> low_level_property{"LOW_LEVEL_PROPERTY"};
+
+
+struct EmptyTestConfig : public ov::PluginConfig {
+    std::vector<std::string> get_supported_properties() const {
+        std::vector<std::string> supported_properties;
+        for (const auto& kv : m_options_map) {
+            supported_properties.push_back(kv.first);
+        }
+        return supported_properties;
+    }
+};
+
+struct NotEmptyTestConfig : public ov::PluginConfig {
+    NotEmptyTestConfig() {
+        m_options_map[bool_property.name()] = &m_bool_property;
+        m_options_map[int_property.name()] = &m_int_property;
+        m_options_map[high_level_property.name()] = &m_high_level_property;
+        m_options_map[low_level_property.name()] = &m_low_level_property;
+    }
+
+    NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() {
+        user_properties = other.user_properties;
+        for (const auto& kv : other.m_options_map) {
+            m_options_map.at(kv.first)->set_any(kv.second->get_any());
+        }
+    }
+
+    ConfigOption<bool> m_bool_property = ConfigOption<bool>(true);
+    ConfigOption<int32_t> m_int_property = ConfigOption<int32_t>(-1);
+    ConfigOption<std::string> m_high_level_property = ConfigOption<std::string>("");
+    ConfigOption<std::string> m_low_level_property = ConfigOption<std::string>("");
+
+    std::vector<std::string> get_supported_properties() const {
+        std::vector<std::string> supported_properties;
+        for (const auto& kv : m_options_map) {
+            supported_properties.push_back(kv.first);
+        }
+        return supported_properties;
+    }
+
+    void finalize_impl(std::shared_ptr<IRemoteContext> context) override {
+        if (!is_set_by_user(low_level_property)) {
+            m_low_level_property.value = m_high_level_property.value;
+        }
+    }
+
+    void apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) override {
+        apply_rt_info_property(high_level_property, rt_info);
+    }
+
+    using ov::PluginConfig::is_set_by_user;
+};
+
+TEST(plugin_config, can_create_empty_config) {
+    ASSERT_NO_THROW(
+        EmptyTestConfig cfg;
+        ASSERT_EQ(cfg.get_supported_properties().size(), 0);
+    );
+}
+
+TEST(plugin_config, can_create_not_empty_config) {
+    ASSERT_NO_THROW(
+        NotEmptyTestConfig cfg;
+        ASSERT_EQ(cfg.get_supported_properties().size(), 4);
+    );
+}
+
+TEST(plugin_config, can_set_get_property) {
+    NotEmptyTestConfig cfg;
+    ASSERT_NO_THROW(cfg.get_property(bool_property));
+    ASSERT_EQ(cfg.get_property(bool_property), true);
+    ASSERT_NO_THROW(cfg.set_property(bool_property(false)));
+    ASSERT_EQ(cfg.get_property(bool_property), false);
+
+    ASSERT_NO_THROW(cfg.set_user_property(bool_property(true)));
+    ASSERT_EQ(cfg.get_property(bool_property), true);
+}
+
+TEST(plugin_config, throw_for_unsupported_property) {
+    NotEmptyTestConfig cfg;
+    ASSERT_ANY_THROW(cfg.get_property(unsupported_property));
+    ASSERT_ANY_THROW(cfg.set_property(unsupported_property(10.0f)));
+    ASSERT_ANY_THROW(cfg.set_user_property(unsupported_property(10.0f)));
+}
+
+TEST(plugin_config, can_direct_access_to_properties) {
+    NotEmptyTestConfig cfg;
+    ASSERT_EQ(cfg.m_bool_property.value, cfg.get_property(bool_property));
+    ASSERT_NO_THROW(cfg.set_property(bool_property(false)));
+    ASSERT_EQ(cfg.m_bool_property.value, cfg.get_property(bool_property));
+    ASSERT_EQ(cfg.m_bool_property.value, false);
+
+    ASSERT_NO_THROW(cfg.set_user_property(bool_property(true)));
+    ASSERT_EQ(cfg.m_bool_property.value, false); // user property doesn't impact member value until finalize() is called
+
+    cfg.m_bool_property.value = true;
+    ASSERT_EQ(cfg.get_property(bool_property), true);
+}
+
+TEST(plugin_config, finalization_updates_member) {
+    NotEmptyTestConfig cfg;
+    ASSERT_NO_THROW(cfg.set_user_property(bool_property(false)));
+    ASSERT_EQ(cfg.m_bool_property.value, true); // user property doesn't impact member value until finalize() is called
+
+    cfg.finalize(nullptr, {});
+
+    ASSERT_EQ(cfg.m_bool_property.value, false); // now the value has changed
+}
+
+TEST(plugin_config, get_property_before_finalization_returns_user_property_if_set) {
+    NotEmptyTestConfig cfg;
+
+    ASSERT_EQ(cfg.get_property(bool_property), true);  // default value
+    ASSERT_EQ(cfg.m_bool_property.value, true);  // default value
+
+    cfg.m_bool_property.value = false; // update member directly
+    ASSERT_EQ(cfg.get_property(bool_property), false);  // OK, return the class member value as no user property was set
+
+    ASSERT_NO_THROW(cfg.set_user_property(bool_property(true)));
+    ASSERT_TRUE(cfg.is_set_by_user(bool_property));
+    ASSERT_EQ(cfg.get_property(bool_property), true);  // now user property value is returned
+    ASSERT_EQ(cfg.m_bool_property.value, false);  // but class member is not updated
+
+    cfg.finalize(nullptr, {});
+    ASSERT_EQ(cfg.get_property(bool_property), cfg.m_bool_property.value);  // equal after finalization
+    ASSERT_FALSE(cfg.is_set_by_user(bool_property)); // and user property is cleared
+}
+
+TEST(plugin_config, finalization_updates_dependant_properties) {
+    NotEmptyTestConfig cfg;
+
+    cfg.set_user_property(high_level_property("value1"));
+    ASSERT_TRUE(cfg.is_set_by_user(high_level_property));
+    ASSERT_FALSE(cfg.is_set_by_user(low_level_property));
+
+    cfg.finalize(nullptr, {});
+    ASSERT_EQ(cfg.m_high_level_property.value, "value1");
+    ASSERT_EQ(cfg.m_low_level_property.value, "value1");
+    ASSERT_FALSE(cfg.is_set_by_user(high_level_property));
+    ASSERT_FALSE(cfg.is_set_by_user(low_level_property));
+}
+
+TEST(plugin_config, can_set_property_from_rt_info) {
+    NotEmptyTestConfig cfg;
+
+    RTMap rt_info = {
+        {high_level_property.name(), "value1"},
+        {int_property.name(), 10} // int_property is not applied from rt info
+    };
+
+    // default values
+    ASSERT_EQ(cfg.m_high_level_property.value, "");
+    ASSERT_EQ(cfg.m_low_level_property.value, "");
+    ASSERT_EQ(cfg.m_int_property.value, -1);
+
+    cfg.finalize(nullptr, rt_info);
+
+    ASSERT_EQ(cfg.m_high_level_property.value, "value1");
+    ASSERT_EQ(cfg.m_low_level_property.value, "value1"); // dependant is updated too
+    ASSERT_EQ(cfg.m_int_property.value, -1); // still default
+}
+
+TEST(plugin_config, can_copy_config) {
+    NotEmptyTestConfig cfg1;
+
+    cfg1.m_high_level_property.value = "value1";
+    cfg1.m_low_level_property.value = "value2";
+    cfg1.m_int_property.value = 1;
+    cfg1.set_user_property(bool_property(false));
+
+    NotEmptyTestConfig cfg2 = cfg1;
+    ASSERT_EQ(cfg2.m_high_level_property.value, "value1");
+    ASSERT_EQ(cfg2.m_low_level_property.value, "value2");
+    ASSERT_EQ(cfg2.m_int_property.value, 1);
+    ASSERT_EQ(cfg2.get_property(bool_property), false); // ensure user properties are copied too
+
+    // check that cfg1 modification doesn't impact a copy
+    cfg1.set_property(high_level_property("value3"));
+    cfg1.m_int_property.value = 3;
+    ASSERT_EQ(cfg2.m_high_level_property.value, "value1");
+    ASSERT_EQ(cfg2.m_int_property.value, 1);
+}
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
index 5931a60ffae37a..f18b32cd8b7cbb 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
@@ -23,7 +23,7 @@ struct NewExecutionConfig : public ov::PluginConfig {
     NewExecutionConfig& operator=(const NewExecutionConfig& other);
 
     #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \
-        ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type> PropertyVar = \
+        ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type> m_ ## PropertyVar = \
             ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type>(GET_EXCEPT_LAST(__VA_ARGS__));
 
     #include "options_release.inl"
diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
index 330d3ed40c2175..8f4319734d3e9f 100644
--- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
@@ -14,7 +14,7 @@ namespace intel_gpu {
 
 NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() {
     #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \
-        m_options_map[PropertyNamespace::PropertyVar.name()] = &PropertyVar;
+        m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar;
 
     #include "intel_gpu/runtime/options_release.inl"
     #include "intel_gpu/runtime/options_debug.inl"
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp
index 930128ef53bff6..b14c5b0bf4623d 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp
@@ -14,10 +14,11 @@ TEST(config_test, basic) {
     ov::intel_gpu::NewExecutionConfig cfg;
     std::cerr << cfg.to_string();
 
+    std::cerr << cfg.get_property("PERFORMANCE_HINT").as<std::string>();
     cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY));
     cfg.set_property(ov::hint::inference_precision(ov::element::f32));
 
-    std::cerr << "PROF: " << cfg.enable_profiling.value << std::endl;
+    std::cerr << "PROF: " << cfg.m_enable_profiling.value << std::endl;
 
     std::cerr << cfg.to_string();
 

From f87d45b004bbdc8e706bfccc5713435a2e491bca Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Thu, 19 Dec 2024 14:25:26 +0400
Subject: [PATCH 06/44] added visibility for options

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        | 50 ++++++++++++-
 src/inference/src/dev/plugin_config.cpp       | 43 ++++++-----
 src/inference/tests/unit/config_test.cpp      | 45 +++++++++---
 .../include/intel_gpu/graph/program.hpp       |  4 +-
 .../intel_gpu/runtime/internal_properties.hpp |  8 +--
 .../include/intel_gpu/runtime/options.inl     | 71 +++++++++++++++++++
 .../intel_gpu/runtime/options_debug.inl       | 31 --------
 .../intel_gpu/runtime/options_release.inl     | 50 -------------
 .../intel_gpu/runtime/plugin_config.hpp       |  9 +--
 .../intel_gpu/src/plugin/ops/condition.cpp    |  1 -
 src/plugins/intel_gpu/src/plugin/ops/loop.cpp |  1 -
 .../src/runtime/execution_config.cpp          |  7 --
 .../intel_gpu/src/runtime/plugin_config.cpp   |  8 +--
 .../tests/unit/fusions/gemm_fusion_test.cpp   |  1 -
 .../tests/unit/fusions/loop_fusion_test.cpp   |  1 -
 .../tests/unit/test_cases/crop_gpu_test.cpp   |  1 -
 .../tests/unit/test_cases/loop_gpu_test.cpp   |  5 +-
 17 files changed, 189 insertions(+), 147 deletions(-)
 create mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
 delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl
 delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 78f8da4fe61ca2..36b6765849ee8e 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -33,8 +33,40 @@
 
 #define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__))
 
+#define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \
+    ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type, Visibility> m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)};
+
+#define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \
+        m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar;
+
+#define OV_CONFIG_RELEASE_OPTION(PropertyNamespace, PropertyVar, ...) \
+    OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE, __VA_ARGS__)
+
+#define OV_CONFIG_RELEASE_INTERNAL_OPTION(PropertyNamespace, PropertyVar, ...) \
+    OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE_INTERNAL, __VA_ARGS__)
+
+#define OV_CONFIG_DEBUG_OPTION(PropertyNamespace, PropertyVar, ...) \
+    OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__)
+
 namespace ov {
 
+enum class OptionVisibility {
+    RELEASE = 0,            // Option can be set for any build type via public interface, environment and config file
+    RELEASE_INTERNAL = 1,   // Option can be set for any build type via environment and config file only
+    DEBUG = 2,              // Option can be set for debug builds only via environment and config file
+};
+
+inline std::ostream& operator<<(std::ostream& os, const OptionVisibility& visibility) {
+    switch (visibility) {
+    case OptionVisibility::RELEASE: os << "RELEASE"; break;
+    case OptionVisibility::RELEASE_INTERNAL: os << "RELEASE_INTERNAL"; break;
+    case OptionVisibility::DEBUG: os << "DEBUG"; break;
+    default: os << "UNKNOWN"; break;
+    }
+
+    return os;
+}
+
 struct ConfigOptionBase {
     explicit ConfigOptionBase() {}
     virtual ~ConfigOptionBase() = default;
@@ -42,13 +74,15 @@ struct ConfigOptionBase {
     virtual void set_any(const ov::Any any) = 0;
     virtual ov::Any get_any() const = 0;
     virtual bool is_valid_value(ov::Any val) = 0;
+    virtual OptionVisibility get_visibility() const = 0;
 };
 
-template <typename T>
+template <typename T, OptionVisibility visibility_ = OptionVisibility::DEBUG>
 struct ConfigOption : public ConfigOptionBase {
     ConfigOption(const T& default_val, std::function<bool(T)> validator = nullptr)
         : ConfigOptionBase(), value(default_val), validator(validator) {}
     T value;
+    constexpr static const auto visibility = visibility_;
 
     void set_any(const ov::Any any) override {
         if (validator)
@@ -69,6 +103,10 @@ struct ConfigOption : public ConfigOptionBase {
         }
     }
 
+    OptionVisibility get_visibility() const override {
+        return visibility;
+    }
+
 private:
     std::function<bool(T)> validator;
 };
@@ -139,6 +177,14 @@ class OPENVINO_RUNTIME_API PluginConfig {
         return user_properties.find(property.name()) != user_properties.end();
     }
 
+    ConfigOptionBase* get_option_ptr(const std::string& name) const {
+        auto it = m_options_map.find(name);
+        OPENVINO_ASSERT(it != m_options_map.end(), "Option not found: ", name);
+        OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name);
+
+        return it->second;
+    }
+
     template <typename T, PropertyMutability mutability>
     void apply_rt_info_property(const ov::Property<T, mutability>& property, const ov::RTMap& rt_info) {
         if (!is_set_by_user(property)) {
@@ -149,6 +195,8 @@ class OPENVINO_RUNTIME_API PluginConfig {
         }
     }
 
+    void set_user_property(const ov::AnyMap& properties, const std::vector<OptionVisibility>& allowed_visibility);
+
     ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const;
     ov::AnyMap read_env(const std::vector<std::string>& prefixes) const;
     void cleanup_unsupported(ov::AnyMap& config) const;
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index c4489cdc1bc69f..cfc48745f677f5 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -23,34 +23,31 @@ void PluginConfig::set_property(const AnyMap& config) {
         auto& name = kv.first;
         auto& val = kv.second;
 
-        const auto& known_options = m_options_map;
-        auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; });
-        OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name);
-        OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name);
-
-        it->second->set_any(val);
+        auto option = get_option_ptr(name);
+        option->set_any(val);
     }
 }
 
 ov::Any PluginConfig::get_property(const std::string& name) const {
-    const auto& known_options = m_options_map;
-    auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; });
-    OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name);
-    OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name);
-
-    return it->second->get_any();
+    auto option = get_option_ptr(name);
+    return option->get_any();
 }
 
 void PluginConfig::set_user_property(const AnyMap& config) {
+    static std::vector<OptionVisibility> allowed_visibility = {OptionVisibility::RELEASE};
+    set_user_property(config, allowed_visibility);
+}
+
+void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector<OptionVisibility>& allowed_visibility) {
     for (auto& kv : config) {
         auto& name = kv.first;
         auto& val = kv.second;
 
-        const auto& known_options = m_options_map;
-        auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; });
-        OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name);
-        OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name);
-        OPENVINO_ASSERT(it->second->is_valid_value(val), "Invalid value: ", val.as<std::string>(), " for property: ",  name);
+        auto option = get_option_ptr(name);
+        if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) {
+            OPENVINO_THROW("Unkown property: ", name);
+        }
+        OPENVINO_ASSERT(option->is_valid_value(val), "Invalid value: ", val.as<std::string>(), " for property: ",  name);
 
         user_properties[name] = val;
     }
@@ -76,14 +73,22 @@ void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::R
 }
 
 void PluginConfig::apply_debug_options(std::shared_ptr<IRemoteContext> context) {
+    static std::vector<OptionVisibility> allowed_visibility = {
+        OptionVisibility::RELEASE,
+        OptionVisibility::RELEASE_INTERNAL,
+#ifdef ENABLE_DEBUG_CAPS
+        OptionVisibility::DEBUG
+#endif
+    };
+
     if (context) {
         ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name());
         cleanup_unsupported(config_properties);
-        set_user_property(config_properties);
+        set_user_property(config_properties, allowed_visibility);
     }
 
     ov::AnyMap env_properties = read_env({"OV_"});
-    set_user_property(env_properties);
+    set_user_property(env_properties, allowed_visibility);
 }
 
 ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const {
diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index db832247dd2bd6..0feeef707a2779 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -8,8 +8,6 @@
 #include <gtest/gtest.h>
 #include <string>
 
-#include "common_test_utils/common_utils.hpp"
-
 using namespace ::testing;
 using namespace ov;
 
@@ -18,6 +16,8 @@ static constexpr Property<bool, PropertyMutability::RW> bool_property{"BOOL_PROP
 static constexpr Property<int32_t, PropertyMutability::RW> int_property{"INT_PROPERTY"};
 static constexpr Property<std::string, PropertyMutability::RW> high_level_property{"HIGH_LEVEL_PROPERTY"};
 static constexpr Property<std::string, PropertyMutability::RW> low_level_property{"LOW_LEVEL_PROPERTY"};
+static constexpr Property<uint8_t, PropertyMutability::RW> release_internal_property{"RELEASE_INTERNAL_PROPERTY"};
+static constexpr Property<uint8_t, PropertyMutability::RW> debug_property{"DEBUG_PROPERTY"};
 
 
 struct EmptyTestConfig : public ov::PluginConfig {
@@ -32,10 +32,15 @@ struct EmptyTestConfig : public ov::PluginConfig {
 
 struct NotEmptyTestConfig : public ov::PluginConfig {
     NotEmptyTestConfig() {
-        m_options_map[bool_property.name()] = &m_bool_property;
-        m_options_map[int_property.name()] = &m_int_property;
-        m_options_map[high_level_property.name()] = &m_high_level_property;
-        m_options_map[low_level_property.name()] = &m_low_level_property;
+    #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
+        OV_CONFIG_RELEASE_OPTION(, bool_property, true, "")
+        OV_CONFIG_RELEASE_OPTION(, int_property, -1, "")
+        OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "")
+        OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "")
+        OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "")
+        OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "")
+    #undef OV_CONFIG_OPTION
+
     }
 
     NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() {
@@ -45,10 +50,14 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
         }
     }
 
-    ConfigOption<bool> m_bool_property = ConfigOption<bool>(true);
-    ConfigOption<int32_t> m_int_property = ConfigOption<int32_t>(-1);
-    ConfigOption<std::string> m_high_level_property = ConfigOption<std::string>("");
-    ConfigOption<std::string> m_low_level_property = ConfigOption<std::string>("");
+    #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__)
+        OV_CONFIG_RELEASE_OPTION(, bool_property, true, "")
+        OV_CONFIG_RELEASE_OPTION(, int_property, -1, "")
+        OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "")
+        OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "")
+        OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "")
+        OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "")
+    #undef OV_CONFIG_OPTION
 
     std::vector<std::string> get_supported_properties() const {
         std::vector<std::string> supported_properties;
@@ -68,6 +77,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
         apply_rt_info_property(high_level_property, rt_info);
     }
 
+    using ov::PluginConfig::get_option_ptr;
     using ov::PluginConfig::is_set_by_user;
 };
 
@@ -81,7 +91,7 @@ TEST(plugin_config, can_create_empty_config) {
 TEST(plugin_config, can_create_not_empty_config) {
     ASSERT_NO_THROW(
         NotEmptyTestConfig cfg;
-        ASSERT_EQ(cfg.get_supported_properties().size(), 4);
+        ASSERT_EQ(cfg.get_supported_properties().size(), 6);
     );
 }
 
@@ -200,3 +210,16 @@ TEST(plugin_config, can_copy_config) {
     ASSERT_EQ(cfg2.m_high_level_property.value, "value1");
     ASSERT_EQ(cfg2.m_int_property.value, 1);
 }
+
+TEST(plugin_config, set_user_property_throw_for_non_release_options) {
+    NotEmptyTestConfig cfg;
+    ASSERT_ANY_THROW(cfg.set_user_property(release_internal_property(10)));
+    ASSERT_ANY_THROW(cfg.set_user_property(debug_property(10)));
+}
+
+TEST(plugin_config, visibility_is_correct) {
+    NotEmptyTestConfig cfg;
+    ASSERT_EQ(cfg.get_option_ptr(release_internal_property.name())->get_visibility(), OptionVisibility::RELEASE_INTERNAL);
+    ASSERT_EQ(cfg.get_option_ptr(debug_property.name())->get_visibility(), OptionVisibility::DEBUG);
+    ASSERT_EQ(cfg.get_option_ptr(int_property.name())->get_visibility(), OptionVisibility::RELEASE);
+}
diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
index c775537a514dde..bec721ad3938a6 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
@@ -309,8 +309,8 @@ struct program {
     std::vector<primitive_id> allocating_order;
     std::unique_ptr<pass_manager> pm;
     std::unique_ptr<layout_optimizer> _layout_optimizer;
-    bool is_internal;
-    bool _is_body_program;
+    bool is_internal = false;
+    bool _is_body_program = false;
     // if subgraph can be optimized if it consists of only inputs and corresponding outputs
     bool _can_be_optimized;
     std::unique_ptr<ImplementationsCache> _impls_cache;
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
index ddbb260647b287..77a00294e8076f 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
@@ -45,21 +45,19 @@ static constexpr Property<bool, PropertyMutability::RW> optimize_data{"GPU_OPTIM
 static constexpr Property<bool, PropertyMutability::RW> allow_static_input_reorder{"GPU_ALLOW_STATIC_INPUT_REORDER"};
 static constexpr Property<bool, PropertyMutability::RW> partial_build_program{"GPU_PARTIAL_BUILD"};
 static constexpr Property<bool, PropertyMutability::RW> allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"};
-static constexpr Property<bool, PropertyMutability::RW> use_only_static_kernels_for_dynamic_shape{"GPU_USE_ONLY_STATIC_KERNELS_FOR_DYNAMIC_SHAPE"};
 static constexpr Property<std::vector<std::string>, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"};
 static constexpr Property<ImplForcingMap, PropertyMutability::RW> force_implementations{"GPU_FORCE_IMPLEMENTATIONS"};
 static constexpr Property<std::string, PropertyMutability::RW> config_file{"CONFIG_FILE"};
 static constexpr Property<bool, PropertyMutability::RW> enable_lp_transformations{"LP_TRANSFORMS_MODE"};
-static constexpr Property<size_t, PropertyMutability::RW> max_dynamic_batch{"DYN_BATCH_LIMIT"};
-static constexpr Property<bool, PropertyMutability::RW> nv12_two_inputs{"GPU_NV12_TWO_INPUTS"};
 static constexpr Property<float, PropertyMutability::RW> buffers_preallocation_ratio{"GPU_BUFFERS_PREALLOCATION_RATIO"};
 static constexpr Property<size_t, PropertyMutability::RW> max_kernels_per_batch{"GPU_MAX_KERNELS_PER_BATCH"};
 static constexpr Property<bool, PropertyMutability::RW> use_onednn{"USE_ONEDNN"};
 
 static constexpr Property<bool, ov::PropertyMutability::RW> help{"HELP"};
 static constexpr Property<size_t, ov::PropertyMutability::RW> verbose{"VERBOSE"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> log_to_file{"LOG_TO_FILE"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_usm{"DISABLE_USM"};
-static constexpr Property<bool, ov::PropertyMutability::RW> disable_onednn_post_ops{"DISABLE_ONEDNN_POST_OPS"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_onednn_post_ops_opt{"DISABLE_ONEDNN_POST_OPS_OPT"};
 static constexpr Property<std::string, PropertyMutability::RW> dump_graphs{"GPU_DUMP_GRAPHS"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> dump_profiling_data{"DUMP_PROFILING_DATA"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> dump_sources{"DUMP_SOURCES"};
@@ -69,11 +67,11 @@ static constexpr Property<std::string, ov::PropertyMutability::RW> dump_iteratio
 static constexpr Property<bool, ov::PropertyMutability::RW> host_time_profiling{"HOST_TIME_PROFILING"};
 static constexpr Property<size_t, ov::PropertyMutability::RW> impls_cache_capacity{"IMPLS_CACHE_CAPACITY"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_async_compilation{"DISABLE_ASYNC_COMPILATION"};
-static constexpr Property<bool, ov::PropertyMutability::RW> disable_shape_agnostic_impls{"DISABLE_SHAPE_AGNOSTIC_IMPLS"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_memory_reuse{"DISABLE_MEMORY_REUSE"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_fake_alignment{"DISABLE_FAKE_ALIGNMENT"};
 static constexpr Property<bool, ov::PropertyMutability::RW> use_usm_host{"USE_USM_HOST"};
 static constexpr Property<bool, ov::PropertyMutability::RW> asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> mem_prealloc_options{"MEM_PREALLOC_OPTIONS"};
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
new file mode 100644
index 00000000000000..d5da1edf81bd69
--- /dev/null
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
@@ -0,0 +1,71 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// Namespace, property name, default value, [validator], description
+OV_CONFIG_RELEASE_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin")
+OV_CONFIG_RELEASE_OPTION(ov::device, id, "0", "ID of the current device")
+OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty")
+OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Number of streams to be used for inference")
+OV_CONFIG_RELEASE_OPTION(ov, compilation_num_threads, std::max(1, static_cast<int>(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism")
+OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16,
+                [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision")
+OV_CONFIG_RELEASE_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings")
+OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc")
+OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy")
+OV_CONFIG_RELEASE_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application")
+OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not")
+
+OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks")
+OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level")
+OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property")
+OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution")
+OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling")
+OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available")
+OV_CONFIG_RELEASE_OPTION(ov::internal, exclusive_async_requests, false, "")
+OV_CONFIG_RELEASE_OPTION(ov::internal, query_model_ratio, 1.0f, "")
+OV_CONFIG_RELEASE_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache")
+OV_CONFIG_RELEASE_OPTION(ov, cache_encryption_callbacks, EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model")
+OV_CONFIG_RELEASE_OPTION(ov::hint, dynamic_quantization_group_size, 0, "")
+OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "")
+OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "")
+OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching")
+OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision")
+
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file")
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_lp_transformations, false, "Enable/Disable Low precision transformations set")
+
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "Type of the queue that must be used for model execution. May be in-order or out-of-order")
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, optimize_data, false, "Enable/Disable data flow optimizations for cldnn::program")
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_memory_pool, true, "Enable/Disable memory pool usage")
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_static_input_reorder, false, "Controls if weights tensors can be reordered during model compilation to more friendly layout for specific kernel")
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, custom_outputs, std::vector<std::string>{}, "List of output primitive names")
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, force_implementations, ImplForcingMap{}, "Specifies the list of forced implementations for the primitives")
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, partial_build_program, false, "Early exit from model compilation process which allows faster execution graph dumping")
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "Switch between new and old shape inference flow. Shall be removed soon")
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "Threshold for preallocation feature in case when it uses ratio policy")
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disable onednn for usage for particular model/platform")
+
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, help, false, "Print help message for all config options")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_onednn_post_ops_opt, false, "Disable optimization pass for onednn post-ops")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data, "", "Save csv file with per-stage and per-primitive profiling data to specified folder")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs, "", "Save intermediate graph representations during model compilation pipeline to specified folder")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources, "", "Save generated sources for each kernel to specified folder")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, "", "Save intermediate in/out tensors of each primitive to specified folder")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool, "", "Save csv file with memory pool info to specified folder")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, "", "Space separated list of iterations where other dump options should be enabled")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "Disable fusions of operations as post-ops/fused-ops")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "Disable pass which merges QKV projections into single MatMul")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fake alignment feature which tries to keep gpu friendly memory alignment for arbitrary tensor shapes")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usage for all allocations")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "Preallocation setting")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "List of layers to load raw binary")
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl
deleted file mode 100644
index 62548a7abb17fd..00000000000000
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright (C) 2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#ifdef GPU_DEBUG_CONFIG
-
-OV_CONFIG_OPTION(ov::intel_gpu, verbose, 0, "Enable")
-OV_CONFIG_OPTION(ov::intel_gpu, help, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, disable_usm, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, disable_onednn_post_ops, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, dump_profiling_data, "", "")
-OV_CONFIG_OPTION(ov::intel_gpu, dump_graphs, "", "")
-OV_CONFIG_OPTION(ov::intel_gpu, dump_sources, "", "")
-OV_CONFIG_OPTION(ov::intel_gpu, dump_tensors, "", "")
-OV_CONFIG_OPTION(ov::intel_gpu, dump_memory_pool, "", "")
-OV_CONFIG_OPTION(ov::intel_gpu, dump_iterations, "", "")
-OV_CONFIG_OPTION(ov::intel_gpu, host_time_profiling, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "")
-OV_CONFIG_OPTION(ov::intel_gpu, impls_cache_capacity, 0, "")
-OV_CONFIG_OPTION(ov::intel_gpu, disable_async_compilation, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, disable_shape_agnostic_impls, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, disable_memory_reuse, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, use_usm_host, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "")
-OV_CONFIG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "")
-
-#endif
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl
deleted file mode 100644
index b3aa12dc75c49b..00000000000000
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright (C) 2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-// Namespace, property name, default value, [validator], description
-OV_CONFIG_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin")
-OV_CONFIG_OPTION(ov::device, id, "0", "ID of the current device")
-OV_CONFIG_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty")
-OV_CONFIG_OPTION(ov, num_streams, 1, "Number of streams to be used for inference")
-OV_CONFIG_OPTION(ov, compilation_num_threads, std::max(1, static_cast<int>(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism")
-OV_CONFIG_OPTION(ov::hint, inference_precision, ov::element::f16,
-                [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision")
-OV_CONFIG_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings")
-OV_CONFIG_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc")
-OV_CONFIG_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy")
-OV_CONFIG_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application")
-OV_CONFIG_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not")
-
-OV_CONFIG_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks")
-OV_CONFIG_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level")
-OV_CONFIG_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property")
-OV_CONFIG_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution")
-OV_CONFIG_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling")
-OV_CONFIG_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available")
-OV_CONFIG_OPTION(ov::internal, exclusive_async_requests, false, "")
-OV_CONFIG_OPTION(ov::internal, query_model_ratio, 1.0f, "")
-OV_CONFIG_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache")
-OV_CONFIG_OPTION(ov, cache_encryption_callbacks, EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model")
-OV_CONFIG_OPTION(ov::hint, dynamic_quantization_group_size, 0, "")
-OV_CONFIG_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "")
-OV_CONFIG_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "")
-OV_CONFIG_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching")
-OV_CONFIG_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision")
-
-OV_CONFIG_OPTION(ov::intel_gpu, nv12_two_inputs, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, config_file, "", "")
-OV_CONFIG_OPTION(ov::intel_gpu, enable_lp_transformations, false, "")
-
-OV_CONFIG_OPTION(ov::intel_gpu, max_dynamic_batch, 1, "")
-OV_CONFIG_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "")
-OV_CONFIG_OPTION(ov::intel_gpu, optimize_data, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, enable_memory_pool, true, "")
-OV_CONFIG_OPTION(ov::intel_gpu, allow_static_input_reorder, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, custom_outputs, std::vector<std::string>{}, "")
-OV_CONFIG_OPTION(ov::intel_gpu, force_implementations, ImplForcingMap{}, "")
-OV_CONFIG_OPTION(ov::intel_gpu, partial_build_program, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, use_only_static_kernels_for_dynamic_shape, false, "")
-OV_CONFIG_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "")
-OV_CONFIG_OPTION(ov::intel_gpu, use_onednn, false, "")
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
index f18b32cd8b7cbb..19a3c1e468e28c 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
@@ -22,13 +22,8 @@ struct NewExecutionConfig : public ov::PluginConfig {
     NewExecutionConfig(const NewExecutionConfig& other);
     NewExecutionConfig& operator=(const NewExecutionConfig& other);
 
-    #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \
-        ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type> m_ ## PropertyVar = \
-            ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type>(GET_EXCEPT_LAST(__VA_ARGS__));
-
-    #include "options_release.inl"
-    #include "options_debug.inl"
-
+    #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__)
+    #include "intel_gpu/runtime/options.inl"
     #undef OV_CONFIG_OPTION
 
     void finalize_impl(std::shared_ptr<IRemoteContext> context) override;
diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
index 5c797b622aa28b..825b3ddfbc7282 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
@@ -27,7 +27,6 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_
             config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>({})));
         }
     }
-    config.set_property(ov::intel_gpu::max_dynamic_batch(1));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer()));
 
     ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true);
diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
index 3e052c134390ae..6e18486ee1d738 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
@@ -299,7 +299,6 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr<ov::op::
 
     auto config = p.get_config();
     config.set_property(ov::intel_gpu::custom_outputs(output_names_vec));
-    config.set_property(ov::intel_gpu::max_dynamic_batch(1));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
 
     // get body program from ov::Model
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 5362f8bccb531c..e700c64b2979e1 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -65,12 +65,10 @@ void OldExecutionConfig::set_default() {
         std::make_tuple(ov::hint::activations_scale_factor, -1.f),
 
         // Legacy API properties
-        std::make_tuple(ov::intel_gpu::nv12_two_inputs, false),
         std::make_tuple(ov::intel_gpu::config_file, ""),
         std::make_tuple(ov::intel_gpu::enable_lp_transformations, false));
 
     register_property<PropertyVisibility::INTERNAL>(
-        std::make_tuple(ov::intel_gpu::max_dynamic_batch, 1),
         std::make_tuple(ov::intel_gpu::queue_type, QueueTypes::out_of_order),
         std::make_tuple(ov::intel_gpu::optimize_data, false),
         std::make_tuple(ov::intel_gpu::enable_memory_pool, true),
@@ -80,7 +78,6 @@ void OldExecutionConfig::set_default() {
         std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}),
         std::make_tuple(ov::intel_gpu::partial_build_program, false),
         std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false),
-        std::make_tuple(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape, false),
         std::make_tuple(ov::intel_gpu::buffers_preallocation_ratio, 1.1f),
         std::make_tuple(ov::intel_gpu::max_kernels_per_batch, 8),
         std::make_tuple(ov::intel_gpu::use_onednn, false));
@@ -203,10 +200,6 @@ void OldExecutionConfig::apply_debug_options(const cldnn::device_info& info) {
         set_property(ov::enable_profiling(true));
     }
 
-    GPU_DEBUG_IF(debug_config->disable_dynamic_impl == 1) {
-        set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true));
-    }
-
     GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) {
         if (debug_config->dynamic_quantize_group_size == -1)
             set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX));
diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
index 8f4319734d3e9f..5eff06155280b1 100644
--- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
@@ -13,12 +13,8 @@ namespace ov {
 namespace intel_gpu {
 
 NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() {
-    #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \
-        m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar;
-
-    #include "intel_gpu/runtime/options_release.inl"
-    #include "intel_gpu/runtime/options_debug.inl"
-
+    #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
+    #include "intel_gpu/runtime/options.inl"
     #undef OV_CONFIG_OPTION
 }
 
diff --git a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp
index 1fa303656f80a5..a32dd526f19250 100644
--- a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp
@@ -339,7 +339,6 @@ TEST_P(gemm_2in_add, eltwise_postop_dynamic) {
     if (engine.get_device_info().supports_immad) {
         ov::intel_gpu::ImplementationDesc gemmv_impl = { cldnn::format::type::any, "", impl_types::onednn };
         cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm_prim", gemmv_impl } }));
-        cfg_fused.set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true));
     }
 
     auto add_data_layout = get_output_layout(p);
diff --git a/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp
index 1fbd1c096e7c6e..0b8c1b153c8f5d 100644
--- a/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp
@@ -55,7 +55,6 @@ program::ptr build_program(engine& engine,
     ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(output_names_vec));
-    config.set_property(ov::intel_gpu::max_dynamic_batch(1));
 
     return program::build_program(engine, body_topology, config, false, false, true);
 }
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp
index 9e92f2ebbfd293..0ff21d1ed5f3dc 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp
@@ -1317,7 +1317,6 @@ TEST_P(crop_gpu_dynamic, i32_in2x3x2x2_crop_offsets) {
             }
         }
     }
-    config2.set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true));
     network network2(engine, topology, config2); // run with static kernel
     network2.set_input_data("input", input);
     auto outputs2 = network2.execute();
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp
index 4939630fab3c57..16c35e04aa3f17 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp
@@ -55,7 +55,6 @@ static program::ptr build_program(engine& engine,
     ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(output_names_vec));
-    config.set_property(ov::intel_gpu::max_dynamic_batch(1));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer));
 
     return program::build_program(engine, body_topology, config, false, false, true);
@@ -837,7 +836,7 @@ static void test_loop_gpu_multiple_shapes(ov::PartialShape body_input_layout,
         permute("permute1", input_info("input_origin"), {0, 1, 2, 3}),
         concatenation("input1", {input_info("permute1"), input_info("input_origin")}, 0),
         loop("loop",
-             {input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input1"), input_info("input2")}, 
+             {input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input1"), input_info("input2")},
              body_program, trip_count_id, initial_condition_id, actual_iteration_count_id,
              input_primitive_maps, output_primitive_maps, back_edges,
              num_iterations, body_current_iteration_id, body_execution_condition_id, 2),
@@ -1105,7 +1104,7 @@ static void test_loop_gpu_wo_trip_count_update_primitive_id(ov::PartialShape bod
     auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true);
 
     auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx});
-    
+
 
     std::vector<int32_t> body_input_layouts;
     for (size_t i = 0; i < body_input_layout.size(); i++) {

From 4017ffb3fcf78f262849d83d81b79c8c6ca7db39 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Thu, 19 Dec 2024 14:45:22 +0400
Subject: [PATCH 07/44] remove old config

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../intel_gpu/runtime/execution_config.hpp    | 171 ++---------
 .../intel_gpu/runtime/plugin_config.hpp       |  41 ---
 .../src/runtime/execution_config.cpp          | 290 +++---------------
 .../intel_gpu/src/runtime/plugin_config.cpp   | 130 --------
 .../tests/unit/module_tests/config_test.cpp   |  33 --
 5 files changed, 68 insertions(+), 597 deletions(-)
 delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
 delete mode 100644 src/plugins/intel_gpu/src/runtime/plugin_config.cpp
 delete mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
index 6e39e0b0b6a822..b21d0c91b1d924 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
@@ -1,178 +1,43 @@
-// Copyright (C) 2022 Intel Corporation
+// Copyright (C) 2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
+#include "openvino/runtime/plugin_config.hpp"
+#include "intel_gpu/runtime/device_info.hpp"
 #include "intel_gpu/runtime/internal_properties.hpp"
-#include "intel_gpu/runtime/device.hpp"
-#include "intel_gpu/runtime/plugin_config.hpp"
+#include "openvino/runtime/internal_properties.hpp"
+#include <thread>
 
 namespace ov::intel_gpu {
 
-enum class PropertyVisibility {
-    INTERNAL = 0,
-    PUBLIC = 1
-};
-
-inline std::ostream& operator<<(std::ostream& os, const PropertyVisibility& visibility) {
-    switch (visibility) {
-    case PropertyVisibility::PUBLIC: os << "PUBLIC"; break;
-    case PropertyVisibility::INTERNAL: os << "INTERNAL"; break;
-    default: os << "UNKNOWN"; break;
-    }
+struct ExecutionConfig : public ov::PluginConfig {
+    ExecutionConfig();
+    ExecutionConfig(std::initializer_list<ov::AnyMap::value_type> values) : ExecutionConfig() { set_property(ov::AnyMap(values)); }
+    explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); }
+    explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); }
 
-    return os;
-}
+    ExecutionConfig(const ExecutionConfig& other);
+    ExecutionConfig& operator=(const ExecutionConfig& other);
 
-class BaseValidator {
-public:
-    using Ptr = std::shared_ptr<BaseValidator>;
-    virtual ~BaseValidator() = default;
-    virtual bool is_valid(const ov::Any& v) const = 0;
-};
+    #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__)
+    #include "intel_gpu/runtime/options.inl"
+    #undef OV_CONFIG_OPTION
 
-class FuncValidator : public BaseValidator {
-public:
-explicit FuncValidator(std::function<bool(const ov::Any)> func) : m_func(func) { }
-    bool is_valid(const ov::Any& v) const override {
-        return m_func(v);
-    }
+    void finalize_impl(std::shared_ptr<IRemoteContext> context) override;
+    void apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) override;
 
 private:
-    std::function<bool(const ov::Any)> m_func;
-};
-
-// PropertyTypeValidator ensures that value can be converted to given property type
-template<typename T>
-class PropertyTypeValidator : public BaseValidator {
-public:
-    bool is_valid(const ov::Any& v) const override {
-        try {
-            v.as<T>();
-            return true;
-        } catch (ov::Exception&) {
-            return false;
-        }
-    }
-};
-
-class OldExecutionConfig {
-public:
-    OldExecutionConfig();
-    OldExecutionConfig(std::initializer_list<ov::AnyMap::value_type> values) : OldExecutionConfig() { set_property(ov::AnyMap(values)); }
-    explicit OldExecutionConfig(const ov::AnyMap& properties) : OldExecutionConfig() { set_property(properties); }
-    explicit OldExecutionConfig(const ov::AnyMap::value_type& property) : OldExecutionConfig() { set_property(property); }
-
-    void set_default();
-    void set_property(const ov::AnyMap& properties);
-    void set_user_property(const ov::AnyMap& properties);
-    Any get_property(const std::string& name) const;
-    bool is_set_by_user(const std::string& name) const;
-    bool is_supported(const std::string& name) const;
-    void register_property_impl(const std::pair<std::string, ov::Any>& propertiy, PropertyVisibility visibility, BaseValidator::Ptr validator);
-
-    template <PropertyVisibility visibility, typename... PropertyInitializer, typename std::enable_if<(sizeof...(PropertyInitializer) == 0), bool>::type = true>
-    void register_property_impl() { }
-
-    template <PropertyVisibility visibility, typename T,  PropertyMutability mutability, typename ValueT, typename... PropertyInitializer>
-    void register_property_impl(const std::tuple<ov::Property<T, mutability>, ValueT>& property, PropertyInitializer&&... properties) {
-        auto p = std::get<0>(property)(std::get<1>(property));
-        auto v = std::dynamic_pointer_cast<BaseValidator>(std::make_shared<PropertyTypeValidator<T>>());
-        register_property_impl(std::move(p), visibility, std::move(v));
-        register_property_impl<visibility>(properties...);
-    }
-
-    template <PropertyVisibility visibility,
-              typename T,
-              PropertyMutability mutability,
-              typename ValueT,
-              typename ValidatorT,
-              typename... PropertyInitializer>
-    typename std::enable_if<std::is_base_of<BaseValidator, ValidatorT>::value, void>::type
-    register_property_impl(const std::tuple<ov::Property<T, mutability>, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) {
-        auto p = std::get<0>(property)(std::get<1>(property));
-        auto v = std::dynamic_pointer_cast<BaseValidator>(std::make_shared<ValidatorT>(std::get<2>(property)));
-        register_property_impl(std::move(p), visibility, std::move(v));
-        register_property_impl<visibility>(properties...);
-    }
-
-    template <PropertyVisibility visibility,
-              typename T,
-              PropertyMutability mutability,
-              typename ValueT,
-              typename ValidatorT,
-              typename... PropertyInitializer>
-    typename std::enable_if<std::is_same<std::function<bool(const ov::Any&)>, ValidatorT>::value, void>::type
-    register_property_impl(const std::tuple<ov::Property<T, mutability>, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) {
-        auto p = std::get<0>(property)(std::get<1>(property));
-        auto v = std::dynamic_pointer_cast<BaseValidator>(std::make_shared<FuncValidator>(std::get<2>(property)));
-        register_property_impl(std::move(p), visibility, std::move(v));
-        register_property_impl<visibility>(properties...);
-    }
-
-    template <PropertyVisibility visibility, typename... PropertyInitializer>
-    void register_property(PropertyInitializer&&... properties) {
-        register_property_impl<visibility>(properties...);
-    }
-
-    template <typename... Properties>
-    util::EnableIfAllStringAny<void, Properties...> set_property(Properties&&... properties) {
-        set_property(ov::AnyMap{std::forward<Properties>(properties)...});
-    }
-
-    template <typename... Properties>
-    util::EnableIfAllStringAny<void, Properties...> set_user_property(Properties&&... properties) {
-        set_user_property(ov::AnyMap{std::forward<Properties>(properties)...});
-    }
-
-    template <typename T, PropertyMutability mutability>
-    bool is_set_by_user(const ov::Property<T, mutability>& property) const {
-        return is_set_by_user(property.name());
-    }
-
-    template <typename T, PropertyMutability mutability>
-    T get_property(const ov::Property<T, mutability>& property) const {
-        return get_property(property.name()).template as<T>();
-    }
-
     void apply_user_properties(const cldnn::device_info& info);
-
-    // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call
-    // So this method should be called after setting all user properties, but before apply_user_properties() call.
-    void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info, const bool is_llm);
-
-    std::string to_string() const;
-
-protected:
     void apply_hints(const cldnn::device_info& info);
     void apply_execution_hints(const cldnn::device_info& info);
     void apply_performance_hints(const cldnn::device_info& info);
     void apply_priority_hints(const cldnn::device_info& info);
-    void apply_debug_options(const cldnn::device_info& info);
-
-    template <typename T, PropertyMutability mutability>
-    void apply_rt_info_property(const ov::Property<T, mutability>& property, const ov::RTMap& rt_info) {
-        if (!is_set_by_user(property)) {
-            auto rt_info_val = rt_info.find(property.name());
-            if (rt_info_val != rt_info.end()) {
-                set_user_property(property(rt_info_val->second.template as<T>()));
-            }
-        }
-    }
-
-private:
-    ov::AnyMap internal_properties;
-    ov::AnyMap user_properties;
-
-    std::map<std::string, PropertyVisibility> supported_properties;
-    std::map<std::string, BaseValidator::Ptr> property_validators;
-
-    bool finalized = false;
 };
 
 }  // namespace ov::intel_gpu
 
 namespace cldnn {
 using ov::intel_gpu::ExecutionConfig;
-}  // namespace cldnn
+}
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
deleted file mode 100644
index 19a3c1e468e28c..00000000000000
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (C) 2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "openvino/runtime/plugin_config.hpp"
-#include "intel_gpu/runtime/device_info.hpp"
-#include "intel_gpu/runtime/internal_properties.hpp"
-#include "openvino/runtime/internal_properties.hpp"
-#include <thread>
-
-namespace ov {
-namespace intel_gpu {
-
-struct NewExecutionConfig : public ov::PluginConfig {
-    NewExecutionConfig();
-    NewExecutionConfig(std::initializer_list<ov::AnyMap::value_type> values) : NewExecutionConfig() { set_property(ov::AnyMap(values)); }
-    explicit NewExecutionConfig(const ov::AnyMap& properties) : NewExecutionConfig() { set_property(properties); }
-    explicit NewExecutionConfig(const ov::AnyMap::value_type& property) : NewExecutionConfig() { set_property(property); }
-
-    NewExecutionConfig(const NewExecutionConfig& other);
-    NewExecutionConfig& operator=(const NewExecutionConfig& other);
-
-    #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__)
-    #include "intel_gpu/runtime/options.inl"
-    #undef OV_CONFIG_OPTION
-
-    void finalize_impl(std::shared_ptr<IRemoteContext> context) override;
-    void apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) override;
-
-private:
-    void apply_user_properties(const cldnn::device_info& info);
-    void apply_hints(const cldnn::device_info& info);
-    void apply_execution_hints(const cldnn::device_info& info);
-    void apply_performance_hints(const cldnn::device_info& info);
-    void apply_priority_hints(const cldnn::device_info& info);
-};
-
-}  // namespace intel_gpu
-}  // namespace ov
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index e700c64b2979e1..9a0d0028201b03 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -1,137 +1,77 @@
-// Copyright (C) 2022 Intel Corporation
+// Copyright (C) 2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "intel_gpu/runtime/execution_config.hpp"
-#include "intel_gpu/runtime/debug_configuration.hpp"
+#include "intel_gpu/plugin/remote_context.hpp"
+#include "openvino/core/any.hpp"
 #include "openvino/runtime/internal_properties.hpp"
-#include "openvino/runtime/properties.hpp"
+#include "intel_gpu/runtime/internal_properties.hpp"
 
-#include <thread>
 
 namespace ov::intel_gpu {
 
-OldExecutionConfig::OldExecutionConfig() {
-    set_default();
+ExecutionConfig::ExecutionConfig() : ov::PluginConfig() {
+    #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
+    #include "intel_gpu/runtime/options.inl"
+    #undef OV_CONFIG_OPTION
 }
 
-class InferencePrecisionValidator : public BaseValidator {
-public:
-    bool is_valid(const ov::Any& v) const override {
-        auto precision = v.as<ov::element::Type>();
-        return precision == ov::element::f16 || precision == ov::element::f32 || precision == ov::element::undefined;
+ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() {
+    user_properties = other.user_properties;
+    for (const auto& kv : other.m_options_map) {
+        m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }
-};
-
-class PerformanceModeValidator : public BaseValidator {
-public:
-    bool is_valid(const ov::Any& v) const override {
-        auto mode = v.as<ov::hint::PerformanceMode>();
-        return mode == ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT ||
-               mode == ov::hint::PerformanceMode::THROUGHPUT ||
-               mode == ov::hint::PerformanceMode::LATENCY;
-    }
-};
-
-void OldExecutionConfig::set_default() {
-    register_property<PropertyVisibility::PUBLIC>(
-        std::make_tuple(ov::device::id, "0"),
-        std::make_tuple(ov::enable_profiling, false),
-        std::make_tuple(ov::cache_dir, ""),
-        std::make_tuple(ov::num_streams, 1),
-        std::make_tuple(ov::compilation_num_threads, std::max(1, static_cast<int>(std::thread::hardware_concurrency()))),
-        std::make_tuple(ov::hint::inference_precision, ov::element::f16, InferencePrecisionValidator()),
-        std::make_tuple(ov::hint::model_priority, ov::hint::Priority::MEDIUM),
-        std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY, PerformanceModeValidator()),
-        std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE),
-        std::make_tuple(ov::hint::num_requests, 0),
-        std::make_tuple(ov::hint::enable_cpu_pinning, false),
-        std::make_tuple(ov::hint::enable_cpu_reservation, false),
-
-        std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM),
-        std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM),
-        std::make_tuple(ov::intel_gpu::hint::queue_priority, ov::hint::Priority::MEDIUM),
-        std::make_tuple(ov::intel_gpu::hint::enable_sdpa_optimization, true),
-        std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true),
-        std::make_tuple(ov::intel_gpu::disable_winograd_convolution, false),
-        std::make_tuple(ov::internal::exclusive_async_requests, false),
-        std::make_tuple(ov::internal::query_model_ratio, 1.0f),
-        std::make_tuple(ov::cache_mode, ov::CacheMode::OPTIMIZE_SPEED),
-        std::make_tuple(ov::cache_encryption_callbacks, EncryptionCallbacks{}),
-        std::make_tuple(ov::hint::dynamic_quantization_group_size, 0),
-        std::make_tuple(ov::hint::kv_cache_precision, ov::element::f16),
-        std::make_tuple(ov::intel_gpu::hint::enable_kernels_reuse, false),
-        std::make_tuple(ov::weights_path, ""),
-        std::make_tuple(ov::hint::activations_scale_factor, -1.f),
-
-        // Legacy API properties
-        std::make_tuple(ov::intel_gpu::config_file, ""),
-        std::make_tuple(ov::intel_gpu::enable_lp_transformations, false));
-
-    register_property<PropertyVisibility::INTERNAL>(
-        std::make_tuple(ov::intel_gpu::queue_type, QueueTypes::out_of_order),
-        std::make_tuple(ov::intel_gpu::optimize_data, false),
-        std::make_tuple(ov::intel_gpu::enable_memory_pool, true),
-        std::make_tuple(ov::intel_gpu::allow_static_input_reorder, false),
-        std::make_tuple(ov::intel_gpu::custom_outputs, std::vector<std::string>{}),
-        std::make_tuple(ov::intel_gpu::dump_graphs, ""),
-        std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}),
-        std::make_tuple(ov::intel_gpu::partial_build_program, false),
-        std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false),
-        std::make_tuple(ov::intel_gpu::buffers_preallocation_ratio, 1.1f),
-        std::make_tuple(ov::intel_gpu::max_kernels_per_batch, 8),
-        std::make_tuple(ov::intel_gpu::use_onednn, false));
 }
 
-void OldExecutionConfig::register_property_impl(const std::pair<std::string, ov::Any>& property, PropertyVisibility visibility, BaseValidator::Ptr validator) {
-    property_validators[property.first] = validator;
-    supported_properties[property.first] = visibility;
-    internal_properties[property.first] = property.second;
-}
-
-void OldExecutionConfig::set_property(const AnyMap& config) {
-    for (auto& kv : config) {
-        auto& name = kv.first;
-        auto& val = kv.second;
-        OPENVINO_ASSERT(is_supported(kv.first), "[GPU] Attempt to set property ", name, " (", val.as<std::string>(), ") which was not registered!\n");
-        OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name,  ": ", val.as<std::string>());
-        internal_properties[name] = val;
+ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) {
+    user_properties = other.user_properties;
+    for (const auto& kv : other.m_options_map) {
+        m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }
+    return *this;
 }
 
-bool OldExecutionConfig::is_supported(const std::string& name) const {
-    bool supported = supported_properties.find(name) != supported_properties.end();
-    bool has_validator = property_validators.find(name) != property_validators.end();
-
-    return supported && has_validator;
-}
-
-bool OldExecutionConfig::is_set_by_user(const std::string& name) const {
-    return user_properties.find(name) != user_properties.end();
+void ExecutionConfig::apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {
+    const auto& info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
+    if (!info.supports_immad) {
+        apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
+        apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
+    }
+    apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
 }
 
-void OldExecutionConfig::set_user_property(const AnyMap& config) {
-    for (auto& kv : config) {
-        auto& name = kv.first;
-        auto& val = kv.second;
-        bool supported = is_supported(name) && supported_properties.at(name) == PropertyVisibility::PUBLIC;
-        OPENVINO_ASSERT(supported, "[GPU] Attempt to set user property ", name, " (", val.as<std::string>(), ") which was not registered or internal!\n");
-        OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name,  ": `", val.as<std::string>(), "`");
+void ExecutionConfig::finalize_impl(std::shared_ptr<IRemoteContext> context) {
+    const auto& info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
+    apply_hints(info);
+    if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) {
+        set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad));
+    }
+    if (info.supports_immad) {
+        set_property(ov::intel_gpu::use_onednn(true));
+    }
+    if (get_property(ov::intel_gpu::use_onednn)) {
+        set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
+    }
 
-        user_properties[kv.first] = kv.second;
+    // Enable KV-cache compression by default for non-systolic platforms
+    if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) {
+        set_property(ov::hint::kv_cache_precision(ov::element::i8));
     }
-}
 
-Any OldExecutionConfig::get_property(const std::string& name) const {
-    if (user_properties.find(name) != user_properties.end()) {
-        return user_properties.at(name);
+    // Enable dynamic quantization by default for non-systolic platforms
+    if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) {
+        set_property(ov::hint::dynamic_quantization_group_size(32));
     }
+}
 
-    OPENVINO_ASSERT(internal_properties.find(name) != internal_properties.end(), "[GPU] Can't get internal property with name ", name);
-    return internal_properties.at(name);
+void ExecutionConfig::apply_hints(const cldnn::device_info& info) {
+    apply_execution_hints(info);
+    apply_performance_hints(info);
+    apply_priority_hints(info);
 }
 
-void OldExecutionConfig::apply_execution_hints(const cldnn::device_info& info) {
+void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) {
     if (is_set_by_user(ov::hint::execution_mode)) {
         const auto mode = get_property(ov::hint::execution_mode);
         if (!is_set_by_user(ov::hint::inference_precision)) {
@@ -147,7 +87,7 @@ void OldExecutionConfig::apply_execution_hints(const cldnn::device_info& info) {
     }
 }
 
-void OldExecutionConfig::apply_performance_hints(const cldnn::device_info& info) {
+void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) {
     if (is_set_by_user(ov::hint::performance_mode)) {
         const auto mode = get_property(ov::hint::performance_mode);
         if (!is_set_by_user(ov::num_streams)) {
@@ -176,7 +116,7 @@ void OldExecutionConfig::apply_performance_hints(const cldnn::device_info& info)
     }
 }
 
-void OldExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
+void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
     if (is_set_by_user(ov::hint::model_priority)) {
         const auto priority = get_property(ov::hint::model_priority);
         if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) {
@@ -185,134 +125,4 @@ void OldExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
     }
 }
 
-void OldExecutionConfig::apply_debug_options(const cldnn::device_info& info) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) {
-        set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs));
-    }
-
-    GPU_DEBUG_IF(debug_config->serialize_compile == 1) {
-        set_property(ov::compilation_num_threads(1));
-    }
-
-    GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) {
-        GPU_DEBUG_COUT << "[WARNING] ov::enable_profiling property was forced because of enabled OV_GPU_DumpProfilingData debug option\n";
-        set_property(ov::enable_profiling(true));
-    }
-
-    GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) {
-        if (debug_config->dynamic_quantize_group_size == -1)
-            set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX));
-        else
-            set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size));
-    }
-
-    GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) {
-        GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) {
-            set_property(ov::hint::kv_cache_precision(ov::element::i8));
-        } else {
-            set_property(ov::hint::kv_cache_precision(ov::element::undefined));
-        }
-    }
-}
-
-void OldExecutionConfig::apply_hints(const cldnn::device_info& info) {
-    apply_execution_hints(info);
-    apply_performance_hints(info);
-    apply_priority_hints(info);
-    apply_debug_options(info);
-}
-
-void ExecutionConfig::update_specific_default_properties(const cldnn::device_info& info) {
-    // These default properties should be set once.
-    if (specific_default_properties_is_set)
-        return;
-    specific_default_properties_is_set = true;
-
-    // Enable KV-cache compression by default for non-systolic platforms MFDNN-11755
-    if (get_property(ov::hint::kv_cache_precision) == ov::element::undefined && !info.supports_immad) {
-        set_property(ov::hint::kv_cache_precision(ov::element::i8));
-    }
-
-    // Enable dynamic quantization by default for non-systolic platforms
-    if (get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) {
-        set_property(ov::hint::dynamic_quantization_group_size(32));
-    }
-}
-
-void OldExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
-    // Update specific default properties, call once before internal_properties updated.
-    update_specific_default_properties(info);
-
-    // Copy internal properties before applying hints to ensure that
-    // a property set by hint won't be overriden by a value in user config.
-    // E.g num_streams=AUTO && hint=THROUGHPUT
-    // If we apply hints first and then copy all values from user config to internal one,
-    // then we'll get num_streams=AUTO in final config while some integer number is expected.
-    for (auto& kv : user_properties) {
-        internal_properties[kv.first] = kv.second;
-    }
-    apply_hints(info);
-    if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) {
-        set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad));
-    }
-    if (info.supports_immad) {
-        set_property(ov::intel_gpu::use_onednn(true));
-    }
-    if (get_property(ov::intel_gpu::use_onednn)) {
-        set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
-    }
-    if (!is_set_by_user(ov::hint::enable_cpu_reservation)) {
-        if (get_property(ov::hint::enable_cpu_pinning)) {
-            set_property(ov::hint::enable_cpu_reservation(true));
-        }
-    }
-    if (get_property(ov::hint::enable_cpu_reservation)) {
-        if (!is_set_by_user(ov::hint::enable_cpu_pinning)) {
-            set_property(ov::hint::enable_cpu_pinning(true));
-        }
-    }
-
-    if (!is_set_by_user(ov::hint::kv_cache_precision) || get_property(ov::hint::kv_cache_precision) == ov::element::undefined) {
-        if (info.supports_immad) {  // MFDNN-11755
-            set_property(ov::hint::kv_cache_precision(get_property(ov::hint::inference_precision)));
-        } else {
-            // Enable KV-cache compression by default for non-systolic platforms only
-            set_property(ov::hint::kv_cache_precision(ov::element::i8));
-        }
-    }
-
-    // Enable dynamic quantization by default for non-systolic platforms
-    if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) &&
-         get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) {
-        set_property(ov::hint::dynamic_quantization_group_size(32));
-    }
-
-    finalized = true;
-
-    user_properties.clear();
-}
-
-void OldExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info, const bool is_llm) {
-    if (!info.supports_immad) {
-        apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
-    }
-    if (!is_llm)
-        apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
-    apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
-}
-
-std::string OldExecutionConfig::to_string() const {
-    std::stringstream s;
-    s << "internal properties:\n";
-    for (auto& kv : internal_properties) {
-        s << "\t" << kv.first << ": " << kv.second.as<std::string>() << std::endl;
-    }
-    s << "user properties:\n";
-    for (auto& kv : user_properties) {
-        s << "\t" << kv.first << ": " << kv.second.as<std::string>() << std::endl;
-    }
-    return s.str();
-}
-
 }  // namespace ov::intel_gpu
diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
deleted file mode 100644
index 5eff06155280b1..00000000000000
--- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-// Copyright (C) 2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "intel_gpu/runtime/plugin_config.hpp"
-#include "intel_gpu/plugin/remote_context.hpp"
-#include "openvino/core/any.hpp"
-#include "openvino/runtime/internal_properties.hpp"
-#include "intel_gpu/runtime/internal_properties.hpp"
-
-
-namespace ov {
-namespace intel_gpu {
-
-NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() {
-    #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
-    #include "intel_gpu/runtime/options.inl"
-    #undef OV_CONFIG_OPTION
-}
-
-NewExecutionConfig::NewExecutionConfig(const NewExecutionConfig& other) : NewExecutionConfig() {
-    user_properties = other.user_properties;
-    for (const auto& kv : other.m_options_map) {
-        m_options_map.at(kv.first)->set_any(kv.second->get_any());
-    }
-}
-
-NewExecutionConfig& NewExecutionConfig::operator=(const NewExecutionConfig& other) {
-    user_properties = other.user_properties;
-    for (const auto& kv : other.m_options_map) {
-        m_options_map.at(kv.first)->set_any(kv.second->get_any());
-    }
-    return *this;
-}
-
-void NewExecutionConfig::apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {
-    const auto& info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
-    if (!info.supports_immad) {
-        apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
-        apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
-    }
-    apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
-}
-
-void NewExecutionConfig::finalize_impl(std::shared_ptr<IRemoteContext> context) {
-    const auto& info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
-    apply_hints(info);
-    if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) {
-        set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad));
-    }
-    if (info.supports_immad) {
-        set_property(ov::intel_gpu::use_onednn(true));
-    }
-    if (get_property(ov::intel_gpu::use_onednn)) {
-        set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
-    }
-
-    // Enable KV-cache compression by default for non-systolic platforms
-    if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) {
-        set_property(ov::hint::kv_cache_precision(ov::element::i8));
-    }
-
-    // Enable dynamic quantization by default for non-systolic platforms
-    if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) {
-        set_property(ov::hint::dynamic_quantization_group_size(32));
-    }
-}
-
-void NewExecutionConfig::apply_hints(const cldnn::device_info& info) {
-    apply_execution_hints(info);
-    apply_performance_hints(info);
-    apply_priority_hints(info);
-}
-
-void NewExecutionConfig::apply_execution_hints(const cldnn::device_info& info) {
-    if (is_set_by_user(ov::hint::execution_mode)) {
-        const auto mode = get_property(ov::hint::execution_mode);
-        if (!is_set_by_user(ov::hint::inference_precision)) {
-            if (mode == ov::hint::ExecutionMode::ACCURACY) {
-                set_property(ov::hint::inference_precision(ov::element::undefined));
-            } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) {
-                if (info.supports_fp16)
-                    set_property(ov::hint::inference_precision(ov::element::f16));
-                else
-                    set_property(ov::hint::inference_precision(ov::element::f32));
-            }
-        }
-    }
-}
-
-void NewExecutionConfig::apply_performance_hints(const cldnn::device_info& info) {
-    if (is_set_by_user(ov::hint::performance_mode)) {
-        const auto mode = get_property(ov::hint::performance_mode);
-        if (!is_set_by_user(ov::num_streams)) {
-            if (mode == ov::hint::PerformanceMode::LATENCY) {
-                set_property(ov::num_streams(1));
-            } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) {
-                set_property(ov::num_streams(ov::streams::AUTO));
-            }
-        }
-    }
-
-    if (get_property(ov::num_streams) == ov::streams::AUTO) {
-        int32_t n_streams = std::max<int32_t>(info.num_ccs, 2);
-        set_property(ov::num_streams(n_streams));
-    }
-
-    if (get_property(ov::internal::exclusive_async_requests)) {
-        set_property(ov::num_streams(1));
-    }
-
-    // Allow kernels reuse only for single-stream scenarios
-    if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) {
-        if (get_property(ov::num_streams) != 1) {
-            set_property(ov::intel_gpu::hint::enable_kernels_reuse(false));
-        }
-    }
-}
-
-void NewExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
-    if (is_set_by_user(ov::hint::model_priority)) {
-        const auto priority = get_property(ov::hint::model_priority);
-        if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) {
-            set_property(ov::intel_gpu::hint::queue_priority(priority));
-        }
-    }
-}
-
-}  // namespace intel_gpu
-}  // namespace ov
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp
deleted file mode 100644
index b14c5b0bf4623d..00000000000000
--- a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (C) 2022-2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "intel_gpu/plugin/remote_context.hpp"
-#include "intel_gpu/runtime/plugin_config.hpp"
-#include "openvino/runtime/properties.hpp"
-#include "test_utils.h"
-
-using namespace cldnn;
-using namespace ::tests;
-
-TEST(config_test, basic) {
-    ov::intel_gpu::NewExecutionConfig cfg;
-    std::cerr << cfg.to_string();
-
-    std::cerr << cfg.get_property("PERFORMANCE_HINT").as<std::string>();
-    cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY));
-    cfg.set_property(ov::hint::inference_precision(ov::element::f32));
-
-    std::cerr << "PROF: " << cfg.m_enable_profiling.value << std::endl;
-
-    std::cerr << cfg.to_string();
-
-    std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl;
-    std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl;
-
-    auto ctx = std::make_shared<ov::intel_gpu::RemoteContextImpl>("GPU", std::vector<device::ptr>{ get_test_engine().get_device() });
-    cfg.finalize(ctx, {});
-    std::cerr << cfg.to_string();
-//     std::cerr << get_prop<ov::hint::inference_precision>() << std::endl;
-//     std::cerr << get_prop<test1>() << std::endl;
-}

From 36f0bf6407c2d41bd0698bbe7df35b1f9824df4e Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Thu, 19 Dec 2024 15:46:55 +0400
Subject: [PATCH 08/44] enhancements

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        | 25 ++++++++++--
 src/inference/src/dev/plugin_config.cpp       | 39 ++++++++++++-------
 src/inference/tests/unit/config_test.cpp      |  2 +-
 .../intel_gpu/src/graph/fully_connected.cpp   |  3 +-
 .../src/runtime/execution_config.cpp          |  4 +-
 5 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 36b6765849ee8e..769a4619b60fe8 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -107,6 +107,23 @@ struct ConfigOption : public ConfigOptionBase {
         return visibility;
     }
 
+    operator T() const {
+        return value;
+    }
+
+    ConfigOption& operator=(const T& val) {
+        value = val;
+        return *this;
+    }
+
+    bool operator==(const T& val) const {
+        return value == val;
+    }
+
+    bool operator!=(const T& val) const {
+        return !(*this == val);
+    }
+
 private:
     std::function<bool(T)> validator;
 };
@@ -157,7 +174,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
     template <typename T, PropertyMutability mutability>
     T get_property(const ov::Property<T, mutability>& property) const {
         if (is_set_by_user(property)) {
-            return user_properties.at(property.name()).template as<T>();
+            return m_user_properties.at(property.name()).template as<T>();
         }
         OPENVINO_ASSERT(m_options_map.find(property.name()) != m_options_map.end(), "Property not found: ", property.name());
         return static_cast<ConfigOption<T>*>(m_options_map.at(property.name()))->value;
@@ -174,7 +191,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
 
     template <typename T, PropertyMutability mutability>
     bool is_set_by_user(const ov::Property<T, mutability>& property) const {
-        return user_properties.find(property.name()) != user_properties.end();
+        return m_user_properties.find(property.name()) != m_user_properties.end();
     }
 
     ConfigOptionBase* get_option_ptr(const std::string& name) const {
@@ -195,7 +212,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
         }
     }
 
-    void set_user_property(const ov::AnyMap& properties, const std::vector<OptionVisibility>& allowed_visibility);
+    void set_user_property(const ov::AnyMap& properties, const std::vector<OptionVisibility>& allowed_visibility, bool throw_on_error);
 
     ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const;
     ov::AnyMap read_env(const std::vector<std::string>& prefixes) const;
@@ -204,7 +221,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
     std::map<std::string, ConfigOptionBase*> m_options_map;
 
     // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info
-    ov::AnyMap user_properties;
+    ov::AnyMap m_user_properties;
     using OptionMapEntry = decltype(m_options_map)::value_type;
 };
 
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index cfc48745f677f5..27d113a04a88cd 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -34,22 +34,31 @@ ov::Any PluginConfig::get_property(const std::string& name) const {
 }
 
 void PluginConfig::set_user_property(const AnyMap& config) {
-    static std::vector<OptionVisibility> allowed_visibility = {OptionVisibility::RELEASE};
-    set_user_property(config, allowed_visibility);
+    const static std::vector<OptionVisibility> allowed_visibility = {OptionVisibility::RELEASE};
+    const bool throw_on_error = true;
+    set_user_property(config, allowed_visibility, throw_on_error);
 }
 
-void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector<OptionVisibility>& allowed_visibility) {
+void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector<OptionVisibility>& allowed_visibility, bool throw_on_error) {
     for (auto& kv : config) {
         auto& name = kv.first;
         auto& val = kv.second;
 
         auto option = get_option_ptr(name);
         if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) {
-            OPENVINO_THROW("Unkown property: ", name);
+            if (throw_on_error)
+                OPENVINO_THROW("Unkown property: ", name);
+            else
+                continue;
+        }
+        if (!option->is_valid_value(val)) {
+            if (throw_on_error)
+                OPENVINO_THROW("Invalid value: ", val.as<std::string>(), " for property: ",  name);
+            else
+                continue;
         }
-        OPENVINO_ASSERT(option->is_valid_value(val), "Invalid value: ", val.as<std::string>(), " for property: ",  name);
 
-        user_properties[name] = val;
+        m_user_properties[name] = val;
     }
 }
 
@@ -61,7 +70,7 @@ void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::R
     // E.g num_streams=AUTO && hint=THROUGHPUT
     // If we apply hints first and then copy all values from user config to internal one,
     // then we'll get num_streams=AUTO in final config while some integer number is expected.
-    for (const auto& prop : user_properties) {
+    for (const auto& prop : m_user_properties) {
         auto& option = m_options_map.at(prop.first);
         option->set_any(prop.second);
     }
@@ -69,7 +78,7 @@ void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::R
     finalize_impl(context);
 
     // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization
-    user_properties.clear();
+    m_user_properties.clear();
 }
 
 void PluginConfig::apply_debug_options(std::shared_ptr<IRemoteContext> context) {
@@ -81,14 +90,17 @@ void PluginConfig::apply_debug_options(std::shared_ptr<IRemoteContext> context)
 #endif
     };
 
+    const bool throw_on_error = false;
+
     if (context) {
         ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name());
         cleanup_unsupported(config_properties);
-        set_user_property(config_properties, allowed_visibility);
+        set_user_property(config_properties, allowed_visibility, throw_on_error);
     }
 
     ov::AnyMap env_properties = read_env({"OV_"});
-    set_user_property(env_properties, allowed_visibility);
+    cleanup_unsupported(env_properties);
+    set_user_property(env_properties, allowed_visibility, throw_on_error);
 }
 
 ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const {
@@ -155,10 +167,9 @@ ov::AnyMap PluginConfig::read_env(const std::vector<std::string>& prefixes) cons
 
 void PluginConfig::cleanup_unsupported(ov::AnyMap& config) const {
     for (auto it = config.begin(); it != config.end();) {
-        const auto& known_options = m_options_map;
         auto& name = it->first;
-        auto opt_it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; });
-        if (opt_it == known_options.end()) {
+        auto opt_it = std::find_if(m_options_map.begin(), m_options_map.end(), [&](const OptionMapEntry& o) { return o.first == name; });
+        if (opt_it == m_options_map.end()) {
             it = config.erase(it);
         } else {
             ++it;
@@ -176,7 +187,7 @@ std::string PluginConfig::to_string() const {
         s << "\t" << option.first << ": " << option.second->get_any().as<std::string>() << std::endl;
     }
     s << "USER PROPERTIES:\n";
-    for (const auto& user_prop : user_properties) {
+    for (const auto& user_prop : m_user_properties) {
         s << "\t" << user_prop.first << ": " << user_prop.second.as<std::string>() << std::endl;
     }
 
diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index 0feeef707a2779..fa09be0616c8d4 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -44,7 +44,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
     }
 
     NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() {
-        user_properties = other.user_properties;
+        m_user_properties = other.m_user_properties;
         for (const auto& kv : other.m_options_map) {
             m_options_map.at(kv.first)->set_any(kv.second->get_any());
         }
diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp
index 0c03916d45efcb..d635e87c7494d0 100644
--- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp
+++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp
@@ -250,8 +250,7 @@ kernel_impl_params fully_connected_inst::get_fake_aligned_params(kernel_impl_par
         }
     }
 
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->disable_fake_alignment) {
+    GPU_DEBUG_IF(orig_impl_param.get_program().get_config().m_disable_fake_alignment) {
         can_apply_fake_alignment = false;
     }
 
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 9a0d0028201b03..ff420a0e66c1b9 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -18,14 +18,14 @@ ExecutionConfig::ExecutionConfig() : ov::PluginConfig() {
 }
 
 ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() {
-    user_properties = other.user_properties;
+    m_user_properties = other.m_user_properties;
     for (const auto& kv : other.m_options_map) {
         m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }
 }
 
 ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) {
-    user_properties = other.user_properties;
+    m_user_properties = other.m_user_properties;
     for (const auto& kv : other.m_options_map) {
         m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }

From 4cb213e562cd33a7709f9db1e931152544c1713e Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Mon, 23 Dec 2024 10:27:54 +0400
Subject: [PATCH 09/44] update behavior for set/get property. Add help message

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        |  36 +++--
 src/inference/src/dev/plugin_config.cpp       | 134 ++++++++++++++++--
 src/inference/tests/unit/config_test.cpp      |  32 ++---
 .../include/intel_gpu/plugin/plugin.hpp       |   1 -
 .../intel_gpu/runtime/execution_config.hpp    |   3 +-
 .../include/intel_gpu/runtime/options.inl     |   2 +-
 src/plugins/intel_gpu/src/graph/broadcast.cpp |   2 +-
 src/plugins/intel_gpu/src/graph/crop.cpp      |   2 +-
 src/plugins/intel_gpu/src/graph/eltwise.cpp   |   2 +-
 src/plugins/intel_gpu/src/graph/gather.cpp    |   2 +-
 .../graph_optimizer/add_required_reorders.cpp |   2 +-
 .../graph_optimizer/build_implementations.cpp |   2 +-
 .../graph_optimizer/graph_initializations.cpp |   4 +-
 .../graph_optimizer/propagate_constants.cpp   |   4 +-
 .../select_preferred_formats.cpp              |   2 +-
 .../src/graph/impls/ocl/fully_connected.cpp   |   2 +-
 .../impls/ocl/kernel_selector_helper.cpp      |   4 +-
 .../src/graph/impls/ocl/kernels_cache.cpp     |  10 +-
 .../impls/onednn/primitive_onednn_base.h      |   8 +-
 .../impls/registry/implementation_manager.cpp |   2 +-
 .../registry/non_max_suppression_impls.cpp    |   2 +-
 .../intel_gpu/src/graph/layout_optimizer.cpp  |   2 +-
 src/plugins/intel_gpu/src/graph/network.cpp   |   6 +-
 .../src/graph/non_max_suppression.cpp         |   2 +-
 src/plugins/intel_gpu/src/graph/permute.cpp   |   2 +-
 .../intel_gpu/src/graph/primitive_inst.cpp    |   8 +-
 src/plugins/intel_gpu/src/graph/program.cpp   |  45 +++---
 .../src/graph/program_dump_graph.cpp          |   2 +-
 src/plugins/intel_gpu/src/graph/reorder.cpp   |   2 +-
 src/plugins/intel_gpu/src/graph/reshape.cpp   |   2 +-
 .../src/graph/scatter_elements_update.cpp     |   2 +-
 .../intel_gpu/src/graph/scatter_nd_update.cpp |   2 +-
 .../intel_gpu/src/graph/scatter_update.cpp    |   2 +-
 src/plugins/intel_gpu/src/graph/select.cpp    |   2 +-
 .../intel_gpu/src/graph/strided_slice.cpp     |   2 +-
 .../intel_gpu/src/plugin/compiled_model.cpp   |  20 +--
 src/plugins/intel_gpu/src/plugin/graph.cpp    |  20 ++-
 .../intel_gpu/src/plugin/ops/condition.cpp    |   6 +-
 src/plugins/intel_gpu/src/plugin/ops/loop.cpp |   4 +-
 src/plugins/intel_gpu/src/plugin/plugin.cpp   |  49 +++----
 .../intel_gpu/src/plugin/program_builder.cpp  |  18 +--
 .../src/plugin/sync_infer_request.cpp         |   8 +-
 .../src/plugin/transformations_pipeline.cpp   |  12 +-
 .../src/runtime/execution_config.cpp          |  46 ++++--
 .../intel_gpu/src/runtime/ocl/ocl_engine.cpp  |   2 +-
 .../intel_gpu/src/runtime/ocl/ocl_stream.cpp  |   8 +-
 src/plugins/intel_gpu/src/runtime/stream.cpp  |   4 +-
 .../test_cases/fully_connected_gpu_test.cpp   |  24 ++--
 48 files changed, 337 insertions(+), 223 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 769a4619b60fe8..a1bcab62b5d5fd 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -33,12 +33,26 @@
 
 #define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__))
 
+#define GET_LAST_IMPL(N, ...) CAT(GET_LAST_IMPL_, N)(__VA_ARGS__)
+#define GET_LAST_IMPL_0(_0, ...) _0
+#define GET_LAST_IMPL_1(_0, _1, ...) _1
+#define GET_LAST_IMPL_2(_0, _1, _2, ...) _2
+#define GET_LAST_IMPL_3(_0, _1, _2, _3, ...) _3
+#define GET_LAST_IMPL_4(_0, _1, _2, _3, _4, ...) _4
+#define GET_LAST_IMPL_5(_0, _1, _2, _3, _4, _5, ...) _5
+#define GET_LAST_IMPL_6(_0, _1, _2, _3, _4, _5, _6, ...) _6
+
+#define GET_LAST(...) GET_LAST_IMPL(COUNT(__VA_ARGS__), _, __VA_ARGS__ ,,,,,,,,,,,)
+
 #define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \
     ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type, Visibility> m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)};
 
 #define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \
         m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar;
 
+#define OV_CONFIG_OPTION_HELP(PropertyNamespace, PropertyVar, Visibility, DefaultValue, ...) \
+        { #PropertyNamespace "::" #PropertyVar, PropertyNamespace::PropertyVar.name(), GET_LAST(__VA_ARGS__)},
+
 #define OV_CONFIG_RELEASE_OPTION(PropertyNamespace, PropertyVar, ...) \
     OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE, __VA_ARGS__)
 
@@ -159,18 +173,12 @@ class OPENVINO_RUNTIME_API PluginConfig {
 
     void set_property(const ov::AnyMap& properties);
     Any get_property(const std::string& name) const;
-    void set_user_property(const ov::AnyMap& properties);
 
     template <typename... Properties>
     util::EnableIfAllStringAny<void, Properties...> set_property(Properties&&... properties) {
         set_property(ov::AnyMap{std::forward<Properties>(properties)...});
     }
 
-    template <typename... Properties>
-    util::EnableIfAllStringAny<void, Properties...> set_user_property(Properties&&... properties) {
-        set_user_property(ov::AnyMap{std::forward<Properties>(properties)...});
-    }
-
     template <typename T, PropertyMutability mutability>
     T get_property(const ov::Property<T, mutability>& property) const {
         if (is_set_by_user(property)) {
@@ -189,6 +197,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
     virtual void apply_debug_options(std::shared_ptr<IRemoteContext> context);
     virtual void finalize_impl(std::shared_ptr<IRemoteContext> context) {}
 
+
     template <typename T, PropertyMutability mutability>
     bool is_set_by_user(const ov::Property<T, mutability>& property) const {
         return m_user_properties.find(property.name()) != m_user_properties.end();
@@ -207,12 +216,13 @@ class OPENVINO_RUNTIME_API PluginConfig {
         if (!is_set_by_user(property)) {
             auto rt_info_val = rt_info.find(property.name());
             if (rt_info_val != rt_info.end()) {
-                set_user_property(property(rt_info_val->second.template as<T>()));
+                set_property(property(rt_info_val->second.template as<T>()));
             }
         }
     }
 
-    void set_user_property(const ov::AnyMap& properties, const std::vector<OptionVisibility>& allowed_visibility, bool throw_on_error);
+    ov::Any get_property(const std::string& name, const std::vector<OptionVisibility>& allowed_visibility) const;
+    void set_property(const ov::AnyMap& properties, const std::vector<OptionVisibility>& allowed_visibility, bool throw_on_error);
 
     ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const;
     ov::AnyMap read_env(const std::vector<std::string>& prefixes) const;
@@ -223,6 +233,16 @@ class OPENVINO_RUNTIME_API PluginConfig {
     // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info
     ov::AnyMap m_user_properties;
     using OptionMapEntry = decltype(m_options_map)::value_type;
+
+    // property variable name, string name, default value, description
+    using OptionsDesc = std::vector<std::tuple<std::string, std::string, std::string>>;
+    static OptionsDesc m_options_desc;
+    virtual const OptionsDesc& get_options_desc() const { static OptionsDesc empty; return empty; }
+    const std::string get_help_message(const std::string& name = "") const;
+    void print_help() const;
+
+private:
+    bool m_is_finalized = false;
 };
 
 }  // namespace ov
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index 27d113a04a88cd..e1b09b76ad8235 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -8,7 +8,9 @@
 #include "openvino/runtime/device_id_parser.hpp"
 #include "openvino/util/common_util.hpp"
 #include "openvino/util/env_util.hpp"
+#include <cmath>
 #include <fstream>
+#include <iomanip>
 
 #ifdef JSON_HEADER
 #    include <json.hpp>
@@ -16,30 +18,63 @@
 #    include <nlohmann/json.hpp>
 #endif
 
-namespace ov {
-
-void PluginConfig::set_property(const AnyMap& config) {
-    for (auto& kv : config) {
-        auto& name = kv.first;
-        auto& val = kv.second;
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <unistd.h>
+#include <sys/ioctl.h>
+#endif
 
-        auto option = get_option_ptr(name);
-        option->set_any(val);
+namespace {
+size_t get_terminal_width() {
+    const size_t default_width = 120;
+#ifdef _WIN32
+    CONSOLE_SCREEN_BUFFER_INFO csbi;
+    if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) {
+        return csbi.srWindow.Right - csbi.srWindow.Left + 1;
+    } else {
+        return default_width;
+    }
+#else
+    struct winsize w;
+    if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) {
+        return w.ws_col;
+    } else {
+        return default_width;
     }
+#endif  // _WIN32
 }
+}
+
+namespace ov {
 
 ov::Any PluginConfig::get_property(const std::string& name) const {
+    const static std::vector<OptionVisibility> allowed_visibility = {OptionVisibility::RELEASE, OptionVisibility::RELEASE_INTERNAL};
+    return get_property(name, allowed_visibility);
+}
+
+ov::Any PluginConfig::get_property(const std::string& name, const std::vector<OptionVisibility>& allowed_visibility) const {
+    if (m_user_properties.find(name) != m_user_properties.end()) {
+        return m_user_properties.at(name);
+    }
+
     auto option = get_option_ptr(name);
+     if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) {
+        OPENVINO_THROW("Couldn't get unknown property: ", name);
+    }
+
     return option->get_any();
 }
 
-void PluginConfig::set_user_property(const AnyMap& config) {
+void PluginConfig::set_property(const AnyMap& config) {
     const static std::vector<OptionVisibility> allowed_visibility = {OptionVisibility::RELEASE};
     const bool throw_on_error = true;
-    set_user_property(config, allowed_visibility, throw_on_error);
+    set_property(config, allowed_visibility, throw_on_error);
 }
 
-void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector<OptionVisibility>& allowed_visibility, bool throw_on_error) {
+void PluginConfig::set_property(const ov::AnyMap& config, const std::vector<OptionVisibility>& allowed_visibility, bool throw_on_error) {
+    OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited");
+
     for (auto& kv : config) {
         auto& name = kv.first;
         auto& val = kv.second;
@@ -47,13 +82,13 @@ void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector
         auto option = get_option_ptr(name);
         if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) {
             if (throw_on_error)
-                OPENVINO_THROW("Unkown property: ", name);
+                OPENVINO_THROW("Couldn't set unknown property: ", name);
             else
                 continue;
         }
         if (!option->is_valid_value(val)) {
             if (throw_on_error)
-                OPENVINO_THROW("Invalid value: ", val.as<std::string>(), " for property: ",  name);
+                OPENVINO_THROW("Invalid value: ", val.as<std::string>(), " for property: ",  name, "\nProperty description: ", get_help_message(name));
             else
                 continue;
         }
@@ -79,6 +114,8 @@ void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::R
 
     // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization
     m_user_properties.clear();
+
+    m_is_finalized = true;
 }
 
 void PluginConfig::apply_debug_options(std::shared_ptr<IRemoteContext> context) {
@@ -95,12 +132,12 @@ void PluginConfig::apply_debug_options(std::shared_ptr<IRemoteContext> context)
     if (context) {
         ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name());
         cleanup_unsupported(config_properties);
-        set_user_property(config_properties, allowed_visibility, throw_on_error);
+        set_property(config_properties, allowed_visibility, throw_on_error);
     }
 
     ov::AnyMap env_properties = read_env({"OV_"});
     cleanup_unsupported(env_properties);
-    set_user_property(env_properties, allowed_visibility, throw_on_error);
+    set_property(env_properties, allowed_visibility, throw_on_error);
 }
 
 ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const {
@@ -194,4 +231,71 @@ std::string PluginConfig::to_string() const {
     return s.str();
 }
 
+void PluginConfig::print_help() const {
+    auto format_text = [](const std::string& cpp_name, const std::string& str_name, const std::string& desc, size_t max_name_width, size_t max_width) {
+        std::istringstream words(desc);
+        std::ostringstream formatted_text;
+        std::string word;
+        std::vector<std::string> words_vec;
+
+        while (words >> word) {
+            words_vec.push_back(word);
+        }
+
+        size_t j = 0;
+        size_t count_of_desc_lines = (desc.length() + max_width - 1) / max_width;
+        for (size_t i = 0 ; i < std::max<size_t>(2, count_of_desc_lines); i++) {
+            if (i == 0) {
+                formatted_text << std::left << std::setw(max_name_width) << cpp_name;
+            } else if (i == 1) {
+                formatted_text << std::left << std::setw(max_name_width) << str_name;
+            } else {
+                formatted_text << std::left << std::setw(max_name_width) << "";
+            }
+
+            formatted_text << " | ";
+
+            size_t line_length = max_name_width + 3;
+            for (; j < words_vec.size();) {
+                line_length += words_vec[j].size() + 1;
+                if (line_length > max_width) {
+                    break;
+                } else {
+                    formatted_text << words_vec[j] << " ";
+                }
+                j++;
+            }
+            formatted_text << "\n";
+        }
+        return formatted_text.str();
+    };
+
+    const auto& options_desc = get_options_desc();
+    std::stringstream ss;
+    auto max_name_length_item = std::max_element(options_desc.begin(), options_desc.end(),
+        [](const OptionsDesc::value_type& a, const OptionsDesc::value_type& b){
+            return std::get<0>(a).size() < std::get<0>(b).size();
+    });
+
+    const size_t max_name_width = static_cast<int>(std::get<0>(*max_name_length_item).size() + std::get<1>(*max_name_length_item).size());
+    const size_t terminal_width = get_terminal_width();
+    ss << std::left << std::setw(max_name_width) << ("Option name") << " | " << " Description " << "\n";
+    ss << std::left << std::setw(terminal_width) << std::setfill('-') << "" << "\n";
+    for (auto& kv : options_desc) {
+        ss << format_text(std::get<0>(kv), std::get<1>(kv), std::get<2>(kv), max_name_width, terminal_width) << "\n";
+    }
+
+    std::cout << ss.str();
+}
+
+const std::string PluginConfig::get_help_message(const std::string& name) const {
+    const auto& options_desc = get_options_desc();
+    auto it = std::find_if(options_desc.begin(), options_desc.end(), [&](const OptionsDesc::value_type& v) { return std::get<1>(v) == name; });
+    if (it != options_desc.end()) {
+        return std::get<2>(*it);
+    }
+
+    return "";
+}
+
 }  // namespace ov
diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index fa09be0616c8d4..42b7fba115a273 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -101,35 +101,27 @@ TEST(plugin_config, can_set_get_property) {
     ASSERT_EQ(cfg.get_property(bool_property), true);
     ASSERT_NO_THROW(cfg.set_property(bool_property(false)));
     ASSERT_EQ(cfg.get_property(bool_property), false);
-
-    ASSERT_NO_THROW(cfg.set_user_property(bool_property(true)));
-    ASSERT_EQ(cfg.get_property(bool_property), true);
 }
 
 TEST(plugin_config, throw_for_unsupported_property) {
     NotEmptyTestConfig cfg;
     ASSERT_ANY_THROW(cfg.get_property(unsupported_property));
     ASSERT_ANY_THROW(cfg.set_property(unsupported_property(10.0f)));
-    ASSERT_ANY_THROW(cfg.set_user_property(unsupported_property(10.0f)));
 }
 
 TEST(plugin_config, can_direct_access_to_properties) {
     NotEmptyTestConfig cfg;
-    ASSERT_EQ(cfg.m_bool_property.value, cfg.get_property(bool_property));
-    ASSERT_NO_THROW(cfg.set_property(bool_property(false)));
-    ASSERT_EQ(cfg.m_bool_property.value, cfg.get_property(bool_property));
-    ASSERT_EQ(cfg.m_bool_property.value, false);
+    ASSERT_EQ(cfg.m_int_property.value, cfg.get_property(int_property));
+    ASSERT_NO_THROW(cfg.set_property(int_property(1)));
+    ASSERT_EQ(cfg.m_int_property.value, -1); // user property doesn't impact member value until finalize() is called
 
-    ASSERT_NO_THROW(cfg.set_user_property(bool_property(true)));
-    ASSERT_EQ(cfg.m_bool_property.value, false); // user property doesn't impact member value until finalize() is called
-
-    cfg.m_bool_property.value = true;
-    ASSERT_EQ(cfg.get_property(bool_property), true);
+    cfg.m_int_property.value = 2;
+    ASSERT_EQ(cfg.get_property(int_property), 1); // still 1 as user property was set previously
 }
 
 TEST(plugin_config, finalization_updates_member) {
     NotEmptyTestConfig cfg;
-    ASSERT_NO_THROW(cfg.set_user_property(bool_property(false)));
+    ASSERT_NO_THROW(cfg.set_property(bool_property(false)));
     ASSERT_EQ(cfg.m_bool_property.value, true); // user property doesn't impact member value until finalize() is called
 
     cfg.finalize(nullptr, {});
@@ -146,7 +138,7 @@ TEST(plugin_config, get_property_before_finalization_returns_user_property_if_se
     cfg.m_bool_property.value = false; // update member directly
     ASSERT_EQ(cfg.get_property(bool_property), false);  // OK, return the class member value as no user property was set
 
-    ASSERT_NO_THROW(cfg.set_user_property(bool_property(true)));
+    ASSERT_NO_THROW(cfg.set_property(bool_property(true)));
     ASSERT_TRUE(cfg.is_set_by_user(bool_property));
     ASSERT_EQ(cfg.get_property(bool_property), true);  // now user property value is returned
     ASSERT_EQ(cfg.m_bool_property.value, false);  // but class member is not updated
@@ -159,7 +151,7 @@ TEST(plugin_config, get_property_before_finalization_returns_user_property_if_se
 TEST(plugin_config, finalization_updates_dependant_properties) {
     NotEmptyTestConfig cfg;
 
-    cfg.set_user_property(high_level_property("value1"));
+    cfg.set_property(high_level_property("value1"));
     ASSERT_TRUE(cfg.is_set_by_user(high_level_property));
     ASSERT_FALSE(cfg.is_set_by_user(low_level_property));
 
@@ -196,7 +188,7 @@ TEST(plugin_config, can_copy_config) {
     cfg1.m_high_level_property.value = "value1";
     cfg1.m_low_level_property.value = "value2";
     cfg1.m_int_property.value = 1;
-    cfg1.set_user_property(bool_property(false));
+    cfg1.set_property(bool_property(false));
 
     NotEmptyTestConfig cfg2 = cfg1;
     ASSERT_EQ(cfg2.m_high_level_property.value, "value1");
@@ -211,10 +203,10 @@ TEST(plugin_config, can_copy_config) {
     ASSERT_EQ(cfg2.m_int_property.value, 1);
 }
 
-TEST(plugin_config, set_user_property_throw_for_non_release_options) {
+TEST(plugin_config, set_property_throw_for_non_release_options) {
     NotEmptyTestConfig cfg;
-    ASSERT_ANY_THROW(cfg.set_user_property(release_internal_property(10)));
-    ASSERT_ANY_THROW(cfg.set_user_property(debug_property(10)));
+    ASSERT_ANY_THROW(cfg.set_property(release_internal_property(10)));
+    ASSERT_ANY_THROW(cfg.set_property(debug_property(10)));
 }
 
 TEST(plugin_config, visibility_is_correct) {
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp
index 869d32825b8761..cec27660baa2b4 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp
@@ -43,7 +43,6 @@ class Plugin : public ov::IPlugin {
 
     bool is_metric(const std::string& name) const;
     ov::Any get_metric(const std::string& name, const ov::AnyMap& arguments) const;
-    void set_cache_info(const std::shared_ptr<const ov::Model>& model, ExecutionConfig& properties) const;
 
 public:
     Plugin();
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
index b21d0c91b1d924..b96a558171c203 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
@@ -25,10 +25,11 @@ struct ExecutionConfig : public ov::PluginConfig {
     #include "intel_gpu/runtime/options.inl"
     #undef OV_CONFIG_OPTION
 
+protected:
     void finalize_impl(std::shared_ptr<IRemoteContext> context) override;
     void apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) override;
+    const ov::PluginConfig::OptionsDesc& get_options_desc() const override;
 
-private:
     void apply_user_properties(const cldnn::device_info& info);
     void apply_hints(const cldnn::device_info& info);
     void apply_execution_hints(const cldnn::device_info& info);
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
index d5da1edf81bd69..1941aaec69b2bf 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
@@ -9,7 +9,7 @@ OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be
 OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Number of streams to be used for inference")
 OV_CONFIG_RELEASE_OPTION(ov, compilation_num_threads, std::max(1, static_cast<int>(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism")
 OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16,
-                [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision")
+                [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision. Supported values: { f16, f32, undefined }")
 OV_CONFIG_RELEASE_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings")
 OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc")
 OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy")
diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp
index 0f69379fa8e217..f2037c94979e0c 100644
--- a/src/plugins/intel_gpu/src/graph/broadcast.cpp
+++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp
@@ -149,7 +149,7 @@ void broadcast_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) {
+        _node->get_program().get_config().m_enable_memory_pool) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs[0] = input_memory_ptr();
diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp
index d9f6ebd8b71872..85be51a788c842 100644
--- a/src/plugins/intel_gpu/src/graph/crop.cpp
+++ b/src/plugins/intel_gpu/src/graph/crop.cpp
@@ -278,7 +278,7 @@ void crop_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) {
+        _node->get_program().get_config().m_enable_memory_pool) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout());
diff --git a/src/plugins/intel_gpu/src/graph/eltwise.cpp b/src/plugins/intel_gpu/src/graph/eltwise.cpp
index 7805cb575aed9a..491867dc206d31 100644
--- a/src/plugins/intel_gpu/src/graph/eltwise.cpp
+++ b/src/plugins/intel_gpu/src/graph/eltwise.cpp
@@ -393,7 +393,7 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) :
                                       "");
         }
     } else {
-        bool use_new_shape_infer = network.get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
+        bool use_new_shape_infer = network.get_config().m_allow_new_shape_infer;
         auto input0_pshape = node.get_input_pshape(0);
 
         for (size_t i = 1; i < inputs_count; ++i) {
diff --git a/src/plugins/intel_gpu/src/graph/gather.cpp b/src/plugins/intel_gpu/src/graph/gather.cpp
index 2020f10015f916..e32a45ef00aab5 100644
--- a/src/plugins/intel_gpu/src/graph/gather.cpp
+++ b/src/plugins/intel_gpu/src/graph/gather.cpp
@@ -150,7 +150,7 @@ void gather_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) {
+        _node->get_program().get_config().m_enable_memory_pool) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs[0] = input_memory_ptr();
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
index 333afe18775e0b..0ef04576494665 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
@@ -161,7 +161,7 @@ bool add_required_reorders::test_format(cldnn::program_node& node, format reques
 }
 
 void add_required_reorders::run(program& p) {
-    bool optimize_data = p.get_config().get_property(ov::intel_gpu::optimize_data);
+    bool optimize_data = p.get_config().m_optimize_data;
     auto usr_itr = p.get_processing_order().begin();
     while (usr_itr != p.get_processing_order().end()) {
         auto& usr = *usr_itr++;
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp
index 4c1b1008434144..999e103c3fe200 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp
@@ -11,7 +11,7 @@ using namespace cldnn;
 
 void build_implementations::run(program& p) {
     OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "pass::build_implementations");
-    if (p.get_config().get_property(ov::intel_gpu::partial_build_program)) {
+    if (p.get_config().m_partial_build_program) {
         return;
     }
 
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp
index eec55260e2ea4b..daa3b5ddc41501 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp
@@ -17,7 +17,7 @@ using namespace cldnn;
 namespace cldnn {
 
 void graph_initializations::set_outputs(program& p) {
-    auto custom_outputs = p.get_config().get_property(ov::intel_gpu::custom_outputs);
+    auto custom_outputs = p.get_config().m_custom_outputs.value;
     if (!custom_outputs.empty()) {
         for (auto const& output : custom_outputs) {
             OPENVINO_ASSERT(p.has_node(output), "not found custom output node in current cldnn::program: ", output);
@@ -37,7 +37,7 @@ void graph_initializations::set_outputs(program& p) {
 void graph_initializations::run(program& p) {
     set_outputs(p);
 
-    auto forcing_map = p.get_config().get_property(ov::intel_gpu::force_implementations);
+    auto forcing_map = p.get_config().m_force_implementations.value;
     for (auto& kv : forcing_map) {
         if (p.has_node(kv.first)) {
             p.get_node(kv.first).set_forced_impl_type(kv.second.impl_type);
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
index 34fa9647ec99c3..cc04b3efe0913c 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
@@ -143,8 +143,8 @@ propagate_constants::calculate(engine& engine,
         return {};
 
     ExecutionConfig cf_config = config;
-    cf_config.set_property(ov::intel_gpu::optimize_data(false));
-    cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs));
+    cf_config.m_optimize_data = false;
+    cf_config.m_custom_outputs = const_outputs;
     network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true);
     std::map<primitive_id, std::pair<std::shared_ptr<weightless_cache_manager>, std::shared_ptr<layout>>>
         weightless_cache_map;
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
index a4e6d989543837..9313544928b6b4 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
@@ -74,7 +74,7 @@ void select_preferred_formats::run(program& p) {
     }
 #endif  // ENABLE_ONEDNN_FOR_GPU
 
-    auto forcing_map = p.get_config().get_property(ov::intel_gpu::force_implementations);
+    auto forcing_map = p.get_config().m_force_implementations.value;
 
     for (auto n : p.get_processing_order()) {
         n->recalc_output_layout();
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
index d7813c03d80f99..a22d23ca4dfed9 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
@@ -203,7 +203,7 @@ struct fully_connected_impl : typed_primitive_impl_ocl<fully_connected> {
             params.quantization = kernel_selector::QuantizationType::NONE;
         }
 
-        params.dynamic_quantization_group_size = impl_param.get_program().get_config().get_property(ov::hint::dynamic_quantization_group_size);
+        params.dynamic_quantization_group_size = impl_param.get_program().get_config().m_dynamic_quantization_group_size;
 
         return params;
     }
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp
index 624db86e38342c..4fea0bbe644d63 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp
@@ -1204,13 +1204,13 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p
     params.engineInfo.ip_version = device_info.ip_version;
     params.engineInfo.arch = kernel_selector::gpu_arch(static_cast<std::underlying_type<gpu_arch>::type>(device_info.arch));
 
-    auto impl_forcing = config.get_property(ov::intel_gpu::force_implementations);
+    auto impl_forcing = config.m_force_implementations.value;
 
     if (impl_forcing.count(param_info.desc->id) != 0) {
         params.forceImplementation = impl_forcing.at(param_info.desc->id).kernel_name;
     }
 
-    params.allowStaticInputReordering = config.get_property(ov::intel_gpu::optimize_data) || config.get_property(ov::intel_gpu::allow_static_input_reorder);
+    params.allowStaticInputReordering = config.m_optimize_data || config.m_allow_static_input_reorder;
     params.allowInputReordering = false;
 }
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
index 8d305a592e081b..50f1b1ed9ab879 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
@@ -111,7 +111,7 @@ namespace cldnn {
 std::mutex kernels_cache::_mutex;
 
 std::string kernels_cache::get_cache_path() const {
-    auto path = _config.get_property(ov::cache_dir);
+    auto path = _config.m_cache_dir.value;
     if (path.empty()) {
         return {};
     }
@@ -123,12 +123,12 @@ std::string kernels_cache::get_cache_path() const {
 }
 
 bool kernels_cache::is_cache_enabled() const {
-    if (!_config.get_property(ov::intel_gpu::allow_new_shape_infer) &&
-        (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SPEED)) {
+    if (!_config.m_allow_new_shape_infer &&
+        (_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SPEED)) {
         return false;
     }
 
-    return !_config.get_property(ov::cache_dir).empty();
+    return !_config.m_cache_dir.value.empty();
 }
 
 size_t kernels_cache::get_max_kernels_per_batch() const {
@@ -136,7 +136,7 @@ size_t kernels_cache::get_max_kernels_per_batch() const {
     GPU_DEBUG_IF(debug_config->max_kernels_per_batch >= 1) {
         return static_cast<size_t>(debug_config->max_kernels_per_batch);
     }
-    return _config.get_property(ov::intel_gpu::max_kernels_per_batch);
+    return _config.m_max_kernels_per_batch;
 }
 
 void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::vector<kernels_cache::batch_program>* all_batches) const {
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
index 05a3dc5b2a9e4b..34910d5d73531d 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
@@ -47,7 +47,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
         _engine(&engine),
         _attrs(attrs),
         _pd(pd) {
-            _enable_profiling = config.get_property(ov::enable_profiling);
+            _enable_profiling = config.m_enable_profiling;
 
             _scratchpad_md = _pd.scratchpad_desc();
 
@@ -70,7 +70,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
         _engine(&engine),
         _pd(),
         _prim() {
-            _enable_profiling = config.get_property(ov::enable_profiling);
+            _enable_profiling = config.m_enable_profiling;
             GPU_DEBUG_GET_INSTANCE(debug_config);
             GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) {
                 _enable_profiling = true;
@@ -318,7 +318,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
 
 private:
     std::string get_cache_directory(const ExecutionConfig& config) const {
-        auto path = config.get_property(ov::cache_dir);
+        auto path = config.m_cache_dir.value;
         if (path.empty()) {
             return {};
         }
@@ -343,7 +343,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
     void build_primitive(const ExecutionConfig& config) {
         auto cache_outpath = get_cache_directory(config);
 
-        if (!config.get_property(ov::intel_gpu::allow_new_shape_infer)) {
+        if (!config.m_allow_new_shape_infer) {
             cache_outpath = "";
         }
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp
index fdb2f151de8986..b135d9af73f31f 100644
--- a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp
@@ -58,7 +58,7 @@ std::unique_ptr<primitive_impl> ImplementationManager::create(const program_node
     if (auto impl = create_impl(node, params)) {
         update_impl(*impl, params);
         impl->set_node_params(node);
-        impl->can_share_kernels = node.get_program().get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse);
+        impl->can_share_kernels = node.get_program().get_config().m_enable_kernels_reuse;
         return impl;
     }
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp
index d974b78f8e6d14..142b4b45fbf001 100644
--- a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp
@@ -59,7 +59,7 @@ const std::vector<std::shared_ptr<cldnn::ImplementationManager>>& Registry<non_m
                             const size_t kBatchNum = static_cast<size_t>(scores_layout.get_partial_shape()[0].get_length());
                             const size_t kClassNum = static_cast<size_t>(scores_layout.get_partial_shape()[1].get_length());
                             const size_t kNStreams =
-                                    static_cast<size_t>(node.get_program().get_config().get_property(ov::streams::num));
+                                    static_cast<size_t>(node.get_program().get_config().m_num_streams.value);
                             const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast<size_t>(8)) * kNStreams;
                             return kKeyValue > 64;
                         }
diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
index 167b254a247637..84458b58279028 100644
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@@ -435,7 +435,7 @@ bool should_use_winograd_2x3_s1(const convolution_node& node,
                                 layout const& input_layout,
                                 layout const& weights_layout,
                                 bool output_size_handling_enabled) {
-    bool disable_winograd_conv = node.get_program().get_config().get_property(ov::intel_gpu::disable_winograd_convolution);
+    bool disable_winograd_conv = node.get_program().get_config().m_disable_winograd_convolution;
     if (disable_winograd_conv)
         return false;
 
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index aa7c9a55775e6d..653ff2f57ee385 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -180,9 +180,9 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo
     , _memory_pool(new memory_pool(program->get_engine()))
     , _internal(is_internal)
     , _is_primary_stream(is_primary_stream)
-    , _enable_profiling(program->get_config().get_property(ov::enable_profiling))
+    , _enable_profiling(program->get_config().m_enable_profiling)
     , _reset_arguments(true)
-    , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio))) {
+    , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().m_buffers_preallocation_ratio)) {
     if (!_internal) {
         net_id = get_unique_net_id();
     }
@@ -398,7 +398,7 @@ void network::calculate_weights_cache_capacity() {
     }
 
     // Sum all weights constants for each stream
-    required_mem_size += weights_const_size * _config.get_property(ov::streams::num);
+    required_mem_size += weights_const_size * _config.m_num_streams.value;
     // Add all other constants (shared between streams)
     required_mem_size += total_const_size - weights_const_size;
 
diff --git a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp
index 2732476a063f8f..725f3333a0f132 100644
--- a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp
+++ b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp
@@ -157,7 +157,7 @@ void non_max_suppression_gather_inst::update_output_memory() {
         // Can_be_optimized nodes are allocating from memory_pool too. In this case,
         // we need release the legacy output memory from memory pool explicitly.
         if (static_cast<bool>(_outputs[i]) &&
-            _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) {
+            _node->get_program().get_config().m_enable_memory_pool) {
             _network.get_memory_pool().release_memory(_outputs[i].get(), _node->get_unique_id(), _node->id(), _network.get_id());
         }
         _outputs[i] = {_network.get_engine().reinterpret_buffer(input_memory(i), _impl_params->get_output_layout(i))};
diff --git a/src/plugins/intel_gpu/src/graph/permute.cpp b/src/plugins/intel_gpu/src/graph/permute.cpp
index bf87e78e4bbbc5..41252797f09eca 100644
--- a/src/plugins/intel_gpu/src/graph/permute.cpp
+++ b/src/plugins/intel_gpu/src/graph/permute.cpp
@@ -146,7 +146,7 @@ void permute_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) {
+        _node->get_program().get_config().m_enable_memory_pool) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())};
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index bcf468cea97d03..2f2a755c675cf1 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -163,7 +163,7 @@ static memory::ptr get_memory_from_pool(engine& _engine,
     OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(),
                     "[GPU] Can't allocate output for dynamic layout without upper bound");
     // Use layout with max tensor for dynamic shape with upper bound
-    if (_node.get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) {
+    if (_node.get_program().get_config().m_enable_memory_pool) {
         if (curr_memory != nullptr)
             pool.release_memory(curr_memory, _node.get_unique_id(), _node.id(), net_id);
         return pool.get_memory(layout,
@@ -2042,7 +2042,7 @@ primitive_inst::primitive_inst(network & network, program_node const& node, bool
     : _network(network)
     , _node(&node)
     , _node_output_layout(node.get_output_layout())
-    , _use_shared_kernels(node.get_program().get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse))
+    , _use_shared_kernels(node.get_program().get_config().m_enable_kernels_reuse)
     , _impl_params(node.get_kernel_impl_params())
     , _impl(node.get_selected_impl() ? node.get_selected_impl()->clone() : nullptr)
     , _runtime_memory_dependencies(node.get_memory_dependencies())
@@ -2596,8 +2596,8 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() {
         ExecutionConfig subgraph_config{
             ov::intel_gpu::allow_static_input_reorder(true),
             ov::intel_gpu::allow_new_shape_infer(true),
-            ov::enable_profiling(get_network().get_config().get_property(ov::enable_profiling)),
-            ov::intel_gpu::use_onednn(get_network().get_config().get_property(ov::intel_gpu::use_onednn))
+            ov::enable_profiling(get_network().get_config().m_enable_profiling),
+            ov::intel_gpu::use_onednn(get_network().get_config().m_use_onednn)
         };
         auto prog = program::build_program(get_network().get_engine(),
                                            t,
diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
index 800ac5ce997d6c..a4068e408779c5 100644
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -108,8 +108,8 @@ using namespace cldnn;
 using namespace ov::intel_gpu;
 
 static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) {
-    int streams = (num_streams > 0) ? num_streams : config.get_property(ov::compilation_num_threads);
-    auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority);
+    int streams = (num_streams > 0) ? num_streams : config.m_compilation_num_threads.value;
+    auto priority = config.m_host_task_priority;
     auto core_type = ov::hint::SchedulingCoreType::ANY_CORE;
     switch (priority) {
         case ov::hint::Priority::LOW: core_type = ov::hint::SchedulingCoreType::ECORE_ONLY; break;
@@ -117,7 +117,7 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E
         case ov::hint::Priority::HIGH: core_type = ov::hint::SchedulingCoreType::PCORE_ONLY; break;
         default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority);
     }
-    bool enable_cpu_pinning = config.get_property(ov::hint::enable_cpu_pinning);
+    bool enable_cpu_pinning = config.m_enable_cpu_pinning;
 
     ov::threading::IStreamsExecutor::Config task_executor_config(tags,
                                                                  streams,
@@ -216,7 +216,7 @@ program::program(engine& engine, const ExecutionConfig& config)
     init_primitives();
     auto ctx = std::make_shared<RemoteContextImpl>("GPU", std::vector<device::ptr>{_engine.get_device()});
     _config.finalize(ctx, {});
-    new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer);
+    new_shape_infer = _config.m_allow_new_shape_infer;
     _layout_optimizer = std::make_unique<layout_optimizer>();
 }
 
@@ -228,7 +228,7 @@ void program::init_program() {
     set_options();
 
     pm = std::unique_ptr<pass_manager>(new pass_manager(*this));
-    new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer);
+    new_shape_infer = _config.m_allow_new_shape_infer;
 
     if (_task_executor == nullptr)
         _task_executor = program::make_task_executor(_config);
@@ -236,7 +236,7 @@ void program::init_program() {
                                                                       kernel_selector::KernelBase::get_db().get_batch_headers(),
                                                                       kernel_selector::KernelBase::get_db().get_cm_batch_headers()));
 
-    _kernels_cache->set_kernels_reuse(get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse));
+    _kernels_cache->set_kernels_reuse(get_config().m_enable_kernels_reuse);
 
     if (!_compilation_context)
         _compilation_context = program::make_compilation_context(_config);
@@ -490,13 +490,13 @@ void program::set_options() {
     static std::atomic<uint32_t> id_gen{0};
     prog_id = ++id_gen;
     assert(prog_id != 0);
-    if (!_config.get_property(ov::intel_gpu::force_implementations).empty()) {
-        _config.set_property(ov::intel_gpu::optimize_data(true));
+    if (!_config.m_force_implementations.value.empty()) {
+        _config.m_optimize_data = true;
     }
 
     GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) {
-        _config.set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs));
+        _config.m_dump_graphs = debug_config->dump_graphs;
     }
 }
 
@@ -534,8 +534,7 @@ void program::init_graph() {
         if (!node->is_type<data>())
             node->get_output_layouts();
         if (node->is_type<lstm_seq>()) {
-            _config.set_property(ov::intel_gpu::use_onednn(true));
-            _config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
+            _config.m_use_onednn = true;
         }
     }
     // Perform initial shape_of subgraphs markup
@@ -554,7 +553,7 @@ void program::pre_optimize_graph(bool is_internal) {
 
     bool output_size_handling_enabled = analyze_output_size_handling_need();
 
-    bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data);
+    bool optimize_data = _config.m_optimize_data;
     if (optimize_data) {
         apply_opt_pass<prepare_quantization>();
     }
@@ -631,7 +630,7 @@ void program::post_optimize_graph(bool is_internal) {
 
     reorder_factory rf;
 
-    bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data);
+    bool optimize_data = _config.m_optimize_data;
 
     if (!is_internal) {
         apply_opt_pass<post_optimize_weights>(rf);
@@ -639,7 +638,7 @@ void program::post_optimize_graph(bool is_internal) {
 
     apply_opt_pass<remove_redundant_reorders>(false, true);  // TODO: do we need it at this place also?
 
-    auto partial_build = _config.get_property(ov::intel_gpu::partial_build_program);
+    auto partial_build = _config.m_partial_build_program;
 #ifdef GPU_DEBUG_CONFIG
     GPU_DEBUG_GET_INSTANCE(debug_config);
     if (!is_internal && (!partial_build || !debug_config->dry_run_path.empty())) {
@@ -658,7 +657,7 @@ void program::post_optimize_graph(bool is_internal) {
 
     // Recalculate processing order after all graph transformation to keep optimal primitives ordering
     // for OOO queue
-    if (_config.get_property(ov::intel_gpu::queue_type) == QueueTypes::out_of_order)
+    if (_config.m_queue_type == QueueTypes::out_of_order)
         get_processing_order().calculate_BFS_processing_order();
 
     apply_opt_pass<mark_state_init_subgraphs>();
@@ -782,7 +781,7 @@ const std::vector<primitive_id>& program::get_allocating_order(bool forced_updat
 }
 
 void program::prepare_memory_dependencies() {
-    if (!_config.get_property(ov::intel_gpu::enable_memory_pool))
+    if (!_config.m_enable_memory_pool)
         return;
     for (auto& node : get_processing_order()) {
         node->add_memory_dependency(node->get_unique_id());
@@ -1414,7 +1413,7 @@ program::primitives_info program::get_current_stage_info() const {
 
 void program::save_pass_info(std::string pass_name) {
     // TODO: Directory path here can be probably changed to some bool flag
-    if (!_config.get_property(ov::intel_gpu::dump_graphs).empty())
+    if (!_config.m_dump_graphs.value.empty())
         optimizer_passes_info.emplace_back(pass_name, get_current_stage_info());
 }
 
@@ -1442,7 +1441,7 @@ const program::primitives_info& program::get_primitives_info() const { return pr
 void program::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); }
 
 void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
-    lo.set_implementation_forcing(_config.get_property(ov::intel_gpu::force_implementations));
+    lo.set_implementation_forcing(_config.m_force_implementations);
 
 
     // first pass to set layout optimization_attributes for topology
@@ -1668,15 +1667,15 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
         lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1);
 
 #ifdef ENABLE_ONEDNN_FOR_GPU
-    bool enable_onednn_for_tests = get_config().get_property(ov::intel_gpu::optimize_data) || is_internal_program();
+    bool enable_onednn_for_tests = get_config().m_optimize_data || is_internal_program();
     auto& engine = get_engine();
     if (engine.get_device_info().vendor_id == INTEL_VENDOR_ID &&
-        get_config().get_property(ov::intel_gpu::queue_type) == QueueTypes::in_order &&
+        get_config().m_queue_type == QueueTypes::in_order &&
         enable_onednn_for_tests) {
             if (engine.get_device_info().supports_immad) {
                 lo.add_all_onednn_impls_optimization_attribute();
             } else {
-                if (get_config().get_property(ov::intel_gpu::use_onednn)) {
+                if (get_config().m_use_onednn) {
                     lo.enable_onednn_for<lstm_seq>();
                 }
             }
@@ -1890,8 +1889,8 @@ void program::load(cldnn::BinaryInputBuffer& ib) {
     init_program();
 
     std::shared_ptr<ov::MappedMemory> mapped_memory = nullptr;
-    std::string weights_path = _config.get_property(ov::weights_path);
-    if (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE &&
+    std::string weights_path = _config.m_weights_path;
+    if (_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE &&
         ov::util::validate_weights_path(weights_path)) {
         mapped_memory = ov::load_mmap_object(weights_path);
     }
diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
index 6a09fcd10eb513..a14cbfd0139d29 100644
--- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
@@ -157,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) {
 }  // namespace
 
 std::string get_dir_path(const ExecutionConfig& config) {
-    auto path = config.get_property(ov::intel_gpu::dump_graphs);
+    auto path = config.m_dump_graphs.value;
     if (path.empty()) {
         return {};
     }
diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp
index 76b15ca54bcff8..c3b7c472d59437 100644
--- a/src/plugins/intel_gpu/src/graph/reorder.cpp
+++ b/src/plugins/intel_gpu/src/graph/reorder.cpp
@@ -287,7 +287,7 @@ void reorder_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) {
+        _node->get_program().get_config().m_enable_memory_pool) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
 
diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp
index fc874e29f70ac4..1250a73e881e9a 100644
--- a/src/plugins/intel_gpu/src/graph/reshape.cpp
+++ b/src/plugins/intel_gpu/src/graph/reshape.cpp
@@ -320,7 +320,7 @@ void reshape_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) {
+        _node->get_program().get_config().m_enable_memory_pool) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())};
diff --git a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp
index 316acef0e492e8..7ddea056745a53 100644
--- a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp
+++ b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp
@@ -75,7 +75,7 @@ void scatter_elements_update_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) {
+        _node->get_program().get_config().m_enable_memory_pool) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())};
diff --git a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp
index 2f109f83df428f..56c8bf6607c4a3 100644
--- a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp
+++ b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp
@@ -86,7 +86,7 @@ void scatter_nd_update_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) {
+        _node->get_program().get_config().m_enable_memory_pool) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())};
diff --git a/src/plugins/intel_gpu/src/graph/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_update.cpp
index 4291ee67caa3ef..0d34b8f95e9f1a 100644
--- a/src/plugins/intel_gpu/src/graph/scatter_update.cpp
+++ b/src/plugins/intel_gpu/src/graph/scatter_update.cpp
@@ -66,7 +66,7 @@ void scatter_update_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) {
+        _node->get_program().get_config().m_enable_memory_pool) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())};
diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp
index 5435de5598bea0..35d1db8fc389b1 100644
--- a/src/plugins/intel_gpu/src/graph/select.cpp
+++ b/src/plugins/intel_gpu/src/graph/select.cpp
@@ -95,7 +95,7 @@ select_inst::typed_primitive_inst(network& network, select_node const& node) : p
                                 3,
                                 "");
 
-    bool allow_new_shape_infer = network.get_program()->get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
+    bool allow_new_shape_infer = network.get_program()->get_config().m_allow_new_shape_infer;
     // Broadcast check is performed in ngraph shape infer of select when allow_new_shape_infer=true
     if (!allow_new_shape_infer) {
         if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) {
diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp
index 007553b8a9d192..baff08bc8de82e 100644
--- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp
+++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp
@@ -208,7 +208,7 @@ void strided_slice_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) {
+        _node->get_program().get_config().m_enable_memory_pool) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs[0] = input_memory_ptr();
diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
index d273ba2b1df6a6..8bd73dbdce7029 100644
--- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
+++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
@@ -20,7 +20,7 @@ namespace ov::intel_gpu {
 namespace {
 std::shared_ptr<ov::threading::ITaskExecutor> create_task_executor(const std::shared_ptr<const ov::IPlugin>& plugin,
                                                                    const ExecutionConfig& config) {
-    if (config.get_property(ov::internal::exclusive_async_requests)) {
+    if (config.m_exclusive_async_requests) {
         // exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with
         // the CPU behavior
         return plugin->get_executor_manager()->get_executor("GPU");
@@ -30,7 +30,7 @@ std::shared_ptr<ov::threading::ITaskExecutor> create_task_executor(const std::sh
         bool enable_cpu_reservation = config.get_property(ov::hint::enable_cpu_reservation);
         return std::make_shared<ov::threading::CPUStreamsExecutor>(
             ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor",
-                                                    config.get_property(ov::num_streams),
+                                                    config.m_num_streams.value,
                                                     1,
                                                     ov::hint::SchedulingCoreType::PCORE_ONLY,
                                                     enable_cpu_reservation,
@@ -38,7 +38,7 @@ std::shared_ptr<ov::threading::ITaskExecutor> create_task_executor(const std::sh
     } else {
         return std::make_shared<ov::threading::CPUStreamsExecutor>(
             ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor",
-                                                    config.get_property(ov::num_streams),
+                                                    config.m_num_streams.value,
                                                     0,
                                                     ov::hint::SchedulingCoreType::ANY_CORE,
                                                     false,
@@ -62,7 +62,7 @@ CompiledModel::CompiledModel(std::shared_ptr<ov::Model> model,
       m_outputs(ov::ICompiledModel::outputs()),
       m_loaded_from_cache(false) {
     auto graph_base = std::make_shared<Graph>(model, m_context, m_config, 0);
-    for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) {
+    for (uint16_t n = 0; n < m_config.m_num_streams.value; n++) {
         auto graph = n == 0 ? graph_base : std::make_shared<Graph>(graph_base, n);
         m_graphs.push_back(graph);
     }
@@ -157,7 +157,7 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib,
     }
 
     auto graph_base = std::make_shared<Graph>(ib, context, m_config, 0);
-    for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) {
+    for (uint16_t n = 0; n < m_config.m_num_streams.value; n++) {
         auto graph = n == 0 ? graph_base : std::make_shared<Graph>(graph_base, n);
         m_graphs.push_back(graph);
     }
@@ -179,8 +179,8 @@ std::shared_ptr<ov::IAsyncInferRequest> CompiledModel::create_infer_request() co
 void CompiledModel::export_model(std::ostream& model) const {
     // If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching
     // which requires the weights_path.
-    ov::CacheMode cache_mode = m_config.get_property(ov::cache_mode);
-    std::string weights_path = m_config.get_property(ov::weights_path);
+    ov::CacheMode cache_mode = m_config.m_cache_mode;
+    std::string weights_path = m_config.m_weights_path;
     if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE &&
         !ov::util::validate_weights_path(weights_path))
         return;
@@ -188,7 +188,7 @@ void CompiledModel::export_model(std::ostream& model) const {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model");
     OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded");
 
-    const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks);
+    const ov::EncryptionCallbacks encryption_callbacks = m_config.m_cache_encryption_callbacks;
 
     // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty.
     const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;
@@ -290,8 +290,8 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
     } else if (name == ov::loaded_from_cache) {
         return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache};
     } else if (name == ov::optimal_number_of_infer_requests) {
-        unsigned int nr = m_config.get_property(ov::num_streams);
-        if (m_config.get_property(ov::hint::performance_mode) != ov::hint::PerformanceMode::LATENCY)
+        unsigned int nr = m_config.m_num_streams.value;
+        if (m_config.m_performance_mode != ov::hint::PerformanceMode::LATENCY)
             nr *= 2;
         return decltype(ov::optimal_number_of_infer_requests)::value_type {nr};
     } else if (name == ov::execution_devices) {
diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp
index 6859728076fb6a..ab69fd5d4e26f8 100644
--- a/src/plugins/intel_gpu/src/plugin/graph.cpp
+++ b/src/plugins/intel_gpu/src/plugin/graph.cpp
@@ -86,13 +86,9 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context
         }
     }
     {
-        bool bool_prop_value;
-        ib >> bool_prop_value;
-        m_config.set_property(ov::intel_gpu::partial_build_program(bool_prop_value));
-        ib >> bool_prop_value;
-        m_config.set_property(ov::intel_gpu::optimize_data(bool_prop_value));
-        ib >> bool_prop_value;
-        m_config.set_property(ov::intel_gpu::allow_new_shape_infer(bool_prop_value));
+        ib >> m_config.m_partial_build_program.value;
+        ib >> m_config.m_optimize_data.value;
+        ib >> m_config.m_allow_new_shape_infer.value;
     }
 
     auto imported_prog = std::make_shared<cldnn::program>(get_engine(), m_config);
@@ -177,7 +173,7 @@ void Graph::build(std::shared_ptr<cldnn::program> program) {
 
     auto external_queue = m_context->get_external_queue();
     if (external_queue) {
-        OPENVINO_ASSERT(m_config.get_property(ov::num_streams) == 1, "[GPU] Throughput streams can't be used with shared queue!");
+        OPENVINO_ASSERT(m_config.m_num_streams == 1, "[GPU] Throughput streams can't be used with shared queue!");
         const auto &engine = program->get_engine();
         m_network = std::make_shared<cldnn::network>(program, engine.create_stream(m_config, external_queue), m_stream_id);
     } else {
@@ -209,7 +205,7 @@ bool Graph::use_external_queue() const {
 
 std::shared_ptr<ov::Model> Graph::get_runtime_model(std::vector<cldnn::primitive_info>& primitives_info, bool filter_const_primitives) {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::get_runtime_model");
-    if (m_config.get_property(ov::enable_profiling)) {
+    if (m_config.m_enable_profiling) {
         try {
             // Update may throw an exception for step-by-step runtime graph dump,
             // since network->get_executed_primitives() method can't be called before network execution
@@ -521,9 +517,9 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) {
         }
     }
     {
-        ob << m_config.get_property(ov::intel_gpu::partial_build_program);
-        ob << m_config.get_property(ov::intel_gpu::optimize_data);
-        ob << m_config.get_property(ov::intel_gpu::allow_new_shape_infer);
+        ob << m_config.m_partial_build_program.value;
+        ob << m_config.m_optimize_data.value;
+        ob << m_config.m_allow_new_shape_infer.value;
     }
 
     ob.set_stream(m_network->get_stream_ptr().get());
diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
index 825b3ddfbc7282..889a066aa74325 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
@@ -22,12 +22,12 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_
 
     auto config = p.get_config();
     {
-        auto custom_outputs = config.get_property(ov::intel_gpu::custom_outputs);
+        auto custom_outputs = config.m_custom_outputs.value;
         if (!custom_outputs.empty()) {
-            config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>({})));
+            config.m_custom_outputs = std::vector<std::string>({});
         }
     }
-    config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer()));
+    config.m_allow_new_shape_infer = op->is_dynamic() || p.use_new_shape_infer();
 
     ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true);
     branch.inner_program = prog.get_compiled_program();
diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
index 6e18486ee1d738..5d808f4d041275 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
@@ -298,8 +298,8 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr<ov::op::
     auto output_names_vec = GetOutputNames(layerName, body_execution_condition_id, output_primitive_maps, back_edges);
 
     auto config = p.get_config();
-    config.set_property(ov::intel_gpu::custom_outputs(output_names_vec));
-    config.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
+    config.m_custom_outputs = output_names_vec;
+    config.m_allow_new_shape_infer = is_dynamic;
 
     // get body program from ov::Model
     ProgramBuilder prog(ov_model, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true);
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index 63c98b8c756845..aaa84bfc434c19 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -60,9 +60,14 @@ namespace ov::intel_gpu {
 namespace {
 
 ov::RTMap get_rt_info(const ov::Model& model) {
+    ov::RTMap rt_info;
     if (model.has_rt_info("runtime_options"))
-        return model.get_rt_info<ov::AnyMap>("runtime_options");
-    return {};
+        rt_info = model.get_rt_info<ov::AnyMap>("runtime_options");
+
+    if (model.has_rt_info("__weights_path")) {
+        rt_info[ov::weights_path.name()] = model.get_rt_info<ov::Any>("__weights_path");
+    }
+    return rt_info;
 }
 
 }  // namespace
@@ -205,22 +210,6 @@ Plugin::Plugin() {
     m_compiled_model_runtime_properties["OV_VERSION"] = ov_version.buildNumber;
 }
 
-void Plugin::set_cache_info(const std::shared_ptr<const ov::Model>& model, ExecutionConfig& config) const {
-    // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with
-    // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not
-    // using that mechanism.
-    if (config.get_property(ov::cache_mode) != ov::CacheMode::OPTIMIZE_SIZE) {
-        return;
-    }
-
-    const auto& rt_info = model->get_rt_info();
-    auto weights_path = rt_info.find("__weights_path");
-    if (weights_path != rt_info.end()) {
-        ov::AnyMap weights_path_property{{"WEIGHTS_PATH", weights_path->second}};
-        config.set_property(weights_path_property);
-    }
-}
-
 std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<const ov::Model>& model, const ov::AnyMap& orig_config) const {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model");
     std::string device_id = get_device_id(orig_config);
@@ -230,11 +219,9 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id);
 
     ExecutionConfig config = m_configs_map.at(device_id);
-    config.set_user_property(orig_config);
+    config.set_property(orig_config);
     config.finalize(context, get_rt_info(*model));
 
-    set_cache_info(model, config);
-
     auto transformed_model = clone_and_transform_model(model, config, context);
     {
         OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model::CreateCompiledModel");
@@ -252,7 +239,6 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
 
     ExecutionConfig config = m_configs_map.at(device_id);
     config.finalize(context_impl, get_rt_info(*model));
-    set_cache_info(model, config);
 
     auto transformed_model = clone_and_transform_model(model, config, context_impl);
     return std::make_shared<CompiledModel>(transformed_model, shared_from_this(), context_impl, config);
@@ -282,7 +268,7 @@ ov::SoPtr<ov::IRemoteContext> Plugin::get_default_context(const AnyMap& params)
 
 void Plugin::set_property(const ov::AnyMap &config) {
     auto update_config = [](ExecutionConfig& config, const ov::AnyMap& user_config) {
-        config.set_user_property(user_config);
+        config.set_property(user_config);
         // Check that custom layers config can be loaded
         if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) {
             CustomLayerMap custom_layers;
@@ -317,12 +303,12 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     auto ctx = get_default_context(device_id);
 
     ExecutionConfig config = m_configs_map.at(device_id);
-    config.set_user_property(orig_config);
+    config.set_property(orig_config);
     config.finalize(ctx, get_rt_info(*model));
 
     ProgramBuilder prog(ctx->get_engine(), config);
 
-    float query_model_ratio = config.get_property(ov::internal::query_model_ratio.name()).as<float>();
+    float query_model_ratio = config.m_query_model_ratio;
 
     auto supported = ov::get_supported_nodes(model,
         [&config,&ctx,this](std::shared_ptr<ov::Model>& model) {
@@ -372,11 +358,11 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
     }
 
     ExecutionConfig config = m_configs_map.at(device_id);
-    config.set_user_property(_orig_config);
+    config.set_property(_orig_config);
     config.finalize(context_impl, {});
 
-    ov::CacheMode cache_mode = config.get_property(ov::cache_mode);
-    ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks);
+    ov::CacheMode cache_mode = config.m_cache_mode;
+    ov::EncryptionCallbacks encryption_callbacks = config.m_cache_encryption_callbacks;
     const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;
 
     std::unique_ptr<cldnn::BinaryInputBuffer> ib_ptr =
@@ -393,9 +379,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
         return nullptr;
     }
 
-    std::string weights_path = config.get_property(ov::weights_path);
-    if (config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE &&
-        !ov::util::validate_weights_path(weights_path)) {
+    std::string weights_path = config.m_weights_path;
+    if (config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) {
         return nullptr;
     }
 
@@ -695,7 +680,7 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const {
     auto context = get_default_contexts().at(device_id);
     const auto& device_info = context->get_engine().get_device_info();
     const auto& config = m_configs_map.at(device_id);
-    uint32_t n_streams = static_cast<uint32_t>(config.get_property(ov::num_streams));
+    uint32_t n_streams = static_cast<uint32_t>(config.m_num_streams.value);
     uint64_t occupied_device_mem = 0;
     auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as<std::map<std::string, uint64_t>>();
     auto occupied_usm_dev = statistic_result.find("usm_device_current");
diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
index 2abc8bb65df6ac..a9f4fd17f2be2f 100644
--- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
@@ -105,7 +105,7 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr<ov::Model> model, cldnn::engine&
     config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml";
 
     CustomLayer::LoadFromFile(config_path, m_custom_layers, true);
-    auto custom_layers_config = m_config.get_property(ov::intel_gpu::config_file);
+    auto custom_layers_config = m_config.m_config_file.value;
     CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty());
 
     auto ops = model->get_ordered_ops();
@@ -113,9 +113,9 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr<ov::Model> model, cldnn::engine&
     // smaller # of kernels are built compared to static models.
     // So having smaller batch size is even better for dynamic model as we can do more parallel build.
     if (model->is_dynamic()) {
-        m_config.set_property(ov::intel_gpu::max_kernels_per_batch(4));
+        m_config.m_max_kernels_per_batch = 4;
     } else {
-        m_config.set_property(ov::intel_gpu::max_kernels_per_batch(8));
+        m_config.m_max_kernels_per_batch = 8;
     }
 
     m_program = build(ops, partial_build, is_inner_program);
@@ -160,12 +160,12 @@ std::shared_ptr<cldnn::program> ProgramBuilder::build(const std::vector<std::sha
     }
 
     if (is_inner_program) {
-        allow_new_shape_infer = (m_config.get_property(ov::intel_gpu::allow_new_shape_infer) || allow_new_shape_infer);
+        allow_new_shape_infer = (m_config.m_allow_new_shape_infer || allow_new_shape_infer);
     }
 
-    m_config.set_property(ov::intel_gpu::partial_build_program(partial_build));
-    m_config.set_property(ov::intel_gpu::optimize_data(true));
-    m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer));
+    m_config.m_partial_build_program = partial_build;
+    m_config.m_optimize_data = true;
+    m_config.m_allow_new_shape_infer = allow_new_shape_infer;
 
     prepare_build();
     {
@@ -309,7 +309,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr<cldnn::pr
     prim->origin_op_name = op.get_friendly_name();
     prim->origin_op_type_name = op.get_type_name();
 
-    if (this->m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) {
+    if (this->m_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE) {
         if (auto data_prim = dynamic_cast<cldnn::data*>(prim.get())) {
             auto rt_info = op.get_rt_info();
 
@@ -340,7 +340,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr<cldnn::pr
             prim->origin_op_type_name = prim->type_string();
     }
 
-    if (this->m_config.get_property(ov::enable_profiling) && should_profile) {
+    if (this->m_config.m_enable_profiling && should_profile) {
         profiling_ids.push_back(prim_id);
         init_profile_info(*prim);
     }
diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
index 41b358b6d7faad..6d74722a5b435a 100644
--- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
@@ -113,8 +113,8 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr<const CompiledModel>& c
     : ov::ISyncInferRequest(compiled_model)
     , m_graph(compiled_model->get_graph(0))
     , m_context(std::static_pointer_cast<RemoteContextImpl>(compiled_model->get_context_impl()))
-    , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio)))
-    , m_enable_profiling(m_graph->get_config().get_property(ov::enable_profiling))
+    , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().m_buffers_preallocation_ratio))
+    , m_enable_profiling(m_graph->get_config().m_enable_profiling)
     , m_use_external_queue(m_graph->use_external_queue()) {
     GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) {
@@ -422,7 +422,7 @@ void SyncInferRequest::wait() {
             auto mem_shape = output_layout.get_shape();
             // In case of old shape infer we need to shrink out tensor shape to avoid redudnant dimensions that occur due to rank extension
             // For new shape infer this shouldn't happen, thus remove that WA once we migrate to ngraph-based shape infer for all cases
-            if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) {
+            if (!m_graph->get_config().m_allow_new_shape_infer) {
                 OPENVINO_ASSERT(port.get_partial_shape().is_static(), "[GPU] Unexpected dynamic shape for legacy shape inference");
                 OPENVINO_ASSERT(ov::shape_size(port.get_shape()) == ov::shape_size(mem_shape), "[GPU] Unexpected elements count for output tensor");
                 mem_shape = port.get_shape();
@@ -895,7 +895,7 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string
 
     auto memory = device_tensor->get_memory();
     // WA to extend shape to ranks expected by legacy shape infer. Remove after full migration to new shape infer
-    if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) {
+    if (!m_graph->get_config().m_allow_new_shape_infer) {
         auto new_layout = memory->get_layout();
         new_layout.set_partial_shape(m_graph->get_input_layouts().at(input_idx).get_shape());
         memory = engine.reinterpret_buffer(*memory, new_layout);
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index f036afc0cd59ad..c9e7c155158448 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -289,7 +289,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
     const ov::element::TypeVector supported_woq_types = {ov::element::u8, ov::element::i8, ov::element::u4, ov::element::i4};
     bool enableInt8;
     ov::element::Type infer_precision = ov::element::undefined;
-    bool unroll_loop = config.get_property(ov::intel_gpu::enable_loop_unrolling);
+    bool unroll_loop = config.m_enable_loop_unrolling;
     {
         ov::pass::Manager manager("Plugin:GPU");
         auto pass_config = manager.get_pass_config();
@@ -302,7 +302,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         }
 
         auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func);
-        enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && is_model_quantized;
+        enableInt8 = config.m_enable_lp_transformations && is_model_quantized;
 
         manager.register_pass<ov::pass::MarkDequantization>(
             std::vector<ov::element::Type>{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 },
@@ -335,7 +335,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         };
 
         // Add conversion from FP data types to infer precision if it's specified
-        infer_precision = config.get_property(ov::hint::inference_precision);
+        infer_precision = config.m_inference_precision;
         if (infer_precision != ov::element::undefined) {
             if (!fp_precision_supported(infer_precision))
                 infer_precision = fallback_precision;
@@ -416,7 +416,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
                 GPU_DEBUG_CODE(return cldnn::debug_configuration::get_instance()->enable_sdpa == 1);
             }
 
-            if (!config.get_property(ov::intel_gpu::hint::enable_sdpa_optimization))
+            if (!config.m_enable_sdpa_optimization)
                 return false;
 
             auto sdpa = ov::as_type_ptr<const ov::op::v13::ScaledDotProductAttention>(node);
@@ -1032,7 +1032,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         manager.register_pass<ov::pass::GLUFusion>();
         manager.register_pass<ov::intel_gpu::IndirectKVCache>();
 
-        auto kv_cache_compression_dt = config.get_property(ov::hint::kv_cache_precision);
+        auto kv_cache_compression_dt = config.m_kv_cache_precision;
         manager.register_pass<ov::intel_gpu::KVCacheCompression>(kv_cache_compression_dt, device_info.supports_immad);
 
         manager.register_pass<ov::intel_gpu::ConvertConvolutionToInternal>();
@@ -1052,7 +1052,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         manager.register_pass<ov::pass::Validate>();
 
         if (device_info.supports_immad) {
-            auto dynamic_quantization_group_size = config.get_property(ov::hint::dynamic_quantization_group_size);
+            auto dynamic_quantization_group_size = config.m_dynamic_quantization_group_size;
             pass_config->set_callback<ov::intel_gpu::DynamicQuantizeFullyConnected>([=](const_node_ptr& root) -> bool {
                 for (size_t i = 0 ; i < root->get_input_node_shared_ptr(0)->get_output_size(); ++i) {
                     if (root->get_input_node_shared_ptr(0)->get_output_element_type(i) == ov::element::Type_t::f32) {
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index ff420a0e66c1b9..587bf23a23d15c 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -7,6 +7,8 @@
 #include "openvino/core/any.hpp"
 #include "openvino/runtime/internal_properties.hpp"
 #include "intel_gpu/runtime/internal_properties.hpp"
+#include "openvino/runtime/plugin_config.hpp"
+#include "openvino/runtime/properties.hpp"
 
 
 namespace ov::intel_gpu {
@@ -39,29 +41,36 @@ void ExecutionConfig::apply_rt_info(std::shared_ptr<IRemoteContext> context, con
         apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
     }
     apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
+
+    // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with
+    // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not
+    // using that mechanism.
+    if (get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) {
+        apply_rt_info_property(ov::weights_path, rt_info);
+    }
 }
 
 void ExecutionConfig::finalize_impl(std::shared_ptr<IRemoteContext> context) {
     const auto& info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
     apply_hints(info);
     if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) {
-        set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad));
+        m_enable_lp_transformations = info.supports_imad || info.supports_immad;
     }
     if (info.supports_immad) {
-        set_property(ov::intel_gpu::use_onednn(true));
+        m_use_onednn = true;
     }
     if (get_property(ov::intel_gpu::use_onednn)) {
-        set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
+        m_queue_type = QueueTypes::in_order;
     }
 
     // Enable KV-cache compression by default for non-systolic platforms
     if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) {
-        set_property(ov::hint::kv_cache_precision(ov::element::i8));
+        m_kv_cache_precision = ov::element::i8;
     }
 
     // Enable dynamic quantization by default for non-systolic platforms
     if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) {
-        set_property(ov::hint::dynamic_quantization_group_size(32));
+        m_dynamic_quantization_group_size = 32;
     }
 }
 
@@ -76,12 +85,12 @@ void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) {
         const auto mode = get_property(ov::hint::execution_mode);
         if (!is_set_by_user(ov::hint::inference_precision)) {
             if (mode == ov::hint::ExecutionMode::ACCURACY) {
-                set_property(ov::hint::inference_precision(ov::element::undefined));
+                m_inference_precision = ov::element::undefined;
             } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) {
                 if (info.supports_fp16)
-                    set_property(ov::hint::inference_precision(ov::element::f16));
+                    m_inference_precision = ov::element::f16;
                 else
-                    set_property(ov::hint::inference_precision(ov::element::f32));
+                    m_inference_precision = ov::element::f32;
             }
         }
     }
@@ -92,26 +101,26 @@ void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) {
         const auto mode = get_property(ov::hint::performance_mode);
         if (!is_set_by_user(ov::num_streams)) {
             if (mode == ov::hint::PerformanceMode::LATENCY) {
-                set_property(ov::num_streams(1));
+                m_num_streams = 1;
             } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) {
-                set_property(ov::num_streams(ov::streams::AUTO));
+                m_num_streams = ov::streams::AUTO;
             }
         }
     }
 
     if (get_property(ov::num_streams) == ov::streams::AUTO) {
         int32_t n_streams = std::max<int32_t>(info.num_ccs, 2);
-        set_property(ov::num_streams(n_streams));
+        m_num_streams = n_streams;
     }
 
     if (get_property(ov::internal::exclusive_async_requests)) {
-        set_property(ov::num_streams(1));
+        m_num_streams = 1;
     }
 
     // Allow kernels reuse only for single-stream scenarios
     if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) {
         if (get_property(ov::num_streams) != 1) {
-            set_property(ov::intel_gpu::hint::enable_kernels_reuse(false));
+            m_enable_kernels_reuse = false;
         }
     }
 }
@@ -120,9 +129,18 @@ void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
     if (is_set_by_user(ov::hint::model_priority)) {
         const auto priority = get_property(ov::hint::model_priority);
         if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) {
-            set_property(ov::intel_gpu::hint::queue_priority(priority));
+            m_queue_priority = priority;
         }
     }
 }
 
+const ov::PluginConfig::OptionsDesc& ExecutionConfig::get_options_desc() const {
+    static  ov::PluginConfig::OptionsDesc help_map {
+        #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__)
+        #include "intel_gpu/runtime/options.inl"
+        #undef OV_CONFIG_OPTION
+    };
+    return help_map;
+}
+
 }  // namespace ov::intel_gpu
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
index 11fab0106bff83..b954f494abfe5a 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
@@ -64,7 +64,7 @@ void ocl_engine::create_onednn_engine(const ExecutionConfig& config) {
         auto casted = std::dynamic_pointer_cast<ocl_device>(_device);
         OPENVINO_ASSERT(casted, "[GPU] Invalid device type stored in ocl_engine");
 
-        std::string cache_dir = config.get_property(ov::cache_dir);
+        std::string cache_dir = config.m_cache_dir;
         if (cache_dir.empty()) {
             _onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
         } else {
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
index e227c94c7dc06d..61844cd640ea41 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
@@ -189,22 +189,22 @@ void set_arguments_impl(ocl_kernel_type& kernel,
 }  // namespace
 
 ocl_stream::ocl_stream(const ocl_engine &engine, const ExecutionConfig& config)
-    : stream(config.get_property(ov::intel_gpu::queue_type), stream::get_expected_sync_method(config))
+    : stream(config.m_queue_type, stream::get_expected_sync_method(config))
     , _engine(engine) {
     auto context = engine.get_cl_context();
     auto device = engine.get_cl_device();
     ocl::command_queues_builder queue_builder;
-    queue_builder.set_profiling(config.get_property(ov::enable_profiling));
+    queue_builder.set_profiling(config.m_enable_profiling);
     queue_builder.set_out_of_order(m_queue_type == QueueTypes::out_of_order);
 
     OPENVINO_ASSERT(m_sync_method != SyncMethods::none || m_queue_type == QueueTypes::in_order,
                     "[GPU] Unexpected sync method (none) is specified for out_of_order queue");
 
     bool priorty_extensions = engine.extension_supported("cl_khr_priority_hints") && engine.extension_supported("cl_khr_create_command_queue");
-    queue_builder.set_priority_mode(config.get_property(ov::intel_gpu::hint::queue_priority), priorty_extensions);
+    queue_builder.set_priority_mode(config.m_queue_priority, priorty_extensions);
 
     bool throttle_extensions = engine.extension_supported("cl_khr_throttle_hints") && engine.extension_supported("cl_khr_create_command_queue");
-    queue_builder.set_throttle_mode(config.get_property(ov::intel_gpu::hint::queue_throttle), throttle_extensions);
+    queue_builder.set_throttle_mode(config.m_queue_throttle, throttle_extensions);
 
     bool queue_families_extension = engine.get_device_info().supports_queue_families;
     queue_builder.set_supports_queue_families(queue_families_extension);
diff --git a/src/plugins/intel_gpu/src/runtime/stream.cpp b/src/plugins/intel_gpu/src/runtime/stream.cpp
index 0516a551f19177..2043afb9f3869c 100644
--- a/src/plugins/intel_gpu/src/runtime/stream.cpp
+++ b/src/plugins/intel_gpu/src/runtime/stream.cpp
@@ -20,8 +20,8 @@ QueueTypes stream::detect_queue_type(engine_types engine_type, void* queue_handl
 }
 
 SyncMethods stream::get_expected_sync_method(const ExecutionConfig& config) {
-    auto profiling = config.get_property(ov::enable_profiling);
-    auto queue_type = config.get_property(ov::intel_gpu::queue_type);
+    auto profiling = config.m_enable_profiling;
+    auto queue_type = config.m_queue_type;
     return profiling ? SyncMethods::events : queue_type == QueueTypes::out_of_order ? SyncMethods::barriers
                                                                                     : SyncMethods::none;
 }
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp
index 87d4c4ed7f0a2d..00fd36f16aaf9c 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp
@@ -1555,7 +1555,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
         auto config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         config.set_property(ov::intel_gpu::optimize_data(true));
-        config.set_user_property(ov::hint::dynamic_quantization_group_size(32));
+        config.set_property(ov::hint::dynamic_quantization_group_size(32));
 
         network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
 
@@ -1643,7 +1643,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
             config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
             ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl };
             config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} }));
-            config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
+            config.set_property(ov::hint::dynamic_quantization_group_size(0));
 
             network network(engine, topology, config);
             network.set_input_data("input", input_mem);
@@ -1669,7 +1669,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
         auto config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         config.set_property(ov::intel_gpu::optimize_data(true));
-        config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
+        config.set_property(ov::hint::dynamic_quantization_group_size(0));
 
         network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
 
@@ -1753,7 +1753,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
             config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
             ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl };
             config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} }));
-            config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
+            config.set_property(ov::hint::dynamic_quantization_group_size(0));
 
             network network(engine, topology, config);
             network.set_input_data("input", input_mem);
@@ -1780,9 +1780,9 @@ class fully_connected_gpu_tests: public ::testing::Test {
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         config.set_property(ov::intel_gpu::optimize_data(true));
         if (is_dyn_quan) {
-            config.set_user_property(ov::hint::dynamic_quantization_group_size(32));
+            config.set_property(ov::hint::dynamic_quantization_group_size(32));
         } else {
-            config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
+            config.set_property(ov::hint::dynamic_quantization_group_size(0));
         }
 
         network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
@@ -1923,7 +1923,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
             config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
             ov::intel_gpu::ImplementationDesc fc_impl = { in_layout.format, "", impl_types::ocl };
             config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim1", fc_impl }, { "fc_prim2", fc_impl }  }));
-            config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
+            config.set_property(ov::hint::dynamic_quantization_group_size(0));
 
             network network(engine, topology, config);
             network.set_input_data("input", input_mem);
@@ -1952,7 +1952,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
         auto config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         config.set_property(ov::intel_gpu::optimize_data(true));
-        config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
+        config.set_property(ov::hint::dynamic_quantization_group_size(0));
 
         network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
 
@@ -2905,7 +2905,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
             config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
             ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl };
             config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} }));
-            config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
+            config.set_property(ov::hint::dynamic_quantization_group_size(0));
 
             network network(engine, topo, config);
             network.set_input_data("input", input_mem);
@@ -2931,7 +2931,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
         auto config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         config.set_property(ov::intel_gpu::optimize_data(true));
-        config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size));
+        config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size));
 
         network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false);
 
@@ -3031,7 +3031,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
             config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
             ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bf_tiled", impl_types::ocl };
             config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} }));
-            config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
+            config.set_property(ov::hint::dynamic_quantization_group_size(0));
 
             network network(engine, topo, config);
             network.set_input_data("input", input_mem);
@@ -3057,7 +3057,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
         auto config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         config.set_property(ov::intel_gpu::optimize_data(true));
-        config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size));
+        config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size));
 
         network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false);
 

From a899aca9403e1db5879f02a3ef44f4b143ebe519 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Mon, 23 Dec 2024 13:23:35 +0400
Subject: [PATCH 10/44] refactor

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/inference/dev_api/openvino/runtime/plugin_config.hpp | 3 ---
 src/inference/src/dev/plugin_config.cpp                  | 5 ++++-
 src/plugins/intel_gpu/src/graph/program.cpp              | 5 -----
 src/plugins/intel_gpu/src/plugin/program_builder.cpp     | 1 +
 src/plugins/intel_gpu/src/runtime/execution_config.cpp   | 7 +++++++
 5 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index a1bcab62b5d5fd..04e384cc26d35e 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -197,7 +197,6 @@ class OPENVINO_RUNTIME_API PluginConfig {
     virtual void apply_debug_options(std::shared_ptr<IRemoteContext> context);
     virtual void finalize_impl(std::shared_ptr<IRemoteContext> context) {}
 
-
     template <typename T, PropertyMutability mutability>
     bool is_set_by_user(const ov::Property<T, mutability>& property) const {
         return m_user_properties.find(property.name()) != m_user_properties.end();
@@ -236,12 +235,10 @@ class OPENVINO_RUNTIME_API PluginConfig {
 
     // property variable name, string name, default value, description
     using OptionsDesc = std::vector<std::tuple<std::string, std::string, std::string>>;
-    static OptionsDesc m_options_desc;
     virtual const OptionsDesc& get_options_desc() const { static OptionsDesc empty; return empty; }
     const std::string get_help_message(const std::string& name = "") const;
     void print_help() const;
 
-private:
     bool m_is_finalized = false;
 };
 
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index e1b09b76ad8235..ca1c87cce1b659 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -98,6 +98,9 @@ void PluginConfig::set_property(const ov::AnyMap& config, const std::vector<Opti
 }
 
 void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {
+    if (m_is_finalized)
+        return;
+
     apply_rt_info(context, rt_info);
     apply_debug_options(context);
     // Copy internal properties before applying hints to ensure that
@@ -122,8 +125,8 @@ void PluginConfig::apply_debug_options(std::shared_ptr<IRemoteContext> context)
     static std::vector<OptionVisibility> allowed_visibility = {
         OptionVisibility::RELEASE,
         OptionVisibility::RELEASE_INTERNAL,
-#ifdef ENABLE_DEBUG_CAPS
         OptionVisibility::DEBUG
+#ifdef ENABLE_DEBUG_CAPS
 #endif
     };
 
diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
index a4068e408779c5..271a866e3b5cf2 100644
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -493,11 +493,6 @@ void program::set_options() {
     if (!_config.m_force_implementations.value.empty()) {
         _config.m_optimize_data = true;
     }
-
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) {
-        _config.m_dump_graphs = debug_config->dump_graphs;
-    }
 }
 
 void program::build_program(bool is_internal) {
diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
index a9f4fd17f2be2f..297b9d42266efb 100644
--- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
@@ -11,6 +11,7 @@
 #include "openvino/op/loop.hpp"
 #include "openvino/op/search_sorted.hpp"
 #include "openvino/op/stft.hpp"
+#include "openvino/runtime/properties.hpp"
 #include "ov_ops/dynamic_quantize.hpp"
 
 #include "intel_gpu/plugin/common_utils.hpp"
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 587bf23a23d15c..9885b075109e1a 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -21,6 +21,7 @@ ExecutionConfig::ExecutionConfig() : ov::PluginConfig() {
 
 ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() {
     m_user_properties = other.m_user_properties;
+    m_is_finalized = other.m_is_finalized;
     for (const auto& kv : other.m_options_map) {
         m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }
@@ -28,6 +29,7 @@ ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig
 
 ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) {
     m_user_properties = other.m_user_properties;
+    m_is_finalized = other.m_is_finalized;
     for (const auto& kv : other.m_options_map) {
         m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }
@@ -51,6 +53,11 @@ void ExecutionConfig::apply_rt_info(std::shared_ptr<IRemoteContext> context, con
 }
 
 void ExecutionConfig::finalize_impl(std::shared_ptr<IRemoteContext> context) {
+    if (m_help) {
+        print_help();
+        exit(-1);
+    }
+
     const auto& info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
     apply_hints(info);
     if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) {

From b0b5ab271c4ae88d76f98239a97f3d5551fb48e8 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Mon, 23 Dec 2024 16:35:56 +0400
Subject: [PATCH 11/44] Hide config class members

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        | 16 ++++++
 src/inference/src/dev/plugin_config.cpp       | 14 ++++-
 .../intel_gpu/runtime/execution_config.hpp    |  9 +++-
 src/plugins/intel_gpu/src/graph/broadcast.cpp |  2 +-
 src/plugins/intel_gpu/src/graph/crop.cpp      |  2 +-
 src/plugins/intel_gpu/src/graph/eltwise.cpp   |  2 +-
 .../intel_gpu/src/graph/fully_connected.cpp   |  2 +-
 src/plugins/intel_gpu/src/graph/gather.cpp    |  2 +-
 .../graph_optimizer/add_required_reorders.cpp |  2 +-
 .../graph_optimizer/build_implementations.cpp |  2 +-
 .../graph_optimizer/graph_initializations.cpp |  4 +-
 .../graph_optimizer/propagate_constants.cpp   |  6 ++-
 .../select_preferred_formats.cpp              |  2 +-
 .../src/graph/impls/ocl/fully_connected.cpp   |  2 +-
 .../impls/ocl/kernel_selector_helper.cpp      |  4 +-
 .../src/graph/impls/ocl/kernels_cache.cpp     | 10 ++--
 .../impls/onednn/primitive_onednn_base.h      |  8 +--
 .../impls/registry/implementation_manager.cpp |  2 +-
 .../registry/non_max_suppression_impls.cpp    |  2 +-
 .../intel_gpu/src/graph/layout_optimizer.cpp  |  2 +-
 src/plugins/intel_gpu/src/graph/network.cpp   |  6 +--
 .../src/graph/non_max_suppression.cpp         |  2 +-
 src/plugins/intel_gpu/src/graph/permute.cpp   |  2 +-
 .../intel_gpu/src/graph/primitive_inst.cpp    |  8 +--
 src/plugins/intel_gpu/src/graph/program.cpp   | 54 ++++++++-----------
 .../src/graph/program_dump_graph.cpp          |  2 +-
 src/plugins/intel_gpu/src/graph/reorder.cpp   |  2 +-
 src/plugins/intel_gpu/src/graph/reshape.cpp   |  2 +-
 .../src/graph/scatter_elements_update.cpp     |  2 +-
 .../intel_gpu/src/graph/scatter_nd_update.cpp |  2 +-
 .../intel_gpu/src/graph/scatter_update.cpp    |  2 +-
 src/plugins/intel_gpu/src/graph/select.cpp    |  2 +-
 .../intel_gpu/src/graph/strided_slice.cpp     |  2 +-
 .../intel_gpu/src/plugin/compiled_model.cpp   | 28 +++++-----
 src/plugins/intel_gpu/src/plugin/graph.cpp    | 16 ++----
 .../intel_gpu/src/plugin/ops/condition.cpp    | 11 ++--
 src/plugins/intel_gpu/src/plugin/ops/loop.cpp |  5 +-
 src/plugins/intel_gpu/src/plugin/plugin.cpp   | 12 ++---
 .../intel_gpu/src/plugin/program_builder.cpp  | 43 +++++++++------
 .../src/plugin/sync_infer_request.cpp         |  8 +--
 .../src/plugin/transformations_pipeline.cpp   | 12 ++---
 .../src/runtime/execution_config.cpp          | 13 ++++-
 .../intel_gpu/src/runtime/ocl/ocl_engine.cpp  |  2 +-
 .../intel_gpu/src/runtime/ocl/ocl_stream.cpp  |  8 +--
 src/plugins/intel_gpu/src/runtime/stream.cpp  |  4 +-
 45 files changed, 191 insertions(+), 154 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 04e384cc26d35e..9e566b216590cb 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -6,6 +6,7 @@
 
 #include <memory>
 #include <map>
+#include "openvino/core/attribute_visitor.hpp"
 #include "openvino/runtime/iremote_context.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "openvino/core/except.hpp"
@@ -47,6 +48,19 @@
 #define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \
     ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type, Visibility> m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)};
 
+#define OV_CONFIG_DECLARE_GETTERS(PropertyNamespace, PropertyVar, Visibility, ...) \
+    const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() const { \
+        if (m_is_finalized) { \
+            return m_ ## PropertyVar.value; \
+        } else { \
+            if (m_user_properties.find(PropertyNamespace::PropertyVar.name()) != m_user_properties.end()) { \
+                return m_user_properties.at(PropertyNamespace::PropertyVar.name()).as<decltype(PropertyNamespace::PropertyVar)::value_type>(); \
+            } else { \
+                return m_ ## PropertyVar.value; \
+            } \
+        } \
+    }
+
 #define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \
         m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar;
 
@@ -192,6 +206,8 @@ class OPENVINO_RUNTIME_API PluginConfig {
 
     void finalize(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info);
 
+    bool visit_attributes(ov::AttributeVisitor& visitor) const;
+
 protected:
     virtual void apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {}
     virtual void apply_debug_options(std::shared_ptr<IRemoteContext> context);
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index ca1c87cce1b659..b21547f40a57df 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -67,7 +67,7 @@ ov::Any PluginConfig::get_property(const std::string& name, const std::vector<Op
 }
 
 void PluginConfig::set_property(const AnyMap& config) {
-    const static std::vector<OptionVisibility> allowed_visibility = {OptionVisibility::RELEASE};
+    const static std::vector<OptionVisibility> allowed_visibility = {OptionVisibility::RELEASE,OptionVisibility::RELEASE_INTERNAL, OptionVisibility::DEBUG};
     const bool throw_on_error = true;
     set_property(config, allowed_visibility, throw_on_error);
 }
@@ -121,6 +121,18 @@ void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::R
     m_is_finalized = true;
 }
 
+bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) const {
+    // for (const auto& prop : m_user_properties) {
+    //     visitor.on_attribute(prop.first + "__user", prop.second.as<std::string>());
+    // }
+    // for (const auto& prop : m_options_map) {
+    //     visitor.on_attribute(prop.first + "__internal", prop.second->get_any().as<std::string>());
+    // }
+    // visitor.on_attribute("is_finalized", m_is_finalized);
+
+    return true;
+}
+
 void PluginConfig::apply_debug_options(std::shared_ptr<IRemoteContext> context) {
     static std::vector<OptionVisibility> allowed_visibility = {
         OptionVisibility::RELEASE,
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
index b96a558171c203..f8f639d8f67a25 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
@@ -21,7 +21,10 @@ struct ExecutionConfig : public ov::PluginConfig {
     ExecutionConfig(const ExecutionConfig& other);
     ExecutionConfig& operator=(const ExecutionConfig& other);
 
-    #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__)
+    void finalize(cldnn::engine& engine);
+    using ov::PluginConfig::finalize;
+
+    #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__)
     #include "intel_gpu/runtime/options.inl"
     #undef OV_CONFIG_OPTION
 
@@ -35,6 +38,10 @@ struct ExecutionConfig : public ov::PluginConfig {
     void apply_execution_hints(const cldnn::device_info& info);
     void apply_performance_hints(const cldnn::device_info& info);
     void apply_priority_hints(const cldnn::device_info& info);
+
+    #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__)
+    #include "intel_gpu/runtime/options.inl"
+    #undef OV_CONFIG_OPTION
 };
 
 }  // namespace ov::intel_gpu
diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp
index f2037c94979e0c..95d060ef0b760e 100644
--- a/src/plugins/intel_gpu/src/graph/broadcast.cpp
+++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp
@@ -149,7 +149,7 @@ void broadcast_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().m_enable_memory_pool) {
+        _node->get_program().get_config().get_enable_memory_pool()) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs[0] = input_memory_ptr();
diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp
index 85be51a788c842..fec36fb9ff5c81 100644
--- a/src/plugins/intel_gpu/src/graph/crop.cpp
+++ b/src/plugins/intel_gpu/src/graph/crop.cpp
@@ -278,7 +278,7 @@ void crop_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().m_enable_memory_pool) {
+        _node->get_program().get_config().get_enable_memory_pool()) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout());
diff --git a/src/plugins/intel_gpu/src/graph/eltwise.cpp b/src/plugins/intel_gpu/src/graph/eltwise.cpp
index 491867dc206d31..581f3f08dc120d 100644
--- a/src/plugins/intel_gpu/src/graph/eltwise.cpp
+++ b/src/plugins/intel_gpu/src/graph/eltwise.cpp
@@ -393,7 +393,7 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) :
                                       "");
         }
     } else {
-        bool use_new_shape_infer = network.get_config().m_allow_new_shape_infer;
+        bool use_new_shape_infer = network.get_config().get_allow_new_shape_infer();
         auto input0_pshape = node.get_input_pshape(0);
 
         for (size_t i = 1; i < inputs_count; ++i) {
diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp
index d635e87c7494d0..cc3956393c4103 100644
--- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp
+++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp
@@ -250,7 +250,7 @@ kernel_impl_params fully_connected_inst::get_fake_aligned_params(kernel_impl_par
         }
     }
 
-    GPU_DEBUG_IF(orig_impl_param.get_program().get_config().m_disable_fake_alignment) {
+    GPU_DEBUG_IF(orig_impl_param.get_program().get_config().get_disable_fake_alignment()) {
         can_apply_fake_alignment = false;
     }
 
diff --git a/src/plugins/intel_gpu/src/graph/gather.cpp b/src/plugins/intel_gpu/src/graph/gather.cpp
index e32a45ef00aab5..7d503faf8a2255 100644
--- a/src/plugins/intel_gpu/src/graph/gather.cpp
+++ b/src/plugins/intel_gpu/src/graph/gather.cpp
@@ -150,7 +150,7 @@ void gather_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().m_enable_memory_pool) {
+        _node->get_program().get_config().get_enable_memory_pool()) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs[0] = input_memory_ptr();
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
index 0ef04576494665..cffb4457000380 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
@@ -161,7 +161,7 @@ bool add_required_reorders::test_format(cldnn::program_node& node, format reques
 }
 
 void add_required_reorders::run(program& p) {
-    bool optimize_data = p.get_config().m_optimize_data;
+    bool optimize_data = p.get_config().get_optimize_data();
     auto usr_itr = p.get_processing_order().begin();
     while (usr_itr != p.get_processing_order().end()) {
         auto& usr = *usr_itr++;
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp
index 999e103c3fe200..ef4300c33bfea1 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp
@@ -11,7 +11,7 @@ using namespace cldnn;
 
 void build_implementations::run(program& p) {
     OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "pass::build_implementations");
-    if (p.get_config().m_partial_build_program) {
+    if (p.get_config().get_partial_build_program()) {
         return;
     }
 
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp
index daa3b5ddc41501..6c38bce8dd9e31 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp
@@ -17,7 +17,7 @@ using namespace cldnn;
 namespace cldnn {
 
 void graph_initializations::set_outputs(program& p) {
-    auto custom_outputs = p.get_config().m_custom_outputs.value;
+    auto custom_outputs = p.get_config().get_custom_outputs();
     if (!custom_outputs.empty()) {
         for (auto const& output : custom_outputs) {
             OPENVINO_ASSERT(p.has_node(output), "not found custom output node in current cldnn::program: ", output);
@@ -37,7 +37,7 @@ void graph_initializations::set_outputs(program& p) {
 void graph_initializations::run(program& p) {
     set_outputs(p);
 
-    auto forcing_map = p.get_config().m_force_implementations.value;
+    auto forcing_map = p.get_config().get_force_implementations();
     for (auto& kv : forcing_map) {
         if (p.has_node(kv.first)) {
             p.get_node(kv.first).set_forced_impl_type(kv.second.impl_type);
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
index cc04b3efe0913c..c632019cf91b01 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "intel_gpu/runtime/internal_properties.hpp"
 #include "pass_manager.h"
 #include "program_node.h"
 #include "intel_gpu/runtime/engine.hpp"
@@ -143,8 +144,9 @@ propagate_constants::calculate(engine& engine,
         return {};
 
     ExecutionConfig cf_config = config;
-    cf_config.m_optimize_data = false;
-    cf_config.m_custom_outputs = const_outputs;
+    cf_config.set_property(ov::intel_gpu::optimize_data(false));
+    cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs));
+    cf_config.finalize(engine);
     network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true);
     std::map<primitive_id, std::pair<std::shared_ptr<weightless_cache_manager>, std::shared_ptr<layout>>>
         weightless_cache_map;
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
index 9313544928b6b4..a5d98be69771a9 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
@@ -74,7 +74,7 @@ void select_preferred_formats::run(program& p) {
     }
 #endif  // ENABLE_ONEDNN_FOR_GPU
 
-    auto forcing_map = p.get_config().m_force_implementations.value;
+    auto forcing_map = p.get_config().get_force_implementations();
 
     for (auto n : p.get_processing_order()) {
         n->recalc_output_layout();
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
index a22d23ca4dfed9..b33a391dadea4c 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
@@ -203,7 +203,7 @@ struct fully_connected_impl : typed_primitive_impl_ocl<fully_connected> {
             params.quantization = kernel_selector::QuantizationType::NONE;
         }
 
-        params.dynamic_quantization_group_size = impl_param.get_program().get_config().m_dynamic_quantization_group_size;
+        params.dynamic_quantization_group_size = impl_param.get_program().get_config().get_dynamic_quantization_group_size();
 
         return params;
     }
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp
index 4fea0bbe644d63..9b73d9711a5a32 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp
@@ -1204,13 +1204,13 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p
     params.engineInfo.ip_version = device_info.ip_version;
     params.engineInfo.arch = kernel_selector::gpu_arch(static_cast<std::underlying_type<gpu_arch>::type>(device_info.arch));
 
-    auto impl_forcing = config.m_force_implementations.value;
+    auto impl_forcing = config.get_force_implementations();
 
     if (impl_forcing.count(param_info.desc->id) != 0) {
         params.forceImplementation = impl_forcing.at(param_info.desc->id).kernel_name;
     }
 
-    params.allowStaticInputReordering = config.m_optimize_data || config.m_allow_static_input_reorder;
+    params.allowStaticInputReordering = config.get_optimize_data() || config.get_allow_static_input_reorder();
     params.allowInputReordering = false;
 }
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
index 50f1b1ed9ab879..0d90dc31e691f9 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
@@ -111,7 +111,7 @@ namespace cldnn {
 std::mutex kernels_cache::_mutex;
 
 std::string kernels_cache::get_cache_path() const {
-    auto path = _config.m_cache_dir.value;
+    auto path = _config.get_cache_dir();
     if (path.empty()) {
         return {};
     }
@@ -123,12 +123,12 @@ std::string kernels_cache::get_cache_path() const {
 }
 
 bool kernels_cache::is_cache_enabled() const {
-    if (!_config.m_allow_new_shape_infer &&
-        (_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SPEED)) {
+    if (!_config.get_allow_new_shape_infer() &&
+        (_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SPEED)) {
         return false;
     }
 
-    return !_config.m_cache_dir.value.empty();
+    return !_config.get_cache_dir().empty();
 }
 
 size_t kernels_cache::get_max_kernels_per_batch() const {
@@ -136,7 +136,7 @@ size_t kernels_cache::get_max_kernels_per_batch() const {
     GPU_DEBUG_IF(debug_config->max_kernels_per_batch >= 1) {
         return static_cast<size_t>(debug_config->max_kernels_per_batch);
     }
-    return _config.m_max_kernels_per_batch;
+    return _config.get_max_kernels_per_batch();
 }
 
 void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::vector<kernels_cache::batch_program>* all_batches) const {
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
index 34910d5d73531d..4aca436bdd34d8 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
@@ -47,7 +47,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
         _engine(&engine),
         _attrs(attrs),
         _pd(pd) {
-            _enable_profiling = config.m_enable_profiling;
+            _enable_profiling = config.get_enable_profiling();
 
             _scratchpad_md = _pd.scratchpad_desc();
 
@@ -70,7 +70,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
         _engine(&engine),
         _pd(),
         _prim() {
-            _enable_profiling = config.m_enable_profiling;
+            _enable_profiling = config.get_enable_profiling();
             GPU_DEBUG_GET_INSTANCE(debug_config);
             GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) {
                 _enable_profiling = true;
@@ -318,7 +318,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
 
 private:
     std::string get_cache_directory(const ExecutionConfig& config) const {
-        auto path = config.m_cache_dir.value;
+        auto path = config.get_cache_dir();
         if (path.empty()) {
             return {};
         }
@@ -343,7 +343,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
     void build_primitive(const ExecutionConfig& config) {
         auto cache_outpath = get_cache_directory(config);
 
-        if (!config.m_allow_new_shape_infer) {
+        if (!config.get_allow_new_shape_infer()) {
             cache_outpath = "";
         }
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp
index b135d9af73f31f..0ce180380f14b5 100644
--- a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp
@@ -58,7 +58,7 @@ std::unique_ptr<primitive_impl> ImplementationManager::create(const program_node
     if (auto impl = create_impl(node, params)) {
         update_impl(*impl, params);
         impl->set_node_params(node);
-        impl->can_share_kernels = node.get_program().get_config().m_enable_kernels_reuse;
+        impl->can_share_kernels = node.get_program().get_config().get_enable_kernels_reuse();
         return impl;
     }
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp
index 142b4b45fbf001..ce461632631d15 100644
--- a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp
@@ -59,7 +59,7 @@ const std::vector<std::shared_ptr<cldnn::ImplementationManager>>& Registry<non_m
                             const size_t kBatchNum = static_cast<size_t>(scores_layout.get_partial_shape()[0].get_length());
                             const size_t kClassNum = static_cast<size_t>(scores_layout.get_partial_shape()[1].get_length());
                             const size_t kNStreams =
-                                    static_cast<size_t>(node.get_program().get_config().m_num_streams.value);
+                                    static_cast<size_t>(node.get_program().get_config().get_num_streams());
                             const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast<size_t>(8)) * kNStreams;
                             return kKeyValue > 64;
                         }
diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
index 84458b58279028..b64a4744360071 100644
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@@ -435,7 +435,7 @@ bool should_use_winograd_2x3_s1(const convolution_node& node,
                                 layout const& input_layout,
                                 layout const& weights_layout,
                                 bool output_size_handling_enabled) {
-    bool disable_winograd_conv = node.get_program().get_config().m_disable_winograd_convolution;
+    bool disable_winograd_conv = node.get_program().get_config().get_disable_winograd_convolution();
     if (disable_winograd_conv)
         return false;
 
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index 653ff2f57ee385..1c61b2c3fba82c 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -180,9 +180,9 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo
     , _memory_pool(new memory_pool(program->get_engine()))
     , _internal(is_internal)
     , _is_primary_stream(is_primary_stream)
-    , _enable_profiling(program->get_config().m_enable_profiling)
+    , _enable_profiling(program->get_config().get_enable_profiling())
     , _reset_arguments(true)
-    , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().m_buffers_preallocation_ratio)) {
+    , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_buffers_preallocation_ratio())) {
     if (!_internal) {
         net_id = get_unique_net_id();
     }
@@ -398,7 +398,7 @@ void network::calculate_weights_cache_capacity() {
     }
 
     // Sum all weights constants for each stream
-    required_mem_size += weights_const_size * _config.m_num_streams.value;
+    required_mem_size += weights_const_size * _config.get_num_streams();
     // Add all other constants (shared between streams)
     required_mem_size += total_const_size - weights_const_size;
 
diff --git a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp
index 725f3333a0f132..df29ed36a7fd12 100644
--- a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp
+++ b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp
@@ -157,7 +157,7 @@ void non_max_suppression_gather_inst::update_output_memory() {
         // Can_be_optimized nodes are allocating from memory_pool too. In this case,
         // we need release the legacy output memory from memory pool explicitly.
         if (static_cast<bool>(_outputs[i]) &&
-            _node->get_program().get_config().m_enable_memory_pool) {
+            _node->get_program().get_config().get_enable_memory_pool()) {
             _network.get_memory_pool().release_memory(_outputs[i].get(), _node->get_unique_id(), _node->id(), _network.get_id());
         }
         _outputs[i] = {_network.get_engine().reinterpret_buffer(input_memory(i), _impl_params->get_output_layout(i))};
diff --git a/src/plugins/intel_gpu/src/graph/permute.cpp b/src/plugins/intel_gpu/src/graph/permute.cpp
index 41252797f09eca..c4fddfde978c2d 100644
--- a/src/plugins/intel_gpu/src/graph/permute.cpp
+++ b/src/plugins/intel_gpu/src/graph/permute.cpp
@@ -146,7 +146,7 @@ void permute_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().m_enable_memory_pool) {
+        _node->get_program().get_config().get_enable_memory_pool()) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())};
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index 2f2a755c675cf1..53932dfd91a466 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -163,7 +163,7 @@ static memory::ptr get_memory_from_pool(engine& _engine,
     OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(),
                     "[GPU] Can't allocate output for dynamic layout without upper bound");
     // Use layout with max tensor for dynamic shape with upper bound
-    if (_node.get_program().get_config().m_enable_memory_pool) {
+    if (_node.get_program().get_config().get_enable_memory_pool()) {
         if (curr_memory != nullptr)
             pool.release_memory(curr_memory, _node.get_unique_id(), _node.id(), net_id);
         return pool.get_memory(layout,
@@ -2042,7 +2042,7 @@ primitive_inst::primitive_inst(network & network, program_node const& node, bool
     : _network(network)
     , _node(&node)
     , _node_output_layout(node.get_output_layout())
-    , _use_shared_kernels(node.get_program().get_config().m_enable_kernels_reuse)
+    , _use_shared_kernels(node.get_program().get_config().get_enable_kernels_reuse())
     , _impl_params(node.get_kernel_impl_params())
     , _impl(node.get_selected_impl() ? node.get_selected_impl()->clone() : nullptr)
     , _runtime_memory_dependencies(node.get_memory_dependencies())
@@ -2596,8 +2596,8 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() {
         ExecutionConfig subgraph_config{
             ov::intel_gpu::allow_static_input_reorder(true),
             ov::intel_gpu::allow_new_shape_infer(true),
-            ov::enable_profiling(get_network().get_config().m_enable_profiling),
-            ov::intel_gpu::use_onednn(get_network().get_config().m_use_onednn)
+            ov::enable_profiling(get_network().get_config().get_enable_profiling()),
+            ov::intel_gpu::use_onednn(get_network().get_config().get_use_onednn())
         };
         auto prog = program::build_program(get_network().get_engine(),
                                            t,
diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
index 271a866e3b5cf2..98c61b021477d5 100644
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -108,8 +108,8 @@ using namespace cldnn;
 using namespace ov::intel_gpu;
 
 static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) {
-    int streams = (num_streams > 0) ? num_streams : config.m_compilation_num_threads.value;
-    auto priority = config.m_host_task_priority;
+    int streams = (num_streams > 0) ? num_streams : config.get_compilation_num_threads();
+    auto priority = config.get_host_task_priority();
     auto core_type = ov::hint::SchedulingCoreType::ANY_CORE;
     switch (priority) {
         case ov::hint::Priority::LOW: core_type = ov::hint::SchedulingCoreType::ECORE_ONLY; break;
@@ -117,7 +117,7 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E
         case ov::hint::Priority::HIGH: core_type = ov::hint::SchedulingCoreType::PCORE_ONLY; break;
         default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority);
     }
-    bool enable_cpu_pinning = config.m_enable_cpu_pinning;
+    bool enable_cpu_pinning = config.get_enable_cpu_pinning();
 
     ov::threading::IStreamsExecutor::Config task_executor_config(tags,
                                                                  streams,
@@ -163,8 +163,7 @@ program::program(engine& engine_ref,
     program_node::reset_unique_id();
     if (no_optimizations) {
         init_graph();
-        auto ctx = std::make_shared<RemoteContextImpl>("GPU", std::vector<device::ptr>{_engine.get_device()});
-        _config.finalize(ctx, {});
+        _config.finalize(_engine);
     } else {
         build_program(is_internal);
         if (_is_body_program) {
@@ -200,8 +199,7 @@ program::program(engine& engine_ref,
       _task_executor(std::move(task_executor)),
       processing_order(),
       is_internal(is_internal) {
-    auto ctx = std::make_shared<RemoteContextImpl>("GPU", std::vector<device::ptr>{_engine.get_device()});
-    _config.finalize(ctx, {});
+    _config.finalize(_engine);
     init_primitives();
     init_program();
     prepare_nodes(nodes);
@@ -214,9 +212,8 @@ program::program(engine& engine, const ExecutionConfig& config)
       _config(config),
       processing_order() {
     init_primitives();
-    auto ctx = std::make_shared<RemoteContextImpl>("GPU", std::vector<device::ptr>{_engine.get_device()});
-    _config.finalize(ctx, {});
-    new_shape_infer = _config.m_allow_new_shape_infer;
+    _config.finalize(_engine);
+    new_shape_infer = _config.get_allow_new_shape_infer();
     _layout_optimizer = std::make_unique<layout_optimizer>();
 }
 
@@ -228,7 +225,7 @@ void program::init_program() {
     set_options();
 
     pm = std::unique_ptr<pass_manager>(new pass_manager(*this));
-    new_shape_infer = _config.m_allow_new_shape_infer;
+    new_shape_infer = _config.get_allow_new_shape_infer();
 
     if (_task_executor == nullptr)
         _task_executor = program::make_task_executor(_config);
@@ -236,7 +233,7 @@ void program::init_program() {
                                                                       kernel_selector::KernelBase::get_db().get_batch_headers(),
                                                                       kernel_selector::KernelBase::get_db().get_cm_batch_headers()));
 
-    _kernels_cache->set_kernels_reuse(get_config().m_enable_kernels_reuse);
+    _kernels_cache->set_kernels_reuse(_config.get_enable_kernels_reuse());
 
     if (!_compilation_context)
         _compilation_context = program::make_compilation_context(_config);
@@ -490,15 +487,11 @@ void program::set_options() {
     static std::atomic<uint32_t> id_gen{0};
     prog_id = ++id_gen;
     assert(prog_id != 0);
-    if (!_config.m_force_implementations.value.empty()) {
-        _config.m_optimize_data = true;
-    }
 }
 
 void program::build_program(bool is_internal) {
     init_graph();
-    auto ctx = std::make_shared<RemoteContextImpl>("GPU", std::vector<device::ptr>{_engine.get_device()});
-    _config.finalize(ctx, {});
+    _config.finalize(_engine);
     { pre_optimize_graph(is_internal); }
     run_graph_compilation();
     { post_optimize_graph(is_internal); }
@@ -528,9 +521,6 @@ void program::init_graph() {
     for (auto& node : processing_order) {
         if (!node->is_type<data>())
             node->get_output_layouts();
-        if (node->is_type<lstm_seq>()) {
-            _config.m_use_onednn = true;
-        }
     }
     // Perform initial shape_of subgraphs markup
     apply_opt_pass<mark_shape_of_subgraphs>();
@@ -548,7 +538,7 @@ void program::pre_optimize_graph(bool is_internal) {
 
     bool output_size_handling_enabled = analyze_output_size_handling_need();
 
-    bool optimize_data = _config.m_optimize_data;
+    bool optimize_data = _config.get_optimize_data();
     if (optimize_data) {
         apply_opt_pass<prepare_quantization>();
     }
@@ -625,7 +615,7 @@ void program::post_optimize_graph(bool is_internal) {
 
     reorder_factory rf;
 
-    bool optimize_data = _config.m_optimize_data;
+    bool optimize_data = _config.get_optimize_data();
 
     if (!is_internal) {
         apply_opt_pass<post_optimize_weights>(rf);
@@ -633,7 +623,7 @@ void program::post_optimize_graph(bool is_internal) {
 
     apply_opt_pass<remove_redundant_reorders>(false, true);  // TODO: do we need it at this place also?
 
-    auto partial_build = _config.m_partial_build_program;
+    auto partial_build = _config.get_partial_build_program();
 #ifdef GPU_DEBUG_CONFIG
     GPU_DEBUG_GET_INSTANCE(debug_config);
     if (!is_internal && (!partial_build || !debug_config->dry_run_path.empty())) {
@@ -652,7 +642,7 @@ void program::post_optimize_graph(bool is_internal) {
 
     // Recalculate processing order after all graph transformation to keep optimal primitives ordering
     // for OOO queue
-    if (_config.m_queue_type == QueueTypes::out_of_order)
+    if (_config.get_queue_type() == QueueTypes::out_of_order)
         get_processing_order().calculate_BFS_processing_order();
 
     apply_opt_pass<mark_state_init_subgraphs>();
@@ -776,7 +766,7 @@ const std::vector<primitive_id>& program::get_allocating_order(bool forced_updat
 }
 
 void program::prepare_memory_dependencies() {
-    if (!_config.m_enable_memory_pool)
+    if (!_config.get_enable_memory_pool())
         return;
     for (auto& node : get_processing_order()) {
         node->add_memory_dependency(node->get_unique_id());
@@ -1408,7 +1398,7 @@ program::primitives_info program::get_current_stage_info() const {
 
 void program::save_pass_info(std::string pass_name) {
     // TODO: Directory path here can be probably changed to some bool flag
-    if (!_config.m_dump_graphs.value.empty())
+    if (!_config.get_dump_graphs().empty())
         optimizer_passes_info.emplace_back(pass_name, get_current_stage_info());
 }
 
@@ -1436,7 +1426,7 @@ const program::primitives_info& program::get_primitives_info() const { return pr
 void program::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); }
 
 void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
-    lo.set_implementation_forcing(_config.m_force_implementations);
+    lo.set_implementation_forcing(_config.get_force_implementations());
 
 
     // first pass to set layout optimization_attributes for topology
@@ -1662,15 +1652,15 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
         lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1);
 
 #ifdef ENABLE_ONEDNN_FOR_GPU
-    bool enable_onednn_for_tests = get_config().m_optimize_data || is_internal_program();
+    bool enable_onednn_for_tests = get_config().get_optimize_data() || is_internal_program();
     auto& engine = get_engine();
     if (engine.get_device_info().vendor_id == INTEL_VENDOR_ID &&
-        get_config().m_queue_type == QueueTypes::in_order &&
+        get_config().get_queue_type() == QueueTypes::in_order &&
         enable_onednn_for_tests) {
             if (engine.get_device_info().supports_immad) {
                 lo.add_all_onednn_impls_optimization_attribute();
             } else {
-                if (get_config().m_use_onednn) {
+                if (get_config().get_use_onednn()) {
                     lo.enable_onednn_for<lstm_seq>();
                 }
             }
@@ -1884,8 +1874,8 @@ void program::load(cldnn::BinaryInputBuffer& ib) {
     init_program();
 
     std::shared_ptr<ov::MappedMemory> mapped_memory = nullptr;
-    std::string weights_path = _config.m_weights_path;
-    if (_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE &&
+    std::string weights_path = _config.get_weights_path();
+    if (_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE &&
         ov::util::validate_weights_path(weights_path)) {
         mapped_memory = ov::load_mmap_object(weights_path);
     }
diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
index a14cbfd0139d29..aeae56173f4fd0 100644
--- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
@@ -157,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) {
 }  // namespace
 
 std::string get_dir_path(const ExecutionConfig& config) {
-    auto path = config.m_dump_graphs.value;
+    auto path = config.get_dump_graphs();
     if (path.empty()) {
         return {};
     }
diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp
index c3b7c472d59437..75a4011eeefc25 100644
--- a/src/plugins/intel_gpu/src/graph/reorder.cpp
+++ b/src/plugins/intel_gpu/src/graph/reorder.cpp
@@ -287,7 +287,7 @@ void reorder_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().m_enable_memory_pool) {
+        _node->get_program().get_config().get_enable_memory_pool()) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
 
diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp
index 1250a73e881e9a..b6e5b23a0f6476 100644
--- a/src/plugins/intel_gpu/src/graph/reshape.cpp
+++ b/src/plugins/intel_gpu/src/graph/reshape.cpp
@@ -320,7 +320,7 @@ void reshape_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().m_enable_memory_pool) {
+        _node->get_program().get_config().get_enable_memory_pool()) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())};
diff --git a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp
index 7ddea056745a53..a651baa50002fa 100644
--- a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp
+++ b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp
@@ -75,7 +75,7 @@ void scatter_elements_update_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().m_enable_memory_pool) {
+        _node->get_program().get_config().get_enable_memory_pool()) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())};
diff --git a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp
index 56c8bf6607c4a3..3dbec05dbbe3b3 100644
--- a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp
+++ b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp
@@ -86,7 +86,7 @@ void scatter_nd_update_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().m_enable_memory_pool) {
+        _node->get_program().get_config().get_enable_memory_pool()) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())};
diff --git a/src/plugins/intel_gpu/src/graph/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_update.cpp
index 0d34b8f95e9f1a..947507533796e0 100644
--- a/src/plugins/intel_gpu/src/graph/scatter_update.cpp
+++ b/src/plugins/intel_gpu/src/graph/scatter_update.cpp
@@ -66,7 +66,7 @@ void scatter_update_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().m_enable_memory_pool) {
+        _node->get_program().get_config().get_enable_memory_pool()) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())};
diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp
index 35d1db8fc389b1..a3b6ad9166c964 100644
--- a/src/plugins/intel_gpu/src/graph/select.cpp
+++ b/src/plugins/intel_gpu/src/graph/select.cpp
@@ -95,7 +95,7 @@ select_inst::typed_primitive_inst(network& network, select_node const& node) : p
                                 3,
                                 "");
 
-    bool allow_new_shape_infer = network.get_program()->get_config().m_allow_new_shape_infer;
+    bool allow_new_shape_infer = network.get_program()->get_config().get_allow_new_shape_infer();
     // Broadcast check is performed in ngraph shape infer of select when allow_new_shape_infer=true
     if (!allow_new_shape_infer) {
         if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) {
diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp
index baff08bc8de82e..674e7649bc9820 100644
--- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp
+++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp
@@ -208,7 +208,7 @@ void strided_slice_inst::update_output_memory() {
     // Can_be_optimized nodes are allocating from memory_pool too. In this case,
     // we need release the legacy output memory from memory pool explicitly.
     if (static_cast<bool>(_outputs[0]) &&
-        _node->get_program().get_config().m_enable_memory_pool) {
+        _node->get_program().get_config().get_enable_memory_pool()) {
         _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id());
     }
     _outputs[0] = input_memory_ptr();
diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
index 8bd73dbdce7029..3fc1439d56a727 100644
--- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
+++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
@@ -20,17 +20,17 @@ namespace ov::intel_gpu {
 namespace {
 std::shared_ptr<ov::threading::ITaskExecutor> create_task_executor(const std::shared_ptr<const ov::IPlugin>& plugin,
                                                                    const ExecutionConfig& config) {
-    if (config.m_exclusive_async_requests) {
+    if (config.get_exclusive_async_requests()) {
         // exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with
         // the CPU behavior
         return plugin->get_executor_manager()->get_executor("GPU");
-    } else if (config.get_property(ov::hint::enable_cpu_pinning) ||
-               config.get_property(ov::hint::enable_cpu_reservation)) {
-        bool enable_cpu_pinning = config.get_property(ov::hint::enable_cpu_pinning);
-        bool enable_cpu_reservation = config.get_property(ov::hint::enable_cpu_reservation);
+    } else if (config.get_enable_cpu_pinning() ||
+               config.get_enable_cpu_reservation()) {
+        bool enable_cpu_pinning = config.get_enable_cpu_pinning();
+        bool enable_cpu_reservation = config.get_enable_cpu_reservation();
         return std::make_shared<ov::threading::CPUStreamsExecutor>(
             ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor",
-                                                    config.m_num_streams.value,
+                                                    config.get_num_streams(),
                                                     1,
                                                     ov::hint::SchedulingCoreType::PCORE_ONLY,
                                                     enable_cpu_reservation,
@@ -38,7 +38,7 @@ std::shared_ptr<ov::threading::ITaskExecutor> create_task_executor(const std::sh
     } else {
         return std::make_shared<ov::threading::CPUStreamsExecutor>(
             ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor",
-                                                    config.m_num_streams.value,
+                                                    config.get_num_streams(),
                                                     0,
                                                     ov::hint::SchedulingCoreType::ANY_CORE,
                                                     false,
@@ -62,7 +62,7 @@ CompiledModel::CompiledModel(std::shared_ptr<ov::Model> model,
       m_outputs(ov::ICompiledModel::outputs()),
       m_loaded_from_cache(false) {
     auto graph_base = std::make_shared<Graph>(model, m_context, m_config, 0);
-    for (uint16_t n = 0; n < m_config.m_num_streams.value; n++) {
+    for (uint16_t n = 0; n < m_config.get_num_streams(); n++) {
         auto graph = n == 0 ? graph_base : std::make_shared<Graph>(graph_base, n);
         m_graphs.push_back(graph);
     }
@@ -157,7 +157,7 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib,
     }
 
     auto graph_base = std::make_shared<Graph>(ib, context, m_config, 0);
-    for (uint16_t n = 0; n < m_config.m_num_streams.value; n++) {
+    for (uint16_t n = 0; n < m_config.get_num_streams(); n++) {
         auto graph = n == 0 ? graph_base : std::make_shared<Graph>(graph_base, n);
         m_graphs.push_back(graph);
     }
@@ -179,8 +179,8 @@ std::shared_ptr<ov::IAsyncInferRequest> CompiledModel::create_infer_request() co
 void CompiledModel::export_model(std::ostream& model) const {
     // If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching
     // which requires the weights_path.
-    ov::CacheMode cache_mode = m_config.m_cache_mode;
-    std::string weights_path = m_config.m_weights_path;
+    ov::CacheMode cache_mode = m_config.get_cache_mode();
+    std::string weights_path = m_config.get_weights_path();
     if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE &&
         !ov::util::validate_weights_path(weights_path))
         return;
@@ -188,7 +188,7 @@ void CompiledModel::export_model(std::ostream& model) const {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model");
     OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded");
 
-    const ov::EncryptionCallbacks encryption_callbacks = m_config.m_cache_encryption_callbacks;
+    const ov::EncryptionCallbacks encryption_callbacks = m_config.get_cache_encryption_callbacks();
 
     // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty.
     const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;
@@ -290,8 +290,8 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
     } else if (name == ov::loaded_from_cache) {
         return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache};
     } else if (name == ov::optimal_number_of_infer_requests) {
-        unsigned int nr = m_config.m_num_streams.value;
-        if (m_config.m_performance_mode != ov::hint::PerformanceMode::LATENCY)
+        unsigned int nr = m_config.get_num_streams();
+        if (m_config.get_performance_mode() != ov::hint::PerformanceMode::LATENCY)
             nr *= 2;
         return decltype(ov::optimal_number_of_infer_requests)::value_type {nr};
     } else if (name == ov::execution_devices) {
diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp
index ab69fd5d4e26f8..4f1f541b725090 100644
--- a/src/plugins/intel_gpu/src/plugin/graph.cpp
+++ b/src/plugins/intel_gpu/src/plugin/graph.cpp
@@ -85,11 +85,7 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context
             ib >> perfEntry.parentPrimitive;
         }
     }
-    {
-        ib >> m_config.m_partial_build_program.value;
-        ib >> m_config.m_optimize_data.value;
-        ib >> m_config.m_allow_new_shape_infer.value;
-    }
+    // ib >> m_config;
 
     auto imported_prog = std::make_shared<cldnn::program>(get_engine(), m_config);
     imported_prog->load(ib);
@@ -173,7 +169,7 @@ void Graph::build(std::shared_ptr<cldnn::program> program) {
 
     auto external_queue = m_context->get_external_queue();
     if (external_queue) {
-        OPENVINO_ASSERT(m_config.m_num_streams == 1, "[GPU] Throughput streams can't be used with shared queue!");
+        OPENVINO_ASSERT(m_config.get_num_streams() == 1, "[GPU] Throughput streams can't be used with shared queue!");
         const auto &engine = program->get_engine();
         m_network = std::make_shared<cldnn::network>(program, engine.create_stream(m_config, external_queue), m_stream_id);
     } else {
@@ -205,7 +201,7 @@ bool Graph::use_external_queue() const {
 
 std::shared_ptr<ov::Model> Graph::get_runtime_model(std::vector<cldnn::primitive_info>& primitives_info, bool filter_const_primitives) {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::get_runtime_model");
-    if (m_config.m_enable_profiling) {
+    if (m_config.get_enable_profiling()) {
         try {
             // Update may throw an exception for step-by-step runtime graph dump,
             // since network->get_executed_primitives() method can't be called before network execution
@@ -516,11 +512,7 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) {
             ob << perf_item.second.second.parentPrimitive;
         }
     }
-    {
-        ob << m_config.m_partial_build_program.value;
-        ob << m_config.m_optimize_data.value;
-        ob << m_config.m_allow_new_shape_infer.value;
-    }
+    // ob << m_config;
 
     ob.set_stream(m_network->get_stream_ptr().get());
     m_network->get_program()->save(ob);
diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
index 889a066aa74325..20690957fea5e6 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
@@ -1,6 +1,7 @@
 // Copyright (C) 2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+#include "intel_gpu/runtime/internal_properties.hpp"
 #include "openvino/op/if.hpp"
 #include "intel_gpu/plugin/program_builder.hpp"
 #include "intel_gpu/primitives/condition.hpp"
@@ -21,13 +22,9 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_
                     << ", num inputs: " << op->get_input_size() << std::endl;
 
     auto config = p.get_config();
-    {
-        auto custom_outputs = config.m_custom_outputs.value;
-        if (!custom_outputs.empty()) {
-            config.m_custom_outputs = std::vector<std::string>({});
-        }
-    }
-    config.m_allow_new_shape_infer = op->is_dynamic() || p.use_new_shape_infer();
+    config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>({})));
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer()));
+    config.finalize(p.get_engine());
 
     ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true);
     branch.inner_program = prog.get_compiled_program();
diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
index 5d808f4d041275..9665918f88d0be 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
@@ -298,8 +298,9 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr<ov::op::
     auto output_names_vec = GetOutputNames(layerName, body_execution_condition_id, output_primitive_maps, back_edges);
 
     auto config = p.get_config();
-    config.m_custom_outputs = output_names_vec;
-    config.m_allow_new_shape_infer = is_dynamic;
+    config.set_property(ov::intel_gpu::custom_outputs(output_names_vec));
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
+    config.finalize(p.get_engine());
 
     // get body program from ov::Model
     ProgramBuilder prog(ov_model, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true);
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index aaa84bfc434c19..2370d60dda808d 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -308,7 +308,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
 
     ProgramBuilder prog(ctx->get_engine(), config);
 
-    float query_model_ratio = config.m_query_model_ratio;
+    float query_model_ratio = config.get_query_model_ratio();
 
     auto supported = ov::get_supported_nodes(model,
         [&config,&ctx,this](std::shared_ptr<ov::Model>& model) {
@@ -361,8 +361,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
     config.set_property(_orig_config);
     config.finalize(context_impl, {});
 
-    ov::CacheMode cache_mode = config.m_cache_mode;
-    ov::EncryptionCallbacks encryption_callbacks = config.m_cache_encryption_callbacks;
+    ov::CacheMode cache_mode = config.get_cache_mode();
+    ov::EncryptionCallbacks encryption_callbacks = config.get_cache_encryption_callbacks();
     const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;
 
     std::unique_ptr<cldnn::BinaryInputBuffer> ib_ptr =
@@ -379,8 +379,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
         return nullptr;
     }
 
-    std::string weights_path = config.m_weights_path;
-    if (config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) {
+    std::string weights_path = config.get_weights_path();
+    if (config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) {
         return nullptr;
     }
 
@@ -680,7 +680,7 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const {
     auto context = get_default_contexts().at(device_id);
     const auto& device_info = context->get_engine().get_device_info();
     const auto& config = m_configs_map.at(device_id);
-    uint32_t n_streams = static_cast<uint32_t>(config.m_num_streams.value);
+    uint32_t n_streams = static_cast<uint32_t>(config.get_num_streams());
     uint64_t occupied_device_mem = 0;
     auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as<std::map<std::string, uint64_t>>();
     auto occupied_usm_dev = statistic_result.find("usm_device_current");
diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
index 297b9d42266efb..cd69e741e435de 100644
--- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "intel_gpu/runtime/internal_properties.hpp"
 #include "openvino/core/rt_info/weightless_caching_attributes.hpp"
 #include "openvino/op/constant.hpp"
 #include "openvino/op/split.hpp"
@@ -106,19 +107,10 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr<ov::Model> model, cldnn::engine&
     config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml";
 
     CustomLayer::LoadFromFile(config_path, m_custom_layers, true);
-    auto custom_layers_config = m_config.m_config_file.value;
+    auto custom_layers_config = m_config.get_config_file();
     CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty());
 
     auto ops = model->get_ordered_ops();
-    // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels,
-    // smaller # of kernels are built compared to static models.
-    // So having smaller batch size is even better for dynamic model as we can do more parallel build.
-    if (model->is_dynamic()) {
-        m_config.m_max_kernels_per_batch = 4;
-    } else {
-        m_config.m_max_kernels_per_batch = 8;
-    }
-
     m_program = build(ops, partial_build, is_inner_program);
 }
 
@@ -159,14 +151,33 @@ std::shared_ptr<cldnn::program> ProgramBuilder::build(const std::vector<std::sha
             break;
         }
     }
+    bool is_dynamic = false;
+    for (const auto& op : ops) {
+        if (op->is_dynamic()) {
+            is_dynamic = true;
+            break;
+        }
+    }
 
     if (is_inner_program) {
-        allow_new_shape_infer = (m_config.m_allow_new_shape_infer || allow_new_shape_infer);
+        allow_new_shape_infer = (m_config.get_allow_new_shape_infer() || allow_new_shape_infer);
+    }
+
+    // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels,
+    // smaller # of kernels are built compared to static models.
+    // So having smaller batch size is even better for dynamic model as we can do more parallel build.
+    if (is_dynamic) {
+        m_config.set_property(ov::intel_gpu::max_kernels_per_batch(4));;
+    } else {
+        m_config.set_property(ov::intel_gpu::max_kernels_per_batch(8));;
     }
 
-    m_config.m_partial_build_program = partial_build;
-    m_config.m_optimize_data = true;
-    m_config.m_allow_new_shape_infer = allow_new_shape_infer;
+    m_config.set_property(ov::intel_gpu::partial_build_program(partial_build));
+    m_config.set_property(ov::intel_gpu::optimize_data(true));
+    m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer));
+    //if (has_lstm)
+    m_config.set_property(ov::intel_gpu::use_onednn(true));
+    m_config.finalize(m_engine);
 
     prepare_build();
     {
@@ -310,7 +321,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr<cldnn::pr
     prim->origin_op_name = op.get_friendly_name();
     prim->origin_op_type_name = op.get_type_name();
 
-    if (this->m_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE) {
+    if (this->m_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) {
         if (auto data_prim = dynamic_cast<cldnn::data*>(prim.get())) {
             auto rt_info = op.get_rt_info();
 
@@ -341,7 +352,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr<cldnn::pr
             prim->origin_op_type_name = prim->type_string();
     }
 
-    if (this->m_config.m_enable_profiling && should_profile) {
+    if (this->m_config.get_enable_profiling() && should_profile) {
         profiling_ids.push_back(prim_id);
         init_profile_info(*prim);
     }
diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
index 6d74722a5b435a..7c3a1b0e452fa8 100644
--- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
@@ -113,8 +113,8 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr<const CompiledModel>& c
     : ov::ISyncInferRequest(compiled_model)
     , m_graph(compiled_model->get_graph(0))
     , m_context(std::static_pointer_cast<RemoteContextImpl>(compiled_model->get_context_impl()))
-    , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().m_buffers_preallocation_ratio))
-    , m_enable_profiling(m_graph->get_config().m_enable_profiling)
+    , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_buffers_preallocation_ratio()))
+    , m_enable_profiling(m_graph->get_config().get_enable_profiling())
     , m_use_external_queue(m_graph->use_external_queue()) {
     GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) {
@@ -422,7 +422,7 @@ void SyncInferRequest::wait() {
             auto mem_shape = output_layout.get_shape();
             // In case of old shape infer we need to shrink out tensor shape to avoid redudnant dimensions that occur due to rank extension
             // For new shape infer this shouldn't happen, thus remove that WA once we migrate to ngraph-based shape infer for all cases
-            if (!m_graph->get_config().m_allow_new_shape_infer) {
+            if (!m_graph->get_config().get_allow_new_shape_infer()) {
                 OPENVINO_ASSERT(port.get_partial_shape().is_static(), "[GPU] Unexpected dynamic shape for legacy shape inference");
                 OPENVINO_ASSERT(ov::shape_size(port.get_shape()) == ov::shape_size(mem_shape), "[GPU] Unexpected elements count for output tensor");
                 mem_shape = port.get_shape();
@@ -895,7 +895,7 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string
 
     auto memory = device_tensor->get_memory();
     // WA to extend shape to ranks expected by legacy shape infer. Remove after full migration to new shape infer
-    if (!m_graph->get_config().m_allow_new_shape_infer) {
+    if (!m_graph->get_config().get_allow_new_shape_infer()) {
         auto new_layout = memory->get_layout();
         new_layout.set_partial_shape(m_graph->get_input_layouts().at(input_idx).get_shape());
         memory = engine.reinterpret_buffer(*memory, new_layout);
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index c9e7c155158448..339ee80ba7a3eb 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -289,7 +289,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
     const ov::element::TypeVector supported_woq_types = {ov::element::u8, ov::element::i8, ov::element::u4, ov::element::i4};
     bool enableInt8;
     ov::element::Type infer_precision = ov::element::undefined;
-    bool unroll_loop = config.m_enable_loop_unrolling;
+    bool unroll_loop = config.get_enable_loop_unrolling();
     {
         ov::pass::Manager manager("Plugin:GPU");
         auto pass_config = manager.get_pass_config();
@@ -302,7 +302,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         }
 
         auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func);
-        enableInt8 = config.m_enable_lp_transformations && is_model_quantized;
+        enableInt8 = config.get_enable_lp_transformations() && is_model_quantized;
 
         manager.register_pass<ov::pass::MarkDequantization>(
             std::vector<ov::element::Type>{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 },
@@ -335,7 +335,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         };
 
         // Add conversion from FP data types to infer precision if it's specified
-        infer_precision = config.m_inference_precision;
+        infer_precision = config.get_inference_precision();
         if (infer_precision != ov::element::undefined) {
             if (!fp_precision_supported(infer_precision))
                 infer_precision = fallback_precision;
@@ -416,7 +416,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
                 GPU_DEBUG_CODE(return cldnn::debug_configuration::get_instance()->enable_sdpa == 1);
             }
 
-            if (!config.m_enable_sdpa_optimization)
+            if (!config.get_enable_sdpa_optimization())
                 return false;
 
             auto sdpa = ov::as_type_ptr<const ov::op::v13::ScaledDotProductAttention>(node);
@@ -1032,7 +1032,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         manager.register_pass<ov::pass::GLUFusion>();
         manager.register_pass<ov::intel_gpu::IndirectKVCache>();
 
-        auto kv_cache_compression_dt = config.m_kv_cache_precision;
+        auto kv_cache_compression_dt = config.get_kv_cache_precision();
         manager.register_pass<ov::intel_gpu::KVCacheCompression>(kv_cache_compression_dt, device_info.supports_immad);
 
         manager.register_pass<ov::intel_gpu::ConvertConvolutionToInternal>();
@@ -1052,7 +1052,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         manager.register_pass<ov::pass::Validate>();
 
         if (device_info.supports_immad) {
-            auto dynamic_quantization_group_size = config.m_dynamic_quantization_group_size;
+            auto dynamic_quantization_group_size = config.get_dynamic_quantization_group_size();
             pass_config->set_callback<ov::intel_gpu::DynamicQuantizeFullyConnected>([=](const_node_ptr& root) -> bool {
                 for (size_t i = 0 ; i < root->get_input_node_shared_ptr(0)->get_output_size(); ++i) {
                     if (root->get_input_node_shared_ptr(0)->get_output_element_type(i) == ov::element::Type_t::f32) {
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 9885b075109e1a..529e0543f6de67 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -21,7 +21,7 @@ ExecutionConfig::ExecutionConfig() : ov::PluginConfig() {
 
 ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() {
     m_user_properties = other.m_user_properties;
-    m_is_finalized = other.m_is_finalized;
+    m_is_finalized = false; // copy is not automatically finalized
     for (const auto& kv : other.m_options_map) {
         m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }
@@ -29,13 +29,18 @@ ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig
 
 ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) {
     m_user_properties = other.m_user_properties;
-    m_is_finalized = other.m_is_finalized;
+    m_is_finalized = false; // copy is not automatically finalized
     for (const auto& kv : other.m_options_map) {
         m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }
     return *this;
 }
 
+void ExecutionConfig::finalize(cldnn::engine& engine) {
+    auto ctx = std::make_shared<RemoteContextImpl>("GPU", std::vector<cldnn::device::ptr>{engine.get_device()});
+    PluginConfig::finalize(ctx, {});
+}
+
 void ExecutionConfig::apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {
     const auto& info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
     if (!info.supports_immad) {
@@ -79,6 +84,10 @@ void ExecutionConfig::finalize_impl(std::shared_ptr<IRemoteContext> context) {
     if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) {
         m_dynamic_quantization_group_size = 32;
     }
+
+    if (!get_force_implementations().empty()) {
+        m_optimize_data = true;
+    }
 }
 
 void ExecutionConfig::apply_hints(const cldnn::device_info& info) {
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
index b954f494abfe5a..857283b9558647 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
@@ -64,7 +64,7 @@ void ocl_engine::create_onednn_engine(const ExecutionConfig& config) {
         auto casted = std::dynamic_pointer_cast<ocl_device>(_device);
         OPENVINO_ASSERT(casted, "[GPU] Invalid device type stored in ocl_engine");
 
-        std::string cache_dir = config.m_cache_dir;
+        std::string cache_dir = config.get_cache_dir();
         if (cache_dir.empty()) {
             _onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
         } else {
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
index 61844cd640ea41..bc01a8174292e4 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
@@ -189,22 +189,22 @@ void set_arguments_impl(ocl_kernel_type& kernel,
 }  // namespace
 
 ocl_stream::ocl_stream(const ocl_engine &engine, const ExecutionConfig& config)
-    : stream(config.m_queue_type, stream::get_expected_sync_method(config))
+    : stream(config.get_queue_type(), stream::get_expected_sync_method(config))
     , _engine(engine) {
     auto context = engine.get_cl_context();
     auto device = engine.get_cl_device();
     ocl::command_queues_builder queue_builder;
-    queue_builder.set_profiling(config.m_enable_profiling);
+    queue_builder.set_profiling(config.get_enable_profiling());
     queue_builder.set_out_of_order(m_queue_type == QueueTypes::out_of_order);
 
     OPENVINO_ASSERT(m_sync_method != SyncMethods::none || m_queue_type == QueueTypes::in_order,
                     "[GPU] Unexpected sync method (none) is specified for out_of_order queue");
 
     bool priorty_extensions = engine.extension_supported("cl_khr_priority_hints") && engine.extension_supported("cl_khr_create_command_queue");
-    queue_builder.set_priority_mode(config.m_queue_priority, priorty_extensions);
+    queue_builder.set_priority_mode(config.get_queue_priority(), priorty_extensions);
 
     bool throttle_extensions = engine.extension_supported("cl_khr_throttle_hints") && engine.extension_supported("cl_khr_create_command_queue");
-    queue_builder.set_throttle_mode(config.m_queue_throttle, throttle_extensions);
+    queue_builder.set_throttle_mode(config.get_queue_throttle(), throttle_extensions);
 
     bool queue_families_extension = engine.get_device_info().supports_queue_families;
     queue_builder.set_supports_queue_families(queue_families_extension);
diff --git a/src/plugins/intel_gpu/src/runtime/stream.cpp b/src/plugins/intel_gpu/src/runtime/stream.cpp
index 2043afb9f3869c..913d84d8f476f5 100644
--- a/src/plugins/intel_gpu/src/runtime/stream.cpp
+++ b/src/plugins/intel_gpu/src/runtime/stream.cpp
@@ -20,8 +20,8 @@ QueueTypes stream::detect_queue_type(engine_types engine_type, void* queue_handl
 }
 
 SyncMethods stream::get_expected_sync_method(const ExecutionConfig& config) {
-    auto profiling = config.m_enable_profiling;
-    auto queue_type = config.m_queue_type;
+    auto profiling = config.get_enable_profiling();
+    auto queue_type = config.get_queue_type();
     return profiling ? SyncMethods::events : queue_type == QueueTypes::out_of_order ? SyncMethods::barriers
                                                                                     : SyncMethods::none;
 }

From f57e7a1661e12f30b64ff221b30c178e19587367 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 24 Dec 2024 09:38:23 +0400
Subject: [PATCH 12/44] Options visibility update

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        | 44 +++++++++++--------
 src/inference/src/dev/plugin_config.cpp       | 30 +++----------
 src/inference/tests/unit/config_test.cpp      | 25 +++++------
 src/plugins/intel_gpu/src/plugin/plugin.cpp   | 11 ++---
 .../src/runtime/execution_config.cpp          | 18 ++++----
 .../unit/fusions/convolution_fusion_test.cpp  |  2 +-
 .../fusions/fully_connected_fusion_test.cpp   |  8 ++--
 .../graph_manipulation_gpu_test.cpp           |  2 +-
 .../passes/prepare_primitive_fusing_test.cpp  |  2 +-
 .../remove_redundant_reorders_tests.cpp       | 12 ++---
 .../test_cases/concatenation_gpu_test.cpp     |  6 +--
 .../unit/test_cases/reorder_gpu_test.cpp      |  2 +-
 .../tests/unit/test_utils/test_utils.cpp      |  6 +--
 13 files changed, 79 insertions(+), 89 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 9e566b216590cb..36785d071bfc8e 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -78,12 +78,32 @@
 
 namespace ov {
 
-enum class OptionVisibility {
-    RELEASE = 0,            // Option can be set for any build type via public interface, environment and config file
-    RELEASE_INTERNAL = 1,   // Option can be set for any build type via environment and config file only
-    DEBUG = 2,              // Option can be set for debug builds only via environment and config file
+enum class OptionVisibility : uint8_t {
+    RELEASE = 1 << 0,            // Option can be set for any build type via public interface, environment and config file
+    RELEASE_INTERNAL = 1 << 1,   // Option can be set for any build type via environment and config file only
+    DEBUG = 1 << 2,              // Option can be set for debug builds only via environment and config file
+#ifdef ENABLE_DEBUG_CAPS
+    ANY = 0x07,                  // Any visibility is valid including DEBUG
+#else
+    ANY = 0x03,                  // Any visibility is valid excluding DEBUG
+#endif
 };
 
+inline OptionVisibility operator&(OptionVisibility a, OptionVisibility b) {
+    typedef std::underlying_type<OptionVisibility>::type underlying_type;
+    return static_cast<OptionVisibility>(static_cast<underlying_type>(a) & static_cast<underlying_type>(b));
+}
+
+inline OptionVisibility operator|(OptionVisibility a, OptionVisibility b) {
+    typedef std::underlying_type<OptionVisibility>::type underlying_type;
+    return static_cast<OptionVisibility>(static_cast<underlying_type>(a) | static_cast<underlying_type>(b));
+}
+
+inline OptionVisibility operator~(OptionVisibility a) {
+    typedef std::underlying_type<OptionVisibility>::type underlying_type;
+    return static_cast<OptionVisibility>(~static_cast<underlying_type>(a));
+}
+
 inline std::ostream& operator<<(std::ostream& os, const OptionVisibility& visibility) {
     switch (visibility) {
     case OptionVisibility::RELEASE: os << "RELEASE"; break;
@@ -185,23 +205,14 @@ class OPENVINO_RUNTIME_API PluginConfig {
     PluginConfig(PluginConfig&& other) = delete;
     PluginConfig& operator=(PluginConfig&& other) = delete;
 
-    void set_property(const ov::AnyMap& properties);
-    Any get_property(const std::string& name) const;
+    void set_property(const ov::AnyMap& properties, OptionVisibility allowed_visibility = OptionVisibility::ANY, bool throw_on_error = true);
+    Any get_property(const std::string& name, OptionVisibility allowed_visibility = OptionVisibility::ANY) const;
 
     template <typename... Properties>
     util::EnableIfAllStringAny<void, Properties...> set_property(Properties&&... properties) {
         set_property(ov::AnyMap{std::forward<Properties>(properties)...});
     }
 
-    template <typename T, PropertyMutability mutability>
-    T get_property(const ov::Property<T, mutability>& property) const {
-        if (is_set_by_user(property)) {
-            return m_user_properties.at(property.name()).template as<T>();
-        }
-        OPENVINO_ASSERT(m_options_map.find(property.name()) != m_options_map.end(), "Property not found: ", property.name());
-        return static_cast<ConfigOption<T>*>(m_options_map.at(property.name()))->value;
-    }
-
     std::string to_string() const;
 
     void finalize(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info);
@@ -236,9 +247,6 @@ class OPENVINO_RUNTIME_API PluginConfig {
         }
     }
 
-    ov::Any get_property(const std::string& name, const std::vector<OptionVisibility>& allowed_visibility) const;
-    void set_property(const ov::AnyMap& properties, const std::vector<OptionVisibility>& allowed_visibility, bool throw_on_error);
-
     ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const;
     ov::AnyMap read_env(const std::vector<std::string>& prefixes) const;
     void cleanup_unsupported(ov::AnyMap& config) const;
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index b21547f40a57df..bbfe88d8737f80 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -48,31 +48,21 @@ size_t get_terminal_width() {
 
 namespace ov {
 
-ov::Any PluginConfig::get_property(const std::string& name) const {
-    const static std::vector<OptionVisibility> allowed_visibility = {OptionVisibility::RELEASE, OptionVisibility::RELEASE_INTERNAL};
-    return get_property(name, allowed_visibility);
-}
 
-ov::Any PluginConfig::get_property(const std::string& name, const std::vector<OptionVisibility>& allowed_visibility) const {
+ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility allowed_visibility) const {
     if (m_user_properties.find(name) != m_user_properties.end()) {
         return m_user_properties.at(name);
     }
 
     auto option = get_option_ptr(name);
-     if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) {
+     if ((allowed_visibility & option->get_visibility()) == option->get_visibility()) {
         OPENVINO_THROW("Couldn't get unknown property: ", name);
     }
 
     return option->get_any();
 }
 
-void PluginConfig::set_property(const AnyMap& config) {
-    const static std::vector<OptionVisibility> allowed_visibility = {OptionVisibility::RELEASE,OptionVisibility::RELEASE_INTERNAL, OptionVisibility::DEBUG};
-    const bool throw_on_error = true;
-    set_property(config, allowed_visibility, throw_on_error);
-}
-
-void PluginConfig::set_property(const ov::AnyMap& config, const std::vector<OptionVisibility>& allowed_visibility, bool throw_on_error) {
+void PluginConfig::set_property(const ov::AnyMap& config, OptionVisibility allowed_visibility, bool throw_on_error) {
     OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited");
 
     for (auto& kv : config) {
@@ -80,7 +70,7 @@ void PluginConfig::set_property(const ov::AnyMap& config, const std::vector<Opti
         auto& val = kv.second;
 
         auto option = get_option_ptr(name);
-        if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) {
+        if ((allowed_visibility & option->get_visibility()) == option->get_visibility()) {
             if (throw_on_error)
                 OPENVINO_THROW("Couldn't set unknown property: ", name);
             else
@@ -134,25 +124,17 @@ bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) const {
 }
 
 void PluginConfig::apply_debug_options(std::shared_ptr<IRemoteContext> context) {
-    static std::vector<OptionVisibility> allowed_visibility = {
-        OptionVisibility::RELEASE,
-        OptionVisibility::RELEASE_INTERNAL,
-        OptionVisibility::DEBUG
-#ifdef ENABLE_DEBUG_CAPS
-#endif
-    };
-
     const bool throw_on_error = false;
 
     if (context) {
         ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name());
         cleanup_unsupported(config_properties);
-        set_property(config_properties, allowed_visibility, throw_on_error);
+        set_property(config_properties, OptionVisibility::ANY, throw_on_error);
     }
 
     ov::AnyMap env_properties = read_env({"OV_"});
     cleanup_unsupported(env_properties);
-    set_property(env_properties, allowed_visibility, throw_on_error);
+    set_property(env_properties, OptionVisibility::ANY, throw_on_error);
 }
 
 ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const {
diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index 42b7fba115a273..d2c99585ab015b 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -40,7 +40,6 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
         OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "")
         OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "")
     #undef OV_CONFIG_OPTION
-
     }
 
     NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() {
@@ -50,7 +49,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
         }
     }
 
-    #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__)
+    #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__)  OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__)
         OV_CONFIG_RELEASE_OPTION(, bool_property, true, "")
         OV_CONFIG_RELEASE_OPTION(, int_property, -1, "")
         OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "")
@@ -97,26 +96,26 @@ TEST(plugin_config, can_create_not_empty_config) {
 
 TEST(plugin_config, can_set_get_property) {
     NotEmptyTestConfig cfg;
-    ASSERT_NO_THROW(cfg.get_property(bool_property));
-    ASSERT_EQ(cfg.get_property(bool_property), true);
+    ASSERT_NO_THROW(cfg.get_bool_property());
+    ASSERT_EQ(cfg.get_bool_property(), true);
     ASSERT_NO_THROW(cfg.set_property(bool_property(false)));
-    ASSERT_EQ(cfg.get_property(bool_property), false);
+    ASSERT_EQ(cfg.get_bool_property(), false);
 }
 
 TEST(plugin_config, throw_for_unsupported_property) {
     NotEmptyTestConfig cfg;
-    ASSERT_ANY_THROW(cfg.get_property(unsupported_property));
+    ASSERT_ANY_THROW(cfg.get_property(unsupported_property.name()));
     ASSERT_ANY_THROW(cfg.set_property(unsupported_property(10.0f)));
 }
 
 TEST(plugin_config, can_direct_access_to_properties) {
     NotEmptyTestConfig cfg;
-    ASSERT_EQ(cfg.m_int_property.value, cfg.get_property(int_property));
+    ASSERT_EQ(cfg.m_int_property.value, cfg.get_int_property());
     ASSERT_NO_THROW(cfg.set_property(int_property(1)));
     ASSERT_EQ(cfg.m_int_property.value, -1); // user property doesn't impact member value until finalize() is called
 
     cfg.m_int_property.value = 2;
-    ASSERT_EQ(cfg.get_property(int_property), 1); // still 1 as user property was set previously
+    ASSERT_EQ(cfg.get_int_property(), 1); // stil 1 as user property was set previously
 }
 
 TEST(plugin_config, finalization_updates_member) {
@@ -132,19 +131,19 @@ TEST(plugin_config, finalization_updates_member) {
 TEST(plugin_config, get_property_before_finalization_returns_user_property_if_set) {
     NotEmptyTestConfig cfg;
 
-    ASSERT_EQ(cfg.get_property(bool_property), true);  // default value
+    ASSERT_EQ(cfg.get_bool_property(), true);  // default value
     ASSERT_EQ(cfg.m_bool_property.value, true);  // default value
 
     cfg.m_bool_property.value = false; // update member directly
-    ASSERT_EQ(cfg.get_property(bool_property), false);  // OK, return the class member value as no user property was set
+    ASSERT_EQ(cfg.get_bool_property(), false);  // OK, return the class member value as no user property was set
 
     ASSERT_NO_THROW(cfg.set_property(bool_property(true)));
     ASSERT_TRUE(cfg.is_set_by_user(bool_property));
-    ASSERT_EQ(cfg.get_property(bool_property), true);  // now user property value is returned
+    ASSERT_EQ(cfg.get_bool_property(), true);  // now user property value is returned
     ASSERT_EQ(cfg.m_bool_property.value, false);  // but class member is not updated
 
     cfg.finalize(nullptr, {});
-    ASSERT_EQ(cfg.get_property(bool_property), cfg.m_bool_property.value);  // equal after finalization
+    ASSERT_EQ(cfg.get_bool_property(), cfg.m_bool_property.value);  // equal after finalization
     ASSERT_FALSE(cfg.is_set_by_user(bool_property)); // and user property is cleared
 }
 
@@ -194,7 +193,7 @@ TEST(plugin_config, can_copy_config) {
     ASSERT_EQ(cfg2.m_high_level_property.value, "value1");
     ASSERT_EQ(cfg2.m_low_level_property.value, "value2");
     ASSERT_EQ(cfg2.m_int_property.value, 1);
-    ASSERT_EQ(cfg2.get_property(bool_property), false); // ensure user properties are copied too
+    ASSERT_EQ(cfg2.get_bool_property(), false); // ensure user properties are copied too
 
     // check that cfg1 modification doesn't impact a copy
     cfg1.set_property(high_level_property("value3"));
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index 2370d60dda808d..937077886b10ad 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -39,6 +39,7 @@
 #include "openvino/runtime/internal_properties.hpp"
 #include "openvino/runtime/make_tensor.hpp"
 #include "openvino/runtime/performance_heuristics.hpp"
+#include "openvino/runtime/plugin_config.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "openvino/util/common_util.hpp"
 #include "openvino/util/weights_path.hpp"
@@ -219,7 +220,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id);
 
     ExecutionConfig config = m_configs_map.at(device_id);
-    config.set_property(orig_config);
+    config.set_property(orig_config, OptionVisibility::RELEASE);
     config.finalize(context, get_rt_info(*model));
 
     auto transformed_model = clone_and_transform_model(model, config, context);
@@ -268,7 +269,7 @@ ov::SoPtr<ov::IRemoteContext> Plugin::get_default_context(const AnyMap& params)
 
 void Plugin::set_property(const ov::AnyMap &config) {
     auto update_config = [](ExecutionConfig& config, const ov::AnyMap& user_config) {
-        config.set_property(user_config);
+        config.set_property(user_config, OptionVisibility::RELEASE);
         // Check that custom layers config can be loaded
         if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) {
             CustomLayerMap custom_layers;
@@ -303,7 +304,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     auto ctx = get_default_context(device_id);
 
     ExecutionConfig config = m_configs_map.at(device_id);
-    config.set_property(orig_config);
+    config.set_property(orig_config, OptionVisibility::RELEASE);
     config.finalize(ctx, get_rt_info(*model));
 
     ProgramBuilder prog(ctx->get_engine(), config);
@@ -358,7 +359,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
     }
 
     ExecutionConfig config = m_configs_map.at(device_id);
-    config.set_property(_orig_config);
+    config.set_property(_orig_config, OptionVisibility::RELEASE);
     config.finalize(context_impl, {});
 
     ov::CacheMode cache_mode = config.get_cache_mode();
@@ -466,7 +467,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
     OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] get_property: Couldn't find config for GPU with id ", device_id);
 
     const auto& c = m_configs_map.at(device_id);
-    return c.get_property(name);
+    return c.get_property(name, OptionVisibility::RELEASE);
 }
 
 auto StringRightTrim = [](std::string string, std::string substring, bool case_sensitive = true) {
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 529e0543f6de67..f185a2f321489d 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -52,7 +52,7 @@ void ExecutionConfig::apply_rt_info(std::shared_ptr<IRemoteContext> context, con
     // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with
     // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not
     // using that mechanism.
-    if (get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) {
+    if (get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) {
         apply_rt_info_property(ov::weights_path, rt_info);
     }
 }
@@ -71,7 +71,7 @@ void ExecutionConfig::finalize_impl(std::shared_ptr<IRemoteContext> context) {
     if (info.supports_immad) {
         m_use_onednn = true;
     }
-    if (get_property(ov::intel_gpu::use_onednn)) {
+    if (get_use_onednn()) {
         m_queue_type = QueueTypes::in_order;
     }
 
@@ -98,7 +98,7 @@ void ExecutionConfig::apply_hints(const cldnn::device_info& info) {
 
 void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) {
     if (is_set_by_user(ov::hint::execution_mode)) {
-        const auto mode = get_property(ov::hint::execution_mode);
+        const auto mode = get_execution_mode();
         if (!is_set_by_user(ov::hint::inference_precision)) {
             if (mode == ov::hint::ExecutionMode::ACCURACY) {
                 m_inference_precision = ov::element::undefined;
@@ -114,7 +114,7 @@ void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) {
 
 void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) {
     if (is_set_by_user(ov::hint::performance_mode)) {
-        const auto mode = get_property(ov::hint::performance_mode);
+        const auto mode = get_performance_mode();
         if (!is_set_by_user(ov::num_streams)) {
             if (mode == ov::hint::PerformanceMode::LATENCY) {
                 m_num_streams = 1;
@@ -124,18 +124,18 @@ void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) {
         }
     }
 
-    if (get_property(ov::num_streams) == ov::streams::AUTO) {
+    if (get_num_streams() == ov::streams::AUTO) {
         int32_t n_streams = std::max<int32_t>(info.num_ccs, 2);
         m_num_streams = n_streams;
     }
 
-    if (get_property(ov::internal::exclusive_async_requests)) {
+    if (get_exclusive_async_requests()) {
         m_num_streams = 1;
     }
 
     // Allow kernels reuse only for single-stream scenarios
-    if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) {
-        if (get_property(ov::num_streams) != 1) {
+    if (get_enable_kernels_reuse()) {
+        if (get_num_streams() != 1) {
             m_enable_kernels_reuse = false;
         }
     }
@@ -143,7 +143,7 @@ void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) {
 
 void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
     if (is_set_by_user(ov::hint::model_priority)) {
-        const auto priority = get_property(ov::hint::model_priority);
+        const auto priority = get_model_priority();
         if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) {
             m_queue_priority = priority;
         }
diff --git a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp
index 6d8f231b93576a..6fb20181eb1936 100644
--- a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp
@@ -318,7 +318,7 @@ class WeightsPrimitiveFusingTestOneDNN : public BaseFusingTest<convolution_test_
 
         auto input_prim = p.data_type == data_types::u8 ? get_mem(get_input_layout(p), 0, 10) : get_mem(get_input_layout(p));
 
-        auto impl_forcing = cfg_fused.get_property(ov::intel_gpu::force_implementations);
+        auto impl_forcing = cfg_fused.get_force_implementations();
 
         ov::intel_gpu::ImplementationDesc conv_impl = { format::any, "", impl_types::onednn };
 
diff --git a/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp
index 9618ff17990cd9..e025656faba8de 100644
--- a/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp
@@ -96,7 +96,7 @@ class FullyConnectedFusingTestOneDNN : public BaseFusingTest<fully_connected_tes
 
         auto input_prim = p.data_type == data_types::u8 ? get_mem(get_input_layout(p), 0, 10) : get_mem(get_input_layout(p), -1, 1);
 
-        auto impl_forcing = cfg_fused.get_property(ov::intel_gpu::force_implementations);
+        auto impl_forcing = cfg_fused.get_force_implementations();
         auto forcing_format = p.input_format;
         for (auto& forcing : impl_forcing)
             if (forcing.first == "fc_prim")
@@ -1014,13 +1014,13 @@ class fc_fp16_fuse_bias_and_find_eltwise_4d : public FullyConnectedFusingTest {
     void run_test() {
         auto p = GetParam();
         auto test_input_layout = get_input_layout(p);
-        auto in_layout = layout{ ov::PartialShape::dynamic(test_input_layout.get_partial_shape().size()), 
-                                 test_input_layout.data_type, 
+        auto in_layout = layout{ ov::PartialShape::dynamic(test_input_layout.get_partial_shape().size()),
+                                 test_input_layout.data_type,
                                  test_input_layout.format };
         auto data_layout = layout{ p.out_shape, p.default_type, p.default_format };
         auto weight = layout{ { 29, 512 }, data_types::f16, format::bfyx };
         auto bias = layout{ { 1, 1, 1, 29 }, data_types::f16, format::bfyx };
-        
+
         create_topologies(
             input_layout("input", in_layout),
             data("weights", get_mem(get_weights_layout(p))),
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/graph_manipulation_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/graph_manipulation_gpu_test.cpp
index ef377b82de9cd9..0cc25b0659f8d6 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/graph_manipulation_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/graph_manipulation_gpu_test.cpp
@@ -184,7 +184,7 @@ TEST(processing_order, bfs_order_restoring) {
     ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
-    if (config.get_property(ov::intel_gpu::queue_type) != QueueTypes::out_of_order)
+    if (config.get_queue_type() != QueueTypes::out_of_order)
         GTEST_SKIP();
 
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 8, 8 } });
diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp
index 3f3b6019611e3a..031b417f4f5356 100644
--- a/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp
@@ -527,7 +527,7 @@ TEST(prepare_primitive_fusing, fuse_constant_transposes_removal_check) {
 
     auto prog = program::build_program(engine, topology, config, false, true);
 
-    prog->get_layout_optimizer().set_implementation_forcing(config.get_property(ov::intel_gpu::force_implementations));
+    prog->get_layout_optimizer().set_implementation_forcing(config.get_force_implementations());
     program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog);
 
     ASSERT_TRUE(!has_node(*prog, "permute"));
diff --git a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp
index 1fbeab7e67ac2d..b46033f15d77db 100644
--- a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp
+++ b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp
@@ -136,7 +136,7 @@ TEST(remove_redundant_reorders, skip_reorder_fusing_when_sibling_not_support_pad
     auto prog = program::build_program(engine, topology, config, false, true);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
-    bool optimize_data = config.get_property(ov::intel_gpu::optimize_data);
+    bool optimize_data = config.get_optimize_data();
     program_wrapper::apply_opt_pass<remove_redundant_reorders>(*prog, optimize_data);
 
     ASSERT_NE(prog, nullptr);
@@ -165,7 +165,7 @@ TEST(remove_redundant_reorders, not_to_fuse_reshape_with_fused_prims) {
     auto prog = program::build_program(engine, topology, config, false, true);
 
     program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog);
-    bool optimize_data = config.get_property(ov::intel_gpu::optimize_data);
+    bool optimize_data = config.get_optimize_data();
     program_wrapper::apply_opt_pass<remove_redundant_reorders>(*prog, optimize_data);
 
     ASSERT_NE(prog, nullptr);
@@ -204,7 +204,7 @@ TEST(remove_redundant_reorders, not_to_fuse_permute) {
     auto prog = program::build_program(engine, topology, config, false, true);
     ASSERT_NE(prog, nullptr);
 
-    bool opt_data = config.get_property(ov::intel_gpu::optimize_data);
+    bool opt_data = config.get_optimize_data();
 
     program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog);
     program_wrapper::apply_opt_pass<remove_redundant_reorders>(*prog, opt_data);
@@ -266,7 +266,7 @@ TEST(remove_redundant_reorders, remove_fused) {
     auto prog = program::build_program(engine, topology, config, false, true);
 
     program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog);
-    bool optimize_data = config.get_property(ov::intel_gpu::optimize_data);
+    bool optimize_data = config.get_optimize_data();
     program_wrapper::apply_opt_pass<remove_redundant_reorders>(*prog, optimize_data);
 
     ASSERT_NE(prog, nullptr);
@@ -293,7 +293,7 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_mvn_dyn) {
     config.set_property(ov::intel_gpu::optimize_data(true));
     auto prog = program::build_program(engine, topology, config, false, true);
 
-    bool optimize_data = config.get_property(ov::intel_gpu::optimize_data);
+    bool optimize_data = config.get_optimize_data();
     program_wrapper::apply_opt_pass<remove_redundant_reorders>(*prog, optimize_data);
 
     ASSERT_NE(prog, nullptr);
@@ -336,7 +336,7 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_concat_dyn) {
     config.set_property(ov::intel_gpu::optimize_data(true));
     auto prog = program::build_program(engine, topology, config, false, true);
 
-    bool optimize_data = config.get_property(ov::intel_gpu::optimize_data);
+    bool optimize_data = config.get_optimize_data();
     program_wrapper::apply_opt_pass<remove_redundant_reorders>(*prog, optimize_data);
 
     ASSERT_NE(prog, nullptr);
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp
index b145fa603063bc..1b2d0be5907293 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp
@@ -1422,7 +1422,7 @@ struct concat_gpu_4d_implicit : public concat_gpu {
         }
         auto outputs = concat_network->execute();
 
-        bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data);
+        bool concat_opt_enabled = config.get_optimize_data();
         bool concat_opt_result = std::static_pointer_cast<concatenation_inst>(concat_network->get_primitive("concat"))->can_be_optimized();
         EXPECT_EQ(concat_opt_enabled, concat_opt_result);
 
@@ -1642,7 +1642,7 @@ struct concat_gpu_4d_implicit_onednn : public concat_gpu {
         }
         auto outputs = concat_network.execute();
 
-        bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data);
+        bool concat_opt_enabled = config.get_optimize_data();
         bool concat_opt_result = std::static_pointer_cast<concatenation_inst>(concat_network.get_primitive("concat"))->node->can_be_optimized();
         EXPECT_EQ(concat_opt_enabled, concat_opt_result);
 
@@ -1805,7 +1805,7 @@ struct concat_gpu_4d_explicit : public concat_gpu {
         }
         auto outputs = concat_network.execute();
 
-        bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data);
+        bool concat_opt_enabled = config.get_optimize_data();
         bool concat_opt_result = std::static_pointer_cast<concatenation_inst>(concat_network.get_primitive("concat"))->node->can_be_optimized();
 
         // If sibling is using onednn impl and batch > 1, the onednn impl cannot process the implicit concat'ed buffer.
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp
index 8caad2b576d734..8fd32877291d44 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp
@@ -1913,7 +1913,7 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant)
     auto outputs = net.execute();
     auto executed_primitives = net.get_executed_primitives();
 
-    if (config.get_property(ov::intel_gpu::queue_type) != QueueTypes::out_of_order)
+    if (config.get_queue_type() != QueueTypes::out_of_order)
         GTEST_SKIP();
 
     ASSERT_TRUE(executed_primitives.count("in") == 1);
diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp
index 6af8572d323907..23641dbdba7a63 100644
--- a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp
@@ -54,14 +54,14 @@ void generic_test::run_single_test(bool is_caching_test) {
             }
         }
         std::string input_name = "input" + std::to_string(i);
-        if ((i == 0) && generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) {
+        if ((i == 0) && generic_params->network_config.get_optimize_data()) {
             // Add reorder after the first input in case of optimize data flag since it might change the input layout.
             input_name = "input0_init";
         }
 
         // First input is provided to the network as input_layout.
         // Other inputs are provided as input_layout if optimize data flag is off. Otherwise they are provided as data.
-        if ((i == 0) || !generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) {
+        if ((i == 0) || !generic_params->network_config.get_optimize_data()) {
             topology.add(input_layout(input_name, input_mems[i]->get_layout()));
             input_layouts_names.push_back(input_name);
         } else {
@@ -74,7 +74,7 @@ void generic_test::run_single_test(bool is_caching_test) {
         }
     }
 
-    if (generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) {
+    if (generic_params->network_config.get_optimize_data()) {
         // Add reorder after the first input in case of optimize data flag since it might change the input layout.
         topology.add(reorder("input0", input_info("input0_init"), input_mems[0]->get_layout()));
     }

From d84741e7d0481dd6bbe39655e1b9ff4c731697b9 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 24 Dec 2024 10:33:25 +0400
Subject: [PATCH 13/44] Fixes and visit_attributes method impl

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        |  2 +-
 src/inference/src/dev/plugin_config.cpp       | 38 ++++++++++---------
 .../include/intel_gpu/runtime/options.inl     |  2 +-
 src/plugins/intel_gpu/src/plugin/graph.cpp    | 33 +++++++++++++++-
 4 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 36785d071bfc8e..2eacd157ec8b94 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -217,7 +217,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
 
     void finalize(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info);
 
-    bool visit_attributes(ov::AttributeVisitor& visitor) const;
+    bool visit_attributes(ov::AttributeVisitor& visitor);
 
 protected:
     virtual void apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {}
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index bbfe88d8737f80..bebcd891251616 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -55,7 +55,7 @@ ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility all
     }
 
     auto option = get_option_ptr(name);
-     if ((allowed_visibility & option->get_visibility()) == option->get_visibility()) {
+     if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) {
         OPENVINO_THROW("Couldn't get unknown property: ", name);
     }
 
@@ -70,7 +70,7 @@ void PluginConfig::set_property(const ov::AnyMap& config, OptionVisibility allow
         auto& val = kv.second;
 
         auto option = get_option_ptr(name);
-        if ((allowed_visibility & option->get_visibility()) == option->get_visibility()) {
+        if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) {
             if (throw_on_error)
                 OPENVINO_THROW("Couldn't set unknown property: ", name);
             else
@@ -111,14 +111,16 @@ void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::R
     m_is_finalized = true;
 }
 
-bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) const {
-    // for (const auto& prop : m_user_properties) {
-    //     visitor.on_attribute(prop.first + "__user", prop.second.as<std::string>());
-    // }
-    // for (const auto& prop : m_options_map) {
-    //     visitor.on_attribute(prop.first + "__internal", prop.second->get_any().as<std::string>());
-    // }
-    // visitor.on_attribute("is_finalized", m_is_finalized);
+bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) {
+    for (const auto& prop : m_user_properties) {
+        auto val = prop.second.as<std::string>();
+        visitor.on_attribute(prop.first + "__user", val);
+    }
+    for (const auto& prop : m_options_map) {
+        auto val = prop.second->get_any().as<std::string>();
+        visitor.on_attribute(prop.first + "__internal", val);
+    }
+    visitor.on_attribute("is_finalized", m_is_finalized);
 
     return true;
 }
@@ -212,20 +214,20 @@ void PluginConfig::cleanup_unsupported(ov::AnyMap& config) const {
 }
 
 std::string PluginConfig::to_string() const {
-    std::stringstream s;
+    std::stringstream ss;
 
-    s << "-----------------------------------------\n";
-    s << "PROPERTIES:\n";
+    ss << "-----------------------------------------\n";
+    ss << "PROPERTIES:\n";
 
     for (const auto& option : m_options_map) {
-        s << "\t" << option.first << ": " << option.second->get_any().as<std::string>() << std::endl;
+        ss << "\t" << option.first << ": " << option.second->get_any().as<std::string>() << std::endl;
     }
-    s << "USER PROPERTIES:\n";
+    ss << "USER PROPERTIES:\n";
     for (const auto& user_prop : m_user_properties) {
-        s << "\t" << user_prop.first << ": " << user_prop.second.as<std::string>() << std::endl;
+        ss << "\t" << user_prop.first << ": " << user_prop.second.as<std::string>() << std::endl;
     }
 
-    return s.str();
+    return ss.str();
 }
 
 void PluginConfig::print_help() const {
@@ -276,7 +278,7 @@ void PluginConfig::print_help() const {
 
     const size_t max_name_width = static_cast<int>(std::get<0>(*max_name_length_item).size() + std::get<1>(*max_name_length_item).size());
     const size_t terminal_width = get_terminal_width();
-    ss << std::left << std::setw(max_name_width) << ("Option name") << " | " << " Description " << "\n";
+    ss << std::left << std::setw(max_name_width) << "Option name" << " | " << " Description " << "\n";
     ss << std::left << std::setw(terminal_width) << std::setfill('-') << "" << "\n";
     for (auto& kv : options_desc) {
         ss << format_text(std::get<0>(kv), std::get<1>(kv), std::get<2>(kv), max_name_width, terminal_width) << "\n";
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
index 1941aaec69b2bf..1e4f7076887a3e 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
@@ -45,6 +45,7 @@ OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, partial_build_program, false, "
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "Switch between new and old shape inference flow. Shall be removed soon")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "Threshold for preallocation feature in case when it uses ratio policy")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disable onednn for usage for particular model/platform")
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation")
 
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file")
@@ -58,7 +59,6 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, "", "Save intermediate in/ou
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool, "", "Save csv file with memory pool info to specified folder")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, "", "Space separated list of iterations where other dump options should be enabled")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop")
diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp
index 4f1f541b725090..a28d986ce46160 100644
--- a/src/plugins/intel_gpu/src/plugin/graph.cpp
+++ b/src/plugins/intel_gpu/src/plugin/graph.cpp
@@ -2,7 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "intel_gpu/graph/serialization/helpers.hpp"
 #include "intel_gpu/runtime/layout.hpp"
+#include "openvino/runtime/plugin_config.hpp"
 #include "openvino/runtime/threading/executor_manager.hpp"
 #include "openvino/runtime/exec_model_info.hpp"
 #include "openvino/pass/serialize.hpp"
@@ -33,6 +35,34 @@
 #include <sys/stat.h>
 
 namespace ov::intel_gpu {
+namespace {
+
+
+class OstreamAttributeVisitor : public ov::AttributeVisitor {
+    cldnn::BinaryOutputBuffer& os;
+
+    template<typename T>
+    void append_attribute(const std::string& name, const T& value) {
+        os << name;
+        os << value;
+    }
+public:
+    OstreamAttributeVisitor(cldnn::BinaryOutputBuffer& os) : os(os) {}
+
+    void on_adapter(const std::string& name, ov::ValueAccessor<void>& adapter) override {
+        OPENVINO_THROW("Attribute ", name, " can't be processed\n");
+    }
+
+    void on_adapter(const std::string& name, ov::ValueAccessor<bool>& adapter) override {
+        append_attribute(name, adapter.get());
+    }
+
+    void on_adapter(const std::string& name, ov::ValueAccessor<std::string>& adapter) override {
+        append_attribute(name, adapter.get());
+    }
+};
+
+}  // namespace
 
 Graph::Graph(std::shared_ptr<ov::Model> model, const RemoteContextImpl::Ptr& context, const ExecutionConfig& config, uint16_t stream_id)
     : m_context(context)
@@ -512,7 +542,8 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) {
             ob << perf_item.second.second.parentPrimitive;
         }
     }
-    // ob << m_config;
+    OstreamAttributeVisitor visitor(ob);
+    m_config.visit_attributes(visitor);
 
     ob.set_stream(m_network->get_stream_ptr().get());
     m_network->get_program()->save(ob);

From 29c978ec90ed060896adc7d05939e49c20a81f5e Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 24 Dec 2024 11:28:55 +0400
Subject: [PATCH 14/44] Refactor debug knobs

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../intel_gpu/runtime/internal_properties.hpp | 56 ++++++++++++++++---
 .../include/intel_gpu/runtime/options.inl     | 18 +++---
 .../intel_gpu/src/graph/layout_optimizer.cpp  |  5 --
 src/plugins/intel_gpu/src/graph/program.cpp   |  3 +-
 .../src/graph/program_dump_graph.cpp          |  2 +-
 5 files changed, 60 insertions(+), 24 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
index 77a00294e8076f..ec36c924c5d185 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
@@ -35,6 +35,40 @@ inline std::ostream& operator<<(std::ostream& os, const QueueTypes& val) {
     return os;
 }
 
+enum class DumpFormat : uint8_t {
+    binary = 0,
+    text = 1,
+    text_raw = 2,
+};
+
+inline std::ostream& operator<<(std::ostream& os, const DumpFormat& val) {
+    switch (val) {
+        case DumpFormat::binary: os << "binary"; break;
+        case DumpFormat::text: os << "text"; break;
+        case DumpFormat::text_raw: os << "text_raw"; break;
+        default: os << "unknown";
+    }
+
+    return os;
+}
+
+enum class DumpTensors : uint8_t {
+    all = 0,
+    in = 1,
+    out = 2,
+};
+
+inline std::ostream& operator<<(std::ostream& os, const DumpTensors& val) {
+    switch (val) {
+        case DumpTensors::all: os << "all"; break;
+        case DumpTensors::in: os << "in"; break;
+        case DumpTensors::out: os << "out"; break;
+        default: os << "unknown";
+    }
+
+    return os;
+}
+
 /**
  * @brief Defines queue type that must be used for model execution
  */
@@ -55,15 +89,19 @@ static constexpr Property<bool, PropertyMutability::RW> use_onednn{"USE_ONEDNN"}
 
 static constexpr Property<bool, ov::PropertyMutability::RW> help{"HELP"};
 static constexpr Property<size_t, ov::PropertyMutability::RW> verbose{"VERBOSE"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> log_to_file{"LOG_TO_FILE"};
-static constexpr Property<bool, ov::PropertyMutability::RW> disable_usm{"DISABLE_USM"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> log_to_file{"GPU_LOG_TO_FILE"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_usm{"GPU_DISABLE_USM"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_onednn_post_ops_opt{"DISABLE_ONEDNN_POST_OPS_OPT"};
-static constexpr Property<std::string, PropertyMutability::RW> dump_graphs{"GPU_DUMP_GRAPHS"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> dump_profiling_data{"DUMP_PROFILING_DATA"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> dump_sources{"DUMP_SOURCES"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> dump_tensors{"DUMP_TENSORS"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> dump_memory_pool{"DUMP_MEMORY_POOL"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> dump_iterations{"DUMP_ITERATIONS"};
+static constexpr Property<std::string, PropertyMutability::RW> dump_graphs_path{"GPU_DUMP_GRAPHS_PATH"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_profiling_data_path{"GPU_DUMP_PROFILING_DATA_PATH"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_sources_path{"GPU_DUMP_SOURCES_PATH"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_tensors_path{"GPU_DUMP_TENSORS_PATH"};
+static constexpr Property<DumpTensors, ov::PropertyMutability::RW> dump_tensors{"DUMP_TENSORS"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_layers{"GPU_DUMP_LAYERS"};
+static constexpr Property<DumpFormat, ov::PropertyMutability::RW> dump_tensors_format{"DUMP_TENSORS_FORMAT"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dump_memory_pool_path{"GPU_DUMP_MEMORY_POOL_PATH"};
+static constexpr Property<int32_t, ov::PropertyMutability::RW> dump_batch_limit{"GPU_DUMP_BATCH_LIMIT"};
+static constexpr Property<std::set<int64_t>, ov::PropertyMutability::RW> dump_iterations{"GPU_DUMP_ITERATIONS"};
 static constexpr Property<bool, ov::PropertyMutability::RW> host_time_profiling{"HOST_TIME_PROFILING"};
 static constexpr Property<size_t, ov::PropertyMutability::RW> impls_cache_capacity{"IMPLS_CACHE_CAPACITY"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_async_compilation{"DISABLE_ASYNC_COMPILATION"};
@@ -75,7 +113,7 @@ static constexpr Property<bool, ov::PropertyMutability::RW> disable_fake_alignme
 static constexpr Property<bool, ov::PropertyMutability::RW> use_usm_host{"USE_USM_HOST"};
 static constexpr Property<bool, ov::PropertyMutability::RW> asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> mem_prealloc_options{"MEM_PREALLOC_OPTIONS"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"};
+static constexpr Property<std::vector<std::string>, ov::PropertyMutability::RW> load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"};
 
 }  // namespace ov::intel_gpu
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
index 1e4f7076887a3e..93b4653034ab92 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
@@ -52,12 +52,16 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to spec
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, help, false, "Print help message for all config options")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_onednn_post_ops_opt, false, "Disable optimization pass for onednn post-ops")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data, "", "Save csv file with per-stage and per-primitive profiling data to specified folder")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs, "", "Save intermediate graph representations during model compilation pipeline to specified folder")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources, "", "Save generated sources for each kernel to specified folder")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, "", "Save intermediate in/out tensors of each primitive to specified folder")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool, "", "Save csv file with memory pool info to specified folder")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, "", "Space separated list of iterations where other dump options should be enabled")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data_path, "", "Save csv file with per-stage and per-primitive profiling data to specified folder")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs_path, "", "Save intermediate graph representations during model compilation pipeline to specified folder")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources_path, "", "Save generated sources for each kernel to specified folder")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_path, "", "Save intermediate in/out tensors of each primitive to specified folder")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, ov::intel_gpu::DumpTensors::all, "Tensor types to dump. Supported values: all, inputs, outputs")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_format, ov::intel_gpu::DumpFormat::text, "Format of the tensors dump. Supported values: binary, text, text_raw")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_layers, "", "Activate dump for specified layers only")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool_path, "", "Save csv file with memory pool info to specified folder")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, std::set<int64_t>{}, "Space separated list of iterations where other dump options should be enabled")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits<int32_t>::max(), "Max number of batch elements to dump")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation")
@@ -68,4 +72,4 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fa
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usage for all allocations")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "Preallocation setting")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "List of layers to load raw binary")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, std::vector<std::string>{}, "List of layers to load raw binary")
diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
index b64a4744360071..067ca8c8f5fb0d 100644
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@@ -439,11 +439,6 @@ bool should_use_winograd_2x3_s1(const convolution_node& node,
     if (disable_winograd_conv)
         return false;
 
-    // cases when NOT to use winograd
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->disable_winograd_conv == 1)
-        return false;
-
     auto prim = node.get_primitive();
     if (input_layout.data_type != data_types::f16
         || (input_layout.is_static() && input_layout.feature() % 64 != 0)  // current algorithm is effective for ifm to be multiply of 64
diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
index 98c61b021477d5..82e6f7b708d8ca 100644
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -1397,8 +1397,7 @@ program::primitives_info program::get_current_stage_info() const {
 }
 
 void program::save_pass_info(std::string pass_name) {
-    // TODO: Directory path here can be probably changed to some bool flag
-    if (!_config.get_dump_graphs().empty())
+    if (!_config.get_dump_graphs_path().empty())
         optimizer_passes_info.emplace_back(pass_name, get_current_stage_info());
 }
 
diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
index aeae56173f4fd0..61daf949e762f0 100644
--- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
@@ -157,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) {
 }  // namespace
 
 std::string get_dir_path(const ExecutionConfig& config) {
-    auto path = config.get_dump_graphs();
+    auto path = config.get_dump_graphs_path();
     if (path.empty()) {
         return {};
     }

From e830be276e7571cda545c059d840880cea577c5f Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 24 Dec 2024 15:12:36 +0400
Subject: [PATCH 15/44] split set_prop and set_user_prop again

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        |   5 +-
 src/inference/src/dev/plugin_config.cpp       |  27 ++++-
 .../include/intel_gpu/graph/program.hpp       |   1 -
 .../intel_gpu/plugin/program_builder.hpp      |  10 +-
 .../intel_gpu/runtime/internal_properties.hpp |   1 +
 .../include/intel_gpu/runtime/options.inl     |   3 +-
 .../graph_optimizer/prepare_buffer_fusing.cpp |   7 +-
 .../prepare_primitive_fusing.cpp              |   8 +-
 .../prepare_primitive_fusing_through.cpp      |   3 +
 .../graph/graph_optimizer/reorder_inputs.cpp  |   2 +-
 .../src/graph/impls/ocl/kernels_cache.cpp     |  22 +---
 .../src/graph/impls/ocl/kernels_cache.hpp     |   1 -
 .../impls/onednn/concatenation_onednn.hpp     |   3 +-
 .../graph/impls/onednn/convolution_onednn.hpp |   3 +-
 .../impls/onednn/deconvolution_onednn.hpp     |   3 +-
 .../impls/onednn/fully_connected_onednn.hpp   |   3 +-
 .../src/graph/impls/onednn/gemm_onednn.hpp    |   3 +-
 .../graph/impls/onednn/lstm_seq_onednn.hpp    |   4 +-
 .../src/graph/impls/onednn/pooling_onednn.hpp |   3 +-
 .../impls/onednn/primitive_onednn_base.h      |   6 +-
 .../src/graph/impls/onednn/reduce_onednn.hpp  |   3 +-
 .../src/graph/impls/onednn/reorder_onednn.hpp |   3 +-
 .../src/graph/include/primitive_inst.h        |   2 +
 .../src/graph/include/program_node.h          |   1 +
 src/plugins/intel_gpu/src/graph/network.cpp   |   5 +-
 .../intel_gpu/src/graph/primitive_inst.cpp    |  15 +--
 src/plugins/intel_gpu/src/graph/program.cpp   |  24 +---
 .../intel_gpu/src/graph/program_node.cpp      |   3 +-
 .../fully_connected_kernel_bf_tiled.cpp       |  20 ----
 src/plugins/intel_gpu/src/plugin/graph.cpp    |   6 +-
 .../intel_gpu/src/plugin/ops/condition.cpp    |   2 +-
 src/plugins/intel_gpu/src/plugin/ops/loop.cpp |   3 +-
 src/plugins/intel_gpu/src/plugin/plugin.cpp   | 112 ++++++++++++++++--
 .../intel_gpu/src/plugin/program_builder.cpp  |  45 +------
 .../src/plugin/sync_infer_request.cpp         |   7 +-
 .../dynamic_quantize_fully_connected.cpp      |   6 +-
 .../dynamic_quantize_fully_connected.hpp      |   2 +-
 .../src/plugin/transformations_pipeline.cpp   |  15 +--
 .../src/runtime/execution_config.cpp          |   2 +-
 .../intel_gpu/src/runtime/ocl/ocl_device.cpp  |   3 -
 40 files changed, 209 insertions(+), 188 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 2eacd157ec8b94..08dd148b6d1c36 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -77,7 +77,7 @@
     OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__)
 
 namespace ov {
-
+#define ENABLE_DEBUG_CAPS
 enum class OptionVisibility : uint8_t {
     RELEASE = 1 << 0,            // Option can be set for any build type via public interface, environment and config file
     RELEASE_INTERNAL = 1 << 1,   // Option can be set for any build type via environment and config file only
@@ -205,7 +205,8 @@ class OPENVINO_RUNTIME_API PluginConfig {
     PluginConfig(PluginConfig&& other) = delete;
     PluginConfig& operator=(PluginConfig&& other) = delete;
 
-    void set_property(const ov::AnyMap& properties, OptionVisibility allowed_visibility = OptionVisibility::ANY, bool throw_on_error = true);
+    void set_property(const ov::AnyMap& properties);
+    void set_user_property(const ov::AnyMap& properties, OptionVisibility allowed_visibility = OptionVisibility::ANY, bool throw_on_error = true);
     Any get_property(const std::string& name, OptionVisibility allowed_visibility = OptionVisibility::ANY) const;
 
     template <typename... Properties>
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index bebcd891251616..b756894d5e414f 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -62,7 +62,18 @@ ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility all
     return option->get_any();
 }
 
-void PluginConfig::set_property(const ov::AnyMap& config, OptionVisibility allowed_visibility, bool throw_on_error) {
+void PluginConfig::set_property(const ov::AnyMap& config) {
+    OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited");
+
+    for (auto& kv : config) {
+        auto& name = kv.first;
+        auto& val = kv.second;
+
+        get_option_ptr(name)->set_any(val);
+    }
+}
+
+void PluginConfig::set_user_property(const ov::AnyMap& config, OptionVisibility allowed_visibility, bool throw_on_error) {
     OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited");
 
     for (auto& kv : config) {
@@ -131,12 +142,22 @@ void PluginConfig::apply_debug_options(std::shared_ptr<IRemoteContext> context)
     if (context) {
         ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name());
         cleanup_unsupported(config_properties);
-        set_property(config_properties, OptionVisibility::ANY, throw_on_error);
+#ifdef ENABLE_DEBUG_CAPS
+        for (auto& prop : config_properties) {
+            std::cout << "Non default config value for " << prop.first << " = " << prop.second.as<std::string>() << std::endl;
+        }
+#endif
+        set_user_property(config_properties, OptionVisibility::ANY, throw_on_error);
     }
 
     ov::AnyMap env_properties = read_env({"OV_"});
     cleanup_unsupported(env_properties);
-    set_property(env_properties, OptionVisibility::ANY, throw_on_error);
+#ifdef ENABLE_DEBUG_CAPS
+    for (auto& prop : env_properties) {
+        std::cout << "Non default env value for " << prop.first << " = " << prop.second.as<std::string>() << std::endl;
+    }
+#endif
+    set_user_property(env_properties, OptionVisibility::ANY, throw_on_error);
 }
 
 ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const {
diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
index bec721ad3938a6..96bfff48820aaa 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
@@ -314,7 +314,6 @@ struct program {
     // if subgraph can be optimized if it consists of only inputs and corresponding outputs
     bool _can_be_optimized;
     std::unique_ptr<ImplementationsCache> _impls_cache;
-    const size_t _impls_cache_capacity = 300;
     std::shared_ptr<ICompilationContext> _compilation_context;
     bool _loaded_from_cache = false;
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
index 51087405f09769..6b28c02930d530 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
@@ -79,7 +79,7 @@ struct PerfCounter {
 
 class ProgramBuilder final {
 public:
-    ProgramBuilder(std::shared_ptr<ov::Model> model, cldnn::engine& engine, const ExecutionConfig& config, bool partialBuild = false,
+    ProgramBuilder(std::shared_ptr<ov::Model> model, cldnn::engine& engine, const ExecutionConfig& config,
             std::shared_ptr<ov::threading::IStreamsExecutor> task_executor = nullptr,
             std::shared_ptr<cldnn::ICompilationContext> compilation_context = nullptr,
             bool innerProgram = false);
@@ -137,8 +137,8 @@ class ProgramBuilder final {
 
     void add_primitive(const ov::Node& op, std::shared_ptr<cldnn::primitive> prim, std::vector<std::string> aliases = {});
 
-    bool use_new_shape_infer() const { return allow_new_shape_infer; }
-    bool requires_new_shape_infer(const std::shared_ptr<ov::Node>& op) const;
+    bool use_new_shape_infer() const { return m_config.get_allow_new_shape_infer(); }
+
     bool is_inner_program() const { return m_is_inner_program; }
     bool is_query_mode() { return queryMode; }
 
@@ -156,8 +156,6 @@ class ProgramBuilder final {
     std::shared_ptr<cldnn::topology> m_topology;
     CustomLayerMap m_custom_layers;
 
-    bool allow_new_shape_infer = false;
-
     bool queryMode;
 
     std::shared_ptr<ov::threading::IStreamsExecutor> m_task_executor;
@@ -172,7 +170,7 @@ class ProgramBuilder final {
     void cleanup_build();
 
     // TODO(eunsoo): remove createTopolpgyOnly argument and add another method to create topology from ngraph function
-    std::shared_ptr<cldnn::program> build(const std::vector<std::shared_ptr<ov::Node>>& ops, bool partialBuild = false, bool innerProgram = false);
+    std::shared_ptr<cldnn::program> build(const std::vector<std::shared_ptr<ov::Node>>& ops, bool innerProgram = false);
 
     void CreateSingleLayerPrimitive(const std::shared_ptr<ov::Node>& op);
 };
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
index ec36c924c5d185..5924457193765d 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
@@ -109,6 +109,7 @@ static constexpr Property<bool, ov::PropertyMutability::RW> disable_runtime_buff
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_memory_reuse{"DISABLE_MEMORY_REUSE"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_fc_swiglu_fusion{"DISABLE_FC_SWIGLU_FUSION"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_fake_alignment{"DISABLE_FAKE_ALIGNMENT"};
 static constexpr Property<bool, ov::PropertyMutability::RW> use_usm_host{"USE_USM_HOST"};
 static constexpr Property<bool, ov::PropertyMutability::RW> asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"};
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
index 93b4653034ab92..9d63b6b15e3368 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
@@ -46,6 +46,7 @@ OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "Threshold for preallocation feature in case when it uses ratio policy")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disable onednn for usage for particular model/platform")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation")
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models")
 
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file")
@@ -63,11 +64,11 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool_path, "", "Save csv file
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, std::set<int64_t>{}, "Space separated list of iterations where other dump options should be enabled")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits<int32_t>::max(), "Max number of batch elements to dump")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "Disable fusions of operations as post-ops/fused-ops")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "Disable pass which merges QKV projections into single MatMul")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fc_swiglu_fusion, false, "Disable pass which merges FC and SwiGLU ops")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fake alignment feature which tries to keep gpu friendly memory alignment for arbitrary tensor shapes")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usage for all allocations")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations")
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
index 03e4af4d16359b..8e8cac35fa1fb0 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
@@ -80,8 +80,8 @@ bool concat_in_place_optimization::match(const program_node& concat_node,
     if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph())
         return false;
     bool do_runtime_buffer_fusing = true;
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) {
+    const auto& config = concat_node.get_config();
+    GPU_DEBUG_IF(config.get_disable_runtime_buffer_fusing()) {
         do_runtime_buffer_fusing = false;
     }
 
@@ -522,8 +522,7 @@ bool crop_in_place_optimization::match(const program_node& node,
         return false;
 
     if (node.get_users().size() > 0) {
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing && node.is_dynamic()) {
+        GPU_DEBUG_IF(node.get_config().get_disable_runtime_buffer_fusing() && node.is_dynamic()) {
             return false;
         }
 
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
index ce5333f95a1b59..cc3b5ff2260ea6 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
@@ -56,6 +56,9 @@
 using namespace cldnn;
 
 void prepare_primitive_fusing::run(program& p) {
+    if (p.get_config().get_disable_post_ops_fusions())
+        return;
+
     fuse_reorders(p);
     remove_redundant_reshape(p);
     fuse_swiglu(p);
@@ -165,10 +168,7 @@ void prepare_primitive_fusing::fuse_reorders(program &p) {
 }
 
 void prepare_primitive_fusing::fuse_swiglu(program &p) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    bool disable_fc_swiglu_fusion = false;
-    GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1)
-        disable_fc_swiglu_fusion = true;
+    bool disable_fc_swiglu_fusion = p.get_config().get_disable_fc_swiglu_fusion();
     // Apply only for high performant GPU
     if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 128)
         return;
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp
index f63f1bf4efbe21..78b494c52645de 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp
@@ -16,6 +16,9 @@
 using namespace cldnn;
 
 void prepare_primitive_fusing_through::run(program& p) {
+    if (p.get_config().get_disable_post_ops_fusions())
+        return;
+
     auto try_fuse_through = [&](program_node& node) -> std::vector<program_node*> {
         // This function tries to fuse peer_node to first non reorder or reshape previous primitive.
         // It returns chain of primitives (reshapes and reorders) including potential fused_node (e.g. Conv, FC, etc)
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
index 7b3cbdabe639a0..269a88052e7bb0 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
@@ -704,7 +704,7 @@ void reorder_inputs::run(program& p, reorder_factory& rf) {
         GPU_DEBUG_LOG_PASS << "  " << node_ptr->id() << " " << fmt_to_str(fmt) << std::endl;
     }
 
-    GPU_DEBUG_IF(debug_config->verbose >= 2) {
+    GPU_DEBUG_IF(p.get_config().get_verbose() >= 2) {
         reorder_cnt total_reorder_count =
             std::accumulate(p.get_processing_order().begin(),
                             p.get_processing_order().end(),
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
index 0d90dc31e691f9..e5c1fe016b96df 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
@@ -131,14 +131,6 @@ bool kernels_cache::is_cache_enabled() const {
     return !_config.get_cache_dir().empty();
 }
 
-size_t kernels_cache::get_max_kernels_per_batch() const {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->max_kernels_per_batch >= 1) {
-        return static_cast<size_t>(debug_config->max_kernels_per_batch);
-    }
-    return _config.get_max_kernels_per_batch();
-}
-
 void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::vector<kernels_cache::batch_program>* all_batches) const {
     OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "KernelsCache::BuildAll::GetProgramSource");
     std::map<std::string, std::tuple<int32_t, std::vector<batch_program>>> program_buckets;
@@ -205,7 +197,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code,
 
             // Create new kernels batch when the limit is reached
             // and current kernel's entry_point is duplicated in this kernels batch
-            if (current_bucket.back().kernels_counter >= get_max_kernels_per_batch()
+            if (current_bucket.back().kernels_counter >= _config.get_max_kernels_per_batch()
                 || current_bucket.back().entry_point_to_id.find(entry_point) != current_bucket.back().entry_point_to_id.end()
                 || need_separate_batch(entry_point)) {
                 const auto& batch_id = static_cast<int32_t>(current_bucket.size());
@@ -247,9 +239,8 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code,
             b.hash_value = std::hash<std::string>()(full_code);
 
             std::string dump_sources_dir = "";
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(!debug_config->dump_sources.empty()) {
-                dump_sources_dir = debug_config->dump_sources;
+            GPU_DEBUG_IF(!_config.get_dump_sources_path().empty()) {
+                dump_sources_dir = _config.get_dump_sources_path();
             }
 
             // Add -g -s to build options to allow IGC assembly dumper to associate assembler sources with corresponding OpenCL kernel code lines
@@ -307,10 +298,9 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co
 
     bool dump_sources = batch.dump_custom_program;
     std::string dump_sources_dir = "";
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(!debug_config->dump_sources.empty()) {
+    GPU_DEBUG_IF(!_config.get_dump_sources_path().empty()) {
         dump_sources = true;
-        dump_sources_dir = debug_config->dump_sources;
+        dump_sources_dir = _config.get_dump_sources_path();
     }
 
     std::string err_log;  // accumulated build log from all program's parts (only contains messages from parts which
@@ -385,7 +375,7 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co
             if (is_cache_enabled()) {
                 // If kernels caching is enabled, then we save compiled bucket to binary file with name ${code_hash_value}.cl_cache
                 // Note: Bin file contains full bucket, not separate kernels, so kernels reuse across different models is quite limited
-                // Bucket size can be changed in get_max_kernels_per_batch() method, but forcing it to 1 will lead to much longer
+                // Bucket size can be changed by max_kernels_per_batch config option, but forcing it to 1 will lead to much longer
                 // compile time.
                 std::lock_guard<std::mutex> lock(cacheAccessMutex);
                 ov::intel_gpu::save_binary(cached_bin_name, getProgramBinaries(program));
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp
index 56459b93d2481c..b45226f44bd3e9 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp
@@ -113,7 +113,6 @@ class kernels_cache {
 
     std::string get_cache_path() const;
     bool is_cache_enabled() const;
-    size_t get_max_kernels_per_batch() const;
 
     bool _reuse_kernels = false;
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp
index 9e0a3fa5cfb390..64b92a15d1f4ba 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp
@@ -18,8 +18,9 @@ struct ConcatenationImplementationManager : public ImplementationManager {
 
     bool validate_impl(const program_node& node) const override {
         assert(node.is_type<concatenation>());
+        const auto& config = node.get_program().get_config();
         const auto& info = node.get_program().get_engine().get_device_info();
-        if (!info.supports_immad || info.arch == gpu_arch::unknown)
+        if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn())
             return false;
 
         static const std::vector<ov::element::Type_t> supported_types = { ov::element::f16, ov::element::u8, ov::element::i8 };
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp
index c3f599fc5db9f6..430c42dee57f75 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp
@@ -23,8 +23,9 @@ struct ConvolutionImplementationManager : public ImplementationManager {
 
     bool validate_impl(const program_node& node) const override {
         assert(node.is_type<convolution>());
+        const auto& config = node.get_program().get_config();
         const auto& info = node.get_program().get_engine().get_device_info();
-        if (!info.supports_immad || info.arch == gpu_arch::unknown)
+        if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn())
             return false;
 
         const auto& conv_node = node.as<convolution>();
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp
index 039cf36261caa0..238214f82dc6fb 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp
@@ -19,8 +19,9 @@ struct DeconvolutionImplementationManager : public ImplementationManager {
 
     bool validate_impl(const program_node& node) const override {
         assert(node.is_type<deconvolution>());
+        const auto& config = node.get_program().get_config();
         const auto& info = node.get_program().get_engine().get_device_info();
-        if (!info.supports_immad || info.arch == gpu_arch::unknown)
+        if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn())
             return false;
 
         const auto& deconv_node = node.as<deconvolution>();
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp
index c4dc5f7faa6531..731a83372a9dfc 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp
@@ -21,8 +21,9 @@ struct FullyConnectedImplementationManager : public ImplementationManager {
 
     bool validate_impl(const program_node& node) const override {
         assert(node.is_type<fully_connected>());
+        const auto& config = node.get_program().get_config();
         const auto& info = node.get_program().get_engine().get_device_info();
-        if (!info.supports_immad || info.arch == gpu_arch::unknown)
+        if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn())
             return false;
 
         const auto& fc_node = node.as<fully_connected>();
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp
index 6c576d177043ee..3d64d2009490c0 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp
@@ -18,8 +18,9 @@ struct GemmImplementationManager : public ImplementationManager {
 
     bool validate_impl(const program_node& node) const override {
         assert(node.is_type<gemm>());
+        const auto& config = node.get_program().get_config();
         const auto& info = node.get_program().get_engine().get_device_info();
-        if (!info.supports_immad || info.arch == gpu_arch::unknown)
+        if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn())
             return false;
 
         const auto& gemm_node = node.as<gemm>();
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp
index 6fd16a4dd04acf..4b2615c62e2747 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp
@@ -22,10 +22,10 @@ struct LSTMSeqImplementationManager : public ImplementationManager {
 
     bool validate_impl(const program_node& node) const override {
         assert(node.is_type<lstm_seq>());
+        const auto& config = node.get_program().get_config();
         const auto& info = node.get_program().get_engine().get_device_info();
-        if (info.arch == gpu_arch::unknown)
+        if (info.arch == gpu_arch::unknown || !config.get_use_onednn())
             return false;
-
         const auto& lstm_seq_node = node.as<lstm_seq>();
         const auto& in_layout = lstm_seq_node.get_input_layout(0);
         const auto& out_layout = lstm_seq_node.get_output_layout(0);
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp
index 4710b0c77b83c7..ced0316e13a08f 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp
@@ -19,8 +19,9 @@ struct PoolingImplementationManager : public ImplementationManager {
 
     bool validate_impl(const program_node& node) const override {
         assert(node.is_type<pooling>());
+        const auto& config = node.get_program().get_config();
         const auto& info = node.get_program().get_engine().get_device_info();
-        if (!info.supports_immad || info.arch == gpu_arch::unknown)
+        if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn())
             return false;
 
         const auto& in_layout = node.get_input_layout(0);
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
index 4aca436bdd34d8..fe5920355e29c7 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
@@ -51,8 +51,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
 
             _scratchpad_md = _pd.scratchpad_desc();
 
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(debug_config->verbose >= 4) {
+            GPU_DEBUG_IF(config.get_verbose() >= 4) {
                 if (_scratchpad_md.get_size() > 0) {
                     static std::atomic_llong total{0};
                     int64_t size = _scratchpad_md.get_size() / 1048576;
@@ -71,8 +70,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
         _pd(),
         _prim() {
             _enable_profiling = config.get_enable_profiling();
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) {
+            GPU_DEBUG_IF(!config.get_dump_profiling_data_path().empty()) {
                 _enable_profiling = true;
             }
         }
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp
index 68d963fd9e369f..4a4a4c60df032d 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp
@@ -48,8 +48,9 @@ struct ReduceImplementationManager : public ImplementationManager {
 
     bool validate_impl(const program_node& node) const override {
         assert(node.is_type<reduce>());
+        const auto& config = node.get_program().get_config();
         const auto& info = node.get_program().get_engine().get_device_info();
-        if (!info.supports_immad || info.arch == gpu_arch::unknown)
+        if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn())
             return false;
 
         const auto& reduce_node = node.as<reduce>();
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp
index c4117058da88e9..824069f56b9583 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp
@@ -56,8 +56,9 @@ struct ReorderImplementationManager : public ImplementationManager {
         if (output_fmt == format::custom)
             return true;
 
+        const auto& config = node.get_program().get_config();
         const auto& info = node.get_program().get_engine().get_device_info();
-        if (!info.supports_immad || info.arch == gpu_arch::unknown)
+        if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn())
             return false;
 
         if (!one_of(input_fmt.value, supported_formats) || !one_of(output_fmt.value, supported_formats))
diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
index 01286a1c6d04bc..619693f3b1a6fc 100644
--- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
@@ -199,6 +199,8 @@ class primitive_inst {
     program_node const& get_node() const { return *_node; }
     network& get_network() const { return _network; }
     uint32_t get_network_id() const;
+    const ExecutionConfig& get_config() const { return get_network().get_config(); }
+
     virtual event::ptr set_output_memory(memory::ptr mem, bool check = true, size_t idx = 0);
     void check_memory_to_set(const memory& mem, const layout& layout) const;
     const std::list<const cldnn::program_node *>& get_users() const { return _node->get_users(); }
diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h
index 269a3c30fd293c..229dec6a80c77e 100644
--- a/src/plugins/intel_gpu/src/graph/include/program_node.h
+++ b/src/plugins/intel_gpu/src/graph/include/program_node.h
@@ -158,6 +158,7 @@ struct program_node {
 
     program& get_program() { return myprog; }
     program& get_program() const { return myprog; }
+    const ExecutionConfig& get_config() const { return myprog.get_config(); }
 
     primitive_impl* get_selected_impl() const { return selected_impl.get(); }
     void set_selected_impl(std::unique_ptr<primitive_impl> impl);
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index 1c61b2c3fba82c..d6642ac27e56c3 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -238,9 +238,8 @@ network::~network() {
     if (_program != nullptr)
         _program->cancel_compilation_context();
     _memory_pool->clear_pool_for_network(net_id);
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) {
-        dump_perf_data_raw(debug_config->dump_profiling_data + "/perf_raw" + std::to_string(net_id) + ".csv", _exec_order);
+    GPU_DEBUG_IF(!_config.get_dump_profiling_data_path().empty()) {
+        dump_perf_data_raw(_config.get_dump_profiling_data_path() + "/perf_raw" + std::to_string(net_id) + ".csv", _exec_order);
     }
 }
 
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index 53932dfd91a466..22f6ef4571b3c7 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -1082,8 +1082,7 @@ void primitive_inst::realloc_if_needed(bool prev_execution_skipped) {
 }
 
 bool primitive_inst::use_async_compilation() {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->disable_async_compilation) {
+    GPU_DEBUG_IF(get_config().get_disable_async_compilation()) {
         return false;
     }
 
@@ -1581,8 +1580,7 @@ void primitive_inst::do_runtime_in_place_concat() {
         return false;
     };
     OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_concat: " + id()));
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) {
+    GPU_DEBUG_IF(get_config().get_disable_runtime_buffer_fusing()) {
         return;
     }
     if (update_shape_done_by_other) {
@@ -1691,8 +1689,7 @@ void primitive_inst::do_runtime_skip_scatter_update() {
 
 void primitive_inst::do_runtime_in_place_crop() {
     OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_crop: " + id()));
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) {
+    GPU_DEBUG_IF(get_config().get_disable_runtime_buffer_fusing()) {
         return;
     }
 
@@ -1985,8 +1982,7 @@ void primitive_inst::execute() {
 
     set_out_event(_impl->execute(_impl_params->dep_events, *this));
 
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) {
+    GPU_DEBUG_IF(!get_config().get_dump_profiling_data_path().empty()) {
         auto ev = _impl_params->out_event;
         get_network().get_stream().wait_for_events({ev});
 
@@ -2323,8 +2319,7 @@ void primitive_inst::update_weights() {
             reorder_impl->set_arguments(*reorder_inst, args);
             add_dep_event(reorder_impl->execute({}, *reorder_inst));
 
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) {
+            GPU_DEBUG_IF(!get_config().get_dump_profiling_data_path().empty()) {
                 stream.wait_for_events(_impl_params->dep_events);
             }
 
diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
index 82e6f7b708d8ca..a455c1e07b18f4 100644
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -240,12 +240,7 @@ void program::init_program() {
 
 
     _layout_optimizer = std::make_unique<layout_optimizer>();
-    size_t impls_cache_capacity = _impls_cache_capacity;
-    GPU_DEBUG_IF(debug_config->impls_cache_capacity >= 0) {
-        impls_cache_capacity = debug_config->impls_cache_capacity;
-    }
-
-    _impls_cache = std::make_unique<ImplementationsCache>(impls_cache_capacity);
+    _impls_cache = std::make_unique<ImplementationsCache>(get_config().get_impls_cache_capacity());
     // Remove items of compilation context's internal queue when some impl is popped in kernels_cache
     // compilation context's queue check duplication of inserted task
     _impls_cache->set_remove_item_callback([this](ImplementationsCache::ItemType& item) {
@@ -548,26 +543,13 @@ void program::pre_optimize_graph(bool is_internal) {
 
     reorder_factory rf;
     if (optimize_data) {
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-#ifdef GPU_DEBUG_CONFIG
-        GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) {
-#else
-        {
-#endif
-            apply_opt_pass<prepare_primitive_fusing_through>();
-        }
+        apply_opt_pass<prepare_primitive_fusing_through>();
 
         apply_opt_pass<pre_replace_deconv>();
 
         apply_opt_pass<reorder_transfer>();
 
-#ifdef GPU_DEBUG_CONFIG
-        GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) {
-#else
-        {
-#endif
-            apply_opt_pass<prepare_primitive_fusing>();
-        }
+        apply_opt_pass<prepare_primitive_fusing>();
 
         apply_opt_pass<select_preferred_formats>();
 
diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp
index b7767c14f9abff..65df228d6c733f 100644
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@@ -1851,8 +1851,7 @@ void program_node::create_onednn_primitive_attributes(
         // Trying to combine multiplications and additions which are placed one after another.
         // We do it in the cycle because some optimization cases can be simplified again from time to time
         do {
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(debug_config->disable_onednn_opt_post_ops)
+            GPU_DEBUG_IF(get_config().get_disable_onednn_post_ops_opt())
                 break;
             optimized_post_ops = try_optimize_post_ops(fused_ops, optimized_post_ops, attrs, optimization_is_finished);
         } while (!optimization_is_finished);
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
index 25558be18c481a..a6b798bde97b9e 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
@@ -89,26 +89,6 @@ static bool is_per_token_dynamic_quantize(const fully_connected_params& params)
 static size_t get_dynamic_quantize_group_size(const fully_connected_params& params) {
     auto dynamic_quantization_group_size = params.dynamic_quantization_group_size;
 
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) {
-        dynamic_quantization_group_size = debug_config->dynamic_quantize_group_size;
-
-        // Specify which Fully-connected layer would be dynamic-quantized
-        GPU_DEBUG_IF(!debug_config->dynamic_quantize_layers_without_onednn.empty()) {
-            auto layers = debug_config->dynamic_quantize_layers_without_onednn;
-            auto iter = std::find_if(layers.begin(), layers.end(), [&](const std::string& pattern){
-                return debug_config->is_layer_name_matched(params.layerID, pattern);
-            });
-
-            if (iter != layers.end()) {
-                dynamic_quantization_group_size = debug_config->dynamic_quantize_group_size;
-                GPU_DEBUG_COUT << "Found specified Fully-connected layer [" << params.layerID << "]. Enable Dynamic-quantize." << std::endl;
-            } else {
-                dynamic_quantization_group_size = 0;
-            }
-        }
-    }
-
     size_t scale_group_size = get_scale_group_size(params);
     size_t zp_group_num = params.decompression_zero_point.Feature().v;
     size_t zp_group_size = 0;
diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp
index a28d986ce46160..8fca999126fa1e 100644
--- a/src/plugins/intel_gpu/src/plugin/graph.cpp
+++ b/src/plugins/intel_gpu/src/plugin/graph.cpp
@@ -68,7 +68,7 @@ Graph::Graph(std::shared_ptr<ov::Model> model, const RemoteContextImpl::Ptr& con
     : m_context(context)
     , m_config(config)
     , m_stream_id(stream_id) {
-    auto program_builder = std::make_shared<ProgramBuilder>(model, get_engine(), config, false);
+    auto program_builder = std::make_shared<ProgramBuilder>(model, get_engine(), config);
     m_config = program_builder->get_config();
 
     build(program_builder->get_compiled_program());
@@ -212,12 +212,12 @@ void Graph::build(std::shared_ptr<cldnn::program> program) {
         exit(0);
     }
 
-    GPU_DEBUG_IF(!debug_config->dump_graphs.empty() && m_stream_id == 0) {
+    GPU_DEBUG_IF(!m_config.get_dump_graphs_path().empty() && m_stream_id == 0) {
         static int net_id = 0;
         auto steps_info = get_network()->get_optimizer_passes_info();
         size_t step_idx = 0;
         for (auto& step : steps_info) {
-            auto xml_path = debug_config->dump_graphs + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml";
+            auto xml_path = m_config.get_dump_graphs_path() + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml";
             ov::pass::Serialize(xml_path, "").run_on_model(get_runtime_model(step.second, true));
             step_idx++;
         }
diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
index 20690957fea5e6..3d287eb46c465e 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
@@ -26,7 +26,7 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_
     config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer()));
     config.finalize(p.get_engine());
 
-    ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true);
+    ProgramBuilder prog(internal_body, p.get_engine(), config, p.get_task_executor(), p.get_compilation_context(), true);
     branch.inner_program = prog.get_compiled_program();
 
     auto& input_map = branch.input_map;
diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
index 9665918f88d0be..08b014d72206c6 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
@@ -299,11 +299,10 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr<ov::op::
 
     auto config = p.get_config();
     config.set_property(ov::intel_gpu::custom_outputs(output_names_vec));
-    config.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
     config.finalize(p.get_engine());
 
     // get body program from ov::Model
-    ProgramBuilder prog(ov_model, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true);
+    ProgramBuilder prog(ov_model, p.get_engine(), config, p.get_task_executor(), p.get_compilation_context(), true);
     auto body_program = prog.get_compiled_program();
 
     GPU_DEBUG_LOG << "* trip_count_id                 : " << trip_count_id << std::endl;
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index 937077886b10ad..bb5ff6d37542ee 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -24,6 +24,7 @@
 #include "intel_gpu/runtime/debug_configuration.hpp"
 #include "intel_gpu/runtime/device_query.hpp"
 #include "intel_gpu/runtime/execution_config.hpp"
+#include "intel_gpu/runtime/internal_properties.hpp"
 #include "intel_gpu/runtime/itt.hpp"
 #include "openvino/core/any.hpp"
 #include "openvino/core/deprecated.hpp"
@@ -42,6 +43,7 @@
 #include "openvino/runtime/plugin_config.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "openvino/util/common_util.hpp"
+#include "ov_ops/dynamic_quantize.hpp"
 #include "openvino/util/weights_path.hpp"
 #include "transformations/common_optimizations/dimension_tracking.hpp"
 #include "transformations/init_node_info.hpp"
@@ -71,6 +73,90 @@ ov::RTMap get_rt_info(const ov::Model& model) {
     return rt_info;
 }
 
+bool requires_new_shape_infer(const std::shared_ptr<ov::Node>& op) {
+    if (op->is_dynamic()) {
+        return true;
+    }
+
+    // HACK: SearchSorted has specific shape requirements.
+    // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine,
+    // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid.
+    if (ov::is_type<ov::op::v15::SearchSorted>(op))
+        return true;
+
+    if (ov::is_type<ov::op::internal::DynamicQuantize>(op))
+        return true;
+
+    if (ov::is_type<ov::op::v5::Loop>(op)) {
+        const auto body_function = std::static_pointer_cast<ov::op::v5::Loop>(op)->get_function();
+        if (body_function->is_dynamic())
+            return true;
+    }
+
+    if (ov::is_type<ov::op::v5::LSTMSequence>(op) || ov::is_type<ov::op::v4::LSTMCell>(op)) {
+        return true;
+    }
+    // When input node has dynamic shape with 4 dimension, this function return false
+    // because op.is_dynamic() which only checks input shapes return false.
+    // So, in the case of input data, we need to check output shape.
+    for (size_t i = 0; i < op->get_output_size(); i++) {
+        if (op->get_output_partial_shape(i).is_dynamic())
+            return true;
+    }
+
+    for (size_t i = 0; i < op->get_output_size(); i++) {
+        if (op->get_output_partial_shape(i).size() > 6)
+            return true;
+    }
+
+    for (size_t i = 0; i < op->get_input_size(); i++) {
+        if (op->get_input_partial_shape(i).size() > 6)
+            return true;
+    }
+
+    return false;
+}
+
+void set_model_properties(const ov::Model& model, ExecutionConfig& config) {
+    const auto& ops = model.get_ordered_ops();
+    // In the case of inner program, allow_new_shape_infer flag is setted by outside of program.
+    // So, do not check allow_new_shape_infer for inner program build
+    for (const auto& op : ops) {
+        if (requires_new_shape_infer(op)) {
+            config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+            break;
+        }
+    }
+    bool is_dynamic = false;
+    for (const auto& op : ops) {
+        if (op->is_dynamic()) {
+            is_dynamic = true;
+            break;
+        }
+    }
+    bool has_lstm = false;
+    for (const auto& op : ops) {
+        if (ov::is_type<ov::op::v5::LSTMSequence>(op)) {
+            has_lstm = true;
+            break;
+        }
+    }
+
+    // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels,
+    // smaller # of kernels are built compared to static models.
+    // So having smaller batch size is even better for dynamic model as we can do more parallel build.
+    if (is_dynamic) {
+        config.set_property(ov::intel_gpu::max_kernels_per_batch(4));
+    } else {
+        config.set_property(ov::intel_gpu::max_kernels_per_batch(8));
+    }
+
+    config.set_property(ov::intel_gpu::optimize_data(true));
+
+    if (has_lstm)
+        config.set_property(ov::intel_gpu::use_onednn(true));
+}
+
 }  // namespace
 
 #define FACTORY_DECLARATION(op_version, op_name) \
@@ -145,14 +231,13 @@ std::shared_ptr<ov::Model> Plugin::clone_and_transform_model(const std::shared_p
                                                              const ExecutionConfig& config,
                                                              const std::shared_ptr<RemoteContextImpl>& context) const {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::clone_and_transform_model");
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_DEFINE_MEM_LOGGER("Plugin::clone_and_transform_model");
 
     auto cloned_model = model->clone();
     OPENVINO_ASSERT(cloned_model != nullptr, "[GPU] Failed to clone model!");
 
-    GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) {
-        auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name();
+    GPU_DEBUG_IF(!config.get_dump_graphs_path().empty()) {
+        auto path_base = config.get_dump_graphs_path() + "/" + cloned_model->get_name();
         ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model);
     }
 
@@ -171,8 +256,8 @@ std::shared_ptr<ov::Model> Plugin::clone_and_transform_model(const std::shared_p
         new_res->set_friendly_name(old_res->get_friendly_name());
     }
 
-    GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) {
-        auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name() + "_" +  "transformed_func";
+    GPU_DEBUG_IF(!config.get_dump_graphs_path().empty()) {
+        auto path_base = config.get_dump_graphs_path() + "/" + cloned_model->get_name() + "_" +  "transformed_func";
         ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model);
     }
     return cloned_model;
@@ -220,7 +305,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id);
 
     ExecutionConfig config = m_configs_map.at(device_id);
-    config.set_property(orig_config, OptionVisibility::RELEASE);
+    config.set_user_property(orig_config, OptionVisibility::RELEASE);
+    set_model_properties(*model, config);
     config.finalize(context, get_rt_info(*model));
 
     auto transformed_model = clone_and_transform_model(model, config, context);
@@ -239,6 +325,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id);
 
     ExecutionConfig config = m_configs_map.at(device_id);
+    set_model_properties(*model, config);
     config.finalize(context_impl, get_rt_info(*model));
 
     auto transformed_model = clone_and_transform_model(model, config, context_impl);
@@ -269,7 +356,7 @@ ov::SoPtr<ov::IRemoteContext> Plugin::get_default_context(const AnyMap& params)
 
 void Plugin::set_property(const ov::AnyMap &config) {
     auto update_config = [](ExecutionConfig& config, const ov::AnyMap& user_config) {
-        config.set_property(user_config, OptionVisibility::RELEASE);
+        config.set_user_property(user_config, OptionVisibility::RELEASE);
         // Check that custom layers config can be loaded
         if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) {
             CustomLayerMap custom_layers;
@@ -304,7 +391,8 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     auto ctx = get_default_context(device_id);
 
     ExecutionConfig config = m_configs_map.at(device_id);
-    config.set_property(orig_config, OptionVisibility::RELEASE);
+    config.set_user_property(orig_config, OptionVisibility::RELEASE);
+    set_model_properties(*model, config);
     config.finalize(ctx, get_rt_info(*model));
 
     ProgramBuilder prog(ctx->get_engine(), config);
@@ -359,7 +447,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
     }
 
     ExecutionConfig config = m_configs_map.at(device_id);
-    config.set_property(_orig_config, OptionVisibility::RELEASE);
+    config.set_user_property(_orig_config, OptionVisibility::RELEASE);
     config.finalize(context_impl, {});
 
     ov::CacheMode cache_mode = config.get_cache_mode();
@@ -680,7 +768,9 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const {
     auto device_id = get_property(ov::device::id.name(), options).as<std::string>();
     auto context = get_default_contexts().at(device_id);
     const auto& device_info = context->get_engine().get_device_info();
-    const auto& config = m_configs_map.at(device_id);
+    auto config = m_configs_map.at(device_id);
+    config.set_property(ov::intel_gpu::partial_build_program(true));
+    config.finalize(context, {});
     uint32_t n_streams = static_cast<uint32_t>(config.get_num_streams());
     uint64_t occupied_device_mem = 0;
     auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as<std::map<std::string, uint64_t>>();
@@ -798,7 +888,7 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const {
 
         TransformationsPipeline transformations(config, context);
         transformations.apply(cloned_model);
-        program = std::make_shared<ProgramBuilder>(cloned_model, engine, config, true);
+        program = std::make_shared<ProgramBuilder>(cloned_model, engine, config);
         std::pair<int64_t, int64_t> device_memory_usage = program->get_compiled_program()->get_estimated_device_mem_usage();
         if (device_memory_usage.first == static_cast<int64_t>(-1L) && device_memory_usage.second == static_cast<int64_t>(-1L)) {
             return static_cast<uint32_t>(max_batch_size);
diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
index cd69e741e435de..f30bd9464a4238 100644
--- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
@@ -13,7 +13,7 @@
 #include "openvino/op/search_sorted.hpp"
 #include "openvino/op/stft.hpp"
 #include "openvino/runtime/properties.hpp"
-#include "ov_ops/dynamic_quantize.hpp"
+
 
 #include "intel_gpu/plugin/common_utils.hpp"
 #include "intel_gpu/plugin/program_builder.hpp"
@@ -63,7 +63,6 @@ std::string layer_type_name_ID(const std::shared_ptr<ov::Node>& op) {
 }
 
 ProgramBuilder::ProgramBuilder(std::shared_ptr<ov::Model> model, cldnn::engine& engine, const ExecutionConfig& config,
-                               bool partial_build,
                                std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
                                std::shared_ptr<cldnn::ICompilationContext> compilation_context,
                                bool is_inner_program)
@@ -111,7 +110,7 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr<ov::Model> model, cldnn::engine&
     CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty());
 
     auto ops = model->get_ordered_ops();
-    m_program = build(ops, partial_build, is_inner_program);
+    m_program = build(ops, is_inner_program);
 }
 
 ProgramBuilder::ProgramBuilder(cldnn::engine& engine, const ExecutionConfig& config)
@@ -141,43 +140,8 @@ void ProgramBuilder::cleanup_build() {
 #endif
 }
 
-std::shared_ptr<cldnn::program> ProgramBuilder::build(const std::vector<std::shared_ptr<ov::Node>>& ops, bool partial_build, bool is_inner_program) {
+std::shared_ptr<cldnn::program> ProgramBuilder::build(const std::vector<std::shared_ptr<ov::Node>>& ops, bool is_inner_program) {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "ProgramBuilder::build");
-    // In the case of inner program, allow_new_shape_infer flag is setted by outside of program.
-    // So, do not check allow_new_shape_infer for inner program build
-    for (const auto& op : ops) {
-        if (requires_new_shape_infer(op)) {
-            allow_new_shape_infer = true;
-            break;
-        }
-    }
-    bool is_dynamic = false;
-    for (const auto& op : ops) {
-        if (op->is_dynamic()) {
-            is_dynamic = true;
-            break;
-        }
-    }
-
-    if (is_inner_program) {
-        allow_new_shape_infer = (m_config.get_allow_new_shape_infer() || allow_new_shape_infer);
-    }
-
-    // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels,
-    // smaller # of kernels are built compared to static models.
-    // So having smaller batch size is even better for dynamic model as we can do more parallel build.
-    if (is_dynamic) {
-        m_config.set_property(ov::intel_gpu::max_kernels_per_batch(4));;
-    } else {
-        m_config.set_property(ov::intel_gpu::max_kernels_per_batch(8));;
-    }
-
-    m_config.set_property(ov::intel_gpu::partial_build_program(partial_build));
-    m_config.set_property(ov::intel_gpu::optimize_data(true));
-    m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer));
-    //if (has_lstm)
-    m_config.set_property(ov::intel_gpu::use_onednn(true));
-    m_config.finalize(m_engine);
 
     prepare_build();
     {
@@ -223,7 +187,6 @@ bool ProgramBuilder::is_op_supported(const std::shared_ptr<ov::Node>& op) {
         if (!data_types_are_supported(op.get()))
             return false;
 
-        allow_new_shape_infer = requires_new_shape_infer(op);
         CreateSingleLayerPrimitive(op);
         cleanup_build();
         DisableQueryMode();
@@ -280,7 +243,7 @@ std::vector<cldnn::input_info> ProgramBuilder::GetInputInfo(const std::shared_pt
         // Note: Currently Split/Variadic Split are divided to multiple crops
         // LSTMCell contains its own body network, and each output has a unique pid
         // But there is no need to maintain output port index for the next node e.g. Result
-        bool is_legacy_multiple_outputs = !allow_new_shape_infer
+        bool is_legacy_multiple_outputs = !use_new_shape_infer()
                                           || ov::is_type<ov::op::v1::Split>(prevOp)
                                           || ov::is_type<ov::op::v1::VariadicSplit>(prevOp)
                                           || ov::is_type<ov::op::v4::LSTMCell>(prevOp);
diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
index 7c3a1b0e452fa8..22b0262c62b2d3 100644
--- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
@@ -317,15 +317,16 @@ void SyncInferRequest::enqueue() {
     m_internal_outputs = network->execute(dependencies);
     auto network_enqueue_end = std::chrono::high_resolution_clock::now();
 
+    const auto& config = network->get_config();
+
     // If dump layers path is set, only runs first inference.
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0 && debug_config->dump_iteration.empty()) {
+    GPU_DEBUG_IF(!config.get_dump_tensors_path().empty() && config.get_dump_iterations().empty()) {
         GPU_DEBUG_INFO << "Only run first inference to dump layers." << std::endl;
         exit(0);
     }
 
     auto enqueue_end = std::chrono::high_resolution_clock::now();
-    GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) {
+    GPU_DEBUG_IF(config.get_host_time_profiling()) {
         network_enqueue_time = std::chrono::duration_cast<std::chrono::microseconds>(network_enqueue_end - network_enqueue_start).count();
 
         const uint64_t total_time = std::chrono::duration_cast<std::chrono::microseconds>(enqueue_end - enqueue_start).count();
diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp
index 6c0d50be96e7ae..b269fbc2c9eb4d 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp
@@ -16,7 +16,7 @@
 
 namespace ov::intel_gpu {
 
-DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size)
+DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric)
     : ov::pass::MatcherPass() {
     GPU_DEBUG_GET_INSTANCE(debug_config);
     using namespace ov::pass::pattern;
@@ -55,9 +55,7 @@ DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size
         config.scale_dt = element::f16;
         config.group_sizes = shape_group_size;
 
-        // AZP does not support grouped size dyn-quan
-        // XXX: This is currently wrapped as GPU_DEBUG_IF as dynamic_quantize_asym is not exposed through public API.
-        GPU_DEBUG_IF(debug_config->dynamic_quantize_asym && group_size == UINT64_MAX) {
+        if (asymmetric && group_size == UINT64_MAX) {
             config.quantization_type = QuantizationType::Asymmetric;
             config.quantization_dt = element::u8;
             config.zp_dt = element::u8; // it supports u8 only now
diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp
index 85d32fbfdcea84..f8b13685389f1d 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp
@@ -11,7 +11,7 @@ namespace ov::intel_gpu {
 class DynamicQuantizeFullyConnected: public ov::pass::MatcherPass {
 public:
     OPENVINO_MATCHER_PASS_RTTI("DynamicQuantizeFullyConnected");
-    DynamicQuantizeFullyConnected(uint64_t group_size);
+    DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric = false);
 };
 
 }   // namespace ov::intel_gpu
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index 339ee80ba7a3eb..6a1b09d1b4f329 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -988,13 +988,9 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         manager.register_pass<ov::intel_gpu::MoveFCReshapeToWeights>();
         manager.register_pass<ov::intel_gpu::ConvertFullyConnectedToFullyConnectedCompressed>();
 
-        bool disable_horizontal_fc_fusion = false;
-        bool disable_fc_swiglu_fusion = false;
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->disable_horizontal_fc_fusion == 1)
-            disable_horizontal_fc_fusion = true;
-        GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1)
-            disable_fc_swiglu_fusion = true;
+        bool disable_horizontal_fc_fusion = config.get_disable_horizontal_fc_fusion();
+        bool disable_fc_swiglu_fusion = config.get_disable_fc_swiglu_fusion();
+
         // mlp fusion is only supported for cldnn on high performant GPUis
         bool fuse_mlp_swiglu = !device_info.supports_immad &&
                                device_info.execution_units_count >= 128 &&
@@ -1052,6 +1048,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         manager.register_pass<ov::pass::Validate>();
 
         if (device_info.supports_immad) {
+            bool asymmetric_dyn_quant = config.get_asym_dynamic_quantization();
             auto dynamic_quantization_group_size = config.get_dynamic_quantization_group_size();
             pass_config->set_callback<ov::intel_gpu::DynamicQuantizeFullyConnected>([=](const_node_ptr& root) -> bool {
                 for (size_t i = 0 ; i < root->get_input_node_shared_ptr(0)->get_output_size(); ++i) {
@@ -1070,7 +1067,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
 
                 // AZP does not support 8bit weight
                 // XXX: This is currently wrapped as GPU_DEBUG_IF as dynamic_quantize_asym is not exposed through public API.
-                GPU_DEBUG_IF(debug_config->dynamic_quantize_asym
+                GPU_DEBUG_IF(asymmetric_dyn_quant
                     && (root->get_input_element_type(1) == ov::element::i8 || root->get_input_element_type(1) == ov::element::u8)) {
                     GPU_DEBUG_TRACE << root->get_friendly_name() << "  dyn_quan is turned off: asym quantization does not support 8bit weight" << std::endl;
                     return true;
@@ -1094,7 +1091,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
 
                 return false;
             });
-            manager.register_pass<ov::intel_gpu::DynamicQuantizeFullyConnected>(dynamic_quantization_group_size);
+            manager.register_pass<ov::intel_gpu::DynamicQuantizeFullyConnected>(dynamic_quantization_group_size, asymmetric_dyn_quant);
         }
 
         // Remove Pad in front of MaxPool if both the pads_begin and pads_end are zero.
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index f185a2f321489d..19d841036cd6de 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -68,7 +68,7 @@ void ExecutionConfig::finalize_impl(std::shared_ptr<IRemoteContext> context) {
     if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) {
         m_enable_lp_transformations = info.supports_imad || info.supports_immad;
     }
-    if (info.supports_immad) {
+    if (!is_set_by_user(ov::intel_gpu::use_onednn) && info.supports_immad) {
         m_use_onednn = true;
     }
     if (get_use_onednn()) {
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp
index 441494b41c4f8f..520cb9bd22e073 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp
@@ -295,9 +295,6 @@ device_info init_device_info(const cl::Device& device, const cl::Context& contex
         GPU_DEBUG_INFO << "GPU version: "
             << static_cast<int>(info.gfx_ver.major) << "." << static_cast<int>(info.gfx_ver.minor) << "." << static_cast<int>(info.gfx_ver.revision)
             << (info.has_separate_cache ? " with separate cache" : "") << std::endl;
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->disable_onednn)
-            info.supports_immad = false;
     } else if (nv_device_attr_supported) {
         info.gfx_ver = {static_cast<uint16_t>(device.getInfo<CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV>()),
                         static_cast<uint8_t>(device.getInfo<CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV>()),

From 730f70a21f5d2fe7bf8a3d69ecf365cefe237bac Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 24 Dec 2024 15:13:09 +0400
Subject: [PATCH 16/44] extended bool any parsing options

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/core/src/any.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp
index 54c6b4f9d868f5..8de26ad7c4d173 100644
--- a/src/core/src/any.cpp
+++ b/src/core/src/any.cpp
@@ -6,6 +6,7 @@
 
 #include <limits>
 #include <string>
+#include "openvino/util/common_util.hpp"
 namespace {
 template <class Container>
 bool contains_type_index(Container&& types, const std::type_info& user_type) {
@@ -202,9 +203,14 @@ namespace util {
 void Read<bool>::operator()(std::istream& is, bool& value) const {
     std::string str;
     is >> str;
-    if (str == "YES") {
+
+    std::set<std::string> off = {"0", "false", "off", "no"};
+    std::set<std::string> on = {"1", "true", "on", "yes"};
+    str = util::to_lower(str);
+
+    if (on.count(str)) {
         value = true;
-    } else if (str == "NO") {
+    } else if (off.count(str)) {
         value = false;
     } else {
         OPENVINO_THROW("Could not convert to bool from string " + str);

From 797f894dc021c56a79044ee7148d9d6e10887f5a Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 14 Jan 2025 10:13:37 +0400
Subject: [PATCH 17/44] debug properties wip

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../include/intel_gpu/runtime/internal_properties.hpp        | 1 +
 src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl  | 1 +
 src/plugins/intel_gpu/src/graph/layout_optimizer.cpp         | 1 +
 src/plugins/intel_gpu/src/graph/program.cpp                  | 5 ++---
 src/plugins/intel_gpu/src/plugin/graph.cpp                   | 5 ++---
 src/plugins/intel_gpu/src/plugin/plugin.cpp                  | 5 -----
 6 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
index 5924457193765d..23b48ac4d9a920 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
@@ -96,6 +96,7 @@ static constexpr Property<std::string, PropertyMutability::RW> dump_graphs_path{
 static constexpr Property<std::string, ov::PropertyMutability::RW> dump_profiling_data_path{"GPU_DUMP_PROFILING_DATA_PATH"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> dump_sources_path{"GPU_DUMP_SOURCES_PATH"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> dump_tensors_path{"GPU_DUMP_TENSORS_PATH"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> dry_run_path{"GPU_DRY_RUN_PATH"};
 static constexpr Property<DumpTensors, ov::PropertyMutability::RW> dump_tensors{"DUMP_TENSORS"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> dump_layers{"GPU_DUMP_LAYERS"};
 static constexpr Property<DumpFormat, ov::PropertyMutability::RW> dump_tensors_format{"DUMP_TENSORS_FORMAT"};
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
index 9d63b6b15e3368..126a01a75acee5 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
@@ -74,3 +74,4 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usa
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "Preallocation setting")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, std::vector<std::string>{}, "List of layers to load raw binary")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dry_run_path, "", "Enables mode which partially compiles a model and stores runtime model into specified directory")
diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
index 067ca8c8f5fb0d..78e669f0e1152f 100644
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@@ -1131,6 +1131,7 @@ bool layout_optimizer::is_primitive_implemented_for_onednn(program_node& node) {
 
 impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node) {
 #ifdef GPU_DEBUG_CONFIG
+
     GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_IF(!debug_config->forced_impl_types.empty()) {
         // Forcing impl type of one primitive
diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
index a455c1e07b18f4..dbadc079c0da82 100644
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -491,9 +491,8 @@ void program::build_program(bool is_internal) {
     run_graph_compilation();
     { post_optimize_graph(is_internal); }
 
-    GPU_DEBUG_GET_INSTANCE(debug_config);
 #ifdef GPU_DEBUG_CONFIG
-    if (debug_config->dry_run_path.empty() || is_internal) {
+    if (get_config().get_dry_run_path().empty() || is_internal) {
 #else
     {
 #endif
@@ -608,7 +607,7 @@ void program::post_optimize_graph(bool is_internal) {
     auto partial_build = _config.get_partial_build_program();
 #ifdef GPU_DEBUG_CONFIG
     GPU_DEBUG_GET_INSTANCE(debug_config);
-    if (!is_internal && (!partial_build || !debug_config->dry_run_path.empty())) {
+    if (!is_internal && (!partial_build || !_config.get_dry_run_path().empty())) {
 #else
     if (!is_internal && !partial_build) {
 #endif
diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp
index 8fca999126fa1e..d04adc77ee67af 100644
--- a/src/plugins/intel_gpu/src/plugin/graph.cpp
+++ b/src/plugins/intel_gpu/src/plugin/graph.cpp
@@ -206,9 +206,8 @@ void Graph::build(std::shared_ptr<cldnn::program> program) {
         m_network = std::make_shared<cldnn::network>(program, m_stream_id);
     }
 
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(!debug_config->dry_run_path.empty()) {
-        ov::pass::Serialize(debug_config->dry_run_path, "").run_on_model(get_runtime_model());
+    GPU_DEBUG_IF(!m_config.get_dry_run_path().empty()) {
+        ov::pass::Serialize(m_config.get_dry_run_path(), "").run_on_model(get_runtime_model());
         exit(0);
     }
 
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index bb5ff6d37542ee..fd0f2482bc4712 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -829,11 +829,6 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const {
 
     std::shared_ptr<ProgramBuilder> program;
 
-    GPU_DEBUG_IF(debug_config->base_batch_for_memory_estimation > 0) {
-        size_t user_specified_base_batch_size = debug_config->base_batch_for_memory_estimation;
-        base_batch_size = (user_specified_base_batch_size != base_batch_size) ? user_specified_base_batch_size : base_batch_size;
-    }
-
     auto cloned_model = model->clone();
 
     try {

From 7d05f99ca6588dde4fb374509f3c9e6a7a22580f Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 14 Jan 2025 10:14:09 +0400
Subject: [PATCH 18/44] fix apply rt info

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/inference/dev_api/openvino/runtime/plugin_config.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 08dd148b6d1c36..0101733b6517d5 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -243,7 +243,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
         if (!is_set_by_user(property)) {
             auto rt_info_val = rt_info.find(property.name());
             if (rt_info_val != rt_info.end()) {
-                set_property(property(rt_info_val->second.template as<T>()));
+                set_user_property(property(rt_info_val->second.template as<T>()));
             }
         }
     }

From c652884c4859d396f9599c5b816b1be0aa2d452e Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Wed, 15 Jan 2025 16:11:20 +0400
Subject: [PATCH 19/44] wip

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        | 51 ++++++++++++++-----
 src/inference/src/dev/plugin_config.cpp       | 40 +++++++--------
 src/inference/tests/unit/config_test.cpp      | 43 ++++++++++++----
 .../intel_gpu/runtime/execution_config.hpp    | 13 +++--
 .../include/intel_gpu/runtime/options.inl     |  2 +-
 .../src/runtime/execution_config.cpp          | 21 ++++++--
 .../intel_gpu/src/runtime/ocl/ocl_event.cpp   | 10 ----
 7 files changed, 117 insertions(+), 63 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 0101733b6517d5..a598d81165eea6 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -11,6 +11,10 @@
 #include "openvino/runtime/properties.hpp"
 #include "openvino/core/except.hpp"
 
+#ifndef EXPAND
+    #define EXPAND(N) N
+#endif
+
 #ifndef COUNT_N
     #define COUNT_N(_1, _2, _3, _4, _5, N, ...) N
 #endif
@@ -23,14 +27,11 @@
     #define CAT(a, b) a ## b
 #endif
 
-#ifndef EXPAND
-    #define EXPAND(N) N
-#endif
-
 #define GET_EXCEPT_LAST_IMPL(N, ...) CAT(GET_EXCEPT_LAST_IMPL_, N)(__VA_ARGS__)
 #define GET_EXCEPT_LAST_IMPL_2(_0, _1) _0
 #define GET_EXCEPT_LAST_IMPL_3(_0, _1, _2) _0, _1
 #define GET_EXCEPT_LAST_IMPL_4(_0, _1, _2, _3) _0, _1, _2
+#define GET_EXCEPT_LAST_IMPL_5(_0, _1, _2, _3, _4) _0, _1, _2, _3
 
 #define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__))
 
@@ -45,10 +46,25 @@
 
 #define GET_LAST(...) GET_LAST_IMPL(COUNT(__VA_ARGS__), _, __VA_ARGS__ ,,,,,,,,,,,)
 
-#define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \
+#define OV_CONFIG_DECLARE_LOCAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \
     ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type, Visibility> m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)};
+#define OV_CONFIG_DECLARE_GLOBAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \
+    static ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type, Visibility> m_ ## PropertyVar;
+
+#define OV_CONFIG_DECLARE_LOCAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \
+    const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() const { \
+        if (m_is_finalized) { \
+            return m_ ## PropertyVar.value; \
+        } else { \
+            if (m_user_properties.find(PropertyNamespace::PropertyVar.name()) != m_user_properties.end()) { \
+                return m_user_properties.at(PropertyNamespace::PropertyVar.name()).as<decltype(PropertyNamespace::PropertyVar)::value_type>(); \
+            } else { \
+                return m_ ## PropertyVar.value; \
+            } \
+        } \
+    }
 
-#define OV_CONFIG_DECLARE_GETTERS(PropertyNamespace, PropertyVar, Visibility, ...) \
+#define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \
     const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() const { \
         if (m_is_finalized) { \
             return m_ ## PropertyVar.value; \
@@ -68,13 +84,16 @@
         { #PropertyNamespace "::" #PropertyVar, PropertyNamespace::PropertyVar.name(), GET_LAST(__VA_ARGS__)},
 
 #define OV_CONFIG_RELEASE_OPTION(PropertyNamespace, PropertyVar, ...) \
-    OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE, __VA_ARGS__)
+    OV_CONFIG_LOCAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE, __VA_ARGS__)
 
 #define OV_CONFIG_RELEASE_INTERNAL_OPTION(PropertyNamespace, PropertyVar, ...) \
-    OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE_INTERNAL, __VA_ARGS__)
+    OV_CONFIG_LOCAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE_INTERNAL, __VA_ARGS__)
 
 #define OV_CONFIG_DEBUG_OPTION(PropertyNamespace, PropertyVar, ...) \
-    OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__)
+    OV_CONFIG_LOCAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__)
+
+#define OV_CONFIG_DEBUG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, ...) \
+    OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG_GLOBAL, __VA_ARGS__)
 
 namespace ov {
 #define ENABLE_DEBUG_CAPS
@@ -82,8 +101,9 @@ enum class OptionVisibility : uint8_t {
     RELEASE = 1 << 0,            // Option can be set for any build type via public interface, environment and config file
     RELEASE_INTERNAL = 1 << 1,   // Option can be set for any build type via environment and config file only
     DEBUG = 1 << 2,              // Option can be set for debug builds only via environment and config file
+    DEBUG_GLOBAL = 1 << 3,       // Global option can be set for debug builds only via environment and config file
 #ifdef ENABLE_DEBUG_CAPS
-    ANY = 0x07,                  // Any visibility is valid including DEBUG
+    ANY = 0x0F,                  // Any visibility is valid including DEBUG
 #else
     ANY = 0x03,                  // Any visibility is valid excluding DEBUG
 #endif
@@ -213,6 +233,10 @@ class OPENVINO_RUNTIME_API PluginConfig {
     util::EnableIfAllStringAny<void, Properties...> set_property(Properties&&... properties) {
         set_property(ov::AnyMap{std::forward<Properties>(properties)...});
     }
+    template <typename... Properties>
+    util::EnableIfAllStringAny<void, Properties...> set_user_property(Properties&&... properties) {
+        set_user_property(ov::AnyMap{std::forward<Properties>(properties)...});
+    }
 
     std::string to_string() const;
 
@@ -243,13 +267,14 @@ class OPENVINO_RUNTIME_API PluginConfig {
         if (!is_set_by_user(property)) {
             auto rt_info_val = rt_info.find(property.name());
             if (rt_info_val != rt_info.end()) {
-                set_user_property(property(rt_info_val->second.template as<T>()));
+                set_user_property({property(rt_info_val->second.template as<T>())}, OptionVisibility::RELEASE | OptionVisibility::RELEASE_INTERNAL);
             }
         }
     }
 
     ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const;
-    ov::AnyMap read_env(const std::vector<std::string>& prefixes) const;
+    ov::AnyMap read_env() const;
+    ov::Any read_env(const std::string& s) const;
     void cleanup_unsupported(ov::AnyMap& config) const;
 
     std::map<std::string, ConfigOptionBase*> m_options_map;
@@ -265,6 +290,8 @@ class OPENVINO_RUNTIME_API PluginConfig {
     void print_help() const;
 
     bool m_is_finalized = false;
+
+    const char* m_allowed_env_prefix = "OV_";
 };
 
 }  // namespace ov
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index b756894d5e414f..01a6f6811c9829 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -150,7 +150,7 @@ void PluginConfig::apply_debug_options(std::shared_ptr<IRemoteContext> context)
         set_user_property(config_properties, OptionVisibility::ANY, throw_on_error);
     }
 
-    ov::AnyMap env_properties = read_env({"OV_"});
+    ov::AnyMap env_properties = read_env();
     cleanup_unsupported(env_properties);
 #ifdef ENABLE_DEBUG_CAPS
     for (auto& prop : env_properties) {
@@ -190,32 +190,30 @@ ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std
     return config;
 }
 
-ov::AnyMap PluginConfig::read_env(const std::vector<std::string>& prefixes) const {
+ov::AnyMap PluginConfig::read_env() const {
     ov::AnyMap config;
 
     for (auto& kv : m_options_map) {
-        for (auto& prefix : prefixes) {
-            auto var_name = prefix + kv.first;
-            const auto& val = ov::util::getenv_string(var_name.c_str());
-
-            if (!val.empty()) {
-                if (dynamic_cast<ConfigOption<bool>*>(kv.second) != nullptr) {
-                    const std::set<std::string> off = {"0", "false", "off", "no"};
-                    const std::set<std::string> on = {"1", "true", "on", "yes"};
-
-                    const auto& val_lower = ov::util::to_lower(val);
-                    if (off.count(val_lower)) {
-                        config[kv.first] = false;
-                    } else if (on.count(val_lower)) {
-                        config[kv.first] = true;
-                    } else {
-                        OPENVINO_THROW("Unexpected value for boolean property: ", val);
-                    }
+        auto var_name = m_allowed_env_prefix + kv.first;
+        const auto& val = ov::util::getenv_string(var_name.c_str());
+
+        if (!val.empty()) {
+            if (dynamic_cast<ConfigOption<bool>*>(kv.second) != nullptr) {
+                const std::set<std::string> off = {"0", "false", "off", "no"};
+                const std::set<std::string> on = {"1", "true", "on", "yes"};
+
+                const auto& val_lower = ov::util::to_lower(val);
+                if (off.count(val_lower)) {
+                    config[kv.first] = false;
+                } else if (on.count(val_lower)) {
+                    config[kv.first] = true;
                 } else {
-                    config[kv.first] = val;
+                    OPENVINO_THROW("Unexpected value for boolean property: ", val);
                 }
-                break;
+            } else {
+                config[kv.first] = val;
             }
+            break;
         }
     }
 
diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index d2c99585ab015b..6f385473b1c627 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -18,6 +18,7 @@ static constexpr Property<std::string, PropertyMutability::RW> high_level_proper
 static constexpr Property<std::string, PropertyMutability::RW> low_level_property{"LOW_LEVEL_PROPERTY"};
 static constexpr Property<uint8_t, PropertyMutability::RW> release_internal_property{"RELEASE_INTERNAL_PROPERTY"};
 static constexpr Property<uint8_t, PropertyMutability::RW> debug_property{"DEBUG_PROPERTY"};
+static constexpr Property<uint8_t, PropertyMutability::RW> debug_global_property{"DEBUG_GLOBAL_PROPERTY"};
 
 
 struct EmptyTestConfig : public ov::PluginConfig {
@@ -32,14 +33,17 @@ struct EmptyTestConfig : public ov::PluginConfig {
 
 struct NotEmptyTestConfig : public ov::PluginConfig {
     NotEmptyTestConfig() {
-    #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
+    #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
+    #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
         OV_CONFIG_RELEASE_OPTION(, bool_property, true, "")
         OV_CONFIG_RELEASE_OPTION(, int_property, -1, "")
         OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "")
         OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "")
         OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "")
         OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "")
-    #undef OV_CONFIG_OPTION
+        OV_CONFIG_DEBUG_GLOBAL_OPTION(, debug_global_property, 4, "")
+    #undef OV_CONFIG_LOCAL_OPTION
+    #undef OV_CONFIG_GLOBAL_OPTION
     }
 
     NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() {
@@ -49,14 +53,17 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
         }
     }
 
-    #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__)  OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__)
+    #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__)  OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__)
+    #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__)  OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__)
         OV_CONFIG_RELEASE_OPTION(, bool_property, true, "")
         OV_CONFIG_RELEASE_OPTION(, int_property, -1, "")
         OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "")
         OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "")
         OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "")
         OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "")
-    #undef OV_CONFIG_OPTION
+        OV_CONFIG_DEBUG_GLOBAL_OPTION(, debug_global_property, 4, "")
+    #undef OV_CONFIG_LOCAL_OPTION
+    #undef OV_CONFIG_GLOBAL_OPTION
 
     std::vector<std::string> get_supported_properties() const {
         std::vector<std::string> supported_properties;
@@ -80,6 +87,15 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
     using ov::PluginConfig::is_set_by_user;
 };
 
+#define OV_CONFIG_LOCAL_OPTION(...)
+#define OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \
+    ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type, Visibility> NotEmptyTestConfig::m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)};
+
+    OV_CONFIG_DEBUG_GLOBAL_OPTION(, debug_global_property, 4, "")
+
+#undef OV_CONFIG_LOCAL_OPTION
+#undef OV_CONFIG_GLOBAL_OPTION
+
 TEST(plugin_config, can_create_empty_config) {
     ASSERT_NO_THROW(
         EmptyTestConfig cfg;
@@ -90,7 +106,7 @@ TEST(plugin_config, can_create_empty_config) {
 TEST(plugin_config, can_create_not_empty_config) {
     ASSERT_NO_THROW(
         NotEmptyTestConfig cfg;
-        ASSERT_EQ(cfg.get_supported_properties().size(), 6);
+        ASSERT_EQ(cfg.get_supported_properties().size(), 7);
     );
 }
 
@@ -111,7 +127,7 @@ TEST(plugin_config, throw_for_unsupported_property) {
 TEST(plugin_config, can_direct_access_to_properties) {
     NotEmptyTestConfig cfg;
     ASSERT_EQ(cfg.m_int_property.value, cfg.get_int_property());
-    ASSERT_NO_THROW(cfg.set_property(int_property(1)));
+    ASSERT_NO_THROW(cfg.set_user_property(int_property(1)));
     ASSERT_EQ(cfg.m_int_property.value, -1); // user property doesn't impact member value until finalize() is called
 
     cfg.m_int_property.value = 2;
@@ -120,7 +136,7 @@ TEST(plugin_config, can_direct_access_to_properties) {
 
 TEST(plugin_config, finalization_updates_member) {
     NotEmptyTestConfig cfg;
-    ASSERT_NO_THROW(cfg.set_property(bool_property(false)));
+    ASSERT_NO_THROW(cfg.set_user_property(bool_property(false)));
     ASSERT_EQ(cfg.m_bool_property.value, true); // user property doesn't impact member value until finalize() is called
 
     cfg.finalize(nullptr, {});
@@ -137,7 +153,7 @@ TEST(plugin_config, get_property_before_finalization_returns_user_property_if_se
     cfg.m_bool_property.value = false; // update member directly
     ASSERT_EQ(cfg.get_bool_property(), false);  // OK, return the class member value as no user property was set
 
-    ASSERT_NO_THROW(cfg.set_property(bool_property(true)));
+    ASSERT_NO_THROW(cfg.set_user_property(bool_property(true)));
     ASSERT_TRUE(cfg.is_set_by_user(bool_property));
     ASSERT_EQ(cfg.get_bool_property(), true);  // now user property value is returned
     ASSERT_EQ(cfg.m_bool_property.value, false);  // but class member is not updated
@@ -150,7 +166,7 @@ TEST(plugin_config, get_property_before_finalization_returns_user_property_if_se
 TEST(plugin_config, finalization_updates_dependant_properties) {
     NotEmptyTestConfig cfg;
 
-    cfg.set_property(high_level_property("value1"));
+    cfg.set_user_property(high_level_property("value1"));
     ASSERT_TRUE(cfg.is_set_by_user(high_level_property));
     ASSERT_FALSE(cfg.is_set_by_user(low_level_property));
 
@@ -204,8 +220,8 @@ TEST(plugin_config, can_copy_config) {
 
 TEST(plugin_config, set_property_throw_for_non_release_options) {
     NotEmptyTestConfig cfg;
-    ASSERT_ANY_THROW(cfg.set_property(release_internal_property(10)));
-    ASSERT_ANY_THROW(cfg.set_property(debug_property(10)));
+    ASSERT_ANY_THROW(cfg.set_user_property({release_internal_property(10)}, OptionVisibility::RELEASE));
+    ASSERT_ANY_THROW(cfg.set_user_property({debug_property(10)}, OptionVisibility::RELEASE));
 }
 
 TEST(plugin_config, visibility_is_correct) {
@@ -214,3 +230,8 @@ TEST(plugin_config, visibility_is_correct) {
     ASSERT_EQ(cfg.get_option_ptr(debug_property.name())->get_visibility(), OptionVisibility::DEBUG);
     ASSERT_EQ(cfg.get_option_ptr(int_property.name())->get_visibility(), OptionVisibility::RELEASE);
 }
+
+TEST(plugin_config, can_get_global_property) {
+    NotEmptyTestConfig cfg;
+    ASSERT_EQ(cfg.get_debug_global_property(), 4);
+}
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
index f8f639d8f67a25..be8a60b65d023f 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
@@ -24,9 +24,12 @@ struct ExecutionConfig : public ov::PluginConfig {
     void finalize(cldnn::engine& engine);
     using ov::PluginConfig::finalize;
 
-    #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__)
+    #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__)
+    #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__)
     #include "intel_gpu/runtime/options.inl"
-    #undef OV_CONFIG_OPTION
+
+    #undef OV_CONFIG_LOCAL_OPTION
+    #undef OV_CONFIG_GLOBAL_OPTION
 
 protected:
     void finalize_impl(std::shared_ptr<IRemoteContext> context) override;
@@ -39,9 +42,11 @@ struct ExecutionConfig : public ov::PluginConfig {
     void apply_performance_hints(const cldnn::device_info& info);
     void apply_priority_hints(const cldnn::device_info& info);
 
-    #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__)
+    #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__)
+    #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__)
     #include "intel_gpu/runtime/options.inl"
-    #undef OV_CONFIG_OPTION
+    #undef OV_CONFIG_LOCAL_OPTION
+    #undef OV_CONFIG_GLOBAL_OPTION
 };
 
 }  // namespace ov::intel_gpu
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
index 126a01a75acee5..46bfc496ea8411 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
@@ -48,7 +48,7 @@ OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disa
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models")
 
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity")
+OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, help, false, "Print help message for all config options")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem")
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 19d841036cd6de..7e2c82f84b7d7a 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -13,10 +13,21 @@
 
 namespace ov::intel_gpu {
 
+#define OV_CONFIG_LOCAL_OPTION(...)
+#define OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \
+    ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type, Visibility> ExecutionConfig::m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)};
+
+#include "intel_gpu/runtime/options.inl"
+
+#undef OV_CONFIG_LOCAL_OPTION
+#undef OV_CONFIG_GLOBAL_OPTION
+
 ExecutionConfig::ExecutionConfig() : ov::PluginConfig() {
-    #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
+    #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
+    #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
     #include "intel_gpu/runtime/options.inl"
-    #undef OV_CONFIG_OPTION
+    #undef OV_CONFIG_LOCAL_OPTION
+    #undef OV_CONFIG_GLOBAL_OPTION
 }
 
 ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() {
@@ -152,9 +163,11 @@ void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
 
 const ov::PluginConfig::OptionsDesc& ExecutionConfig::get_options_desc() const {
     static  ov::PluginConfig::OptionsDesc help_map {
-        #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__)
+        #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__)
+        #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__)
         #include "intel_gpu/runtime/options.inl"
-        #undef OV_CONFIG_OPTION
+        #undef OV_CONFIG_LOCAL_OPTION
+        #undef OV_CONFIG_GLOBAL_OPTION
     };
     return help_map;
 }
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp
index dbf2a01aa4eadf..d722b5f3bc1bfd 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp
@@ -219,16 +219,6 @@ bool ocl_events::get_profiling_info_impl(std::list<instrumentation::profiling_in
             sum += (duration.second - duration.first);
         }
 
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->print_multi_kernel_perf) {
-            if (period.stage == instrumentation::profiling_stage::executing) {
-                GPU_DEBUG_TRACE << "Multi-kernel time: ";
-                for (auto& duration : all_durations[period.stage])
-                    GPU_DEBUG_TRACE << "  " << (duration.second - duration.first) / 1000;
-                GPU_DEBUG_TRACE << " Total " << sum / 1000 << std::endl;
-            }
-        }
-
         info.push_back(get_profiling_interval(period.stage, 0, sum));
     }
 

From c53bb9b281345e41de294ffe620efaf636092882 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Mon, 20 Jan 2025 11:58:36 +0400
Subject: [PATCH 20/44] [GPU] Global static vars. Removed old debug config

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        |  17 +-
 src/inference/src/dev/plugin_config.cpp       |  46 +-
 .../intel_gpu/plugin/program_builder.hpp      |   3 +-
 .../intel_gpu/runtime/debug_configuration.hpp | 115 +---
 .../intel_gpu/runtime/internal_properties.hpp |  14 +-
 .../include/intel_gpu/runtime/memory_pool.hpp |   6 +-
 .../include/intel_gpu/runtime/options.inl     |  26 +-
 .../include/intel_gpu/runtime/profiling.hpp   |   3 +-
 .../intel_gpu/runtime/shape_predictor.hpp     |  35 +-
 .../intel_gpu/src/graph/debug_helper.cpp      | 239 ++++---
 .../intel_gpu/src/graph/debug_helper.hpp      |   4 -
 .../graph_optimizer/post_optimize_weights.cpp |   4 -
 .../graph/graph_optimizer/reorder_inputs.cpp  |   2 -
 .../graph/impls/onednn/convolution_onednn.cpp |   2 -
 .../intel_gpu/src/graph/layout_optimizer.cpp  |  62 --
 src/plugins/intel_gpu/src/graph/network.cpp   |  33 +-
 .../intel_gpu/src/graph/primitive_inst.cpp    |  60 +-
 src/plugins/intel_gpu/src/graph/program.cpp   |   4 +-
 .../src/graph/program_dump_graph.cpp          |   1 -
 .../dynamic_quantize_kernel_ref.cpp           |   1 -
 src/plugins/intel_gpu/src/plugin/graph.cpp    |   4 +-
 src/plugins/intel_gpu/src/plugin/plugin.cpp   |   6 +-
 .../intel_gpu/src/plugin/program_builder.cpp  |  45 --
 .../src/plugin/sync_infer_request.cpp         |  20 +-
 .../dynamic_quantize_fully_connected.cpp      |   1 -
 .../transformations/fc_horizontal_fusion.cpp  |   1 -
 .../src/plugin/transformations_pipeline.cpp   |   8 +-
 .../src/runtime/debug_configuration.cpp       | 601 +-----------------
 src/plugins/intel_gpu/src/runtime/device.cpp  |   3 +-
 src/plugins/intel_gpu/src/runtime/engine.cpp  |   3 +-
 .../intel_gpu/src/runtime/memory_pool.cpp     |  36 +-
 .../intel_gpu/src/runtime/shape_predictor.cpp |  10 +-
 .../module_tests/shape_predictor_test.cpp     |   8 +-
 .../unit/test_cases/debug_config_gpu_test.cpp |   2 +-
 34 files changed, 333 insertions(+), 1092 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index a598d81165eea6..8c9252eb78e48c 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -65,16 +65,11 @@
     }
 
 #define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \
-    const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() const { \
-        if (m_is_finalized) { \
+    static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \
+        auto v = read_env(PropertyNamespace::PropertyVar.name(), m_allowed_env_prefix, &m_ ## PropertyVar); \
+        if (v.empty()) \
             return m_ ## PropertyVar.value; \
-        } else { \
-            if (m_user_properties.find(PropertyNamespace::PropertyVar.name()) != m_user_properties.end()) { \
-                return m_user_properties.at(PropertyNamespace::PropertyVar.name()).as<decltype(PropertyNamespace::PropertyVar)::value_type>(); \
-            } else { \
-                return m_ ## PropertyVar.value; \
-            } \
-        } \
+        return v.as<decltype(PropertyNamespace::PropertyVar)::value_type>(); \
     }
 
 #define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \
@@ -274,7 +269,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
 
     ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const;
     ov::AnyMap read_env() const;
-    ov::Any read_env(const std::string& s) const;
+    static ov::Any read_env(const std::string& option_name, const std::string& prefix, const ConfigOptionBase* option);
     void cleanup_unsupported(ov::AnyMap& config) const;
 
     std::map<std::string, ConfigOptionBase*> m_options_map;
@@ -291,7 +286,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
 
     bool m_is_finalized = false;
 
-    const char* m_allowed_env_prefix = "OV_";
+    inline static const std::string m_allowed_env_prefix = "OV_";
 };
 
 }  // namespace ov
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index 01a6f6811c9829..cd4d13968e67fd 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -190,30 +190,38 @@ ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std
     return config;
 }
 
+ov::Any PluginConfig::read_env(const std::string& option_name, const std::string& prefix, const ConfigOptionBase* option) {
+    auto var_name = prefix + option_name;
+    const auto& val = ov::util::getenv_string(var_name.c_str());
+
+    if (!val.empty()) {
+        if (dynamic_cast<const ConfigOption<bool>*>(option) != nullptr) {
+            const std::set<std::string> off = {"0", "false", "off", "no"};
+            const std::set<std::string> on = {"1", "true", "on", "yes"};
+
+            const auto& val_lower = ov::util::to_lower(val);
+            if (off.count(val_lower)) {
+                return false;
+            } else if (on.count(val_lower)) {
+                return true;
+            } else {
+                OPENVINO_THROW("Unexpected value for boolean property: ", val);
+            }
+        } else {
+            return val;
+        }
+    } else {
+        return ov::Any();
+    }
+}
+
 ov::AnyMap PluginConfig::read_env() const {
     ov::AnyMap config;
 
     for (auto& kv : m_options_map) {
-        auto var_name = m_allowed_env_prefix + kv.first;
-        const auto& val = ov::util::getenv_string(var_name.c_str());
-
+        auto val = read_env(kv.first, m_allowed_env_prefix, kv.second);
         if (!val.empty()) {
-            if (dynamic_cast<ConfigOption<bool>*>(kv.second) != nullptr) {
-                const std::set<std::string> off = {"0", "false", "off", "no"};
-                const std::set<std::string> on = {"1", "true", "on", "yes"};
-
-                const auto& val_lower = ov::util::to_lower(val);
-                if (off.count(val_lower)) {
-                    config[kv.first] = false;
-                } else if (on.count(val_lower)) {
-                    config[kv.first] = true;
-                } else {
-                    OPENVINO_THROW("Unexpected value for boolean property: ", val);
-                }
-            } else {
-                config[kv.first] = val;
-            }
-            break;
+            config[kv.first] = val;
         }
     }
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
index 6b28c02930d530..99220ec3087221 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
@@ -138,7 +138,6 @@ class ProgramBuilder final {
     void add_primitive(const ov::Node& op, std::shared_ptr<cldnn::primitive> prim, std::vector<std::string> aliases = {});
 
     bool use_new_shape_infer() const { return m_config.get_allow_new_shape_infer(); }
-
     bool is_inner_program() const { return m_is_inner_program; }
     bool is_query_mode() { return queryMode; }
 
@@ -156,6 +155,8 @@ class ProgramBuilder final {
     std::shared_ptr<cldnn::topology> m_topology;
     CustomLayerMap m_custom_layers;
 
+    bool allow_new_shape_infer = false;
+
     bool queryMode;
 
     std::shared_ptr<ov::threading::IStreamsExecutor> m_task_executor;
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
index 2ce1397c44bb68..489399c1255ce0 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
@@ -10,6 +10,7 @@
 #include <string>
 #include <iostream>
 
+#include "intel_gpu/runtime/execution_config.hpp"
 namespace ov::intel_gpu {
 
 // Verbose log levels:
@@ -39,6 +40,7 @@ enum class LogLevel : int8_t {
     TRACE_DETAIL = 4
 };
 
+std::ostream& get_verbose_stream();
 }  // namespace ov::intel_gpu
 
 #ifdef GPU_DEBUG_CONFIG
@@ -47,23 +49,26 @@ enum class LogLevel : int8_t {
 #else
 #define SEPARATE '/'
 #endif
+
+#define GPU_PREFIX "GPU_Debug: "
+
 #define GPU_FILENAME (strrchr(__FILE__, SEPARATE) ? strrchr(__FILE__, SEPARATE) + 1 : __FILE__)
 #define GPU_DEBUG_IF(cond) if (cond)
 #define GPU_DEBUG_CODE(...) __VA_ARGS__
 #define GPU_DEBUG_DEFINE_MEM_LOGGER(stage) \
-    cldnn::instrumentation::mem_usage_logger mem_logger{stage, cldnn::debug_configuration::get_instance()->verbose >= 2};
+    cldnn::instrumentation::mem_usage_logger mem_logger{stage, ov::intel_gpu::ExecutionConfig::get_verbose() >= 2};
 #define GPU_DEBUG_PROFILED_STAGE(stage) \
     auto stage_prof = cldnn::instrumentation::profiled_stage<primitive_inst>(\
-        !cldnn::debug_configuration::get_instance()->dump_profiling_data.empty(), *this, stage)
+        !get_config().get_dump_profiling_data_path().empty(), *this, stage)
 #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val) stage_prof.set_cache_hit(val)
 #define GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO(info) stage_prof.add_memalloc_info(info)
 
-#define GPU_DEBUG_LOG_RAW_INT(min_verbose_level) if (cldnn::debug_configuration::get_instance()->verbose >= min_verbose_level) \
-    ((cldnn::debug_configuration::get_instance()->verbose_color == 0) ? GPU_DEBUG_LOG_PREFIX : GPU_DEBUG_LOG_COLOR_PREFIX)
+#define GPU_DEBUG_LOG_RAW_INT(min_verbose_level) if (ov::intel_gpu::ExecutionConfig::get_verbose() >= min_verbose_level) \
+    (ov::intel_gpu::ExecutionConfig::get_verbose_color() ? GPU_DEBUG_LOG_PREFIX : GPU_DEBUG_LOG_COLOR_PREFIX)
 #define GPU_DEBUG_LOG_RAW(min_verbose_level) GPU_DEBUG_LOG_RAW_INT(static_cast<std::underlying_type<ov::intel_gpu::LogLevel>::type>(min_verbose_level))
 #define GPU_DEBUG_LOG_PREFIX  \
-    *cldnn::debug_configuration::verbose_stream << cldnn::debug_configuration::prefix << GPU_FILENAME << ":" <<__LINE__ << ":" << __func__ << ": "
-#define GPU_DEBUG_LOG_COLOR_PREFIX  *cldnn::debug_configuration::verbose_stream << DARK_GRAY << cldnn::debug_configuration::prefix << \
+    ov::intel_gpu::get_verbose_stream() << GPU_PREFIX << GPU_FILENAME << ":" << __LINE__ << ":" << __func__ << ": "
+#define GPU_DEBUG_LOG_COLOR_PREFIX  ov::intel_gpu::get_verbose_stream() << DARK_GRAY << GPU_PREFIX << \
     BLUE << GPU_FILENAME << ":" << PURPLE <<  __LINE__ << ":" << CYAN << __func__ << ": " << RESET
 #define DARK_GRAY   "\033[1;30m"
 #define BLUE        "\033[1;34m"
@@ -77,107 +82,11 @@ enum class LogLevel : int8_t {
 #define GPU_DEBUG_PROFILED_STAGE(stage)
 #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val)
 #define GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO(info)
-#define GPU_DEBUG_LOG_RAW(min_verbose_level) if (0) *cldnn::debug_configuration::verbose_stream << cldnn::debug_configuration::prefix
+#define GPU_DEBUG_LOG_RAW(min_verbose_level) if (0) ov::intel_gpu::get_verbose_stream() << GPU_PREFIX
 #endif
 
-// Macro below is inserted to avoid unused variable warning when GPU_DEBUG_CONFIG is OFF
-#define GPU_DEBUG_GET_INSTANCE(name) auto name = cldnn::debug_configuration::get_instance(); (void)(name);
-
 #define GPU_DEBUG_COUT              GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::DISABLED)
 #define GPU_DEBUG_INFO              GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::INFO)
 #define GPU_DEBUG_LOG               GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::LOG)
 #define GPU_DEBUG_TRACE             GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::TRACE)
 #define GPU_DEBUG_TRACE_DETAIL      GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::TRACE_DETAIL)
-
-namespace cldnn {
-
-class debug_configuration {
-private:
-    debug_configuration();
-
-public:
-    static const char *prefix;
-    int help;                                                   // Print help messages
-    int verbose;                                                // Verbose execution
-    int verbose_color;                                          // Print verbose color
-    std::string verbose_file;                                   // Verbose log to file
-    int list_layers;                                            // Print list layers
-    int print_multi_kernel_perf;                                // Print execution time of each kernel in multi-kernel primitimive
-    int print_input_data_shapes;                                // Print the input data_shape for benchmark_app.
-    int disable_usm;                                            // Disable usm usage
-    int disable_onednn;                                         // Disable onednn for discrete GPU (no effect for integrated GPU)
-    int disable_onednn_opt_post_ops;                            // Disable onednn optimize post operators
-    std::string dump_profiling_data;                            // Enables dump of extended performance profiling to specified dir
-    int dump_profiling_data_per_iter;                           // Enables dump of extended performance profiling to specified dir for each iteration
-    int host_time_profiling;                                    // Enables measurement of scheduling time spend on the host
-    std::string dump_graphs;                                    // Dump optimized graph
-    std::string dump_sources;                                   // Dump opencl sources
-    std::string dump_layers_path;                               // Enable dumping intermediate buffers and set the dest path
-    std::vector<std::string> dump_layers;                       // Dump intermediate buffers of specified layers only
-    std::string dry_run_path;                                   // Dry run and serialize execution graph into the specified path
-    int dump_layers_dst_only;                                   // Dump only output of layers
-    int dump_layers_result;                                     // Dump result layers
-    int dump_layers_input;                                      // Dump input layers
-    int dump_layers_limit_batch;                                // Limit the size of batch to dump
-    int dump_layers_raw;                                        // Dump raw data.
-    int dump_layers_binary;                                     // Dump binary data.
-    int dump_memory_pool;                               // Dump memory pool status at each iteration
-    std::set<int64_t> dump_memory_pool_iters;           // List of iteration's memory pool status
-    std::string dump_memory_pool_path;                  // Enable dumping memory pool status to csv file and set the dest path
-    int base_batch_for_memory_estimation;                       // Base batch size to be used in memory estimation
-    std::vector<std::string> after_proc;                        // Start inference after the listed processes
-    int serialize_compile;                                      // Serialize creating primitives and compiling kernels
-    std::vector<std::string> forced_impl_types;                 // Force implementation type either ocl or onednn
-    int max_kernels_per_batch;                                  // Maximum number of kernels in a batch during compiling kernels
-    int impls_cache_capacity;                                   // The maximum number of entries in the kernel impl cache
-    int enable_sdpa;                                            // Allows to control SDPA decomposition
-    int disable_async_compilation;                              // Disable async compilation
-    int disable_winograd_conv;                                  // Disable Winograd conv
-    int disable_dynamic_impl;                                   // Disable dynamic implementation
-    int disable_runtime_buffer_fusing;                          // Disable runtime buffer fusing
-    int disable_memory_reuse;                                   // Disable memmory reuse among layers
-    int disable_build_time_weight_reorder_for_dynamic_nodes;    // Disable build time weight reordering for dynamic nodes
-    int disable_runtime_skip_reorder;                           // Disable runtime skip reorder
-    int disable_primitive_fusing;                               // Disable primitive fusing
-    int disable_fake_alignment;                                 // Disable fake alignment
-    int use_usm_host;                                           // Set explicit usm_host usage for network input and output
-    std::vector<std::string> dynamic_quantize_layers_without_onednn;  // Specify Fully-connected layers which enable Dynamic quantization
-    int use_kv_cache_compression;                               // Enable KV-cache compression
-    int dynamic_quantize_group_size;                            // Enable Dynamic quantization for fully connected primitive by specified group size
-    int dynamic_quantize_asym;                                  // Use asymmetric dynamic quantization
-    int disable_horizontal_fc_fusion;                           // Disable fc horizontal fusion
-    int disable_fc_swiglu_fusion;                               // Disable swiglu fusion to fc
-    std::set<int64_t> dump_iteration;                           // Dump n-th execution of network.
-    std::vector<std::string> load_layers_raw_dump;              // List of layers to load dumped raw binary and filenames
-    static const debug_configuration *get_instance();
-    bool is_target_dump_prof_data_iteration(int64_t iteration) const;
-    std::vector<std::string> get_filenames_for_matched_layer_loading_binaries(const std::string& id) const;
-    std::string get_name_for_dump(const std::string& file_name) const;
-    bool is_layer_for_dumping(const std::string& layerName, bool is_output = false, bool is_input = false) const;
-    bool is_target_iteration(int64_t iteration) const;
-    std::string get_matched_from_filelist(const std::vector<std::string>& file_names, std::string pattern) const;
-    bool is_layer_name_matched(const std::string& layer_name, const std::string& pattern) const;
-
-    struct memory_preallocation_params {
-        bool is_initialized = false;
-
-        // Iterations mode preallocation
-        size_t next_iters_preallocation_count = 0;
-        size_t max_per_iter_size = 0;
-        size_t max_per_dim_diff = 0;
-
-        // Percentage mode preallocation
-        float buffers_preallocation_ratio = 0.0f;
-    } mem_preallocation_params;
-
-    struct dump_profiling_data_iter_params {
-        bool is_enabled = false;
-        int64_t start = 0;
-        int64_t end = 0;
-    } dump_prof_data_iter_params;
-
-    static std::ostream* verbose_stream;
-    static const int DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET = -2;
-};
-
-}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
index 23b48ac4d9a920..f00cf86da5e50b 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include "intel_gpu/runtime/shape_predictor.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "openvino/runtime/intel_gpu/properties.hpp"
 
@@ -89,21 +90,24 @@ static constexpr Property<bool, PropertyMutability::RW> use_onednn{"USE_ONEDNN"}
 
 static constexpr Property<bool, ov::PropertyMutability::RW> help{"HELP"};
 static constexpr Property<size_t, ov::PropertyMutability::RW> verbose{"VERBOSE"};
+static constexpr Property<bool, ov::PropertyMutability::RW> verbose_color{"VERBOSE_COLOR"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> log_to_file{"GPU_LOG_TO_FILE"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_usm{"GPU_DISABLE_USM"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_onednn_post_ops_opt{"DISABLE_ONEDNN_POST_OPS_OPT"};
 static constexpr Property<std::string, PropertyMutability::RW> dump_graphs_path{"GPU_DUMP_GRAPHS_PATH"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> dump_profiling_data_path{"GPU_DUMP_PROFILING_DATA_PATH"};
+static constexpr Property<bool, ov::PropertyMutability::RW> dump_profiling_data_per_iter{"GPU_DUMP_PROFILING_DATA_PER_ITER"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> dump_sources_path{"GPU_DUMP_SOURCES_PATH"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> dump_tensors_path{"GPU_DUMP_TENSORS_PATH"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> dry_run_path{"GPU_DRY_RUN_PATH"};
 static constexpr Property<DumpTensors, ov::PropertyMutability::RW> dump_tensors{"DUMP_TENSORS"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> dump_layers{"GPU_DUMP_LAYERS"};
+static constexpr Property<std::vector<std::string>, ov::PropertyMutability::RW> dump_layer_names{"GPU_DUMP_LAYER_NAMES"};
 static constexpr Property<DumpFormat, ov::PropertyMutability::RW> dump_tensors_format{"DUMP_TENSORS_FORMAT"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> dump_memory_pool_path{"GPU_DUMP_MEMORY_POOL_PATH"};
+static constexpr Property<bool, ov::PropertyMutability::RW> dump_memory_pool{"GPU_DUMP_MEMORY_POOL"};
 static constexpr Property<int32_t, ov::PropertyMutability::RW> dump_batch_limit{"GPU_DUMP_BATCH_LIMIT"};
 static constexpr Property<std::set<int64_t>, ov::PropertyMutability::RW> dump_iterations{"GPU_DUMP_ITERATIONS"};
-static constexpr Property<bool, ov::PropertyMutability::RW> host_time_profiling{"HOST_TIME_PROFILING"};
+static constexpr Property<size_t, ov::PropertyMutability::RW> host_time_profiling{"HOST_TIME_PROFILING"};
 static constexpr Property<size_t, ov::PropertyMutability::RW> impls_cache_capacity{"IMPLS_CACHE_CAPACITY"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_async_compilation{"DISABLE_ASYNC_COMPILATION"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"};
@@ -112,10 +116,12 @@ static constexpr Property<bool, ov::PropertyMutability::RW> disable_post_ops_fus
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_fc_swiglu_fusion{"DISABLE_FC_SWIGLU_FUSION"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_fake_alignment{"DISABLE_FAKE_ALIGNMENT"};
-static constexpr Property<bool, ov::PropertyMutability::RW> use_usm_host{"USE_USM_HOST"};
+static constexpr Property<bool, ov::PropertyMutability::RW> disable_runtime_skip_reorder{"DISABLE_RUNTIME_SKIP_REORDER"};
+static constexpr Property<size_t, ov::PropertyMutability::RW> usm_policy{"USM_POLICY"};
 static constexpr Property<bool, ov::PropertyMutability::RW> asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"};
-static constexpr Property<std::string, ov::PropertyMutability::RW> mem_prealloc_options{"MEM_PREALLOC_OPTIONS"};
+static constexpr Property<ShapePredictor::Settings, ov::PropertyMutability::RW> shape_predictor_settings{"SHAPE_PREDICTOR_SETTINGS"};
 static constexpr Property<std::vector<std::string>, ov::PropertyMutability::RW> load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"};
+static constexpr Property<std::vector<std::string>, ov::PropertyMutability::RW> start_after_processes{"START_AFTER_PROCESSES"};
 
 }  // namespace ov::intel_gpu
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp
index 87e0b2990b7902..1d27eaf63efb86 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include "intel_gpu/runtime/execution_config.hpp"
 #include "layout.hpp"
 #include "memory_caps.hpp"
 #include "utils.hpp"
@@ -110,8 +111,6 @@ struct padded_pool_comparer {
 // - Improve memory consumption
 
 class memory_pool {
-    memory_pool();
-
     memory_ptr alloc_memory(const layout& layout, allocation_type type, bool reset = true);
     static bool has_conflict(const memory_set&, const std::unordered_set<size_t>&, uint32_t network_id);
 
@@ -119,9 +118,10 @@ class memory_pool {
     std::map<layout, std::list<memory_record>, padded_pool_comparer> _padded_pool;
     std::multimap<uint64_t, memory_record> _no_reusable_pool;
     engine* _engine;
+    const ExecutionConfig& _config;
 
 public:
-    explicit memory_pool(engine& engine);
+    explicit memory_pool(engine& engine, const ExecutionConfig& config);
     ~memory_pool();
     memory_ptr get_memory(const layout& layout,
                           const primitive_id& id,
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
index 46bfc496ea8411..3b77804979f6f8 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
@@ -15,7 +15,6 @@ OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::
 OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy")
 OV_CONFIG_RELEASE_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application")
 OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not")
-
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks")
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level")
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property")
@@ -34,7 +33,7 @@ OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floati
 
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_lp_transformations, false, "Enable/Disable Low precision transformations set")
-
+OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, shape_predictor_settings, {10, 16 * 1024, 2, 1.1f}, "Preallocation settings")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "Type of the queue that must be used for model execution. May be in-order or out-of-order")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, optimize_data, false, "Enable/Disable data flow optimizations for cldnn::program")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_memory_pool, true, "Enable/Disable memory pool usage")
@@ -43,15 +42,19 @@ OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, custom_outputs, std::vector<std
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, force_implementations, ImplForcingMap{}, "Specifies the list of forced implementations for the primitives")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, partial_build_program, false, "Early exit from model compilation process which allows faster execution graph dumping")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "Switch between new and old shape inference flow. Shall be removed soon")
-OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "Threshold for preallocation feature in case when it uses ratio policy")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disable onednn for usage for particular model/platform")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models")
 
+OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, help, false, "Print help message for all config options")
 OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, help, false, "Print help message for all config options")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem")
+OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose_color, false, "Enable coloring for verbose logs")
+OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem")
+OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, usm_policy, 0, "0: default, 1: use usm_host, 2: do not use usm_host")
+OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits<int32_t>::max(), "Max number of batch elements to dump")
+OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, dump_profiling_data_per_iter, false, "Save profiling data w/o per-iteration aggregation")
+OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file")
+
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_onednn_post_ops_opt, false, "Disable optimization pass for onednn post-ops")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data_path, "", "Save csv file with per-stage and per-primitive profiling data to specified folder")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs_path, "", "Save intermediate graph representations during model compilation pipeline to specified folder")
@@ -59,19 +62,20 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources_path, "", "Save generated sou
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_path, "", "Save intermediate in/out tensors of each primitive to specified folder")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, ov::intel_gpu::DumpTensors::all, "Tensor types to dump. Supported values: all, inputs, outputs")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_format, ov::intel_gpu::DumpFormat::text, "Format of the tensors dump. Supported values: binary, text, text_raw")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_layers, "", "Activate dump for specified layers only")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_layer_names, std::vector<std::string>{}, "Activate dump for specified layers only")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool_path, "", "Save csv file with memory pool info to specified folder")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool, false, "Enable verbose output for memory pool")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, std::set<int64_t>{}, "Space separated list of iterations where other dump options should be enabled")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits<int32_t>::max(), "Max number of batch elements to dump")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, 0, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "Disable fusions of operations as post-ops/fused-ops")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "Disable pass which merges QKV projections into single MatMul")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fc_swiglu_fusion, false, "Disable pass which merges FC and SwiGLU ops")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fake alignment feature which tries to keep gpu friendly memory alignment for arbitrary tensor shapes")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usage for all allocations")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_memory_reuse, false, "Disable memory reuse for activation tensors")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_skip_reorder, false, "Disable skip reorder optimization applied in runtime")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations")
-OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "Preallocation setting")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, std::vector<std::string>{}, "List of layers to load raw binary")
+OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, start_after_processes, std::vector<std::string>{}, "Start inference after specified list of processes")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dry_run_path, "", "Enables mode which partially compiles a model and stores runtime model into specified directory")
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp
index b3eca9a78fba89..6d5d91bb5b783a 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp
@@ -9,6 +9,7 @@
 #include <vector>
 #include <string>
 #include <fstream>
+#include "intel_gpu/runtime/execution_config.hpp"
 
 #if defined(_WIN32)
 #ifndef NOMINMAX
@@ -157,7 +158,7 @@ class profiled_stage {
         , _obj(obj)
         , _stage(stage) {
         GPU_DEBUG_IF(profiling_enabled) {
-            _per_iter_mode = cldnn::debug_configuration::get_instance()->dump_profiling_data_per_iter != 0;
+            _per_iter_mode = ov::intel_gpu::ExecutionConfig::get_dump_profiling_data_per_iter();
             _start = std::chrono::high_resolution_clock::now();
         }
     }
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp
index 469c676b4b0311..a246c6d1fdda8f 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp
@@ -14,23 +14,20 @@ class engine;
 
 struct ShapePredictor {
 public:
-    using Ptr = std::shared_ptr<ShapePredictor>;
-    ShapePredictor(const engine* engine, float buffers_preallocation_ratio)
-        : _engine(engine)
-        , _buffers_preallocation_ratio(buffers_preallocation_ratio) {
-        static_assert(_max_deque_size >= 2, "[GPU] Deque is supposed to contain at least 2 elements for prediction");
-    }
+    struct Settings {
+        // Iterations mode preallocation
+        size_t next_iters_preallocation_count = 10;
+        size_t max_per_iter_size = 16 * 1024;
+        size_t max_per_dim_diff = 2;
 
-    ShapePredictor(const engine* engine,
-                   size_t next_iters_preallocation_count,
-                   size_t max_per_iter_size,
-                   size_t max_per_dim_diff,
-                   float buffers_preallocation_ratio)
+        // Percentage mode preallocation
+        float buffers_preallocation_ratio = 1.1f;
+    };
+
+    using Ptr = std::shared_ptr<ShapePredictor>;
+    ShapePredictor(const engine* engine, const Settings& settings)
         : _engine(engine)
-        , _next_iters_preallocation_count(next_iters_preallocation_count)
-        , _max_per_iter_size(max_per_iter_size)
-        , _max_per_dim_diff(max_per_dim_diff)
-        , _buffers_preallocation_ratio(buffers_preallocation_ratio) {
+        , _settings(settings) {
         static_assert(_max_deque_size >= 2, "[GPU] Deque is supposed to contain at least 2 elements for prediction");
     }
 
@@ -73,13 +70,7 @@ struct ShapePredictor {
     std::map<std::string, std::deque<ov::Shape>> _shapes_info;
     const engine* _engine;
 
-    // Iterations mode preallocation
-    const size_t _next_iters_preallocation_count = 10;
-    const size_t _max_per_iter_size = 16 * 1024; // 16KB => maximum preallocation size is 16KB * 10iters = 160KB
-    const size_t _max_per_dim_diff = 2;
-
-    // Percentage mode preallocation
-    const float _buffers_preallocation_ratio = 1.0f;
+    const Settings _settings;
 };
 
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/graph/debug_helper.cpp b/src/plugins/intel_gpu/src/graph/debug_helper.cpp
index b69d10e137010e..3cd7767d5fe21b 100644
--- a/src/plugins/intel_gpu/src/graph/debug_helper.cpp
+++ b/src/plugins/intel_gpu/src/graph/debug_helper.cpp
@@ -3,6 +3,9 @@
 //
 
 #include "debug_helper.hpp"
+#include <regex>
+#include "intel_gpu/runtime/execution_config.hpp"
+#include "intel_gpu/runtime/internal_properties.hpp"
 #include "openvino/util/file_util.hpp"
 
 #ifdef GPU_DEBUG_CONFIG
@@ -44,8 +47,7 @@ template <class T>
 void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) {
     auto&& size = mem->get_layout().get_tensor();
 
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1);
+    auto batch_size = std::max(std::min(ExecutionConfig::get_dump_batch_limit(), size.batch[0]), 1);
     tensor tmp_size(size);
     tmp_size.batch[0] = batch_size;
     if (tmp_size == size) {
@@ -121,8 +123,7 @@ void unpack(cldnn::data_types type, uint8_t input, int8_t &v0, int8_t &v1) {
 void dump_i4u4(cldnn::data_types type, memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) {
     auto&& size = mem->get_layout().get_tensor();
 
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1);
+    auto batch_size = std::max(std::min(ExecutionConfig::get_dump_batch_limit(), size.batch[0]), 1);
     tensor tmp_size(size);
     tmp_size.batch[0] = batch_size;
     if (tmp_size == size) {
@@ -160,11 +161,16 @@ void dump_i4u4(cldnn::data_types type, memory::ptr mem, stream& stream, std::ofs
     file_stream << buffer.str();
 }
 
-void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std::string layerName, bool dump_raw) {
-    std::cout << "Dump " << (dump_raw ? "raw " : "") << layerName << std::endl;
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    std::string filename = debug_config->get_name_for_dump(layerName);
-    filename = debug_config->dump_layers_path + filename + ".txt";
+std::string get_name_for_dump(const std::string& file_name) {
+    std::string filename = file_name;
+    std::replace(filename.begin(), filename.end(), '\\', '_');
+    std::replace(filename.begin(), filename.end(), '/', '_');
+    std::replace(filename.begin(), filename.end(), ' ', '_');
+    std::replace(filename.begin(), filename.end(), ':', '_');
+    return filename;
+}
+
+void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std::string filename, bool dump_raw) {
     std::ofstream file_stream(filename);
     if (!mem) {
         file_stream << "Empty" << std::endl;
@@ -195,9 +201,7 @@ void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std
         std::cout << "Dump for this data type is not supported: " << dt_to_str(mem_dt) << std::endl;
 }
 
-}  // namespace
-
-static std::string get_file_path_for_binary_dump(cldnn::layout layout, std::string name) {
+std::string get_file_path_for_binary_dump(cldnn::layout layout, const std::string& name, const std::string& dump_layers_path) {
     std::string filename;
     std::string data_type = ov::element::Type(layout.data_type).get_type_name();
     std::string format = layout.format.to_string();
@@ -207,29 +211,128 @@ static std::string get_file_path_for_binary_dump(cldnn::layout layout, std::stri
         tensor += ("_" + to_string(dims[r]));
     }
 
-#ifdef GPU_DEBUG_CONFIG
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    std::string layer_name = debug_config->get_name_for_dump(name);
-    filename = debug_config->dump_layers_path + layer_name
-                + "__" + data_type + "_" + tensor + "__" + format + ".bin";
-#endif
+    std::string layer_name = get_name_for_dump(name);
+    filename = dump_layers_path + layer_name + "__" + data_type + "_" + tensor + "__" + format + ".bin";
     return filename;
 }
 
+bool is_target_iteration(int64_t iteration, const std::set<int64_t> dump_iteration) {
+    if (iteration < 0)
+        return true;
+
+    if (dump_iteration.empty())
+        return true;
+
+    if (dump_iteration.find(iteration) == std::end(dump_iteration))
+        return false;
+
+    return true;
+}
+
+std::string get_matched_from_filelist(const std::vector<std::string>& file_names, std::string pattern) {
+    for (const auto& file : file_names) {
+        auto found = file.find(pattern);
+        if (found != std::string::npos) {
+            return file;
+        }
+    }
+
+    return std::string();
+}
+
+bool is_layer_name_matched(const std::string& layer_name, const std::string& pattern) {
+    auto upper_layer_name = std::string(layer_name.length(), '\0');
+    std::transform(layer_name.begin(), layer_name.end(), upper_layer_name.begin(), ::toupper);
+    auto upper_pattern = std::string(pattern.length(), '\0');
+    std::transform(pattern.begin(), pattern.end(), upper_pattern.begin(), ::toupper);
+
+    // Check pattern from exec_graph
+    size_t pos = upper_layer_name.find(':');
+    auto upper_exec_graph_name = upper_layer_name.substr(pos + 1, upper_layer_name.size());
+    if (upper_exec_graph_name.compare(upper_pattern) == 0) {
+        return true;
+    }
+
+    // Check pattern with regular expression
+    std::regex re(upper_pattern);
+    return std::regex_match(upper_layer_name, re);
+}
+
+bool is_layer_for_dumping(const ExecutionConfig& config, const std::string& layer_name, bool is_output = false, bool is_input = false) {
+    bool dump_out = config.get_dump_tensors() == ov::intel_gpu::DumpTensors::all || config.get_dump_tensors() == ov::intel_gpu::DumpTensors::out;
+    bool dump_in = config.get_dump_tensors() == ov::intel_gpu::DumpTensors::all || config.get_dump_tensors() == ov::intel_gpu::DumpTensors::in;
+    // Dump result layer
+    if (is_output == true && dump_out && (layer_name.find("constant:") == std::string::npos))
+        return true;
+
+    // Dump all layers
+    if (config.get_dump_layer_names().empty() && !dump_out && !dump_in)
+        return true;
+
+    // Dump input layers
+    size_t pos = layer_name.find(':');
+    auto type = layer_name.substr(0, pos);
+    if (is_input == true && type == "parameter" && dump_in)
+        return true;
+
+    auto dump_layers = config.get_dump_layer_names();
+
+    auto iter = std::find_if(dump_layers.begin(), dump_layers.end(), [&](const std::string& dl){
+        return is_layer_name_matched(layer_name, dl);
+    });
+    return (iter != dump_layers.end());
+}
+
+std::vector<std::string> get_filenames_for_matched_layer_loading_binaries(const ExecutionConfig& config, const std::string& id) {
+    std::vector<std::string> file_names;
+    if (config.get_load_dump_raw_binary().empty())
+        return file_names;
+
+    for (const auto& load_layer : config.get_load_dump_raw_binary()) {
+        size_t file = load_layer.rfind(":");
+        if (file != std::string::npos) {
+            if (id == load_layer.substr(0, file)) {
+                auto file_name_str = load_layer.substr(file + 1);
+                size_t head = 0;
+                size_t found = 0;
+                do {
+                    found = file_name_str.find(",", head);
+                    if (found != std::string::npos)
+                        file_names.push_back(file_name_str.substr(head, (found - head)));
+                    else
+                        file_names.push_back(file_name_str.substr(head));
+
+                    head = found+1;
+                    GPU_DEBUG_LOG << " Layer name loading raw dump : " << load_layer.substr(0, file) << " / the dump file : "
+                                << file_names.back() << std::endl;
+                } while (found != std::string::npos);
+
+                return file_names;
+            }
+        }
+    }
+
+    return file_names;
+}
+
+
+}  // namespace
+
 NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst)
     : m_inst(inst)
     , m_stream(inst.get_network().get_stream())
     , m_network(inst.get_network())
     , m_program(inst.get_network().get_program().get())
     , m_iter(m_network.iteration) {
+    const auto& config = m_network.get_config();
     // Load binary dump for input layers
-    if (!debug_config->load_layers_raw_dump.empty()) {
+    if (!config.get_load_dump_raw_binary().empty()) {
         const std::string layer_name = m_inst.id();
-        auto files = debug_config->get_filenames_for_matched_layer_loading_binaries(layer_name);
+        auto files = get_filenames_for_matched_layer_loading_binaries(config, layer_name);
         if (!files.empty()) {
             if (m_inst.is_input()) {
                 // Loading binary dumps for output tensors of input-layers : only one output exists or index(dstN) exists
-                auto dump_file = debug_config->get_matched_from_filelist(files, "_dst0__");
+                auto dump_file = get_matched_from_filelist(files, "_dst0__");
                 OPENVINO_ASSERT((files.size() == 1 || dump_file.length() != 0), "Unexpected binary dump for input layer");
 
                 OPENVINO_ASSERT(files.size() == m_inst.outputs_memory_count(), "Mis-match dump file count");
@@ -238,7 +341,7 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst)
                     auto dump_file = files[0];
                     if (files.size() > 1 || m_inst.outputs_memory_count() != 1) {
                         std::string pattern = "_dst" + std::to_string(i) + "__";
-                        dump_file = debug_config->get_matched_from_filelist(files, pattern);
+                        dump_file = get_matched_from_filelist(files, pattern);
                     }
                     OPENVINO_ASSERT((dump_file.length() > 0), "Could not find expected pattern '_dst[N]__' for binary dump");
                     GPU_DEBUG_COUT << " Load binary dump : " << dump_file << " for " << layer_name << std::endl;
@@ -253,18 +356,18 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst)
                     output_mem->copy_from(m_stream, static_cast<void *>(&bin[0]), true);
                 }
             } else {
-                auto check_dst = debug_config->get_matched_from_filelist(files, "_dst0__");
+                auto check_dst = get_matched_from_filelist(files, "_dst0__");
                 OPENVINO_ASSERT(check_dst.length() == 0, "Expected to load binaries for inputs of " + layer_name);
 
                 // Loading input tensors for any layer
-                auto dump_file = debug_config->get_matched_from_filelist(files, "_src0__");
+                auto dump_file = get_matched_from_filelist(files, "_src0__");
                 OPENVINO_ASSERT(dump_file.length() != 0, "Could not find expected pattern '_src[N]__' for binary dump input : " + layer_name);
 
                 for (size_t i = 0; i < m_inst.dependencies().size(); i++) {
                     auto dump_file = files[0];
                     if (files.size() > 1 || m_inst.dependencies().size() != 1) {
                         std::string pattern = "_src" + std::to_string(i) + "__";
-                        dump_file = debug_config->get_matched_from_filelist(files, pattern);
+                        dump_file = get_matched_from_filelist(files, pattern);
                     }
                     if (dump_file.length() == 0) {
                         GPU_DEBUG_COUT  << " Skip loading for  input(" << i << ") of " << layer_name << std::endl;
@@ -290,11 +393,11 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst)
     }
 
     // Dump input buffers of 'inst'
-    if (debug_config->dump_layers_path.length() > 0) {
-        const std::string layer_name = inst.id();
+    if (config.get_dump_tensors_path().length() > 0) {
+        const std::string& layer_name = inst.id();
 
-        if (debug_config->is_target_iteration(m_iter) &&
-            debug_config->dump_layers_dst_only == 0 && debug_config->is_layer_for_dumping(layer_name)) {
+        if (is_target_iteration(m_iter, config.get_dump_iterations()) &&
+            config.get_dump_tensors() != ov::intel_gpu::DumpTensors::out && is_layer_for_dumping(config, layer_name)) {
             std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" + layer_name + ":";
             for (size_t i = 0; i < m_inst.dependencies().size(); i++) {
                 std::string name = get_file_prefix() + "_src" + std::to_string(i);
@@ -306,24 +409,27 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst)
 
                 auto dep = m_inst.dependencies().at(i);
                 auto input_layout = dep.first->get_output_layout(dep.second);
-                GPU_DEBUG_IF(debug_config->dump_layers_binary) {
+                if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary) {
                     // Binary dump : raw
-                    auto filename = get_file_path_for_binary_dump(input_layout, name);
+                    auto filename = get_file_path_for_binary_dump(input_layout, name, config.get_dump_tensors_path());
 
                     mem_lock<char, mem_lock_type::read> lock(input_mem, m_stream);
                     ov::util::save_binary(filename, lock.data(), input_mem->size());
-                    GPU_DEBUG_COUT  << " Dump layer src : " << layer_name << " to " << filename << std::endl;
+                    GPU_DEBUG_COUT << " Dump layer src : " << layer_name << " to " << filename << std::endl;
                     debug_str_for_bin_load += (filename + ",");
                 } else {
+                    const bool dump_raw = config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::text_raw;
+                    GPU_DEBUG_COUT << " Dump " << (dump_raw ? "raw " : "") << name << std::endl;
+                    auto filename = config.get_dump_tensors_path() + get_name_for_dump(name) + ".txt";
                     log_memory_to_file(input_mem,
                                        input_layout,
                                        m_stream,
                                        name,
-                                       debug_config->dump_layers_raw);
+                                       dump_raw);
                 }
             }
 
-            if (debug_config->dump_layers_binary && !inst.is_input()) {
+            if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary && !inst.is_input()) {
                 debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"';
                 GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl;
             }
@@ -333,13 +439,14 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst)
 
 
 NodeDebugHelper::~NodeDebugHelper() {
+    const auto& config = m_network.get_config();
     // Dump output buffers of 'inst'
-    if (debug_config->dump_layers_path.length() > 0) {
+    if (config.get_dump_tensors_path().length() > 0) {
         m_stream.finish();
         const std::string layer_name = m_inst.id();
 
-        GPU_DEBUG_IF(debug_config->is_target_iteration(m_iter) &&
-                    debug_config->is_layer_for_dumping(layer_name, m_inst.is_output(), m_inst.is_input())) {
+        if (is_target_iteration(m_iter, config.get_dump_iterations()) &&
+            is_layer_for_dumping(config, layer_name, m_inst.is_output(), m_inst.is_input())) {
             std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\""
                                                     + layer_name + ":";
             for (size_t i = 0; i < m_inst.outputs_memory_count(); i++) {
@@ -350,22 +457,29 @@ NodeDebugHelper::~NodeDebugHelper() {
                     continue;
                 }
 
-                GPU_DEBUG_IF(debug_config->dump_layers_binary) {
+                if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary) {
                     // Binary dump : raw
                     auto output_layout = m_inst.get_output_layout(i);
-                    auto filename = get_file_path_for_binary_dump(output_layout, name);
+                    auto filename = get_file_path_for_binary_dump(output_layout, name, config.get_dump_tensors_path());
 
                     mem_lock<char, mem_lock_type::read> lock(output_mem, m_stream);
                     ov::util::save_binary(filename, lock.data(), output_mem->size());
                     GPU_DEBUG_COUT  << " Dump layer dst : " << layer_name << " to " << filename << std::endl;
                     debug_str_for_bin_load += (filename + ",");
                 } else {
+                    const bool dump_raw = config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::text_raw;
+                    GPU_DEBUG_COUT << " Dump " << (dump_raw ? "raw " : "") << name << std::endl;
+                    auto filename = config.get_dump_tensors_path() + get_name_for_dump(name) + ".txt";
                     // Text dump
-                    log_memory_to_file(output_mem, m_inst.get_output_layout(i), m_stream, name, debug_config->dump_layers_raw);
+                    log_memory_to_file(output_mem,
+                                       m_inst.get_output_layout(i),
+                                       m_stream,
+                                       name,
+                                       dump_raw);
                 }
             }
 
-            GPU_DEBUG_IF(debug_config->dump_layers_binary && m_inst.is_input()) {
+            if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary && m_inst.is_input()) {
                 debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"';
                 GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl;;
             }
@@ -377,13 +491,14 @@ NetworkDebugHelper::NetworkDebugHelper(const network& net)
     : m_network(net)
     , m_iter(net.iteration) {
     auto net_id = m_network.get_id();
-    GPU_DEBUG_IF(debug_config->dump_memory_pool > 0) {
-        auto& iters = debug_config->dump_memory_pool_iters;
+    const auto& config = m_network.get_config();
+    if (config.get_dump_memory_pool()) {
+        auto& iters = config.get_dump_iterations();
         if (iters.empty() || iters.find(m_iter) != iters.end()) {
             GPU_DEBUG_COUT << "============================================================================" << std::endl;
             GPU_DEBUG_COUT << "Start network execution (net_id : " << net_id << ", iter :" << m_iter << ")" << std::endl;
             if (m_iter == 0 && net_id > 0) {
-                dump_memory_pool(debug_config->dump_memory_pool_path, m_iter);
+                dump_memory_pool(config.get_dump_memory_pool_path(), m_iter);
                 GPU_DEBUG_COUT << "============================================================================" << std::endl;
             }
         }
@@ -391,38 +506,14 @@ NetworkDebugHelper::NetworkDebugHelper(const network& net)
         GPU_DEBUG_TRACE << "============================================================================" << std::endl;
         GPU_DEBUG_TRACE << "Start network execution (net_id : " << net_id << ", iter :" << m_iter << ")" << std::endl;
     }
-
-    if (debug_config->list_layers == 1) {
-        for (auto& inst : m_network._exec_order) {
-            GPU_DEBUG_COUT << inst->id() << std::endl;
-            if (inst->get_node().is_type<loop>()) {
-                auto& loop_node = inst->get_node().as<loop>();
-                for (auto& prim : loop_node.get_body_program()->get_processing_order()) {
-                    GPU_DEBUG_COUT << "\t" << prim->id() << std::endl;
-                }
-            } else if (inst->get_node().is_type<condition>()) {
-                auto& cond_node = inst->get_node().as<condition>();
-                GPU_DEBUG_COUT << "* Branch_True" << std::endl;
-                for (auto& prim : cond_node.get_branch_true().inner_program->get_processing_order()) {
-                    GPU_DEBUG_COUT << "\t" << prim->id() << std::endl;
-                }
-                GPU_DEBUG_COUT << "* Branch_False" << std::endl;
-                for (auto& prim : cond_node.get_branch_false().inner_program->get_processing_order()) {
-                    GPU_DEBUG_COUT << "\t" << prim->id() << std::endl;
-                }
-            }
-        }
-
-        if (!m_network.is_internal())
-            exit(0);
-    }
 }
 
 NetworkDebugHelper::~NetworkDebugHelper() {
     auto prog = m_network.get_program().get();
     auto net_id = m_network.get_id();
+    const auto& config = prog->get_config();
     // print '-data_shape' option for benchmark_app
-    if (debug_config->print_input_data_shapes == 1) {
+    if (config.get_verbose() >= 4) {
         std::stringstream data_shape_str;
         auto add_string = [&data_shape_str](std::string str) {
             data_shape_str << ((data_shape_str.rdbuf()->in_avail() == 0) ? " -data_shape " : ",") << str;
@@ -443,7 +534,7 @@ NetworkDebugHelper::~NetworkDebugHelper() {
                        << data_shape_str.str() << std::endl;
     }
 
-    if (!debug_config->dump_graphs.empty() && debug_config->is_target_iteration(m_iter)) {
+    if (!config.get_dump_graphs_path().empty() && is_target_iteration(m_iter, config.get_dump_iterations())) {
         auto get_fixed_str = [](int value, int length = 2) -> std::string {
             std::ostringstream ss;
             ss << std::setw(length) << std::setfill('0') << std::to_string(value);
@@ -459,10 +550,10 @@ NetworkDebugHelper::~NetworkDebugHelper() {
         }
     }
 
-    if (debug_config->dump_memory_pool > 0) {
-        auto& iters = debug_config->dump_memory_pool_iters;
+    if (config.get_dump_memory_pool()) {
+        auto& iters = config.get_dump_iterations();
         if (iters.empty() || iters.find(m_iter) != iters.end()) {
-            dump_memory_pool(debug_config->dump_memory_pool_path, m_iter);
+            dump_memory_pool(config.get_dump_memory_pool_path(), m_iter);
             GPU_DEBUG_COUT << "============================================================================" << std::endl;
         }
     }
diff --git a/src/plugins/intel_gpu/src/graph/debug_helper.hpp b/src/plugins/intel_gpu/src/graph/debug_helper.hpp
index 61572433cea494..e2137733cf73fc 100644
--- a/src/plugins/intel_gpu/src/graph/debug_helper.hpp
+++ b/src/plugins/intel_gpu/src/graph/debug_helper.hpp
@@ -39,8 +39,6 @@ class NodeDebugHelper {
     const network& m_network;
     const program* m_program;
     const size_t m_iter;
-
-    const debug_configuration* debug_config = cldnn ::debug_configuration ::get_instance();
 };
 
 class NetworkDebugHelper {
@@ -52,8 +50,6 @@ class NetworkDebugHelper {
     void dump_memory_pool(std::string dump_path, int64_t curr_iter) const;
     const network& m_network;
     const size_t m_iter;
-
-    const debug_configuration* debug_config = cldnn ::debug_configuration ::get_instance();
 };
 
 #define NETWORK_DEBUG(net) NetworkDebugHelper __network_debug_helper(net)
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp
index 4d21869dfa3953..2786a9e8e85b99 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp
@@ -43,10 +43,6 @@ void post_optimize_weights::optimize_weights(T& node, program& p) {
         return;
 
     if (impl->is_dynamic()) {
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->disable_build_time_weight_reorder_for_dynamic_nodes) {
-            return;
-        }
         // TODO: To relax current limitation w.r.t the future optimization of weight reorder process
         // In dynamic shape, selected weight format can change in runtime. However reordering blocked format to blocked format is not fully verified yet.
         // So we need to enable other primitives such as convolution with verifying reorder b/w the possible layouts
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
index 269a88052e7bb0..088afd84f5ff6a 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
@@ -679,8 +679,6 @@ void insert_reorders(program& p, const std::map<program_node*, format::type>& fm
 }  // namespace
 
 void reorder_inputs::run(program& p, reorder_factory& rf) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-
     auto& lo = p.get_layout_optimizer();
 
     auto fmt_map = get_preferred_formats(p, lo);
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
index 6f8502423047b6..0f8d271bc3a6cd 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
@@ -157,7 +157,6 @@ struct convolution_onednn : typed_primitive_onednn_impl<convolution> {
             dnnl::memory::desc desc = onednn::layout_to_memory_desc(a_zp->get_layout(), dnnl::memory::format_tag::a, true);
             args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC, a_zp->get_onednn_memory(desc)});
 
-            GPU_DEBUG_GET_INSTANCE(debug_config);
             GPU_DEBUG_TRACE_DETAIL << instance.id() << " activations_zero_points: "
                 << " " << a_zp->get_layout().to_short_string() << std::endl;
         }
@@ -167,7 +166,6 @@ struct convolution_onednn : typed_primitive_onednn_impl<convolution> {
             dnnl::memory::desc desc = onednn::layout_to_memory_desc(w_zp->get_layout(), dnnl::memory::format_tag::a, true);
             args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS, w_zp->get_onednn_memory(desc)});
 
-            GPU_DEBUG_GET_INSTANCE(debug_config);
             GPU_DEBUG_TRACE_DETAIL << instance.id() << " weights_zero_points: "
                 << " " << w_zp->get_layout().to_short_string() << std::endl;
         }
diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
index 78e669f0e1152f..dfd65cd9b58067 100644
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@@ -1129,74 +1129,12 @@ bool layout_optimizer::is_primitive_implemented_for_onednn(program_node& node) {
     return false;
 }
 
-impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node) {
-#ifdef GPU_DEBUG_CONFIG
-
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(!debug_config->forced_impl_types.empty()) {
-        // Forcing impl type of one primitive
-        for (const auto& forced_impl_type : debug_config->forced_impl_types) {
-            if (node.is_type<fully_connected>()) {
-                if (forced_impl_type == "fc:ocl")
-                    return impl_types::ocl;
-                else if (forced_impl_type == "fc:onednn")
-                    return impl_types::onednn;
-            } else if (node.is_type<gemm>()) {
-                if (forced_impl_type == "gemm:ocl")
-                    return impl_types::ocl;
-                else if (forced_impl_type == "gemm:onednn")
-                    return impl_types::onednn;
-            } else if (node.is_type<detection_output>()) {
-                if (forced_impl_type == "do:cpu")
-                    return impl_types::cpu;
-                else if (forced_impl_type == "do:ocl")
-                    return impl_types::ocl;
-            } else if (node.is_type<reduce>()) {
-                if (forced_impl_type == "reduce:ocl")
-                    return impl_types::ocl;
-                else if (forced_impl_type == "reduce:onednn")
-                    return impl_types::onednn;
-            } else if (node.is_type<concatenation>()) {
-                if (forced_impl_type == "concat:ocl")
-                    return impl_types::ocl;
-                else if (forced_impl_type == "concat:onednn")
-                    return impl_types::onednn;
-            }
-
-            // Forcing one layer
-            size_t found_type = forced_impl_type.rfind(":");
-            if (found_type != std::string::npos) {
-                impl_types preferred_type = impl_types::any;
-                auto impl_type = forced_impl_type.substr(found_type + 1);
-                if (impl_type == "ocl")
-                    preferred_type = impl_types::ocl;
-                else if (impl_type == "onednn")
-                    preferred_type = impl_types::onednn;
-                else if (impl_type == "cpu")
-                    preferred_type = impl_types::cpu;
-
-                if (node.id() == forced_impl_type.substr(0, found_type)) {
-                    GPU_DEBUG_LOG << " Forced implementation type : " << forced_impl_type.substr(0, found_type) << " : "
-                                << forced_impl_type.substr(found_type + 1) << std::endl;
-                    return preferred_type;
-                }
-            }
-        }
-    }
-#endif
-
-    return impl_types::any;
-}
-
 impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format preferred_format) {
     if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) {
         auto forced_impl = _forcing_map.at(node.id()).second;
         if (forced_impl != impl_types::any)
             return forced_impl;
     }
-    auto forced_impl = get_forced_impl_type_by_config(node);
-    if (forced_impl != impl_types::any)
-        return forced_impl;
 
     auto shape_type = shape_types::any;
 
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index d6642ac27e56c3..66d890b51e4692 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -60,7 +60,7 @@ namespace cldnn {
 namespace {
 
 #ifdef GPU_DEBUG_CONFIG
-void dump_perf_data_raw(std::string dump_path, const std::list<std::shared_ptr<primitive_inst>>& exec_order) {
+void dump_perf_data_raw(std::string dump_path, bool per_iter_mode, const std::list<std::shared_ptr<primitive_inst>>& exec_order) {
     auto layouts_to_str = [](const std::vector<layout>& layouts) -> std::string {
         std::stringstream s;
         for (size_t i = 0; i < layouts.size(); i++) {
@@ -71,7 +71,6 @@ void dump_perf_data_raw(std::string dump_path, const std::list<std::shared_ptr<p
         return s.str();
     };
 
-    const bool per_iter_mode = cldnn::debug_configuration::get_instance()->dump_profiling_data_per_iter != 0;
     const std::string perf_raw_csv_header = per_iter_mode ? "prim_id,prim_type,stage,net_in_shapes,in_shapes,out_shapes,impl,iter,time_usec\n"
                                                           : "prim_id,prim_type,stage,net_in_shapes,in_shapes,out_shapes,impl,iters,time_usec\n";
     std::ofstream of(dump_path);
@@ -139,13 +138,12 @@ void dump_perf_data_raw(std::string dump_path, const std::list<std::shared_ptr<p
     }
 }
 
-void wait_for_the_turn() {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
+void wait_for_the_turn(const std::vector<std::string>& pids) {
     bool need_to_wait;
     do {
         need_to_wait = false;
         struct stat buffer;
-        for (auto pid : debug_config->after_proc) {
+        for (auto pid : pids) {
             auto path = "/proc/" + pid;
             std::cout << "check " + path << std::endl;
             if (stat(path.c_str(), &buffer) == 0) {
@@ -158,8 +156,8 @@ void wait_for_the_turn() {
 }
 
 #else
-void dump_perf_data_raw(std::string, const std::list<std::shared_ptr<primitive_inst>>&) {}
-void wait_for_the_turn() {}
+void dump_perf_data_raw(std::string, bool per_iter_mode, const std::list<std::shared_ptr<primitive_inst>>&) {}
+void wait_for_the_turn(const std::vector<std::string>& pids) {}
 #endif
 }  // namespace
 
@@ -177,30 +175,19 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo
     , _config(program->get_config())
     , _engine(program->get_engine())
     , _stream(stream)
-    , _memory_pool(new memory_pool(program->get_engine()))
+    , _memory_pool(new memory_pool(program->get_engine(), program->get_config()))
     , _internal(is_internal)
     , _is_primary_stream(is_primary_stream)
     , _enable_profiling(program->get_config().get_enable_profiling())
     , _reset_arguments(true)
-    , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_buffers_preallocation_ratio())) {
+    , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_shape_predictor_settings())) {
     if (!_internal) {
         net_id = get_unique_net_id();
     }
 
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->after_proc.size() != 0) {
-        wait_for_the_turn();
+    GPU_DEBUG_IF(get_config().get_start_after_processes().size() != 0) {
+        wait_for_the_turn(get_config().get_start_after_processes());
     }
-
-    GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) {
-        auto& mem_preallocation_params = debug_config->mem_preallocation_params;
-        _shape_predictor.reset(new ShapePredictor(&program->get_engine(),
-                                                  mem_preallocation_params.next_iters_preallocation_count,
-                                                  mem_preallocation_params.max_per_iter_size,
-                                                  mem_preallocation_params.max_per_dim_diff,
-                                                  mem_preallocation_params.buffers_preallocation_ratio));
-    }
-
     calculate_weights_cache_capacity();
     allocate_primitives();
     configure_primitives_second_output();
@@ -239,7 +226,7 @@ network::~network() {
         _program->cancel_compilation_context();
     _memory_pool->clear_pool_for_network(net_id);
     GPU_DEBUG_IF(!_config.get_dump_profiling_data_path().empty()) {
-        dump_perf_data_raw(_config.get_dump_profiling_data_path() + "/perf_raw" + std::to_string(net_id) + ".csv", _exec_order);
+        dump_perf_data_raw(_config.get_dump_profiling_data_path() + "/perf_raw" + std::to_string(net_id) + ".csv", false, _exec_order);
     }
 }
 
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index 22f6ef4571b3c7..9bb8f96fd25c65 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -557,7 +557,6 @@ void primitive_inst::clear_output_memory() {
 
 void primitive_inst::realloc_if_needed(bool prev_execution_skipped) {
     OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("realloc_if_needed: " + id()));
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::memory_allocation);
 
     const auto& users = get_user_insts();
@@ -836,11 +835,6 @@ void primitive_inst::realloc_if_needed(bool prev_execution_skipped) {
     }
 
     int32_t tmp_prealloc_count = get_prealloc_iter_num();
-    GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) {
-        // If debug config is set, repsect the config most
-        tmp_prealloc_count = -1;
-    }
-
     // If we allocated too large memory, reclaim the memory.
     for (size_t i = 0; i < updated_layouts.size(); ++i) {
         bool reclaim = 0;
@@ -1274,8 +1268,7 @@ void primitive_inst::update_paddings() {
 
 void primitive_inst::do_runtime_skip_reorder() {
     OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_skip_reorder: " + id()));
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->disable_runtime_skip_reorder) {
+    GPU_DEBUG_IF(get_config().get_disable_runtime_skip_reorder()) {
         return;
     }
     if (can_be_optimized())
@@ -2748,42 +2741,31 @@ bool primitive_inst::is_valid_fusion() const {
 }
 
 void primitive_inst::add_profiling_data(instrumentation::pipeline_stage stage, bool cache_hit, std::string memalloc_info, int64_t time, bool per_iter_mode) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-#ifdef GPU_DEBUG_CONFIG
-    int64_t curr_iter = -1;
-    GPU_DEBUG_IF(debug_config->dump_prof_data_iter_params.is_enabled) {
-        curr_iter = get_network().get_current_iteration_num();
-    }
-    GPU_DEBUG_IF(curr_iter < 0 || debug_config->is_target_dump_prof_data_iteration(curr_iter)) {
-#else
-    {
-#endif
-        instrumentation::perf_counter_key key {
-                _network.get_input_layouts(),
-                _impl_params->input_layouts,
-                _impl_params->output_layouts,
-                get_implementation_name(),
-                stage,
+    instrumentation::perf_counter_key key {
+            _network.get_input_layouts(),
+            _impl_params->input_layouts,
+            _impl_params->output_layouts,
+            get_implementation_name(),
+            stage,
 #ifdef GPU_DEBUG_CONFIG
-                per_iter_mode ? get_network().get_current_iteration_num() : 0,
+            per_iter_mode ? get_network().get_current_iteration_num() : 0,
 #else
-                0,
+            0,
 #endif
-                cache_hit,
-                memalloc_info
-        };
-
-        auto hash = instrumentation::perf_counter_hash()(key);
-        auto& d = _profiling_data[hash];
-        if (_profiling_info.find(hash) == _profiling_info.end()) {
-            _profiling_info.emplace(hash, key);
-        }
+            cache_hit,
+            memalloc_info
+    };
 
-        auto& total_time = std::get<0>(d);
-        auto& total_iter = std::get<1>(d);
-        total_time += time;
-        total_iter++;
+    auto hash = instrumentation::perf_counter_hash()(key);
+    auto& d = _profiling_data[hash];
+    if (_profiling_info.find(hash) == _profiling_info.end()) {
+        _profiling_info.emplace(hash, key);
     }
+
+    auto& total_time = std::get<0>(d);
+    auto& total_iter = std::get<1>(d);
+    total_time += time;
+    total_iter++;
 }
 
 std::string primitive_inst::get_implementation_name() const {
diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
index dbadc079c0da82..ad384306b9e081 100644
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -221,7 +221,6 @@ program::~program() {
 }
 
 void program::init_program() {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     set_options();
 
     pm = std::unique_ptr<pass_manager>(new pass_manager(*this));
@@ -606,7 +605,6 @@ void program::post_optimize_graph(bool is_internal) {
 
     auto partial_build = _config.get_partial_build_program();
 #ifdef GPU_DEBUG_CONFIG
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     if (!is_internal && (!partial_build || !_config.get_dry_run_path().empty())) {
 #else
     if (!is_internal && !partial_build) {
@@ -1650,7 +1648,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
 
 std::pair<int64_t, int64_t> program::get_estimated_device_mem_usage() {
     auto max_alloc_size = get_engine().get_device_info().max_alloc_mem_size;
-    memory_pool pool(get_engine());
+    memory_pool pool(get_engine(), get_config());
     int64_t const_sum = 0;
 
 #ifdef __unix__
diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
index 61daf949e762f0..0e735683db2533 100644
--- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
@@ -139,7 +139,6 @@ void close_stream(std::ofstream& graph) { graph.close(); }
 std::string get_node_id(const program_node* ptr) { return "node_" + std::to_string(reinterpret_cast<uintptr_t>(ptr)); }
 
 void dump_full_node(std::ofstream& out, const program_node* node) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     try {
         out << node->type()->to_string(*node);
     } catch(const std::exception& e) {
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp
index a103a159faaf5d..72f3cc9120b9f1 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp
@@ -70,7 +70,6 @@ JitConstants DynamicQuantizeKernelRef::GetJitConstants(const dynamic_quantize_pa
 }
 
 CommonDispatchData DynamicQuantizeKernelRef::SetDefault(const dynamic_quantize_params& params) const {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     CommonDispatchData dispatchData;
 
     OPENVINO_ASSERT(params.outputs[0].GetLayout() == DataLayout::bfyx, "It supports only 4d tensor");
diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp
index d04adc77ee67af..0be6f52aa500f3 100644
--- a/src/plugins/intel_gpu/src/plugin/graph.cpp
+++ b/src/plugins/intel_gpu/src/plugin/graph.cpp
@@ -136,8 +136,8 @@ Graph::Graph(std::shared_ptr<Graph> graph, uint16_t stream_id)
 }
 
 Graph::~Graph() {
-    GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) {
-        const auto log_level = cldnn::debug_configuration::get_instance()->host_time_profiling;
+    GPU_DEBUG_IF(m_config.get_host_time_profiling()) {
+        const auto log_level = m_config.get_host_time_profiling();
 
         auto get_time_str = [](int64_t time_mcs, int64_t iters_num = 1) {
             double time = static_cast<double>(time_mcs);
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index fd0f2482bc4712..52b47ad52be0ae 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -81,7 +81,8 @@ bool requires_new_shape_infer(const std::shared_ptr<ov::Node>& op) {
     // HACK: SearchSorted has specific shape requirements.
     // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine,
     // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid.
-    if (ov::is_type<ov::op::v15::SearchSorted>(op))
+    // Similar case for STFT.
+    if (ov::is_type<ov::op::v15::SearchSorted>(op) || ov::is_type<ov::op::v15::STFT>(op))
         return true;
 
     if (ov::is_type<ov::op::internal::DynamicQuantize>(op))
@@ -589,8 +590,6 @@ bool Plugin::is_metric(const std::string& name) const {
 
 ov::Any Plugin::get_metric(const std::string& name, const ov::AnyMap& options) const {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::get_metric");
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-
     auto device_id = get_property(ov::device::id.name(), options).as<std::string>();
 
     auto iter = m_device_map.find(std::to_string(cldnn::device_query::device_id));
@@ -764,7 +763,6 @@ std::vector<std::string> Plugin::get_device_capabilities(const cldnn::device_inf
 }
 
 uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     auto device_id = get_property(ov::device::id.name(), options).as<std::string>();
     auto context = get_default_contexts().at(device_id);
     const auto& device_info = context->get_engine().get_device_info();
diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
index f30bd9464a4238..5d9871d1fff29a 100644
--- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
@@ -327,51 +327,6 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr<cldnn::pr
     m_topology->add_primitive(prim);
 }
 
-bool ProgramBuilder::requires_new_shape_infer(const std::shared_ptr<ov::Node>& op) const {
-    if (op->is_dynamic()) {
-        return true;
-    }
-
-    // HACK: SearchSorted has specific shape requirements.
-    // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine,
-    // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid.
-    // Similar case for STFT.
-    if (ov::is_type<ov::op::v15::SearchSorted>(op) || ov::is_type<ov::op::v15::STFT>(op))
-        return true;
-
-    if (ov::is_type<ov::op::internal::DynamicQuantize>(op))
-        return true;
-
-    if (ov::is_type<ov::op::v5::Loop>(op)) {
-        const auto body_function = std::static_pointer_cast<ov::op::v5::Loop>(op)->get_function();
-        if (body_function->is_dynamic())
-            return true;
-    }
-
-    if (ov::is_type<ov::op::v5::LSTMSequence>(op) || ov::is_type<ov::op::v4::LSTMCell>(op)) {
-        return true;
-    }
-    // When input node has dynamic shape with 4 dimension, this function return false
-    // because op.is_dynamic() which only checks input shapes return false.
-    // So, in the case of input data, we need to check output shape.
-    for (size_t i = 0; i < op->get_output_size(); i++) {
-        if (op->get_output_partial_shape(i).is_dynamic())
-            return true;
-    }
-
-    for (size_t i = 0; i < op->get_output_size(); i++) {
-        if (op->get_output_partial_shape(i).size() > 6)
-            return true;
-    }
-
-    for (size_t i = 0; i < op->get_input_size(); i++) {
-        if (op->get_input_partial_shape(i).size() > 6)
-            return true;
-    }
-
-    return false;
-}
-
 int64_t ProgramBuilder::get_parameter_index(const std::shared_ptr<ov::op::v0::Parameter>& parameter) const {
     return m_model->get_parameter_index(parameter);
 }
diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
index 22b0262c62b2d3..9189941ea8b3a6 100644
--- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
@@ -33,9 +33,8 @@
 namespace {
 
 inline bool can_use_usm_host(const cldnn::engine& engine, const uint64_t total_output_bytes) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->use_usm_host == 1) { return true; }
-    GPU_DEBUG_IF(debug_config->use_usm_host == 2) { return false; }
+    GPU_DEBUG_IF(ov::intel_gpu::ExecutionConfig::get_usm_policy() == 1) { return true; }
+    GPU_DEBUG_IF(ov::intel_gpu::ExecutionConfig::get_usm_policy() == 2) { return false; }
 
     auto can_use_usm = engine.use_unified_shared_memory();
     // When output size is large, it is better not to write to usm_host directly
@@ -113,20 +112,9 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr<const CompiledModel>& c
     : ov::ISyncInferRequest(compiled_model)
     , m_graph(compiled_model->get_graph(0))
     , m_context(std::static_pointer_cast<RemoteContextImpl>(compiled_model->get_context_impl()))
-    , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_buffers_preallocation_ratio()))
+    , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_shape_predictor_settings()))
     , m_enable_profiling(m_graph->get_config().get_enable_profiling())
     , m_use_external_queue(m_graph->use_external_queue()) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) {
-        auto& mem_preallocation_params = debug_config->mem_preallocation_params;
-        m_shape_predictor.reset(
-            new cldnn::ShapePredictor(&m_graph->get_engine(),
-                                      mem_preallocation_params.next_iters_preallocation_count,
-                                      mem_preallocation_params.max_per_iter_size,
-                                      mem_preallocation_params.max_per_dim_diff,
-                                      mem_preallocation_params.buffers_preallocation_ratio));
-    }
-
     init_mappings();
     allocate_inputs();
     allocate_outputs();
@@ -504,7 +492,7 @@ void SyncInferRequest::wait() {
     }
 
     auto wait_end = std::chrono::high_resolution_clock::now();
-    GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) {
+    GPU_DEBUG_IF(m_graph->get_config().get_host_time_profiling()) {
         auto& exec_time_info = m_graph->host_exec_times.back();
 
         const uint64_t total_time = std::chrono::duration_cast<std::chrono::microseconds>(wait_end - wait_start).count();
diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp
index b269fbc2c9eb4d..66fe9d9c9e0fc6 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp
@@ -18,7 +18,6 @@ namespace ov::intel_gpu {
 
 DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric)
     : ov::pass::MatcherPass() {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     using namespace ov::pass::pattern;
     using QuantizationType = ov::op::internal::DynamicQuantize::QuantizationType;
 
diff --git a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp
index e2090a4d2b5eb8..29e82e4acac904 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp
@@ -20,7 +20,6 @@ namespace ov::intel_gpu {
 FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion(bool fuse_mlp_swiglu) {
     using namespace ov::pass::pattern;
 
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     // Three FCs connected to the same input
     size_t min_num_fcs_to_fuse = 3;
     // Note:
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index 6a1b09d1b4f329..72054ec3b1fd44 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -412,10 +412,6 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         manager.register_pass<ov::pass::CommonOptimizations>();
 
         pass_config->set_callback<ov::pass::ScaledDotProductAttentionDecomposition>([&](const std::shared_ptr<const ov::Node> node){
-            GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->enable_sdpa != -1) {
-                GPU_DEBUG_CODE(return cldnn::debug_configuration::get_instance()->enable_sdpa == 1);
-            }
-
             if (!config.get_enable_sdpa_optimization())
                 return false;
 
@@ -926,7 +922,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         pass_config->disable<ov::pass::RoPEFusionIOSlicing>();
         pass_config->disable<ov::pass::RoPEShareCosSin>();
 
-        float activations_scale_factor = config.get_property(ov::hint::activations_scale_factor);
+        float activations_scale_factor = config.get_activations_scale_factor();
 
         if (activations_scale_factor > 0.f && infer_precision == ov::element::f16 && !enableInt8) {
             using namespace ov::pass::low_precision;
@@ -1100,7 +1096,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         // This is supposed to be the last pass to ensure that we don't have name collisions until
         // GPU plugin stops using friendly names for program creation
         manager.register_pass<ov::pass::ResolveNameCollisions>(true);
-        GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->verbose >= 1) {
+        GPU_DEBUG_IF(config.get_verbose() >= 1) {
             manager.register_pass<ov::intel_gpu::PrintModelStatistics>();
         }
         manager.run_passes(func);
diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
index 550d740f772a16..ab72354626b5cb 100644
--- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
+++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
@@ -3,604 +3,21 @@
 //
 
 #include "intel_gpu/runtime/debug_configuration.hpp"
-#include <algorithm>
-#include <iomanip>
+#include "intel_gpu/runtime/execution_config.hpp"
 #include <iostream>
-#include <memory>
-#include <regex>
-#include <sstream>
-#include <vector>
 #include <fstream>
 
-namespace cldnn {
-const char *debug_configuration::prefix = "GPU_Debug: ";
-std::ostream* debug_configuration::verbose_stream;
+namespace ov::intel_gpu {
 
-// Default policy is that dump_configuration will override other configuration from IE.
-
-#ifdef GPU_DEBUG_CONFIG
-
-#define GPU_DEBUG_COUT_ std::cout << cldnn::debug_configuration::prefix
-
-template<typename T>
-void print_option(std::string option_name, T option_value) {
-    GPU_DEBUG_COUT_ << "Config " << option_name << " = " << option_value << std::endl;
-}
-
-static std::string to_upper_case(const std::string& var) {
-    std::stringstream s;
-
-    for (size_t i = 0; i < var.size(); i++) {
-        if (std::isupper(var[i])) {
-            if (i != 0) {
-                s << "_";
-            }
-            s << var[i];
-        } else {
-            s << static_cast<char>(std::toupper(var[i]));
-        }
-    }
-
-    return s.str();
-}
-
-static std::vector<std::string> get_possible_option_names(const std::string& var, std::vector<std::string> allowed_option_prefixes) {
-    std::vector<std::string> result;
-
-    for (auto& prefix : allowed_option_prefixes) {
-        result.push_back(prefix + var);
-        result.push_back(prefix + to_upper_case(var));
-    }
-
-    return result;
-}
-
-template <typename T>
-T convert_to(const std::string &str) {
-    std::istringstream ss(str);
-    T res;
-    ss >> res;
-    return res;
-}
-
-template <>
-std::string convert_to(const std::string &str) {
-    return str;
-}
-
-static std::set<int64_t> parse_int_set(std::string& str) {
-    std::set<int64_t> int_array;
-    // eliminate '"' from string to avoid parsing error
-    str.erase(std::remove_if(str.begin(), str.end(), [](char c) {
-                return c == '\"'; }), str.end());
-    if (str.size() > 0) {
-        str = " " + str + " ";
-        std::istringstream ss(str);
-        std::string token;
-        while (ss >> token) {
-            try {
-                int_array.insert(static_cast<int64_t>(std::stol(token)));
-            } catch(const std::exception &) {
-                int_array.clear();
-                GPU_DEBUG_COUT << "Argument was ignored. It cannot be parsed to integer array: " << str << std::endl;
-                break;
-            }
-        }
-    }
-    return int_array;
-}
-
-template<typename T>
-void get_debug_env_var(const std::string &var, T &val, std::vector<std::string> allowed_option_prefixes) {
-    bool found = false;
-    for (auto o : get_possible_option_names(var, allowed_option_prefixes)) {
-        if (const auto env_var = std::getenv(o.c_str())) {
-            val = convert_to<T>(env_var);
-            found = true;
-        }
-    }
-
-    if (found) {
-        print_option(var, val);
-    }
-}
-
-template<typename T>
-void get_gpu_debug_env_var(const std::string &var, T &val) {
-    return get_debug_env_var(var, val, {"OV_GPU_"});
-}
-
-template<typename T>
-void get_common_debug_env_var(const std::string &var, T &val) {
-    // The list below should be prioritized from lowest to highest prefix priority
-    // If an option is set several times with different prefixes, version with the highest priority will be actually used.
-    // This may allow to enable global option with some value and override this value for GPU plugin
-    // For example: OV_GPU_Verbose=2 OV_Verbose=1 ./my_app => this->verbose == 2
-    // In that case we enable Verbose (with level = 1) for all OV components that support this option, but for GPU plugin we increase verbose level to 2
-    std::vector<std::string> allowed_option_prefixes = {
-        "OV_",
-        "OV_GPU_"
-    };
-
-    return get_debug_env_var(var, val, allowed_option_prefixes);
-}
-
-static void print_help_messages() {
-    std::vector<std::pair<std::string, std::string>> message_list;
-    message_list.emplace_back("OV_GPU_Help", "Print help messages");
-    message_list.emplace_back("OV_GPU_Verbose", "Verbose execution");
-    message_list.emplace_back("OV_GPU_VerboseColor", "Print verbose color");
-    message_list.emplace_back("OV_GPU_VerboseFile", "Filename to dump verbose log");
-    message_list.emplace_back("OV_GPU_ListLayers", "Print layers names");
-    message_list.emplace_back("OV_GPU_PrintMultiKernelPerf", "Print execution time of each kernel in multi-kernel primitimive");
-    message_list.emplace_back("OV_GPU_PrintInputDataShapes",  "Print data_shapes of input layers for benchmark_app.");
-    message_list.emplace_back("OV_GPU_DisableUsm", "Disable usm usage");
-    message_list.emplace_back("OV_GPU_DisableOnednn", "Disable onednn for discrete GPU (no effect for integrated GPU)");
-    message_list.emplace_back("OV_GPU_DisableOnednnOptPostOps", "Disable onednn optimize post operators");
-    message_list.emplace_back("OV_GPU_DumpProfilingData", "Enables dump of extended profiling information to specified directory."
-                              " Please use OV_GPU_DumpProfilingDataPerIter=1 env variable to collect performance per iteration."
-                              " Note: Performance impact may be significant as this option enforces host side sync after each primitive");
-    message_list.emplace_back("OV_GPU_DumpProfilingDataIteration", "Enable collecting profiling data only at iterations with requested range. "
-                              "For example for dump profiling data only when iteration is from 10 to 20, you can use "
-                              "OV_GPU_DumpProfilingDataIteration='10..20'. Additionally, you can dump profiling data only "
-                              "from one specific iteration by giving the same values for the start and end, and the open "
-                              "ended range is also available by range from given start to the last iteration as -1. e.g. "
-                              "OV_GPU_DumpProfilingDataIteration='10..-1'");
-    message_list.emplace_back("OV_GPU_HostTimeProfiling", "Enable collecting of model enqueue time spent on the host");
-    message_list.emplace_back("OV_GPU_DumpGraphs", "1) dump ngraph before and after transformation. 2) dump graph in model compiling."
-                              "3) dump graph in execution.");
-    message_list.emplace_back("OV_GPU_DumpSources", "Dump opencl sources");
-    message_list.emplace_back("OV_GPU_DumpLayersPath", "Enable dumping intermediate buffers and set the dest path");
-    message_list.emplace_back("OV_GPU_DumpLayers", "Dump intermediate buffers of specified layers only, separated by space."
-                               " Support case-insensitive and regular expression. For example .*conv.*");
-    message_list.emplace_back("OV_GPU_DumpLayersResult", "Dump output buffers of result layers only");
-    message_list.emplace_back("OV_GPU_DumpLayersInput",  "Dump intermediate buffers of input layers only");
-    message_list.emplace_back("OV_GPU_DumpLayersDstOnly", "Dump only output of layers");
-    message_list.emplace_back("OV_GPU_DumpLayersLimitBatch", "Limit the size of batch to dump");
-    message_list.emplace_back("OV_GPU_DumpLayersRaw", "If true, dump data is stored in raw memory format.");
-    message_list.emplace_back("OV_GPU_DumpLayersRawBinary", "If true, dump data is stored in binary format.");
-    message_list.emplace_back("OV_GPU_DryRunPath", "Dry run and serialize execution graph into the specified path");
-    message_list.emplace_back("OV_GPU_BaseBatchForMemEstimation", "Base batch size to be used in memory estimation");
-    message_list.emplace_back("OV_GPU_AfterProc", "Run inference after the specified process PIDs are finished, separated by space."
-                              " Supported on only on linux.");
-    message_list.emplace_back("OV_GPU_SerialCompile", "Serialize creating primitives and compiling kernels");
-    message_list.emplace_back("OV_GPU_ForceImplTypes", "Force implementation type of a target primitive or layer. [primitive or layer_name]:[impl_type]"
-                              " For example fc:onednn gemm:onednn reduce:ocl do:cpu"
-                              " For primitives fc, gemm, do, reduce, concat are supported. Separated by space.");
-    message_list.emplace_back("OV_GPU_MaxKernelsPerBatch", "Maximum number of kernels in a batch during compiling kernels");
-    message_list.emplace_back("OV_GPU_ImplsCacheCapacity", "The maximum number of entries in the kernel impl cache");
-    message_list.emplace_back("OV_GPU_DisableAsyncCompilation", "Disable async compilation");
-    message_list.emplace_back("OV_GPU_DisableWinogradConv", "Disable Winograd convolution");
-    message_list.emplace_back("OV_GPU_DisableDynamicImpl", "Disable dynamic implementation");
-    message_list.emplace_back("OV_GPU_DisableRuntimeBufferFusing", "Disable runtime buffer fusing");
-    message_list.emplace_back("OV_GPU_DisableMemoryReuse", "Disable memory reuse");
-    message_list.emplace_back("OV_GPU_EnableSDPA", "This allows the enforcement of SDPA decomposition logic: 0 completely disables SDPA kernel usage, "
-                              "and 1 enables it for all the cases.");
-    message_list.emplace_back("OV_GPU_DumpMemoryPool", "Dump memory pool contents of each iteration");
-    message_list.emplace_back("OV_GPU_DumpMemoryPoolIters", "List of iterations to dump memory pool status, separated by space.");
-    message_list.emplace_back("OV_GPU_DumpMemoryPoolPath", "Enable dumping memory pool status to csv file and set the dest path");
-    message_list.emplace_back("OV_GPU_DisableBuildTimeWeightReorderForDynamicNodes", "Disable build time weight reorder for dynmaic nodes.");
-    message_list.emplace_back("OV_GPU_DisableRuntimeSkipReorder", "Disable runtime skip reorder.");
-    message_list.emplace_back("OV_GPU_DisablePrimitiveFusing", "Disable primitive fusing");
-    message_list.emplace_back("OV_GPU_DisableFakeAlignment", "Disable fake alignment");
-    message_list.emplace_back("OV_GPU_UseUsmHost", "Set explicit policy for usm host usage for network input/output. "
-                              "0: default, 1: use usm_host, 2: do not use usm_host");
-    message_list.emplace_back("OV_GPU_KVCacheCompression", "Enable/Disable KV-cache compression");
-    message_list.emplace_back("OV_GPU_DynamicQuantizeLayersWithoutOnednn", "Enable Dynamic quantization for specified Fully connected layers only, "
-                                "separated by space. Support case-insensitive and regular expression. For example .*fully_connected.*");
-    message_list.emplace_back("OV_GPU_DynamicQuantizeGroupSize", "Specify a group size of dynamic quantization to enable "
-                              "dynamic quantization for Fully-connected primitive.");
-    message_list.emplace_back("OV_GPU_DynamicQuantizeAsym", "Enable asymmetric dynamic quantization when set as 1.");
-    message_list.emplace_back("OV_GPU_DisableHorizontalFCFusion", "Disable horizontal fc fusion");
-    message_list.emplace_back("OV_GPU_DisableFCSwigluFusion", "Disable fc + swiglu fusion");
-    message_list.emplace_back("OV_GPU_DumpIteration", "Dump n-th execution of network, separated by space.");
-    message_list.emplace_back("OV_GPU_MemPreallocationOptions", "Controls buffer pre-allocation feature. Expects 4 values separated by space in "
-                              "the following order: number of iterations for pre-allocation(int), max size of single iteration in bytes(int), "
-                              "max per-dim allowed diff(int), unconditional buffers preallocation ratio(float). For example for disabling memory "
-                              "preallocation at all, you can use OV_GPU_MemPreallocationOptions='0 0 0 1.0'");
-    message_list.emplace_back("OV_GPU_LoadDumpRawBinary",
-                               "Specified layers which are loading dumped binary files generated by OV_GPU_DumpLayersRawBinary debug-config."
-                               " Currently, other layers except input-layer('parameter' type) are loading binaries for only input."
-                               " Different input or output tensors are seperated by ','. Different layers are separated by space. For example, "
-                               " \"[input_layer_name1]:[binary_dumped_file1],[binary_dump_file2] [input_layer_name2]:[binary_dump_1],[binary_dump_2]\"");
-
-    auto max_name_length_item = std::max_element(message_list.begin(), message_list.end(),
-        [](std::pair<std::string, std::string>& a, std::pair<std::string, std::string>& b){
-            return a.first.size() < b.first.size();
-    });
-    int name_width = static_cast<int>(max_name_length_item->first.size()) + 2;
-
-    GPU_DEBUG_COUT_ << "Supported environment variables for debugging" << std::endl;
-    for (auto& p : message_list) {
-        GPU_DEBUG_COUT_ << " - " << std::left << std::setw(name_width) << p.first + "  " << p.second << std::endl;
-    }
-}
-
-#endif
-
-debug_configuration::debug_configuration()
-        : help(0)
-        , verbose(0)
-        , verbose_color(0)
-        , verbose_file()
-        , list_layers(0)
-        , print_multi_kernel_perf(0)
-        , print_input_data_shapes(0)
-        , disable_usm(0)
-        , disable_onednn(0)
-        , disable_onednn_opt_post_ops(0)
-        , dump_profiling_data(std::string(""))
-        , dump_profiling_data_per_iter(0)
-        , host_time_profiling(0)
-        , dump_graphs(std::string())
-        , dump_sources(std::string())
-        , dump_layers_path(std::string())
-        , dry_run_path(std::string())
-        , dump_layers_dst_only(0)
-        , dump_layers_result(0)
-        , dump_layers_input(0)
-        , dump_layers_limit_batch(std::numeric_limits<int>::max())
-        , dump_layers_raw(0)
-        , dump_layers_binary(0)
-        , dump_memory_pool(0)
-        , dump_memory_pool_path(std::string())
-        , base_batch_for_memory_estimation(-1)
-        , serialize_compile(0)
-        , max_kernels_per_batch(0)
-        , impls_cache_capacity(-1)
-        , enable_sdpa(-1)
-        , disable_async_compilation(0)
-        , disable_winograd_conv(0)
-        , disable_dynamic_impl(0)
-        , disable_runtime_buffer_fusing(0)
-        , disable_memory_reuse(0)
-        , disable_build_time_weight_reorder_for_dynamic_nodes(0)
-        , disable_runtime_skip_reorder(0)
-        , disable_primitive_fusing(0)
-        , disable_fake_alignment(0)
-        , use_usm_host(0)
-        , use_kv_cache_compression(-1)
-        , dynamic_quantize_group_size(DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET)
-        , dynamic_quantize_asym(0)
-        , disable_horizontal_fc_fusion(0)
-        , disable_fc_swiglu_fusion(0) {
-#ifdef GPU_DEBUG_CONFIG
-    get_gpu_debug_env_var("Help", help);
-    get_common_debug_env_var("Verbose", verbose);
-    get_gpu_debug_env_var("VerboseColor", verbose_color);
-    get_gpu_debug_env_var("VerboseFile", verbose_file);
-    get_gpu_debug_env_var("ListLayers", list_layers);
-    get_gpu_debug_env_var("PrintMultiKernelPerf", print_multi_kernel_perf);
-    get_gpu_debug_env_var("PrintInputDataShapes", print_input_data_shapes);
-    get_gpu_debug_env_var("DisableUsm", disable_usm);
-    get_gpu_debug_env_var("DumpGraphs", dump_graphs);
-    get_gpu_debug_env_var("DumpSources", dump_sources);
-    get_gpu_debug_env_var("DumpLayersPath", dump_layers_path);
-    get_gpu_debug_env_var("DumpLayersLimitBatch", dump_layers_limit_batch);
-    get_gpu_debug_env_var("DumpLayersRaw", dump_layers_raw);
-    get_gpu_debug_env_var("DumpLayersRawBinary", dump_layers_binary);
-    get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
-    get_gpu_debug_env_var("DumpLayersResult", dump_layers_result);
-    get_gpu_debug_env_var("DumpLayersInput", dump_layers_input);
-    get_gpu_debug_env_var("DisableOnednn", disable_onednn);
-    get_gpu_debug_env_var("DisableOnednnOptPostOps", disable_onednn_opt_post_ops);
-    get_gpu_debug_env_var("DumpProfilingData", dump_profiling_data);
-    get_gpu_debug_env_var("DumpProfilingDataPerIter", dump_profiling_data_per_iter);
-    get_gpu_debug_env_var("HostTimeProfiling", host_time_profiling);
-    std::string dump_prof_data_iter_str;
-    get_gpu_debug_env_var("DumpProfilingDataIteration", dump_prof_data_iter_str);
-    get_gpu_debug_env_var("DryRunPath", dry_run_path);
-    get_gpu_debug_env_var("DumpMemoryPool", dump_memory_pool);
-    std::string dump_runtime_memory_pool_iters_str;
-    get_gpu_debug_env_var("DumpMemoryPoolIters", dump_runtime_memory_pool_iters_str);
-    get_gpu_debug_env_var("DumpMemoryPoolPath", dump_memory_pool_path);
-    get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);
-    std::string dump_layers_str;
-    get_gpu_debug_env_var("DumpLayers", dump_layers_str);
-    std::string after_proc_str;
-    get_gpu_debug_env_var("AfterProc", after_proc_str);
-    get_gpu_debug_env_var("SerialCompile", serialize_compile);
-    std::string forced_impl_types_str;
-    get_gpu_debug_env_var("ForceImplTypes", forced_impl_types_str);
-    get_gpu_debug_env_var("MaxKernelsPerBatch", max_kernels_per_batch);
-    get_gpu_debug_env_var("ImplsCacheCapacity", impls_cache_capacity);
-    get_gpu_debug_env_var("EnableSDPA", enable_sdpa);
-    get_gpu_debug_env_var("DisableAsyncCompilation", disable_async_compilation);
-    get_gpu_debug_env_var("DisableWinogradConv", disable_winograd_conv);
-    get_gpu_debug_env_var("DisableDynamicImpl", disable_dynamic_impl);
-    get_gpu_debug_env_var("DisableRuntimeBufferFusing", disable_runtime_buffer_fusing);
-    get_gpu_debug_env_var("DisableMemoryReuse", disable_memory_reuse);
-    get_gpu_debug_env_var("DisableBuildTimeWeightReorderForDynamicNodes", disable_build_time_weight_reorder_for_dynamic_nodes);
-    get_gpu_debug_env_var("DisableRuntimeSkipReorder", disable_runtime_skip_reorder);
-    get_gpu_debug_env_var("DisablePrimitiveFusing", disable_primitive_fusing);
-    get_gpu_debug_env_var("DisableFakeAlignment", disable_fake_alignment);
-    get_gpu_debug_env_var("UseUsmHost", use_usm_host);
-    get_gpu_debug_env_var("KVCacheCompression", use_kv_cache_compression);
-    get_gpu_debug_env_var("DynamicQuantizeGroupSize", dynamic_quantize_group_size);
-    get_gpu_debug_env_var("DynamicQuantizeAsym", dynamic_quantize_asym);
-    get_gpu_debug_env_var("DisableHorizontalFCFusion", disable_horizontal_fc_fusion);
-    get_gpu_debug_env_var("DisableFCSwigluFusion", disable_fc_swiglu_fusion);
-    std::string dump_iteration_str;
-    get_gpu_debug_env_var("DumpIteration", dump_iteration_str);
-    std::string mem_preallocation_params_str;
-    get_gpu_debug_env_var("MemPreallocationOptions", mem_preallocation_params_str);
-    std::string load_dump_raw_bin_str;
-    get_gpu_debug_env_var("LoadDumpRawBinary", load_dump_raw_bin_str);
-    std::string dynamic_quantize_layers_without_onednn_str;
-    get_gpu_debug_env_var("DynamicQuantizeLayersWithoutOnednn", dynamic_quantize_layers_without_onednn_str);
-
-    if (help > 0) {
-        print_help_messages();
-        exit(0);
-    }
-
-    if (verbose_file.length() > 0) {
+std::ostream& get_verbose_stream() {
+    if (ExecutionConfig::get_log_to_file().length() > 0) {
         static std::ofstream fout;
-        fout.open(verbose_file);
-        verbose_stream = &fout;
+        if (!fout.is_open())
+            fout.open(ExecutionConfig::get_log_to_file());
+        return fout;
     } else {
-        verbose_stream = &std::cout;
-    }
-
-    if (dump_prof_data_iter_str.length() > 0) {
-        dump_prof_data_iter_str = " " + dump_prof_data_iter_str + " ";
-        std::istringstream iss(dump_prof_data_iter_str);
-        char dot;
-        int64_t start, end;
-        bool is_valid_range = false;
-        if (iss >> start >> dot >> dot >> end) {
-            if (start <= end || end == -1) {
-                try {
-                    is_valid_range = true;
-                    dump_prof_data_iter_params.start = start;
-                    dump_prof_data_iter_params.end = end;
-                } catch(const std::exception &) {
-                    is_valid_range = false;
-                }
-            }
-        }
-        if (!is_valid_range)
-            std::cout << "OV_GPU_DumpProfilingDataIteration was ignored. It cannot be parsed to valid iteration range." << std::endl;
-        dump_prof_data_iter_params.is_enabled = is_valid_range;
-    }
-
-    if (dump_layers_str.length() > 0) {
-        // Insert delimiter for easier parsing when used
-        dump_layers_str = " " + dump_layers_str + " ";
-        std::stringstream ss(dump_layers_str);
-        std::string layer;
-        while (ss >> layer) {
-            dump_layers.push_back(layer);
-        }
-    }
-
-    if (dynamic_quantize_layers_without_onednn_str.length() > 0) {
-        // Insert delimiter for easier parsing when used
-        dynamic_quantize_layers_without_onednn_str = " " + dynamic_quantize_layers_without_onednn_str + " ";
-        std::stringstream ss(dynamic_quantize_layers_without_onednn_str);
-        std::string layer;
-        while (ss >> layer) {
-            dynamic_quantize_layers_without_onednn.push_back(layer);
-        }
+        return std::cout;
     }
-
-    if (forced_impl_types_str.length() > 0) {
-        forced_impl_types_str = " " + forced_impl_types_str + " ";
-        std::stringstream ss(forced_impl_types_str);
-        std::string type;
-        while (ss >> type) {
-            forced_impl_types.push_back(type);
-        }
-    }
-
-    // Parsing for loading binary files
-    if (load_dump_raw_bin_str.length() > 0) {
-        load_dump_raw_bin_str = " " + load_dump_raw_bin_str + " ";
-        std::stringstream ss(load_dump_raw_bin_str);
-        std::string type;
-        while (ss >> type) {
-            load_layers_raw_dump.push_back(type);
-        }
-    }
-
-    if (dump_iteration_str.size() > 0) {
-        dump_iteration = parse_int_set(dump_iteration_str);
-    }
-
-    if (dump_runtime_memory_pool_iters_str.size() > 0) {
-        dump_memory_pool_iters = parse_int_set(dump_runtime_memory_pool_iters_str);
-    }
-
-    if (mem_preallocation_params_str.size() > 0) {
-        mem_preallocation_params_str = " " + mem_preallocation_params_str + " ";
-        std::istringstream ss(mem_preallocation_params_str);
-        std::vector<std::string> params;
-        std::string param;
-        while (ss >> param)
-            params.push_back(param);
-
-        bool correct_params = params.size() == 4;
-        if (correct_params) {
-            try {
-                mem_preallocation_params.next_iters_preallocation_count = std::stol(params[0]);
-                mem_preallocation_params.max_per_iter_size = std::stol(params[1]);
-                mem_preallocation_params.max_per_dim_diff = std::stol(params[2]);
-                mem_preallocation_params.buffers_preallocation_ratio = std::stof(params[3]);
-            } catch(const std::exception &) {
-                correct_params = false;
-            }
-        }
-
-        if (!correct_params)
-            GPU_DEBUG_COUT_ << "OV_GPU_MemPreallocationOptions were ignored, because they cannot be parsed.\n";
-
-        mem_preallocation_params.is_initialized = correct_params;
-    }
-
-    if (after_proc_str.length() > 0) {
-#ifdef _WIN32
-        GPU_DEBUG_COUT_ << "Warning: OV_GPU_AfterProc is supported only on linux" << std::endl;
-#else
-        after_proc_str = " " + after_proc_str + " "; // Insert delimiter for easier parsing when used
-        std::stringstream ss(after_proc_str);
-        std::string pid;
-        while (ss >> pid) {
-            after_proc.push_back(pid);
-        }
-#endif
-    }
-#endif
-}
-
-const debug_configuration *debug_configuration::get_instance() {
-    static std::unique_ptr<debug_configuration> instance(nullptr);
-#ifdef GPU_DEBUG_CONFIG
-    static std::mutex _m;
-    std::lock_guard<std::mutex> lock(_m);
-    if (nullptr == instance)
-        instance.reset(new debug_configuration());
-    return instance.get();
-#else
-    return nullptr;
-#endif
-}
-
-bool debug_configuration::is_target_dump_prof_data_iteration(int64_t iteration) const {
-#ifdef GPU_DEBUG_CONFIG
-    if (iteration < 0)
-        return true;
-
-    if (dump_prof_data_iter_params.start > iteration)
-        return false;
-
-    if (dump_prof_data_iter_params.start <= dump_prof_data_iter_params.end &&
-        dump_prof_data_iter_params.end < iteration)
-        return false;
-
-    return true;
-#else
-    return false;
-#endif
 }
 
-std::vector<std::string> debug_configuration::get_filenames_for_matched_layer_loading_binaries(const std::string& id) const {
-    std::vector<std::string> file_names;
-#ifdef GPU_DEBUG_CONFIG
-    if (load_layers_raw_dump.empty())
-        return file_names;
-
-    for (const auto& load_layer : load_layers_raw_dump) {
-        size_t file = load_layer.rfind(":");
-        if (file != std::string::npos) {
-            if (id == load_layer.substr(0, file)) {
-                auto file_name_str = load_layer.substr(file + 1);
-                size_t head = 0;
-                size_t found = 0;
-                do {
-                    found = file_name_str.find(",", head);
-                    if (found != std::string::npos)
-                        file_names.push_back(file_name_str.substr(head, (found - head)));
-                    else
-                        file_names.push_back(file_name_str.substr(head));
-
-                    head = found+1;
-                    GPU_DEBUG_LOG << " Layer name loading raw dump : " << load_layer.substr(0, file) << " / the dump file : "
-                                << file_names.back() << std::endl;
-                } while (found != std::string::npos);
-
-                return file_names;
-            }
-        }
-    }
-#endif
-
-    return file_names;
-}
-
-std::string debug_configuration::get_matched_from_filelist(const std::vector<std::string>& file_names, std::string pattern) const {
-#ifdef GPU_DEBUG_CONFIG
-    for (const auto& file : file_names) {
-        auto found = file.find(pattern);
-        if (found != std::string::npos) {
-            return file;
-        }
-    }
-#endif
-    return std::string();
-}
-
-std::string debug_configuration::get_name_for_dump(const std::string& file_name) const {
-    std::string filename = file_name;
-#ifdef GPU_DEBUG_CONFIG
-    std::replace(filename.begin(), filename.end(), '\\', '_');
-    std::replace(filename.begin(), filename.end(), '/', '_');
-    std::replace(filename.begin(), filename.end(), ' ', '_');
-    std::replace(filename.begin(), filename.end(), ':', '_');
-#endif
-    return filename;
-}
-
-bool debug_configuration::is_layer_name_matched(const std::string& layer_name, const std::string& pattern) const {
-#ifdef GPU_DEBUG_CONFIG
-    auto upper_layer_name = std::string(layer_name.length(), '\0');
-    std::transform(layer_name.begin(), layer_name.end(), upper_layer_name.begin(), ::toupper);
-    auto upper_pattern = std::string(pattern.length(), '\0');
-    std::transform(pattern.begin(), pattern.end(), upper_pattern.begin(), ::toupper);
-
-    // Check pattern from exec_graph
-    size_t pos = upper_layer_name.find(':');
-    auto upper_exec_graph_name = upper_layer_name.substr(pos + 1, upper_layer_name.size());
-    if (upper_exec_graph_name.compare(upper_pattern) == 0) {
-        return true;
-    }
-
-    // Check pattern with regular expression
-    std::regex re(upper_pattern);
-    return std::regex_match(upper_layer_name, re);
-#else
-    return false;
-#endif
-}
-
-bool debug_configuration::is_layer_for_dumping(const std::string& layer_name, bool is_output, bool is_input) const {
-#ifdef GPU_DEBUG_CONFIG
-    // Dump result layer
-    if (is_output == true && dump_layers_result == 1 &&
-        (layer_name.find("constant:") == std::string::npos))
-        return true;
-    // Dump all layers
-    if (dump_layers.empty() && dump_layers_result == 0 && dump_layers_input == 0)
-        return true;
-
-    // Dump input layers
-    size_t pos = layer_name.find(':');
-    auto type = layer_name.substr(0, pos);
-    if (is_input == true && type == "parameter" && dump_layers_input == 1)
-        return true;
-
-    auto iter = std::find_if(dump_layers.begin(), dump_layers.end(), [&](const std::string& dl){
-        return is_layer_name_matched(layer_name, dl);
-    });
-    return (iter != dump_layers.end());
-#else
-    return false;
-#endif
-}
-
-bool debug_configuration::is_target_iteration(int64_t iteration) const {
-#ifdef GPU_DEBUG_CONFIG
-    if (iteration < 0)
-        return true;
-
-    if (dump_iteration.empty())
-        return true;
-
-    if (dump_iteration.find(iteration) == std::end(dump_iteration))
-        return false;
-
-    return true;
-#else
-    return false;
-#endif
-}
-} // namespace cldnn
+}  // namespace ov::intel_gpu
diff --git a/src/plugins/intel_gpu/src/runtime/device.cpp b/src/plugins/intel_gpu/src/runtime/device.cpp
index fa027ebe9e2e33..428d18f6c51775 100644
--- a/src/plugins/intel_gpu/src/runtime/device.cpp
+++ b/src/plugins/intel_gpu/src/runtime/device.cpp
@@ -65,8 +65,7 @@ float device::get_gops(cldnn::data_types dt) const {
 }
 
 bool device::use_unified_shared_memory() const {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->disable_usm) {
+    GPU_DEBUG_IF(ExecutionConfig::get_disable_usm()) {
         return false;
     }
     if (get_mem_caps().supports_usm()) {
diff --git a/src/plugins/intel_gpu/src/runtime/engine.cpp b/src/plugins/intel_gpu/src/runtime/engine.cpp
index b5ec7da3fab705..73bceb8bea8659 100644
--- a/src/plugins/intel_gpu/src/runtime/engine.cpp
+++ b/src/plugins/intel_gpu/src/runtime/engine.cpp
@@ -71,8 +71,7 @@ const device::ptr engine::get_device() const {
 }
 
 bool engine::use_unified_shared_memory() const {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->disable_usm) {
+    GPU_DEBUG_IF(ExecutionConfig::get_disable_usm()) {
         return false;
     }
     if (_device->get_mem_caps().supports_usm()) {
diff --git a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp
index 1417680c985632..5ddd70f8c6bd3b 100644
--- a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp
+++ b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp
@@ -52,7 +52,6 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive
     auto type = mem->get_allocation_type();
     const auto _layout_bytes_count = _layout.bytes_count();
 
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     {
         auto it = _non_padded_pool.lower_bound(_layout_bytes_count);
 
@@ -67,7 +66,7 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive
                 }
                 if (it->second._users.empty()) {
 #ifdef GPU_DEBUG_CONFIG
-                    GPU_DEBUG_IF(debug_config->dump_memory_pool) {
+                    GPU_DEBUG_IF(_config.get_dump_memory_pool()) {
                         auto released_mem_size = it->first;
                         total_mem_size_non_padded_pool -= released_mem_size;
                         if (type == allocation_type::usm_host)
@@ -104,7 +103,7 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive
                     }
                     if (list_itr->_users.empty()) {
 #ifdef GPU_DEBUG_CONFIG
-                        GPU_DEBUG_IF(debug_config->dump_memory_pool) {
+                        GPU_DEBUG_IF(_config.get_dump_memory_pool()) {
                             auto released_mem_size = mem->size();
                             total_mem_size_padded_pool -= released_mem_size;
                             if (type == allocation_type::usm_host)
@@ -128,14 +127,14 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive
         }
     }
 #ifdef GPU_DEBUG_CONFIG
-    GPU_DEBUG_IF(debug_config->dump_memory_pool) {
+    GPU_DEBUG_IF(_config.get_dump_memory_pool()) {
         auto iter = std::find_if(_no_reusable_mems.begin(), _no_reusable_mems.end(), [&](const cldnn::memory_record& r) {
             return (network_id == r._network_id
                 && type == r._type
                 && mem->get_internal_params().mem == r._memory->get_internal_params().mem);
         });
         if (iter != _no_reusable_mems.end()) {
-            GPU_DEBUG_IF(debug_config->dump_memory_pool) {
+            GPU_DEBUG_IF(_config.get_dump_memory_pool()) {
                 auto released_mem_size = iter->_users.begin()->_mem_size;
                 total_mem_size_no_reusable -= released_mem_size;
                 if (type == allocation_type::usm_host)
@@ -183,8 +182,7 @@ memory::ptr memory_pool::get_from_non_padded_pool(const layout& layout,
                                  memory_record({{MEM_USER(unique_id, network_id, prim_id, layout_bytes_count)}}, mem, network_id, type));
 #ifdef GPU_DEBUG_CONFIG
         {
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(debug_config->dump_memory_pool) {
+            GPU_DEBUG_IF(_config.get_dump_memory_pool()) {
                 total_mem_size_non_padded_pool += layout_bytes_count;
                 if (type == allocation_type::usm_host)
                     mem_size_non_padded_pool_host += layout_bytes_count;
@@ -225,8 +223,7 @@ memory::ptr memory_pool::get_from_padded_pool(const layout& layout,
             memory_record({{MEM_USER(unique_id, network_id, prim_id, mem->size())}}, mem, network_id, type));
 #ifdef GPU_DEBUG_CONFIG
         {
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(debug_config->dump_memory_pool) {
+            GPU_DEBUG_IF(_config.get_dump_memory_pool()) {
                 const auto allocated_mem_size = mem->size();
                 total_mem_size_padded_pool += allocated_mem_size;
                 if (type == allocation_type::usm_host)
@@ -242,8 +239,7 @@ memory::ptr memory_pool::get_from_padded_pool(const layout& layout,
     _padded_pool.emplace(layout, std::move(list));
 #ifdef GPU_DEBUG_CONFIG
     {
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->dump_memory_pool) {
+        GPU_DEBUG_IF(_config.get_dump_memory_pool()) {
             const auto allocated_mem_size = mem->size();
             total_mem_size_padded_pool += allocated_mem_size;
             if (type == allocation_type::usm_host)
@@ -300,8 +296,7 @@ memory::ptr memory_pool::get_memory(const layout& layout,
                                     bool reset,
                                     bool is_dynamic) {
     bool do_reuse = reusable_across_network;
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->disable_memory_reuse) {
+    GPU_DEBUG_IF(_config.get_disable_memory_reuse()) {
         do_reuse = false;
     }
     if (do_reuse) {
@@ -316,7 +311,7 @@ memory::ptr memory_pool::get_memory(const layout& layout,
             // images (reuse not yet implemented)
             auto mem = alloc_memory(layout, type, reset);
 #ifdef GPU_DEBUG_CONFIG
-            GPU_DEBUG_IF(debug_config->dump_memory_pool) {
+            GPU_DEBUG_IF(_config.get_dump_memory_pool()) {
                 auto allocated_mem_size = mem->size();
                 _no_reusable_mems.push_back(
                                         memory_record({{MEM_USER(unique_id, network_id, prim_id, allocated_mem_size)}}, mem, network_id, type));
@@ -330,7 +325,7 @@ memory::ptr memory_pool::get_memory(const layout& layout,
     } else {
         auto mem = alloc_memory(layout, type, reset);
 #ifdef GPU_DEBUG_CONFIG
-        GPU_DEBUG_IF(debug_config->dump_memory_pool) {
+        GPU_DEBUG_IF(_config.get_dump_memory_pool()) {
             auto allocated_mem_size = mem->size();
             _no_reusable_mems.push_back(
                                     memory_record({{MEM_USER(unique_id, network_id, prim_id, allocated_mem_size)}}, mem, network_id, type));
@@ -344,7 +339,6 @@ memory::ptr memory_pool::get_memory(const layout& layout,
 }
 
 void memory_pool::clear_pool_for_network(uint32_t network_id) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     // free up _non_padded_pool for this network
     {
         auto itr = _non_padded_pool.begin();
@@ -354,7 +348,7 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) {
 
             if (record._network_id == network_id) {
 #ifdef GPU_DEBUG_CONFIG
-                GPU_DEBUG_IF(debug_config->dump_memory_pool) {
+                GPU_DEBUG_IF(_config.get_dump_memory_pool()) {
                     auto released_mem_size = itr->first;
                     total_mem_size_non_padded_pool -= released_mem_size;
                     if (record._type == allocation_type::usm_host)
@@ -388,7 +382,7 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) {
 
             if (list.empty()) {
 #ifdef GPU_DEBUG_CONFIG
-                GPU_DEBUG_IF(debug_config->dump_memory_pool) {
+                GPU_DEBUG_IF(_config.get_dump_memory_pool()) {
                     auto released_mem_size = itr->first.bytes_count();
                     total_mem_size_padded_pool -= released_mem_size;
                     if (type == allocation_type::usm_host)
@@ -404,12 +398,12 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) {
 
 #ifdef GPU_DEBUG_CONFIG
     // free up _no_reusable_mems for this network
-    GPU_DEBUG_IF(debug_config->dump_memory_pool) {
+    GPU_DEBUG_IF(_config.get_dump_memory_pool()) {
         auto itr = _no_reusable_mems.begin();
         while (itr != _no_reusable_mems.end()) {
             auto& record = *itr;
             if (itr->_network_id == network_id) {
-                GPU_DEBUG_IF(debug_config->dump_memory_pool) {
+                GPU_DEBUG_IF(_config.get_dump_memory_pool()) {
                     auto released_mem_size = itr->_users.begin()->_mem_size;
                     total_mem_size_no_reusable -= released_mem_size;
                     if (record._type == allocation_type::usm_host)
@@ -439,7 +433,7 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) {
     }
 }
 
-memory_pool::memory_pool(engine& engine) : _engine(&engine) { }
+memory_pool::memory_pool(engine& engine, const ExecutionConfig& config) : _engine(&engine), _config(config) { }
 
 #ifdef GPU_DEBUG_CONFIG
 inline std::string get_mb_size(size_t size) {
diff --git a/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp b/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp
index 3b925f07361fff..3a028218a3f62f 100644
--- a/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp
+++ b/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp
@@ -67,7 +67,7 @@ std::pair<bool, ov::Shape> ShapePredictor::predict_preallocation_shape(const std
                                                                        int32_t custom_prealloc_dim) {
     size_t next_iters_prealloc_count = custom_next_iters_prealloc_count > 0
                                            ? static_cast<size_t>(custom_next_iters_prealloc_count)
-                                           : _next_iters_preallocation_count;
+                                           : _settings.next_iters_preallocation_count;
     const auto& current_shape = layout.get_shape();
     auto dt_bitwidth = ov::element::Type(layout.data_type).bitwidth();
 
@@ -122,7 +122,7 @@ std::pair<bool, ov::Shape> ShapePredictor::predict_preallocation_shape(const std
         // to avoid huge unexpected memory preallocations
         if (can_use_iterations_preallocation) {
             for (size_t i = 0; i < diffs[0].size(); ++i) {
-                if (diffs[0][i] > _max_per_dim_diff) {
+                if (diffs[0][i] > _settings.max_per_dim_diff) {
                     can_use_iterations_preallocation = false;
                     break;
                 }
@@ -132,7 +132,7 @@ std::pair<bool, ov::Shape> ShapePredictor::predict_preallocation_shape(const std
             for (size_t i = 0; i < current_shape.size(); ++i)
                 single_iter_shape.push_back(diffs[0][i] == 0 ? current_shape[i] : 1);
 
-            if (ceil_div(ov::shape_size(single_iter_shape) * dt_bitwidth, 8) > _max_per_iter_size)
+            if (ceil_div(ov::shape_size(single_iter_shape) * dt_bitwidth, 8) > _settings.max_per_iter_size)
                 can_use_iterations_preallocation = false;
         }
 
@@ -142,13 +142,13 @@ std::pair<bool, ov::Shape> ShapePredictor::predict_preallocation_shape(const std
             auto preallocation_shape = diffs[0] * mul_shape;
             auto new_shape = current_shape + preallocation_shape;
             return {true, new_shape};
-        } else if (_buffers_preallocation_ratio > 1.0f) {
+        } else if (_settings.buffers_preallocation_ratio > 1.0f) {
             if (format::is_blocked(layout.format))
                 return {false, {}};
             // Apply percentage buffer preallocation
             auto current_shape_size = ov::shape_size(current_shape);
             ov::Shape new_shape_size(current_shape.size(), 1);
-            new_shape_size[0] = static_cast<size_t>(current_shape_size * _buffers_preallocation_ratio);
+            new_shape_size[0] = static_cast<size_t>(current_shape_size * _settings.buffers_preallocation_ratio);
             return {true, new_shape_size};
         }
     }
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp
index 637add35fd4fd8..c89cd5d93b709f 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp
@@ -23,7 +23,7 @@ TEST_P(shape_predictor_tests, prediction) {
     auto& expected_predicted_shape = p.expected_predicted_shape;
     auto& engine = get_test_engine();
 
-    ShapePredictor sp(&engine, p.buffers_preallocation_ratio);
+    ShapePredictor sp(&engine, ShapePredictor::Settings{ .buffers_preallocation_ratio = p.buffers_preallocation_ratio });
     std::pair<bool, ov::Shape> result;
 
     for (auto& shape : in_shapes)
@@ -74,7 +74,7 @@ TEST_P(shape_predictor_tests_b_fs_yx_fsv16, prediction) {
     auto& expected_predicted_shape = p.expected_predicted_shape;
     auto& engine = get_test_engine();
 
-    ShapePredictor sp(&engine, p.buffers_preallocation_ratio);
+    ShapePredictor sp(&engine, ShapePredictor::Settings{ .buffers_preallocation_ratio = p.buffers_preallocation_ratio });
     std::pair<bool, ov::Shape> result;
 
     for (auto& shape : in_shapes)
@@ -121,8 +121,8 @@ INSTANTIATE_TEST_SUITE_P(smoke, shape_predictor_tests_b_fs_yx_fsv16,
 TEST(shape_predictor_tests, check_max_buffer_size) {
     auto& engine = get_test_engine();
 
-    const auto& buffers_preallocation_ratio = 1.1;
-    ShapePredictor sp(&engine, buffers_preallocation_ratio);
+    const auto& buffers_preallocation_ratio = 1.1f;
+    ShapePredictor sp(&engine, ShapePredictor::Settings{ .buffers_preallocation_ratio = buffers_preallocation_ratio });
 
     const auto max_alloc_mem_size = engine.get_device_info().max_alloc_mem_size;
     auto layout = cldnn::layout({static_cast<int64_t>(max_alloc_mem_size)}, ov::element::u8, format::bfyx);
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp
index 6a96b694eea1c5..7c08fe85c07eeb 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp
@@ -10,7 +10,7 @@ using namespace ::tests;
 
 TEST(debug_config_test, check_debug_config_off_on_release) {
 #ifdef NDEBUG
-    GPU_DEBUG_GET_INSTANCE(debug_config);
+    auto config = get_test_default_config(get_test_engine());
     GPU_DEBUG_IF(1) {
         GTEST_FAIL();   /* This should be disabled in case of release build */
     }

From db3e55ba7e68e2c32986d23f7e54ccbc6cafae50 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 21 Jan 2025 11:30:05 +0400
Subject: [PATCH 21/44] fix visitors

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        | 92 +++++++++++++++++++
 src/inference/src/dev/plugin_config.cpp       | 10 +-
 .../intel_gpu/plugin/program_builder.hpp      |  2 -
 src/plugins/intel_gpu/src/plugin/graph.cpp    | 35 +------
 4 files changed, 100 insertions(+), 39 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 8c9252eb78e48c..b7e759fb3cd6a6 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -289,4 +289,96 @@ class OPENVINO_RUNTIME_API PluginConfig {
     inline static const std::string m_allowed_env_prefix = "OV_";
 };
 
+template <>
+class OPENVINO_API AttributeAdapter<ConfigOptionBase*>
+    : public DirectValueAccessor<ConfigOptionBase*> {
+public:
+    AttributeAdapter(ConfigOptionBase*& value) : DirectValueAccessor<ConfigOptionBase*>(value) {}
+
+    OPENVINO_RTTI("AttributeAdapter<ConfigOptionBase*>");
+};
+
+template <>
+class OPENVINO_API AttributeAdapter<ov::AnyMap>
+    : public DirectValueAccessor<ov::AnyMap> {
+public:
+    AttributeAdapter(ov::AnyMap& value)  : DirectValueAccessor<ov::AnyMap>(value) {}
+
+    OPENVINO_RTTI("AttributeAdapter<ov::AnyMap>");
+};
+
+template<typename OStreamType>
+class OstreamAttributeVisitor : public ov::AttributeVisitor {
+    OStreamType& os;
+
+public:
+    OstreamAttributeVisitor(OStreamType& os) : os(os) {}
+
+    void on_adapter(const std::string& name, ov::ValueAccessor<bool>& adapter) override {
+        os << adapter.get();
+    }
+
+    void on_adapter(const std::string& name, ov::ValueAccessor<void>& adapter) override {
+        if (auto a = ov::as_type<ov::AttributeAdapter<ConfigOptionBase*>>(&adapter)) {
+            return handle_option(a->get());
+        } else if (auto a = ov::as_type<ov::AttributeAdapter<ov::AnyMap>>(&adapter)) {
+            const auto& props = a->get();
+            os << props.size();
+            for (auto& kv : props) {
+                os << kv.first << kv.second.as<std::string>();
+            }
+        } else {
+            OPENVINO_THROW("Attribute ", name, " can't be processed\n");
+        }
+    }
+
+    void handle_option(ConfigOptionBase* option) {
+        if (option->get_visibility() == OptionVisibility::RELEASE || option->get_visibility() == OptionVisibility::RELEASE_INTERNAL)
+            os << option->get_any().as<std::string>();
+    }
+};
+
+template<typename IStreamType>
+class IstreamAttributeVisitor : public ov::AttributeVisitor {
+    IStreamType& is;
+
+public:
+    IstreamAttributeVisitor(IStreamType& is) : is(is) {}
+
+    void on_adapter(const std::string& name, ov::ValueAccessor<void>& adapter) override {
+        if (auto a = ov::as_type<ov::AttributeAdapter<ConfigOptionBase*>>(&adapter)) {
+            return handle_option(a->get());
+        } else if (auto a = ov::as_type<ov::AttributeAdapter<ov::AnyMap>>(&adapter)) {
+            size_t size;
+            is >> size;
+            ov::AnyMap props;
+            for (size_t i = 0; i < size; i++) {
+                std::string name, val;
+                is >> name;
+                is >> val;
+                props[name] = val;
+
+            }
+            a->set(props);
+        } else {
+            OPENVINO_THROW("Attribute ", name, " can't be processed\n");
+        }
+    }
+
+    void on_adapter(const std::string& name, ov::ValueAccessor<bool>& adapter) override {
+        bool val;
+        is >> val;
+        adapter.set(val);
+    }
+
+    void handle_option(ConfigOptionBase* option) {
+        if (option->get_visibility() == OptionVisibility::RELEASE || option->get_visibility() == OptionVisibility::RELEASE_INTERNAL) {
+            std::string s;
+            is >> s;
+            if (option->is_valid_value(s))
+                option->set_any(s);
+        }
+    }
+};
+
 }  // namespace ov
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index cd4d13968e67fd..e950a164a41002 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -123,13 +123,9 @@ void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::R
 }
 
 bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) {
-    for (const auto& prop : m_user_properties) {
-        auto val = prop.second.as<std::string>();
-        visitor.on_attribute(prop.first + "__user", val);
-    }
-    for (const auto& prop : m_options_map) {
-        auto val = prop.second->get_any().as<std::string>();
-        visitor.on_attribute(prop.first + "__internal", val);
+    visitor.on_attribute("m_user_properties", m_user_properties);
+    for (auto& prop : m_options_map) {
+        visitor.on_attribute(prop.first + "__internal", prop.second);
     }
     visitor.on_attribute("is_finalized", m_is_finalized);
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
index 99220ec3087221..553f16152cb2bb 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
@@ -155,8 +155,6 @@ class ProgramBuilder final {
     std::shared_ptr<cldnn::topology> m_topology;
     CustomLayerMap m_custom_layers;
 
-    bool allow_new_shape_infer = false;
-
     bool queryMode;
 
     std::shared_ptr<ov::threading::IStreamsExecutor> m_task_executor;
diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp
index 0be6f52aa500f3..318e36cc5c8a09 100644
--- a/src/plugins/intel_gpu/src/plugin/graph.cpp
+++ b/src/plugins/intel_gpu/src/plugin/graph.cpp
@@ -4,6 +4,7 @@
 
 #include "intel_gpu/graph/serialization/helpers.hpp"
 #include "intel_gpu/runtime/layout.hpp"
+#include "openvino/core/any.hpp"
 #include "openvino/runtime/plugin_config.hpp"
 #include "openvino/runtime/threading/executor_manager.hpp"
 #include "openvino/runtime/exec_model_info.hpp"
@@ -35,34 +36,6 @@
 #include <sys/stat.h>
 
 namespace ov::intel_gpu {
-namespace {
-
-
-class OstreamAttributeVisitor : public ov::AttributeVisitor {
-    cldnn::BinaryOutputBuffer& os;
-
-    template<typename T>
-    void append_attribute(const std::string& name, const T& value) {
-        os << name;
-        os << value;
-    }
-public:
-    OstreamAttributeVisitor(cldnn::BinaryOutputBuffer& os) : os(os) {}
-
-    void on_adapter(const std::string& name, ov::ValueAccessor<void>& adapter) override {
-        OPENVINO_THROW("Attribute ", name, " can't be processed\n");
-    }
-
-    void on_adapter(const std::string& name, ov::ValueAccessor<bool>& adapter) override {
-        append_attribute(name, adapter.get());
-    }
-
-    void on_adapter(const std::string& name, ov::ValueAccessor<std::string>& adapter) override {
-        append_attribute(name, adapter.get());
-    }
-};
-
-}  // namespace
 
 Graph::Graph(std::shared_ptr<ov::Model> model, const RemoteContextImpl::Ptr& context, const ExecutionConfig& config, uint16_t stream_id)
     : m_context(context)
@@ -115,7 +88,9 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context
             ib >> perfEntry.parentPrimitive;
         }
     }
-    // ib >> m_config;
+
+    IstreamAttributeVisitor<cldnn::BinaryInputBuffer> visitor(ib);
+    m_config.visit_attributes(visitor);
 
     auto imported_prog = std::make_shared<cldnn::program>(get_engine(), m_config);
     imported_prog->load(ib);
@@ -541,7 +516,7 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) {
             ob << perf_item.second.second.parentPrimitive;
         }
     }
-    OstreamAttributeVisitor visitor(ob);
+    OstreamAttributeVisitor<cldnn::BinaryOutputBuffer> visitor(ob);
     m_config.visit_attributes(visitor);
 
     ob.set_stream(m_network->get_stream_ptr().get());

From abdec311eb4c986650953462c003d757bf4d4429 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 21 Jan 2025 13:00:56 +0400
Subject: [PATCH 22/44] build fixes

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl   | 1 +
 src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
index 3b77804979f6f8..7e7651ede111e7 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
@@ -15,6 +15,7 @@ OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::
 OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy")
 OV_CONFIG_RELEASE_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application")
 OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not")
+OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_reservation, false, "Cpu Reservation means reserve cpus which will not be used by other plugin or compiled model")
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks")
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level")
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property")
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index 72054ec3b1fd44..841551c73b671f 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -1070,7 +1070,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
                 }
 
                 // AZP does not support grouped size dyn-quan
-                GPU_DEBUG_IF(debug_config->dynamic_quantize_asym && (dynamic_quantization_group_size != UINT64_MAX)) {
+                GPU_DEBUG_IF(asymmetric_dyn_quant && (dynamic_quantization_group_size != UINT64_MAX)) {
                     GPU_DEBUG_TRACE << root->get_friendly_name() << "  dyn_quan is turned off: asym quantization does not support grouped quantization" <<
                                                                    " ('DynamicQuantizeAsym' is enabled with grouped size dyn-quan)" << std::endl;
                     return true;

From 81ca005744984d7ee352f07899b82926cbad9bad Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Thu, 23 Jan 2025 14:33:00 +0400
Subject: [PATCH 23/44] minor fixes

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/inference/CMakeLists.txt                  |   4 +
 .../openvino/runtime/internal_properties.hpp  |   6 +
 .../openvino/runtime/plugin_config.hpp        |  23 ++-
 src/inference/src/dev/plugin_config.cpp       |  21 ++-
 src/inference/tests/unit/config_test.cpp      |  38 +++-
 src/plugins/intel_cpu/src/config.cpp          |   2 +-
 .../intel_cpu/src/internal_properties.hpp     |   7 +-
 .../intel_gpu/plugin/program_builder.hpp      |   1 -
 .../intel_gpu/plugin/remote_context.hpp       |   1 +
 .../intel_gpu/runtime/debug_configuration.hpp |  57 +++---
 .../intel_gpu/runtime/execution_config.hpp    |   8 +-
 .../intel_gpu/runtime/internal_properties.hpp |   1 -
 .../include/intel_gpu/runtime/options.inl     |   4 +-
 .../graph_optimizer/prepare_buffer_fusing.cpp |   3 +-
 .../select_preferred_formats.cpp              |   2 +-
 src/plugins/intel_gpu/src/plugin/graph.cpp    |   2 +
 src/plugins/intel_gpu/src/plugin/plugin.cpp   | 174 +++---------------
 .../src/plugin/sync_infer_request.cpp         |   2 +-
 .../src/runtime/execution_config.cpp          | 156 +++++++++++++++-
 .../concurrency/gpu_reservation_test.cpp      |   6 +-
 20 files changed, 295 insertions(+), 223 deletions(-)

diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt
index 7e6cae62b85b67..9b2967e9f80fae 100644
--- a/src/inference/CMakeLists.txt
+++ b/src/inference/CMakeLists.txt
@@ -76,6 +76,10 @@ target_compile_definitions(${TARGET_NAME}_obj PRIVATE
     IMPLEMENT_OPENVINO_RUNTIME_API
     $<$<TARGET_EXISTS:openvino_proxy_plugin_obj>:PROXY_PLUGIN_ENABLED>)
 
+if(ENABLE_DEBUG_CAPS)
+    target_compile_definitions(${TARGET_NAME}_obj PUBLIC ENABLE_DEBUG_CAPS)
+endif()
+
 target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE
     $<TARGET_PROPERTY:openvino::pugixml,INTERFACE_INCLUDE_DIRECTORIES>
     $<$<TARGET_EXISTS:xbyak::xbyak>:$<TARGET_PROPERTY:xbyak::xbyak,INTERFACE_INCLUDE_DIRECTORIES>>)
diff --git a/src/inference/dev_api/openvino/runtime/internal_properties.hpp b/src/inference/dev_api/openvino/runtime/internal_properties.hpp
index 9b2f08c17a7fe0..e26024622580e7 100644
--- a/src/inference/dev_api/openvino/runtime/internal_properties.hpp
+++ b/src/inference/dev_api/openvino/runtime/internal_properties.hpp
@@ -90,5 +90,11 @@ static constexpr Property<bool, PropertyMutability::RO> compiled_model_runtime_p
  */
 static constexpr Property<float, PropertyMutability::RW> query_model_ratio{"QUERY_MODEL_RATIO"};
 
+/**
+ * @brief Allow execution of low precision transformations in plugin's pipelines
+ * @ingroup ov_dev_api_plugin_api
+ */
+static constexpr Property<bool, PropertyMutability::RW> enable_lp_transformations{"LP_TRANSFORMS_MODE"};
+
 }  // namespace internal
 }  // namespace ov
diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index b7e759fb3cd6a6..531bbda8d346f8 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -1,10 +1,9 @@
-// Copyright (C) 2024 Intel Corporation
+// Copyright (C) 2024-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <memory>
 #include <map>
 #include "openvino/core/attribute_visitor.hpp"
 #include "openvino/runtime/iremote_context.hpp"
@@ -64,6 +63,7 @@
         } \
     }
 
+#ifdef ENABLE_DEBUG_CAPS
 #define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \
     static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \
         auto v = read_env(PropertyNamespace::PropertyVar.name(), m_allowed_env_prefix, &m_ ## PropertyVar); \
@@ -71,6 +71,12 @@
             return m_ ## PropertyVar.value; \
         return v.as<decltype(PropertyNamespace::PropertyVar)::value_type>(); \
     }
+#else
+#define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \
+    static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \
+        return m_ ## PropertyVar.value; \
+    }
+#endif
 
 #define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \
         m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar;
@@ -91,16 +97,15 @@
     OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG_GLOBAL, __VA_ARGS__)
 
 namespace ov {
-#define ENABLE_DEBUG_CAPS
 enum class OptionVisibility : uint8_t {
     RELEASE = 1 << 0,            // Option can be set for any build type via public interface, environment and config file
     RELEASE_INTERNAL = 1 << 1,   // Option can be set for any build type via environment and config file only
     DEBUG = 1 << 2,              // Option can be set for debug builds only via environment and config file
     DEBUG_GLOBAL = 1 << 3,       // Global option can be set for debug builds only via environment and config file
 #ifdef ENABLE_DEBUG_CAPS
-    ANY = 0x0F,                  // Any visibility is valid including DEBUG
+    ANY = 0x0F,                  // Any visibility is valid including DEBUG & DEBUG_GLOBAL
 #else
-    ANY = 0x03,                  // Any visibility is valid excluding DEBUG
+    ANY = 0x03,                  // Any visibility is valid excluding DEBUG & DEBUG_GLOBAL
 #endif
 };
 
@@ -235,14 +240,14 @@ class OPENVINO_RUNTIME_API PluginConfig {
 
     std::string to_string() const;
 
-    void finalize(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info);
+    void finalize(const IRemoteContext* context, const ov::Model* model);
 
     bool visit_attributes(ov::AttributeVisitor& visitor);
 
 protected:
-    virtual void apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {}
-    virtual void apply_debug_options(std::shared_ptr<IRemoteContext> context);
-    virtual void finalize_impl(std::shared_ptr<IRemoteContext> context) {}
+    virtual void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) {}
+    virtual void apply_debug_options(const IRemoteContext* context);
+    virtual void finalize_impl(const IRemoteContext* context) {}
 
     template <typename T, PropertyMutability mutability>
     bool is_set_by_user(const ov::Property<T, mutability>& property) const {
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index e950a164a41002..4b0a3266b52f71 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2024 Intel Corporation
+// Copyright (C) 2024-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -35,29 +35,28 @@ size_t get_terminal_width() {
     } else {
         return default_width;
     }
-#else
+#elif __linux__
     struct winsize w;
     if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) {
         return w.ws_col;
     } else {
         return default_width;
     }
-#endif  // _WIN32
+#else
+    return default_width;
+#endif
 }
 }
 
 namespace ov {
 
-
 ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility allowed_visibility) const {
     if (m_user_properties.find(name) != m_user_properties.end()) {
         return m_user_properties.at(name);
     }
 
     auto option = get_option_ptr(name);
-     if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) {
-        OPENVINO_THROW("Couldn't get unknown property: ", name);
-    }
+    OPENVINO_ASSERT((allowed_visibility & option->get_visibility()) == option->get_visibility(), "Couldn't get unknown property: ", name);
 
     return option->get_any();
 }
@@ -98,11 +97,13 @@ void PluginConfig::set_user_property(const ov::AnyMap& config, OptionVisibility
     }
 }
 
-void PluginConfig::finalize(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {
+void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* model) {
     if (m_is_finalized)
         return;
 
-    apply_rt_info(context, rt_info);
+    if (model)
+        apply_model_specific_options(context, *model);
+
     apply_debug_options(context);
     // Copy internal properties before applying hints to ensure that
     // a property set by hint won't be overriden by a value in user config.
@@ -132,7 +133,7 @@ bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) {
     return true;
 }
 
-void PluginConfig::apply_debug_options(std::shared_ptr<IRemoteContext> context) {
+void PluginConfig::apply_debug_options(const IRemoteContext* context) {
     const bool throw_on_error = false;
 
     if (context) {
diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index 6f385473b1c627..68b8cb3b617fd3 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -1,8 +1,11 @@
-// Copyright (C) 2018-2024 Intel Corporation
+// Copyright (C) 2018-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "openvino/core/any.hpp"
+#include "openvino/core/model.hpp"
+#include "openvino/core/node_vector.hpp"
+#include "openvino/op/parameter.hpp"
 #include "openvino/runtime/plugin_config.hpp"
 
 #include <gtest/gtest.h>
@@ -18,7 +21,7 @@ static constexpr Property<std::string, PropertyMutability::RW> high_level_proper
 static constexpr Property<std::string, PropertyMutability::RW> low_level_property{"LOW_LEVEL_PROPERTY"};
 static constexpr Property<uint8_t, PropertyMutability::RW> release_internal_property{"RELEASE_INTERNAL_PROPERTY"};
 static constexpr Property<uint8_t, PropertyMutability::RW> debug_property{"DEBUG_PROPERTY"};
-static constexpr Property<uint8_t, PropertyMutability::RW> debug_global_property{"DEBUG_GLOBAL_PROPERTY"};
+static constexpr Property<int32_t, PropertyMutability::RW> debug_global_property{"DEBUG_GLOBAL_PROPERTY"};
 
 
 struct EmptyTestConfig : public ov::PluginConfig {
@@ -73,14 +76,14 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
         return supported_properties;
     }
 
-    void finalize_impl(std::shared_ptr<IRemoteContext> context) override {
+    void finalize_impl(const IRemoteContext* context) override {
         if (!is_set_by_user(low_level_property)) {
             m_low_level_property.value = m_high_level_property.value;
         }
     }
 
-    void apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) override {
-        apply_rt_info_property(high_level_property, rt_info);
+    void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) override {
+        apply_rt_info_property(high_level_property, model.get_rt_info<ov::AnyMap>("runtime_options"));
     }
 
     using ov::PluginConfig::get_option_ptr;
@@ -185,12 +188,17 @@ TEST(plugin_config, can_set_property_from_rt_info) {
         {int_property.name(), 10} // int_property is not applied from rt info
     };
 
+    auto p1 = std::make_shared<ov::op::v0::Parameter>();
+    auto r1 = std::make_shared<ov::op::v0::Result>(p1);
+    ov::Model m(ov::OutputVector{r1}, ov::ParameterVector{p1});
+    m.set_rt_info(rt_info, {"runtime_options"});
+
     // default values
     ASSERT_EQ(cfg.m_high_level_property.value, "");
     ASSERT_EQ(cfg.m_low_level_property.value, "");
     ASSERT_EQ(cfg.m_int_property.value, -1);
 
-    cfg.finalize(nullptr, rt_info);
+    cfg.finalize(nullptr, &m);
 
     ASSERT_EQ(cfg.m_high_level_property.value, "value1");
     ASSERT_EQ(cfg.m_low_level_property.value, "value1"); // dependant is updated too
@@ -235,3 +243,21 @@ TEST(plugin_config, can_get_global_property) {
     NotEmptyTestConfig cfg;
     ASSERT_EQ(cfg.get_debug_global_property(), 4);
 }
+
+TEST(plugin_config, global_property_read_env_on_each_call) {
+    NotEmptyTestConfig cfg;
+    ASSERT_EQ(cfg.get_debug_global_property(), 4);
+#ifdef ENABLE_DEBUG_CAPS
+    std::string env_var1 = "OV_DEBUG_GLOBAL_PROPERTY=10";
+    ::putenv(env_var1.data());
+    ASSERT_EQ(cfg.get_debug_global_property(), 10);
+
+    std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20";
+    ::putenv(env_var2.data());
+    ASSERT_EQ(cfg.get_debug_global_property(), 20);
+#else
+    std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20";
+    ::putenv(env_var2.data());
+    ASSERT_EQ(cfg.get_debug_global_property(), 4); // no effect for build w/o debug caps
+#endif
+}
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index db53bb0c531b1a..8627f82b28574d 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -210,7 +210,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                ov::internal::exclusive_async_requests.name(),
                                ". Expected only true/false");
             }
-        } else if (key == ov::intel_cpu::lp_transforms_mode.name()) {
+        } else if (key == ov::internal::enable_lp_transformations.name()) {
             try {
                 lpTransformsMode = val.as<bool>() ? LPTransformsMode::On : LPTransformsMode::Off;
             } catch (ov::Exception&) {
diff --git a/src/plugins/intel_cpu/src/internal_properties.hpp b/src/plugins/intel_cpu/src/internal_properties.hpp
index 320539721ca09a..3a806e13a30ea1 100644
--- a/src/plugins/intel_cpu/src/internal_properties.hpp
+++ b/src/plugins/intel_cpu/src/internal_properties.hpp
@@ -16,11 +16,6 @@ namespace intel_cpu {
  */
 static constexpr Property<int32_t, PropertyMutability::RW> cpu_runtime_cache_capacity{"CPU_RUNTIME_CACHE_CAPACITY"};
 
-/**
- * @brief Allow low precision transform.
- */
-static constexpr Property<bool, PropertyMutability::RW> lp_transforms_mode{"LP_TRANSFORMS_MODE"};
-
 /**
  * @brief Enum to define possible snippets mode hints.
  */
@@ -69,4 +64,4 @@ inline std::istream& operator>>(std::istream& is, SnippetsMode& mode) {
 static constexpr Property<SnippetsMode, PropertyMutability::RW> snippets_mode{"SNIPPETS_MODE"};
 
 }  // namespace intel_cpu
-}  // namespace ov
\ No newline at end of file
+}  // namespace ov
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
index 553f16152cb2bb..389c7f7e2b54a1 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
@@ -168,7 +168,6 @@ class ProgramBuilder final {
     void prepare_build();
     void cleanup_build();
 
-    // TODO(eunsoo): remove createTopolpgyOnly argument and add another method to create topology from ngraph function
     std::shared_ptr<cldnn::program> build(const std::vector<std::shared_ptr<ov::Node>>& ops, bool innerProgram = false);
 
     void CreateSingleLayerPrimitive(const std::shared_ptr<ov::Node>& op);
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp
index 8440bd6824ef9e..98e74b76a8b502 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp
@@ -37,6 +37,7 @@ class RemoteContextImpl : public ov::IRemoteContext {
     ov::SoPtr<ov::IRemoteTensor> create_tensor(const ov::element::Type& type, const ov::Shape& shape, const ov::AnyMap& params) override;
 
     cldnn::engine& get_engine() { return *m_engine; }
+    const cldnn::engine& get_engine() const { return *m_engine; }
     ov::intel_gpu::gpu_handle_param get_external_queue() const { return m_external_queue; }
 
     cldnn::memory::ptr try_get_cached_memory(size_t hash);
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
index 489399c1255ce0..08134ce420a4f6 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
@@ -4,11 +4,8 @@
 
 #pragma once
 #include <cstring>
-#include <mutex>
-#include <vector>
-#include <set>
-#include <string>
 #include <iostream>
+#include <filesystem>
 
 #include "intel_gpu/runtime/execution_config.hpp"
 namespace ov::intel_gpu {
@@ -44,37 +41,47 @@ std::ostream& get_verbose_stream();
 }  // namespace ov::intel_gpu
 
 #ifdef GPU_DEBUG_CONFIG
-#if defined(_WIN32)
-#define SEPARATE '\\'
-#else
-#define SEPARATE '/'
-#endif
 
-#define GPU_PREFIX "GPU_Debug: "
+namespace color {
+static constexpr const char dark_gray[] = "\033[1;30m";
+static constexpr const char blue[]      = "\033[1;34m";
+static constexpr const char purple[]    = "\033[1;35m";
+static constexpr const char cyan[]      = "\033[1;36m";
+static constexpr const char reset[]     = "\033[0m";
+}  // namespace color
+
+static constexpr const char prefix[] = "GPU_Debug: ";
 
-#define GPU_FILENAME (strrchr(__FILE__, SEPARATE) ? strrchr(__FILE__, SEPARATE) + 1 : __FILE__)
 #define GPU_DEBUG_IF(cond) if (cond)
 #define GPU_DEBUG_CODE(...) __VA_ARGS__
+
 #define GPU_DEBUG_DEFINE_MEM_LOGGER(stage) \
     cldnn::instrumentation::mem_usage_logger mem_logger{stage, ov::intel_gpu::ExecutionConfig::get_verbose() >= 2};
-#define GPU_DEBUG_PROFILED_STAGE(stage) \
-    auto stage_prof = cldnn::instrumentation::profiled_stage<primitive_inst>(\
+
+#define GPU_DEBUG_PROFILED_STAGE(stage)                                       \
+    auto stage_prof = cldnn::instrumentation::profiled_stage<primitive_inst>( \
         !get_config().get_dump_profiling_data_path().empty(), *this, stage)
+
 #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val) stage_prof.set_cache_hit(val)
 #define GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO(info) stage_prof.add_memalloc_info(info)
 
+#define GPU_DEBUG_LOG_PREFIX ov::intel_gpu::get_verbose_stream() \
+                             << prefix \
+                             << std::filesystem::path(__FILE__).filename().generic_string() << ":" \
+                             << std::to_string(__LINE__) << ":" \
+                             << __func__ << ": "
+
+#define GPU_DEBUG_LOG_COLOR_PREFIX ov::intel_gpu::get_verbose_stream() \
+                                   << color::dark_gray << std::string(prefix) \
+                                   << color::blue << std::filesystem::path(__FILE__).filename().generic_string() << ":" \
+                                   << color::purple << std::to_string(__LINE__) << ":" \
+                                   << color::cyan << __func__ << ": " << color::reset
+
 #define GPU_DEBUG_LOG_RAW_INT(min_verbose_level) if (ov::intel_gpu::ExecutionConfig::get_verbose() >= min_verbose_level) \
-    (ov::intel_gpu::ExecutionConfig::get_verbose_color() ? GPU_DEBUG_LOG_PREFIX : GPU_DEBUG_LOG_COLOR_PREFIX)
-#define GPU_DEBUG_LOG_RAW(min_verbose_level) GPU_DEBUG_LOG_RAW_INT(static_cast<std::underlying_type<ov::intel_gpu::LogLevel>::type>(min_verbose_level))
-#define GPU_DEBUG_LOG_PREFIX  \
-    ov::intel_gpu::get_verbose_stream() << GPU_PREFIX << GPU_FILENAME << ":" << __LINE__ << ":" << __func__ << ": "
-#define GPU_DEBUG_LOG_COLOR_PREFIX  ov::intel_gpu::get_verbose_stream() << DARK_GRAY << GPU_PREFIX << \
-    BLUE << GPU_FILENAME << ":" << PURPLE <<  __LINE__ << ":" << CYAN << __func__ << ": " << RESET
-#define DARK_GRAY   "\033[1;30m"
-#define BLUE        "\033[1;34m"
-#define PURPLE      "\033[1;35m"
-#define CYAN        "\033[1;36m"
-#define RESET       "\033[0m"
+    (ov::intel_gpu::ExecutionConfig::get_verbose_color() ? GPU_DEBUG_LOG_COLOR_PREFIX : GPU_DEBUG_LOG_PREFIX)
+
+#define GPU_DEBUG_LOG_RAW(min_verbose_level) \
+    GPU_DEBUG_LOG_RAW_INT(static_cast<std::underlying_type<ov::intel_gpu::LogLevel>::type>(min_verbose_level))
 #else
 #define GPU_DEBUG_IF(cond) if (0)
 #define GPU_DEBUG_CODE(...)
@@ -82,7 +89,7 @@ std::ostream& get_verbose_stream();
 #define GPU_DEBUG_PROFILED_STAGE(stage)
 #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val)
 #define GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO(info)
-#define GPU_DEBUG_LOG_RAW(min_verbose_level) if (0) ov::intel_gpu::get_verbose_stream() << GPU_PREFIX
+#define GPU_DEBUG_LOG_RAW(min_verbose_level) if (0) ov::intel_gpu::get_verbose_stream()
 #endif
 
 #define GPU_DEBUG_COUT              GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::DISABLED)
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
index be8a60b65d023f..bce3e3101e2160 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include "openvino/core/any.hpp"
 #include "openvino/runtime/plugin_config.hpp"
 #include "intel_gpu/runtime/device_info.hpp"
 #include "intel_gpu/runtime/internal_properties.hpp"
@@ -31,9 +32,12 @@ struct ExecutionConfig : public ov::PluginConfig {
     #undef OV_CONFIG_LOCAL_OPTION
     #undef OV_CONFIG_GLOBAL_OPTION
 
+    const ov::AnyMap& get_user_properties() const { return m_user_properties; }
+
 protected:
-    void finalize_impl(std::shared_ptr<IRemoteContext> context) override;
-    void apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) override;
+    void finalize_impl(const IRemoteContext* context) override;
+    void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) override;
+    void apply_rt_info(const IRemoteContext* context, const ov::RTMap& rt_info, bool is_llm);
     const ov::PluginConfig::OptionsDesc& get_options_desc() const override;
 
     void apply_user_properties(const cldnn::device_info& info);
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
index f00cf86da5e50b..afb32d3393d6b4 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
@@ -83,7 +83,6 @@ static constexpr Property<bool, PropertyMutability::RW> allow_new_shape_infer{"G
 static constexpr Property<std::vector<std::string>, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"};
 static constexpr Property<ImplForcingMap, PropertyMutability::RW> force_implementations{"GPU_FORCE_IMPLEMENTATIONS"};
 static constexpr Property<std::string, PropertyMutability::RW> config_file{"CONFIG_FILE"};
-static constexpr Property<bool, PropertyMutability::RW> enable_lp_transformations{"LP_TRANSFORMS_MODE"};
 static constexpr Property<float, PropertyMutability::RW> buffers_preallocation_ratio{"GPU_BUFFERS_PREALLOCATION_RATIO"};
 static constexpr Property<size_t, PropertyMutability::RW> max_kernels_per_batch{"GPU_MAX_KERNELS_PER_BATCH"};
 static constexpr Property<bool, PropertyMutability::RW> use_onednn{"USE_ONEDNN"};
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
index 7e7651ede111e7..61e4315807449c 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
@@ -31,9 +31,9 @@ OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "")
 OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching")
 OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision")
+OV_CONFIG_RELEASE_OPTION(ov::internal, enable_lp_transformations, false, "Enable/Disable Low precision transformations set")
 
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file")
-OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_lp_transformations, false, "Enable/Disable Low precision transformations set")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, shape_predictor_settings, {10, 16 * 1024, 2, 1.1f}, "Preallocation settings")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "Type of the queue that must be used for model execution. May be in-order or out-of-order")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, optimize_data, false, "Enable/Disable data flow optimizations for cldnn::program")
@@ -49,7 +49,7 @@ OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Con
 
 OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, help, false, "Print help message for all config options")
 OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity")
-OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose_color, false, "Enable coloring for verbose logs")
+OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose_color, true, "Enable coloring for verbose logs")
 OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem")
 OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, usm_policy, 0, "0: default, 1: use usm_host, 2: do not use usm_host")
 OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits<int32_t>::max(), "Max number of batch elements to dump")
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
index 8e8cac35fa1fb0..1c79ab27101808 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
@@ -80,8 +80,7 @@ bool concat_in_place_optimization::match(const program_node& concat_node,
     if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph())
         return false;
     bool do_runtime_buffer_fusing = true;
-    const auto& config = concat_node.get_config();
-    GPU_DEBUG_IF(config.get_disable_runtime_buffer_fusing()) {
+    GPU_DEBUG_IF(concat_node.get_config().get_disable_runtime_buffer_fusing()) {
         do_runtime_buffer_fusing = false;
     }
 
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
index a5d98be69771a9..8b3a73c74f3aa6 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
@@ -121,7 +121,7 @@ void select_preferred_formats::run(program& p) {
                     optimize_conv_permute(*n);
                 }
             } catch (std::exception& exception) {
-                GPU_DEBUG_INFO << "WARNING(select_preferred_formats): " << exception.what() << std::endl;
+                GPU_DEBUG_LOG << "WARNING(select_preferred_formats): " << exception.what() << std::endl;
             }
             print_selected_formats(*n);
         }
diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp
index 318e36cc5c8a09..bcf6a26420dc57 100644
--- a/src/plugins/intel_gpu/src/plugin/graph.cpp
+++ b/src/plugins/intel_gpu/src/plugin/graph.cpp
@@ -91,6 +91,8 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context
 
     IstreamAttributeVisitor<cldnn::BinaryInputBuffer> visitor(ib);
     m_config.visit_attributes(visitor);
+    m_config.set_user_property(config.get_user_properties()); // Copy user properties if those were modified on import call
+    m_config.finalize(context.get(), nullptr);
 
     auto imported_prog = std::make_shared<cldnn::program>(get_engine(), m_config);
     imported_prog->load(ib);
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index 52b47ad52be0ae..3f6b88ff238030 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -7,7 +7,6 @@
 #include <algorithm>
 #include <cctype>
 #include <cmath>
-#include <limits>
 #include <map>
 #include <memory>
 #include <mutex>
@@ -28,12 +27,7 @@
 #include "intel_gpu/runtime/itt.hpp"
 #include "openvino/core/any.hpp"
 #include "openvino/core/deprecated.hpp"
-#include "openvino/op/gather.hpp"
-#include "openvino/op/concat.hpp"
-#include "openvino/op/paged_attention.hpp"
 #include "openvino/pass/manager.hpp"
-#include "openvino/pass/pattern/op/wrap_type.hpp"
-#include "openvino/pass/pattern/op/or.hpp"
 #include "openvino/pass/visualize_tree.hpp"
 #include "openvino/runtime/device_id_parser.hpp"
 #include "openvino/runtime/intel_gpu/properties.hpp"
@@ -42,8 +36,6 @@
 #include "openvino/runtime/performance_heuristics.hpp"
 #include "openvino/runtime/plugin_config.hpp"
 #include "openvino/runtime/properties.hpp"
-#include "openvino/util/common_util.hpp"
-#include "ov_ops/dynamic_quantize.hpp"
 #include "openvino/util/weights_path.hpp"
 #include "transformations/common_optimizations/dimension_tracking.hpp"
 #include "transformations/init_node_info.hpp"
@@ -60,106 +52,6 @@ using Time = std::chrono::high_resolution_clock;
 
 namespace ov::intel_gpu {
 
-namespace {
-
-ov::RTMap get_rt_info(const ov::Model& model) {
-    ov::RTMap rt_info;
-    if (model.has_rt_info("runtime_options"))
-        rt_info = model.get_rt_info<ov::AnyMap>("runtime_options");
-
-    if (model.has_rt_info("__weights_path")) {
-        rt_info[ov::weights_path.name()] = model.get_rt_info<ov::Any>("__weights_path");
-    }
-    return rt_info;
-}
-
-bool requires_new_shape_infer(const std::shared_ptr<ov::Node>& op) {
-    if (op->is_dynamic()) {
-        return true;
-    }
-
-    // HACK: SearchSorted has specific shape requirements.
-    // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine,
-    // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid.
-    // Similar case for STFT.
-    if (ov::is_type<ov::op::v15::SearchSorted>(op) || ov::is_type<ov::op::v15::STFT>(op))
-        return true;
-
-    if (ov::is_type<ov::op::internal::DynamicQuantize>(op))
-        return true;
-
-    if (ov::is_type<ov::op::v5::Loop>(op)) {
-        const auto body_function = std::static_pointer_cast<ov::op::v5::Loop>(op)->get_function();
-        if (body_function->is_dynamic())
-            return true;
-    }
-
-    if (ov::is_type<ov::op::v5::LSTMSequence>(op) || ov::is_type<ov::op::v4::LSTMCell>(op)) {
-        return true;
-    }
-    // When input node has dynamic shape with 4 dimension, this function return false
-    // because op.is_dynamic() which only checks input shapes return false.
-    // So, in the case of input data, we need to check output shape.
-    for (size_t i = 0; i < op->get_output_size(); i++) {
-        if (op->get_output_partial_shape(i).is_dynamic())
-            return true;
-    }
-
-    for (size_t i = 0; i < op->get_output_size(); i++) {
-        if (op->get_output_partial_shape(i).size() > 6)
-            return true;
-    }
-
-    for (size_t i = 0; i < op->get_input_size(); i++) {
-        if (op->get_input_partial_shape(i).size() > 6)
-            return true;
-    }
-
-    return false;
-}
-
-void set_model_properties(const ov::Model& model, ExecutionConfig& config) {
-    const auto& ops = model.get_ordered_ops();
-    // In the case of inner program, allow_new_shape_infer flag is setted by outside of program.
-    // So, do not check allow_new_shape_infer for inner program build
-    for (const auto& op : ops) {
-        if (requires_new_shape_infer(op)) {
-            config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
-            break;
-        }
-    }
-    bool is_dynamic = false;
-    for (const auto& op : ops) {
-        if (op->is_dynamic()) {
-            is_dynamic = true;
-            break;
-        }
-    }
-    bool has_lstm = false;
-    for (const auto& op : ops) {
-        if (ov::is_type<ov::op::v5::LSTMSequence>(op)) {
-            has_lstm = true;
-            break;
-        }
-    }
-
-    // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels,
-    // smaller # of kernels are built compared to static models.
-    // So having smaller batch size is even better for dynamic model as we can do more parallel build.
-    if (is_dynamic) {
-        config.set_property(ov::intel_gpu::max_kernels_per_batch(4));
-    } else {
-        config.set_property(ov::intel_gpu::max_kernels_per_batch(8));
-    }
-
-    config.set_property(ov::intel_gpu::optimize_data(true));
-
-    if (has_lstm)
-        config.set_property(ov::intel_gpu::use_onednn(true));
-}
-
-}  // namespace
-
 #define FACTORY_DECLARATION(op_version, op_name) \
     void __register ## _ ## op_name ## _ ## op_version();
 
@@ -170,33 +62,6 @@ void set_model_properties(const ov::Model& model, ExecutionConfig& config) {
 #include "intel_gpu/plugin/primitives_list.hpp"
 #undef REGISTER_FACTORY
 
-const auto is_llm = [](const std::shared_ptr<const ov::Model>& model) -> bool {
-    using namespace ov::pass::pattern;
-
-    auto past = wrap_type<ov::op::v6::ReadValue>();
-    auto convert_past = wrap_type<ov::op::v0::Convert>({past});
-    auto gather_input = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{past, convert_past});
-    auto beam_idx = wrap_type<ov::op::v0::Parameter>();
-    auto gather_past = wrap_type<ov::op::v8::Gather>({gather_input, beam_idx, wrap_type<ov::op::v0::Constant>()});
-    auto gather_convert = wrap_type<ov::op::v0::Convert>({gather_past});
-    auto concat_past_input = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{past, convert_past, gather_past, gather_convert});
-    auto concat = wrap_type<ov::op::v0::Concat>({concat_past_input, any_input()});
-    auto convert_present = wrap_type<ov::op::v0::Convert>({concat});
-    auto present_input = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{concat, convert_present});
-    auto present = wrap_type<ov::op::v6::Assign>({present_input});
-
-    auto kvcache_matcher = std::make_shared<ov::pass::pattern::Matcher>(present, "KVCacheMatcher");
-
-    for (auto& op : model->get_ordered_ops()) {
-        if (kvcache_matcher->match(op) ||
-            ov::is_type<ov::op::PagedAttentionExtension>(op)) {
-            return true;
-        }
-    }
-
-    return false;
-};
-
 void Plugin::register_primitives() const {
     #define REGISTER_FACTORY(op_version, op_name) FACTORY_CALL(op_version, op_name)
     #include "intel_gpu/plugin/primitives_list.hpp"
@@ -237,12 +102,24 @@ std::shared_ptr<ov::Model> Plugin::clone_and_transform_model(const std::shared_p
     auto cloned_model = model->clone();
     OPENVINO_ASSERT(cloned_model != nullptr, "[GPU] Failed to clone model!");
 
-    GPU_DEBUG_IF(!config.get_dump_graphs_path().empty()) {
-        auto path_base = config.get_dump_graphs_path() + "/" + cloned_model->get_name();
+    // Here we create a copy of the config to finalize it and ensure that transformation pipe can use correct options values
+    // This is manily needed to correctly update lower level properties when higher level option is set by user
+    // For example, transformation use inference_precision hint which may be updated by execution_mode property.
+    // Update itself will happen on finalization stage, so we must call it to have correct passes flow.
+    // The reason why we can't do finalization once and then just run all graph transformations is that
+    // part of the tranformations may actually impact some properties. For example, LSTMSequence op presense
+    // impacts value of use_onednn property. But in order to understand if there's an op of this type we have to run
+    // common optimizations which may do subgraph fusion to LSTMSequence op. So basically, final value of use_onednn
+    // property can be computed for transformed model only.
+    auto config_copy = config;
+    config_copy.finalize(context.get(), model.get());
+
+    GPU_DEBUG_IF(!config_copy.get_dump_graphs_path().empty()) {
+        auto path_base = config_copy.get_dump_graphs_path() + "/" + cloned_model->get_name();
         ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model);
     }
 
-    transform_model(cloned_model, config, context);
+    transform_model(cloned_model, config_copy, context);
 
     // Transformations for some reason may drop output tensor names, so here we copy those from the original model
     auto new_results = cloned_model->get_results();
@@ -257,8 +134,8 @@ std::shared_ptr<ov::Model> Plugin::clone_and_transform_model(const std::shared_p
         new_res->set_friendly_name(old_res->get_friendly_name());
     }
 
-    GPU_DEBUG_IF(!config.get_dump_graphs_path().empty()) {
-        auto path_base = config.get_dump_graphs_path() + "/" + cloned_model->get_name() + "_" +  "transformed_func";
+    GPU_DEBUG_IF(!config_copy.get_dump_graphs_path().empty()) {
+        auto path_base = config_copy.get_dump_graphs_path() + "/" + cloned_model->get_name() + "_" +  "transformed_func";
         ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model);
     }
     return cloned_model;
@@ -307,10 +184,10 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
 
     ExecutionConfig config = m_configs_map.at(device_id);
     config.set_user_property(orig_config, OptionVisibility::RELEASE);
-    set_model_properties(*model, config);
-    config.finalize(context, get_rt_info(*model));
 
     auto transformed_model = clone_and_transform_model(model, config, context);
+
+    config.finalize(context.get(), transformed_model.get());
     {
         OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model::CreateCompiledModel");
         return std::make_shared<CompiledModel>(transformed_model, shared_from_this(), context, config);
@@ -326,10 +203,12 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id);
 
     ExecutionConfig config = m_configs_map.at(device_id);
-    set_model_properties(*model, config);
-    config.finalize(context_impl, get_rt_info(*model));
+    config.set_user_property(orig_config, OptionVisibility::RELEASE);
 
     auto transformed_model = clone_and_transform_model(model, config, context_impl);
+
+    config.finalize(context_impl.get(), transformed_model.get());
+
     return std::make_shared<CompiledModel>(transformed_model, shared_from_this(), context_impl, config);
 }
 
@@ -393,8 +272,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
 
     ExecutionConfig config = m_configs_map.at(device_id);
     config.set_user_property(orig_config, OptionVisibility::RELEASE);
-    set_model_properties(*model, config);
-    config.finalize(ctx, get_rt_info(*model));
+    config.finalize(ctx.get(), model.get());
 
     ProgramBuilder prog(ctx->get_engine(), config);
 
@@ -449,7 +327,6 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
 
     ExecutionConfig config = m_configs_map.at(device_id);
     config.set_user_property(_orig_config, OptionVisibility::RELEASE);
-    config.finalize(context_impl, {});
 
     ov::CacheMode cache_mode = config.get_cache_mode();
     ov::EncryptionCallbacks encryption_callbacks = config.get_cache_encryption_callbacks();
@@ -768,7 +645,6 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const {
     const auto& device_info = context->get_engine().get_device_info();
     auto config = m_configs_map.at(device_id);
     config.set_property(ov::intel_gpu::partial_build_program(true));
-    config.finalize(context, {});
     uint32_t n_streams = static_cast<uint32_t>(config.get_num_streams());
     uint64_t occupied_device_mem = 0;
     auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as<std::map<std::string, uint64_t>>();
@@ -821,6 +697,8 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const {
         OPENVINO_THROW("[GPU_MAX_BATCH_SIZE] ov::hint::model should be std::shared_ptr<ov::Model> type");
     }
 
+    config.finalize(context.get(), model.get());
+
     size_t base_batch_size = 16; // empirically decided for DG1
 
     auto& engine = get_default_context(device_id)->get_engine();
diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
index 9189941ea8b3a6..1633bb022480a5 100644
--- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
@@ -305,7 +305,7 @@ void SyncInferRequest::enqueue() {
     m_internal_outputs = network->execute(dependencies);
     auto network_enqueue_end = std::chrono::high_resolution_clock::now();
 
-    const auto& config = network->get_config();
+    [[maybe_unused]] const auto& config = network->get_config();
 
     // If dump layers path is set, only runs first inference.
     GPU_DEBUG_IF(!config.get_dump_tensors_path().empty() && config.get_dump_iterations().empty()) {
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 7e2c82f84b7d7a..5b90ef246846a5 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -5,6 +5,20 @@
 #include "intel_gpu/runtime/execution_config.hpp"
 #include "intel_gpu/plugin/remote_context.hpp"
 #include "openvino/core/any.hpp"
+#include "openvino/core/model.hpp"
+#include "openvino/op/concat.hpp"
+#include "openvino/op/convert.hpp"
+#include "openvino/op/gather.hpp"
+#include "openvino/op/loop.hpp"
+#include "openvino/op/lstm_sequence.hpp"
+#include "openvino/op/paged_attention.hpp"
+#include "openvino/op/search_sorted.hpp"
+#include "openvino/op/stft.hpp"
+#include "openvino/pass/pattern/matcher.hpp"
+#include "openvino/pass/pattern/op/label.hpp"
+#include "openvino/pass/pattern/op/or.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "ov_ops/dynamic_quantize.hpp"
 #include "openvino/runtime/internal_properties.hpp"
 #include "intel_gpu/runtime/internal_properties.hpp"
 #include "openvino/runtime/plugin_config.hpp"
@@ -13,6 +27,93 @@
 
 namespace ov::intel_gpu {
 
+namespace {
+
+ov::RTMap get_rt_info(const ov::Model& model) {
+    ov::RTMap rt_info;
+    if (model.has_rt_info("runtime_options"))
+        rt_info = model.get_rt_info<ov::AnyMap>("runtime_options");
+
+    if (model.has_rt_info("__weights_path")) {
+        rt_info[ov::weights_path.name()] = model.get_rt_info<ov::Any>("__weights_path");
+    }
+    return rt_info;
+}
+
+
+bool requires_new_shape_infer(const std::shared_ptr<ov::Node>& op) {
+    if (op->is_dynamic()) {
+        return true;
+    }
+
+    // HACK: SearchSorted has specific shape requirements.
+    // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine,
+    // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid.
+    // Similar case for STFT.
+    if (ov::is_type<ov::op::v15::SearchSorted>(op) || ov::is_type<ov::op::v15::STFT>(op))
+        return true;
+
+    if (ov::is_type<ov::op::internal::DynamicQuantize>(op))
+        return true;
+
+    if (ov::is_type<ov::op::v5::Loop>(op)) {
+        const auto body_function = std::static_pointer_cast<ov::op::v5::Loop>(op)->get_function();
+        if (body_function->is_dynamic())
+            return true;
+    }
+
+    if (ov::is_type<ov::op::v5::LSTMSequence>(op) || ov::is_type<ov::op::v4::LSTMCell>(op)) {
+        return true;
+    }
+    // When input node has dynamic shape with 4 dimension, this function return false
+    // because op.is_dynamic() which only checks input shapes return false.
+    // So, in the case of input data, we need to check output shape.
+    for (size_t i = 0; i < op->get_output_size(); i++) {
+        if (op->get_output_partial_shape(i).is_dynamic())
+            return true;
+    }
+
+    for (size_t i = 0; i < op->get_output_size(); i++) {
+        if (op->get_output_partial_shape(i).size() > 6)
+            return true;
+    }
+
+    for (size_t i = 0; i < op->get_input_size(); i++) {
+        if (op->get_input_partial_shape(i).size() > 6)
+            return true;
+    }
+
+    return false;
+}
+
+bool is_llm(const ov::Model& model) {
+    using namespace ov::pass::pattern;
+
+    auto past = wrap_type<ov::op::v6::ReadValue>();
+    auto convert_past = wrap_type<ov::op::v0::Convert>({past});
+    auto gather_input = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{past, convert_past});
+    auto beam_idx = wrap_type<ov::op::v0::Parameter>();
+    auto gather_past = wrap_type<ov::op::v8::Gather>({gather_input, beam_idx, wrap_type<ov::op::v0::Constant>()});
+    auto gather_convert = wrap_type<ov::op::v0::Convert>({gather_past});
+    auto concat_past_input = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{past, convert_past, gather_past, gather_convert});
+    auto concat = wrap_type<ov::op::v0::Concat>({concat_past_input, any_input()});
+    auto convert_present = wrap_type<ov::op::v0::Convert>({concat});
+    auto present_input = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{concat, convert_present});
+    auto present = wrap_type<ov::op::v6::Assign>({present_input});
+
+    auto kvcache_matcher = std::make_shared<ov::pass::pattern::Matcher>(present, "KVCacheMatcher");
+
+    for (auto& op : model.get_ordered_ops()) {
+        if (kvcache_matcher->match(op) || ov::is_type<ov::op::PagedAttentionExtension>(op)) {
+            return true;
+        }
+    }
+
+    return false;
+};
+
+} // namespace
+
 #define OV_CONFIG_LOCAL_OPTION(...)
 #define OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \
     ConfigOption<decltype(PropertyNamespace::PropertyVar)::value_type, Visibility> ExecutionConfig::m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)};
@@ -49,15 +150,17 @@ ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) {
 
 void ExecutionConfig::finalize(cldnn::engine& engine) {
     auto ctx = std::make_shared<RemoteContextImpl>("GPU", std::vector<cldnn::device::ptr>{engine.get_device()});
-    PluginConfig::finalize(ctx, {});
+    PluginConfig::finalize(ctx.get(), nullptr);
 }
 
-void ExecutionConfig::apply_rt_info(std::shared_ptr<IRemoteContext> context, const ov::RTMap& rt_info) {
-    const auto& info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
+void ExecutionConfig::apply_rt_info(const IRemoteContext* context, const ov::RTMap& rt_info, bool is_llm) {
+    const auto& info = dynamic_cast<const RemoteContextImpl*>(context)->get_engine().get_device_info();
     if (!info.supports_immad) {
         apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
-        apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
     }
+    if (!info.supports_immad || !is_llm)
+        apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
+
     apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
 
     // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with
@@ -68,15 +171,54 @@ void ExecutionConfig::apply_rt_info(std::shared_ptr<IRemoteContext> context, con
     }
 }
 
-void ExecutionConfig::finalize_impl(std::shared_ptr<IRemoteContext> context) {
+void ExecutionConfig::apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) {
+    apply_rt_info(context, get_rt_info(model), is_llm(model));
+
+    const auto& ops = model.get_ops();
+
+    auto process_op = [this](std::shared_ptr<Node> op) {
+        if (requires_new_shape_infer(op)) {
+            m_allow_new_shape_infer = true;
+        }
+        // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels,
+        // smaller # of kernels are built compared to static models.
+        // So having smaller batch size is even better for dynamic model as we can do more parallel build.
+        if (op->is_dynamic()) {
+            m_max_kernels_per_batch = 4;
+        }
+
+        // Allow using onednn for models with LSTMSequence op as it's much more performant than existing ocl impl
+        if (ov::is_type<ov::op::v5::LSTMSequence>(op)) {
+            m_use_onednn = true;
+        }
+    };
+
+    // In the case of inner program, allow_new_shape_infer flag is setted by outside of program.
+    // So, do not check allow_new_shape_infer for inner program build
+    for (const auto& op : ops) {
+        if (auto multi_subgraph_op = ov::as_type_ptr<op::util::MultiSubGraphOp>(op)) {
+            for (const auto& sub_graph : multi_subgraph_op->get_functions()) {
+                for (auto& sub_op : sub_graph->get_ops()) {
+                    process_op(sub_op);
+                }
+            }
+        } else {
+            process_op(op);
+        }
+    }
+
+    m_optimize_data = true;
+}
+
+void ExecutionConfig::finalize_impl(const IRemoteContext* context) {
     if (m_help) {
         print_help();
         exit(-1);
     }
 
-    const auto& info = std::dynamic_pointer_cast<RemoteContextImpl>(context)->get_engine().get_device_info();
+    const auto& info = dynamic_cast<const RemoteContextImpl*>(context)->get_engine().get_device_info();
     apply_hints(info);
-    if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) {
+    if (!is_set_by_user(ov::internal::enable_lp_transformations)) {
         m_enable_lp_transformations = info.supports_imad || info.supports_immad;
     }
     if (!is_set_by_user(ov::intel_gpu::use_onednn) && info.supports_immad) {
diff --git a/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp b/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp
index fb9711e7605859..c78b472b9c54cb 100644
--- a/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp
+++ b/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp
@@ -28,6 +28,10 @@ TEST_F(GpuReservationTest, Mutiple_CompiledModel_Reservation) {
     models.emplace_back(ov::test::utils::make_multi_single_conv());
 
     auto core = ov::test::utils::PluginCache::get().core();
+
+    auto available_devices = core->get_available_devices();
+    if (std::find(available_devices.begin(), available_devices.end(), ov::test::utils::DEVICE_CPU) == available_devices.end())
+        GTEST_SKIP();
     core->set_property(target_devices[1], config);
 
     ov::AnyMap property_config = {{ov::num_streams.name(), 1},
@@ -54,4 +58,4 @@ TEST_F(GpuReservationTest, Mutiple_CompiledModel_Reservation) {
         if (thread.joinable())
             thread.join();
     }
-}
\ No newline at end of file
+}

From ea6f10fbec21da6a17cd5923c8fb4c431006d13e Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Thu, 23 Jan 2025 15:27:28 +0400
Subject: [PATCH 24/44] cut off debug properties for release build

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        | 29 +++++++++----------
 src/inference/tests/unit/config_test.cpp      |  8 ++---
 .../intel_gpu/runtime/debug_configuration.hpp |  2 ++
 .../include/intel_gpu/runtime/profiling.hpp   |  2 +-
 .../prepare_primitive_fusing.cpp              |  4 +--
 .../prepare_primitive_fusing_through.cpp      |  2 +-
 .../src/graph/impls/ocl/kernels_cache.cpp     | 10 ++-----
 src/plugins/intel_gpu/src/graph/network.cpp   | 13 +++++----
 src/plugins/intel_gpu/src/graph/program.cpp   |  2 +-
 .../src/graph/program_dump_graph.cpp          |  3 +-
 src/plugins/intel_gpu/src/plugin/graph.cpp    | 15 +++++-----
 src/plugins/intel_gpu/src/plugin/plugin.cpp   |  9 +++---
 .../src/plugin/transformations_pipeline.cpp   |  6 ++--
 .../src/runtime/debug_configuration.cpp       |  5 ++--
 .../src/runtime/execution_config.cpp          |  2 +-
 15 files changed, 54 insertions(+), 58 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 531bbda8d346f8..51f5e8571daa38 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -63,21 +63,6 @@
         } \
     }
 
-#ifdef ENABLE_DEBUG_CAPS
-#define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \
-    static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \
-        auto v = read_env(PropertyNamespace::PropertyVar.name(), m_allowed_env_prefix, &m_ ## PropertyVar); \
-        if (v.empty()) \
-            return m_ ## PropertyVar.value; \
-        return v.as<decltype(PropertyNamespace::PropertyVar)::value_type>(); \
-    }
-#else
-#define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \
-    static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \
-        return m_ ## PropertyVar.value; \
-    }
-#endif
-
 #define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \
         m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar;
 
@@ -90,12 +75,24 @@
 #define OV_CONFIG_RELEASE_INTERNAL_OPTION(PropertyNamespace, PropertyVar, ...) \
     OV_CONFIG_LOCAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE_INTERNAL, __VA_ARGS__)
 
+#ifdef ENABLE_DEBUG_CAPS
+#define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \
+    static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \
+        auto v = read_env(PropertyNamespace::PropertyVar.name(), m_allowed_env_prefix, &m_ ## PropertyVar); \
+        if (v.empty()) \
+            return m_ ## PropertyVar.value; \
+        return v.as<decltype(PropertyNamespace::PropertyVar)::value_type>(); \
+    }
 #define OV_CONFIG_DEBUG_OPTION(PropertyNamespace, PropertyVar, ...) \
     OV_CONFIG_LOCAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__)
 
 #define OV_CONFIG_DEBUG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, ...) \
     OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG_GLOBAL, __VA_ARGS__)
-
+#else
+#define OV_CONFIG_DEBUG_OPTION(...)
+#define OV_CONFIG_DEBUG_GLOBAL_OPTION(...)
+#define OV_CONFIG_DECLARE_GLOBAL_GETTER(...)
+#endif
 namespace ov {
 enum class OptionVisibility : uint8_t {
     RELEASE = 1 << 0,            // Option can be set for any build type via public interface, environment and config file
diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index 68b8cb3b617fd3..75336542cb7828 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -239,6 +239,7 @@ TEST(plugin_config, visibility_is_correct) {
     ASSERT_EQ(cfg.get_option_ptr(int_property.name())->get_visibility(), OptionVisibility::RELEASE);
 }
 
+#ifdef ENABLE_DEBUG_CAPS
 TEST(plugin_config, can_get_global_property) {
     NotEmptyTestConfig cfg;
     ASSERT_EQ(cfg.get_debug_global_property(), 4);
@@ -247,7 +248,6 @@ TEST(plugin_config, can_get_global_property) {
 TEST(plugin_config, global_property_read_env_on_each_call) {
     NotEmptyTestConfig cfg;
     ASSERT_EQ(cfg.get_debug_global_property(), 4);
-#ifdef ENABLE_DEBUG_CAPS
     std::string env_var1 = "OV_DEBUG_GLOBAL_PROPERTY=10";
     ::putenv(env_var1.data());
     ASSERT_EQ(cfg.get_debug_global_property(), 10);
@@ -255,9 +255,5 @@ TEST(plugin_config, global_property_read_env_on_each_call) {
     std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20";
     ::putenv(env_var2.data());
     ASSERT_EQ(cfg.get_debug_global_property(), 20);
-#else
-    std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20";
-    ::putenv(env_var2.data());
-    ASSERT_EQ(cfg.get_debug_global_property(), 4); // no effect for build w/o debug caps
-#endif
 }
+#endif
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
index 08134ce420a4f6..4367d2062d7325 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
@@ -53,6 +53,7 @@ static constexpr const char reset[]     = "\033[0m";
 static constexpr const char prefix[] = "GPU_Debug: ";
 
 #define GPU_DEBUG_IF(cond) if (cond)
+#define GPU_DEBUG_VALUE_OR(debug_value, release_value) debug_value
 #define GPU_DEBUG_CODE(...) __VA_ARGS__
 
 #define GPU_DEBUG_DEFINE_MEM_LOGGER(stage) \
@@ -84,6 +85,7 @@ static constexpr const char prefix[] = "GPU_Debug: ";
     GPU_DEBUG_LOG_RAW_INT(static_cast<std::underlying_type<ov::intel_gpu::LogLevel>::type>(min_verbose_level))
 #else
 #define GPU_DEBUG_IF(cond) if (0)
+#define GPU_DEBUG_VALUE_OR(debug_value, release_value) release_value
 #define GPU_DEBUG_CODE(...)
 #define GPU_DEBUG_DEFINE_MEM_LOGGER(stage)
 #define GPU_DEBUG_PROFILED_STAGE(stage)
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp
index 6d5d91bb5b783a..7d0dba80e7c017 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp
@@ -158,7 +158,7 @@ class profiled_stage {
         , _obj(obj)
         , _stage(stage) {
         GPU_DEBUG_IF(profiling_enabled) {
-            _per_iter_mode = ov::intel_gpu::ExecutionConfig::get_dump_profiling_data_per_iter();
+            _per_iter_mode = GPU_DEBUG_VALUE_OR(ov::intel_gpu::ExecutionConfig::get_dump_profiling_data_per_iter(), false);
             _start = std::chrono::high_resolution_clock::now();
         }
     }
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
index cc3b5ff2260ea6..622b7cff4101ad 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
@@ -56,7 +56,7 @@
 using namespace cldnn;
 
 void prepare_primitive_fusing::run(program& p) {
-    if (p.get_config().get_disable_post_ops_fusions())
+    GPU_DEBUG_IF(p.get_config().get_disable_post_ops_fusions())
         return;
 
     fuse_reorders(p);
@@ -168,7 +168,7 @@ void prepare_primitive_fusing::fuse_reorders(program &p) {
 }
 
 void prepare_primitive_fusing::fuse_swiglu(program &p) {
-    bool disable_fc_swiglu_fusion = p.get_config().get_disable_fc_swiglu_fusion();
+    bool disable_fc_swiglu_fusion = GPU_DEBUG_VALUE_OR(p.get_config().get_disable_fc_swiglu_fusion(), false);
     // Apply only for high performant GPU
     if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 128)
         return;
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp
index 78b494c52645de..33320126a9d910 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp
@@ -16,7 +16,7 @@
 using namespace cldnn;
 
 void prepare_primitive_fusing_through::run(program& p) {
-    if (p.get_config().get_disable_post_ops_fusions())
+    GPU_DEBUG_IF(p.get_config().get_disable_post_ops_fusions())
         return;
 
     auto try_fuse_through = [&](program_node& node) -> std::vector<program_node*> {
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
index e5c1fe016b96df..c02ad09044ae32 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
@@ -238,10 +238,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code,
 
             b.hash_value = std::hash<std::string>()(full_code);
 
-            std::string dump_sources_dir = "";
-            GPU_DEBUG_IF(!_config.get_dump_sources_path().empty()) {
-                dump_sources_dir = _config.get_dump_sources_path();
-            }
+            std::string dump_sources_dir = GPU_DEBUG_VALUE_OR(_config.get_dump_sources_path(), "");
 
             // Add -g -s to build options to allow IGC assembly dumper to associate assembler sources with corresponding OpenCL kernel code lines
             // Should be used with the IGC_ShaderDump option
@@ -297,10 +294,9 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co
     auto& cl_build_device = dynamic_cast<const ocl::ocl_device&>(*_device);
 
     bool dump_sources = batch.dump_custom_program;
-    std::string dump_sources_dir = "";
-    GPU_DEBUG_IF(!_config.get_dump_sources_path().empty()) {
+    std::string dump_sources_dir = GPU_DEBUG_VALUE_OR(_config.get_dump_sources_path(), "");
+    GPU_DEBUG_IF(!dump_sources_dir.empty()) {
         dump_sources = true;
-        dump_sources_dir = _config.get_dump_sources_path();
     }
 
     std::string err_log;  // accumulated build log from all program's parts (only contains messages from parts which
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index 66d890b51e4692..214778c397dbbc 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -157,7 +157,6 @@ void wait_for_the_turn(const std::vector<std::string>& pids) {
 
 #else
 void dump_perf_data_raw(std::string, bool per_iter_mode, const std::list<std::shared_ptr<primitive_inst>>&) {}
-void wait_for_the_turn(const std::vector<std::string>& pids) {}
 #endif
 }  // namespace
 
@@ -185,9 +184,10 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo
         net_id = get_unique_net_id();
     }
 
-    GPU_DEBUG_IF(get_config().get_start_after_processes().size() != 0) {
-        wait_for_the_turn(get_config().get_start_after_processes());
-    }
+    GPU_DEBUG_CODE(
+        if (get_config().get_start_after_processes().size() != 0) {
+            wait_for_the_turn(get_config().get_start_after_processes());
+    });
     calculate_weights_cache_capacity();
     allocate_primitives();
     configure_primitives_second_output();
@@ -225,8 +225,9 @@ network::~network() {
     if (_program != nullptr)
         _program->cancel_compilation_context();
     _memory_pool->clear_pool_for_network(net_id);
-    GPU_DEBUG_IF(!_config.get_dump_profiling_data_path().empty()) {
-        dump_perf_data_raw(_config.get_dump_profiling_data_path() + "/perf_raw" + std::to_string(net_id) + ".csv", false, _exec_order);
+    std::string dump_path = GPU_DEBUG_VALUE_OR(_config.get_dump_profiling_data_path(), "");
+    GPU_DEBUG_IF(!dump_path.empty()) {
+        dump_perf_data_raw(dump_path + "/perf_raw" + std::to_string(net_id) + ".csv", false, _exec_order);
     }
 }
 
diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
index ad384306b9e081..6bbb0f2512905d 100644
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -1376,7 +1376,7 @@ program::primitives_info program::get_current_stage_info() const {
 }
 
 void program::save_pass_info(std::string pass_name) {
-    if (!_config.get_dump_graphs_path().empty())
+    GPU_DEBUG_IF(!_config.get_dump_graphs_path().empty())
         optimizer_passes_info.emplace_back(pass_name, get_current_stage_info());
 }
 
diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
index 0e735683db2533..eb5c152a361a16 100644
--- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
@@ -3,6 +3,7 @@
 //
 
 #include "program_dump_graph.h"
+#include "intel_gpu/runtime/debug_configuration.hpp"
 #include "to_string_utils.h"
 #include "data_inst.h"
 #include "condition_inst.h"
@@ -156,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) {
 }  // namespace
 
 std::string get_dir_path(const ExecutionConfig& config) {
-    auto path = config.get_dump_graphs_path();
+    std::string path = GPU_DEBUG_VALUE_OR(config.get_dump_graphs_path(), "");
     if (path.empty()) {
         return {};
     }
diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp
index bcf6a26420dc57..cba0d6aab9276d 100644
--- a/src/plugins/intel_gpu/src/plugin/graph.cpp
+++ b/src/plugins/intel_gpu/src/plugin/graph.cpp
@@ -113,9 +113,8 @@ Graph::Graph(std::shared_ptr<Graph> graph, uint16_t stream_id)
 }
 
 Graph::~Graph() {
-    GPU_DEBUG_IF(m_config.get_host_time_profiling()) {
-        const auto log_level = m_config.get_host_time_profiling();
-
+    auto log_level = GPU_DEBUG_VALUE_OR(m_config.get_host_time_profiling(), 0);
+    GPU_DEBUG_IF(log_level) {
         auto get_time_str = [](int64_t time_mcs, int64_t iters_num = 1) {
             double time = static_cast<double>(time_mcs);
             time /= iters_num;
@@ -183,17 +182,19 @@ void Graph::build(std::shared_ptr<cldnn::program> program) {
         m_network = std::make_shared<cldnn::network>(program, m_stream_id);
     }
 
-    GPU_DEBUG_IF(!m_config.get_dry_run_path().empty()) {
-        ov::pass::Serialize(m_config.get_dry_run_path(), "").run_on_model(get_runtime_model());
+    std::string dry_run_path = GPU_DEBUG_VALUE_OR(m_config.get_dry_run_path(), "");
+    std::string dump_graphs_path = GPU_DEBUG_VALUE_OR(m_config.get_dump_graphs_path(), "");
+    GPU_DEBUG_IF(!dry_run_path.empty()) {
+        ov::pass::Serialize(dry_run_path, "").run_on_model(get_runtime_model());
         exit(0);
     }
 
-    GPU_DEBUG_IF(!m_config.get_dump_graphs_path().empty() && m_stream_id == 0) {
+    GPU_DEBUG_IF(!dump_graphs_path.empty() && m_stream_id == 0) {
         static int net_id = 0;
         auto steps_info = get_network()->get_optimizer_passes_info();
         size_t step_idx = 0;
         for (auto& step : steps_info) {
-            auto xml_path = m_config.get_dump_graphs_path() + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml";
+            auto xml_path = dump_graphs_path + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml";
             ov::pass::Serialize(xml_path, "").run_on_model(get_runtime_model(step.second, true));
             step_idx++;
         }
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index 3f6b88ff238030..a6c53bd539bc49 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -114,8 +114,9 @@ std::shared_ptr<ov::Model> Plugin::clone_and_transform_model(const std::shared_p
     auto config_copy = config;
     config_copy.finalize(context.get(), model.get());
 
-    GPU_DEBUG_IF(!config_copy.get_dump_graphs_path().empty()) {
-        auto path_base = config_copy.get_dump_graphs_path() + "/" + cloned_model->get_name();
+    std::string dump_path = GPU_DEBUG_VALUE_OR(config_copy.get_dump_graphs_path(), "");
+    GPU_DEBUG_IF(!dump_path.empty()) {
+        auto path_base = dump_path + "/" + cloned_model->get_name();
         ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model);
     }
 
@@ -134,8 +135,8 @@ std::shared_ptr<ov::Model> Plugin::clone_and_transform_model(const std::shared_p
         new_res->set_friendly_name(old_res->get_friendly_name());
     }
 
-    GPU_DEBUG_IF(!config_copy.get_dump_graphs_path().empty()) {
-        auto path_base = config_copy.get_dump_graphs_path() + "/" + cloned_model->get_name() + "_" +  "transformed_func";
+    GPU_DEBUG_IF(!dump_path.empty()) {
+        auto path_base = dump_path + "/" + cloned_model->get_name() + "_" +  "transformed_func";
         ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model);
     }
     return cloned_model;
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index 841551c73b671f..0d6d83f2f2982e 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -984,8 +984,8 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         manager.register_pass<ov::intel_gpu::MoveFCReshapeToWeights>();
         manager.register_pass<ov::intel_gpu::ConvertFullyConnectedToFullyConnectedCompressed>();
 
-        bool disable_horizontal_fc_fusion = config.get_disable_horizontal_fc_fusion();
-        bool disable_fc_swiglu_fusion = config.get_disable_fc_swiglu_fusion();
+        bool disable_horizontal_fc_fusion = GPU_DEBUG_VALUE_OR(config.get_disable_horizontal_fc_fusion(), false);
+        bool disable_fc_swiglu_fusion = GPU_DEBUG_VALUE_OR(config.get_disable_fc_swiglu_fusion(), false);
 
         // mlp fusion is only supported for cldnn on high performant GPUis
         bool fuse_mlp_swiglu = !device_info.supports_immad &&
@@ -1044,7 +1044,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         manager.register_pass<ov::pass::Validate>();
 
         if (device_info.supports_immad) {
-            bool asymmetric_dyn_quant = config.get_asym_dynamic_quantization();
+            bool asymmetric_dyn_quant = GPU_DEBUG_VALUE_OR(config.get_asym_dynamic_quantization(), false);
             auto dynamic_quantization_group_size = config.get_dynamic_quantization_group_size();
             pass_config->set_callback<ov::intel_gpu::DynamicQuantizeFullyConnected>([=](const_node_ptr& root) -> bool {
                 for (size_t i = 0 ; i < root->get_input_node_shared_ptr(0)->get_output_size(); ++i) {
diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
index ab72354626b5cb..7dcb5020f56cc1 100644
--- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
+++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
@@ -2,13 +2,13 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#ifdef GPU_DEBUG_CONFIG
 #include "intel_gpu/runtime/debug_configuration.hpp"
 #include "intel_gpu/runtime/execution_config.hpp"
 #include <iostream>
 #include <fstream>
 
 namespace ov::intel_gpu {
-
 std::ostream& get_verbose_stream() {
     if (ExecutionConfig::get_log_to_file().length() > 0) {
         static std::ofstream fout;
@@ -19,5 +19,6 @@ std::ostream& get_verbose_stream() {
         return std::cout;
     }
 }
-
 }  // namespace ov::intel_gpu
+
+#endif
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 5b90ef246846a5..ffbd977dad42da 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -211,7 +211,7 @@ void ExecutionConfig::apply_model_specific_options(const IRemoteContext* context
 }
 
 void ExecutionConfig::finalize_impl(const IRemoteContext* context) {
-    if (m_help) {
+    GPU_DEBUG_IF(m_help) {
         print_help();
         exit(-1);
     }

From b7dc614c45a41e99a7bad158ea944011d90f28c1 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Fri, 24 Jan 2025 15:12:06 +0400
Subject: [PATCH 25/44] config clone. Visibility fixes

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        | 16 ++++++------
 src/inference/src/dev/plugin_config.cpp       |  9 +++++--
 src/plugins/intel_gpu/CMakeLists.txt          |  1 +
 .../intel_gpu/plugin/remote_context.hpp       |  1 -
 .../intel_gpu/runtime/execution_config.hpp    |  1 +
 .../include/intel_gpu/runtime/options.inl     |  2 +-
 .../graph_optimizer/propagate_constants.cpp   |  2 +-
 .../intel_gpu/src/plugin/compiled_model.cpp   |  3 ++-
 .../intel_gpu/src/plugin/ops/condition.cpp    |  2 +-
 src/plugins/intel_gpu/src/plugin/ops/loop.cpp |  2 +-
 src/plugins/intel_gpu/src/plugin/plugin.cpp   |  2 +-
 .../src/runtime/debug_configuration.cpp       |  7 ++---
 .../src/runtime/execution_config.cpp          | 26 +++++++++++++------
 13 files changed, 46 insertions(+), 28 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 51f5e8571daa38..ef4d810e0a4bce 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -99,11 +99,7 @@ enum class OptionVisibility : uint8_t {
     RELEASE_INTERNAL = 1 << 1,   // Option can be set for any build type via environment and config file only
     DEBUG = 1 << 2,              // Option can be set for debug builds only via environment and config file
     DEBUG_GLOBAL = 1 << 3,       // Global option can be set for debug builds only via environment and config file
-#ifdef ENABLE_DEBUG_CAPS
-    ANY = 0x0F,                  // Any visibility is valid including DEBUG & DEBUG_GLOBAL
-#else
-    ANY = 0x03,                  // Any visibility is valid excluding DEBUG & DEBUG_GLOBAL
-#endif
+    ANY = 0xFF,                  // Any visibility is valid
 };
 
 inline OptionVisibility operator&(OptionVisibility a, OptionVisibility b) {
@@ -126,6 +122,8 @@ inline std::ostream& operator<<(std::ostream& os, const OptionVisibility& visibi
     case OptionVisibility::RELEASE: os << "RELEASE"; break;
     case OptionVisibility::RELEASE_INTERNAL: os << "RELEASE_INTERNAL"; break;
     case OptionVisibility::DEBUG: os << "DEBUG"; break;
+    case OptionVisibility::DEBUG_GLOBAL: os << "DEBUG_GLOBAL"; break;
+    case OptionVisibility::ANY: os << "ANY"; break;
     default: os << "UNKNOWN"; break;
     }
 
@@ -181,11 +179,13 @@ struct ConfigOption : public ConfigOptionBase {
         return *this;
     }
 
-    bool operator==(const T& val) const {
-        return value == val;
+    template<typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
+    bool operator==(const U& val) const {
+        return value == static_cast<T>(val);
     }
 
-    bool operator!=(const T& val) const {
+    template<typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
+    bool operator!=(const U& val) const {
         return !(*this == val);
     }
 
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index 4b0a3266b52f71..2b568ab5bbe310 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -135,6 +135,11 @@ bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) {
 
 void PluginConfig::apply_debug_options(const IRemoteContext* context) {
     const bool throw_on_error = false;
+#ifdef ENABLE_DEBUG_CAPS
+    constexpr const auto allowed_visibility = OptionVisibility::ANY;
+#else
+    constexpr const auto allowed_visibility = OptionVisibility::RELEASE;
+#endif
 
     if (context) {
         ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name());
@@ -144,7 +149,7 @@ void PluginConfig::apply_debug_options(const IRemoteContext* context) {
             std::cout << "Non default config value for " << prop.first << " = " << prop.second.as<std::string>() << std::endl;
         }
 #endif
-        set_user_property(config_properties, OptionVisibility::ANY, throw_on_error);
+        set_user_property(config_properties, allowed_visibility, throw_on_error);
     }
 
     ov::AnyMap env_properties = read_env();
@@ -154,7 +159,7 @@ void PluginConfig::apply_debug_options(const IRemoteContext* context) {
         std::cout << "Non default env value for " << prop.first << " = " << prop.second.as<std::string>() << std::endl;
     }
 #endif
-    set_user_property(env_properties, OptionVisibility::ANY, throw_on_error);
+    set_user_property(env_properties, allowed_visibility, throw_on_error);
 }
 
 ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const {
diff --git a/src/plugins/intel_gpu/CMakeLists.txt b/src/plugins/intel_gpu/CMakeLists.txt
index 3a847c292c30ba..3322f2f3f1a875 100644
--- a/src/plugins/intel_gpu/CMakeLists.txt
+++ b/src/plugins/intel_gpu/CMakeLists.txt
@@ -51,6 +51,7 @@ endif()
 
 if(ENABLE_GPU_DEBUG_CAPS)
     add_definitions(-DGPU_DEBUG_CONFIG=1)
+    add_definitions(-DENABLE_DEBUG_CAPS=1)
 endif()
 
 set(INTEL_GPU_TARGET_OCL_VERSION "200" CACHE STRING "Target version of OpenCL which should be used by GPU plugin")
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp
index 98e74b76a8b502..8f7e7ac45bca2e 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp
@@ -19,7 +19,6 @@
 #include <string>
 #include <map>
 #include <memory>
-#include <atomic>
 
 namespace ov::intel_gpu {
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
index bce3e3101e2160..68e4ea69642a00 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
@@ -21,6 +21,7 @@ struct ExecutionConfig : public ov::PluginConfig {
 
     ExecutionConfig(const ExecutionConfig& other);
     ExecutionConfig& operator=(const ExecutionConfig& other);
+    ExecutionConfig clone() const;
 
     void finalize(cldnn::engine& engine);
     using ov::PluginConfig::finalize;
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
index 61e4315807449c..6d513ef1cb2eda 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
@@ -32,8 +32,8 @@ OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "")
 OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching")
 OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision")
 OV_CONFIG_RELEASE_OPTION(ov::internal, enable_lp_transformations, false, "Enable/Disable Low precision transformations set")
+OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file")
 
-OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, shape_predictor_settings, {10, 16 * 1024, 2, 1.1f}, "Preallocation settings")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "Type of the queue that must be used for model execution. May be in-order or out-of-order")
 OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, optimize_data, false, "Enable/Disable data flow optimizations for cldnn::program")
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
index c632019cf91b01..95c746d5d59791 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
@@ -143,7 +143,7 @@ propagate_constants::calculate(engine& engine,
     if (!has_non_trivial_constants)
         return {};
 
-    ExecutionConfig cf_config = config;
+    ExecutionConfig cf_config = config.clone();
     cf_config.set_property(ov::intel_gpu::optimize_data(false));
     cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs));
     cf_config.finalize(engine);
diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
index 3fc1439d56a727..5b8edb9e7d08c9 100644
--- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
+++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
@@ -5,6 +5,7 @@
 #include "openvino/runtime/iplugin.hpp"
 #include "openvino/runtime/intel_gpu/properties.hpp"
 #include "openvino/runtime/internal_properties.hpp"
+#include "openvino/runtime/plugin_config.hpp"
 #include "openvino/util/weights_path.hpp"
 
 #include "intel_gpu/graph/serialization/binary_buffer.hpp"
@@ -298,7 +299,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
         return decltype(ov::execution_devices)::value_type{m_context->get_device_name()};
     }
 
-    return m_config.get_property(name);
+    return m_config.get_property(name, OptionVisibility::RELEASE);
 }
 
 std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request() const {
diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
index 3d287eb46c465e..da080544363d00 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
@@ -21,7 +21,7 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_
                     << internal_body->get_friendly_name()
                     << ", num inputs: " << op->get_input_size() << std::endl;
 
-    auto config = p.get_config();
+    auto config = p.get_config().clone();
     config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>({})));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer()));
     config.finalize(p.get_engine());
diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
index 08b014d72206c6..556738c5df52ea 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
@@ -297,7 +297,7 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr<ov::op::
 
     auto output_names_vec = GetOutputNames(layerName, body_execution_condition_id, output_primitive_maps, back_edges);
 
-    auto config = p.get_config();
+    auto config = p.get_config().clone();
     config.set_property(ov::intel_gpu::custom_outputs(output_names_vec));
     config.finalize(p.get_engine());
 
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index a6c53bd539bc49..778e699965ebcf 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -111,7 +111,7 @@ std::shared_ptr<ov::Model> Plugin::clone_and_transform_model(const std::shared_p
     // impacts value of use_onednn property. But in order to understand if there's an op of this type we have to run
     // common optimizations which may do subgraph fusion to LSTMSequence op. So basically, final value of use_onednn
     // property can be computed for transformed model only.
-    auto config_copy = config;
+    auto config_copy = config.clone();
     config_copy.finalize(context.get(), model.get());
 
     std::string dump_path = GPU_DEBUG_VALUE_OR(config_copy.get_dump_graphs_path(), "");
diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
index 7dcb5020f56cc1..cb36a8e0349457 100644
--- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
+++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#ifdef GPU_DEBUG_CONFIG
 #include "intel_gpu/runtime/debug_configuration.hpp"
 #include "intel_gpu/runtime/execution_config.hpp"
 #include <iostream>
@@ -10,6 +9,7 @@
 
 namespace ov::intel_gpu {
 std::ostream& get_verbose_stream() {
+#ifdef GPU_DEBUG_CONFIG
     if (ExecutionConfig::get_log_to_file().length() > 0) {
         static std::ofstream fout;
         if (!fout.is_open())
@@ -18,7 +18,8 @@ std::ostream& get_verbose_stream() {
     } else {
         return std::cout;
     }
+#else
+    return std::cout;
+#endif
 }
 }  // namespace ov::intel_gpu
-
-#endif
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index ffbd977dad42da..bef767da04186c 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -133,7 +133,7 @@ ExecutionConfig::ExecutionConfig() : ov::PluginConfig() {
 
 ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() {
     m_user_properties = other.m_user_properties;
-    m_is_finalized = false; // copy is not automatically finalized
+    m_is_finalized = other.m_is_finalized;
     for (const auto& kv : other.m_options_map) {
         m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }
@@ -141,13 +141,19 @@ ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig
 
 ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) {
     m_user_properties = other.m_user_properties;
-    m_is_finalized = false; // copy is not automatically finalized
+    m_is_finalized = other.m_is_finalized;
     for (const auto& kv : other.m_options_map) {
         m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }
     return *this;
 }
 
+ExecutionConfig ExecutionConfig::clone() const {
+    ExecutionConfig new_config = *this;
+    new_config.m_is_finalized = false;
+    return new_config;
+}
+
 void ExecutionConfig::finalize(cldnn::engine& engine) {
     auto ctx = std::make_shared<RemoteContextImpl>("GPU", std::vector<cldnn::device::ptr>{engine.get_device()});
     PluginConfig::finalize(ctx.get(), nullptr);
@@ -166,7 +172,7 @@ void ExecutionConfig::apply_rt_info(const IRemoteContext* context, const ov::RTM
     // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with
     // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not
     // using that mechanism.
-    if (get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) {
+    if (m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE) {
         apply_rt_info_property(ov::weights_path, rt_info);
     }
 }
@@ -224,17 +230,21 @@ void ExecutionConfig::finalize_impl(const IRemoteContext* context) {
     if (!is_set_by_user(ov::intel_gpu::use_onednn) && info.supports_immad) {
         m_use_onednn = true;
     }
-    if (get_use_onednn()) {
+    if (m_use_onednn) {
         m_queue_type = QueueTypes::in_order;
     }
 
-    // Enable KV-cache compression by default for non-systolic platforms
-    if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) {
-        m_kv_cache_precision = ov::element::i8;
+    if (!is_set_by_user(ov::hint::kv_cache_precision) || m_kv_cache_precision == ov::element::undefined) {
+        if (info.supports_immad) {  // MFDNN-11755
+            m_kv_cache_precision = m_inference_precision;
+        } else {
+            // Enable KV-cache compression by default for non-systolic platforms only
+            m_kv_cache_precision = ov::element::i8;
+        }
     }
 
     // Enable dynamic quantization by default for non-systolic platforms
-    if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) {
+    if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && m_dynamic_quantization_group_size == 0 && !info.supports_immad) {
         m_dynamic_quantization_group_size = 32;
     }
 

From a00e1e83dbc59b1b86a24f479a8c99e57941abce Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Fri, 24 Jan 2025 17:02:14 +0400
Subject: [PATCH 26/44] added comment about config copy

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/inference/tests/unit/config_test.cpp                     | 4 +++-
 .../intel_gpu/include/intel_gpu/runtime/execution_config.hpp | 5 +++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index 75336542cb7828..e112e00e1e4241 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -21,8 +21,10 @@ static constexpr Property<std::string, PropertyMutability::RW> high_level_proper
 static constexpr Property<std::string, PropertyMutability::RW> low_level_property{"LOW_LEVEL_PROPERTY"};
 static constexpr Property<uint8_t, PropertyMutability::RW> release_internal_property{"RELEASE_INTERNAL_PROPERTY"};
 static constexpr Property<uint8_t, PropertyMutability::RW> debug_property{"DEBUG_PROPERTY"};
-static constexpr Property<int32_t, PropertyMutability::RW> debug_global_property{"DEBUG_GLOBAL_PROPERTY"};
 
+#ifdef ENABLE_DEBUG_CAPS
+static constexpr Property<int32_t, PropertyMutability::RW> debug_global_property{"DEBUG_GLOBAL_PROPERTY"};
+#endif
 
 struct EmptyTestConfig : public ov::PluginConfig {
     std::vector<std::string> get_supported_properties() const {
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
index 68e4ea69642a00..a56d63e7119caf 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
@@ -19,6 +19,11 @@ struct ExecutionConfig : public ov::PluginConfig {
     explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); }
     explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); }
 
+    // Default operators copy config as is including finalized flag state
+    // In case if the config need updates after finalization clone() method shall be used as it resets finalized flag value.
+    // That's needed to avoid unexpected options update as we call finalization twice: in transformation pipeline
+    // and in cldnn::program c-tor (which is needed to handle unit tests mainly). So this second call may cause unwanted side effects
+    // if config is not marked as finalized, which could have easily happened if copy operator reset finalization flag
     ExecutionConfig(const ExecutionConfig& other);
     ExecutionConfig& operator=(const ExecutionConfig& other);
     ExecutionConfig clone() const;

From 7fe88bec30f270298f1a4c81ce97d3f0a25ec2ba Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Mon, 27 Jan 2025 09:45:36 +0400
Subject: [PATCH 27/44] fix tests

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/inference/src/dev/plugin_config.cpp  | 2 +-
 src/inference/tests/unit/config_test.cpp | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index 2b568ab5bbe310..3917b7155dbd1d 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -173,7 +173,7 @@ ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std
     nlohmann::json json_config;
     try {
         ifs >> json_config;
-    } catch (const std::exception& e) {
+    } catch (const std::exception&) {
         return config;
     }
 
diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index e112e00e1e4241..8ee6c8c6aefe82 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -111,7 +111,11 @@ TEST(plugin_config, can_create_empty_config) {
 TEST(plugin_config, can_create_not_empty_config) {
     ASSERT_NO_THROW(
         NotEmptyTestConfig cfg;
+#ifdef ENABLE_DEBUG_CAPS
         ASSERT_EQ(cfg.get_supported_properties().size(), 7);
+#else
+        ASSERT_EQ(cfg.get_supported_properties().size(), 5);
+#endif
     );
 }
 
@@ -237,8 +241,11 @@ TEST(plugin_config, set_property_throw_for_non_release_options) {
 TEST(plugin_config, visibility_is_correct) {
     NotEmptyTestConfig cfg;
     ASSERT_EQ(cfg.get_option_ptr(release_internal_property.name())->get_visibility(), OptionVisibility::RELEASE_INTERNAL);
-    ASSERT_EQ(cfg.get_option_ptr(debug_property.name())->get_visibility(), OptionVisibility::DEBUG);
     ASSERT_EQ(cfg.get_option_ptr(int_property.name())->get_visibility(), OptionVisibility::RELEASE);
+
+#ifdef ENABLE_DEBUG_CAPS
+    ASSERT_EQ(cfg.get_option_ptr(debug_property.name())->get_visibility(), OptionVisibility::DEBUG);
+#endif
 }
 
 #ifdef ENABLE_DEBUG_CAPS

From b2f9000b6eff651f354c1d0f0227bb9dfc4c0451 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Mon, 27 Jan 2025 15:51:25 +0400
Subject: [PATCH 28/44] build fixes

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../dev_api/openvino/runtime/plugin_config.hpp  |  4 ++--
 src/inference/tests/unit/config_test.cpp        | 17 +++++++++--------
 .../intel_gpu/runtime/execution_config.hpp      |  8 ++++----
 .../include/intel_gpu/runtime/memory_pool.hpp   |  2 +-
 .../intel_gpu/src/runtime/execution_config.cpp  |  8 ++++----
 5 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index ef4d810e0a4bce..e55e7ab86f7c8e 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -292,7 +292,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
 };
 
 template <>
-class OPENVINO_API AttributeAdapter<ConfigOptionBase*>
+class OPENVINO_RUNTIME_API AttributeAdapter<ConfigOptionBase*>
     : public DirectValueAccessor<ConfigOptionBase*> {
 public:
     AttributeAdapter(ConfigOptionBase*& value) : DirectValueAccessor<ConfigOptionBase*>(value) {}
@@ -301,7 +301,7 @@ class OPENVINO_API AttributeAdapter<ConfigOptionBase*>
 };
 
 template <>
-class OPENVINO_API AttributeAdapter<ov::AnyMap>
+class OPENVINO_RUNTIME_API AttributeAdapter<ov::AnyMap>
     : public DirectValueAccessor<ov::AnyMap> {
 public:
     AttributeAdapter(ov::AnyMap& value)  : DirectValueAccessor<ov::AnyMap>(value) {}
diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index 8ee6c8c6aefe82..fd0a8eec04261a 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -38,8 +38,8 @@ struct EmptyTestConfig : public ov::PluginConfig {
 
 struct NotEmptyTestConfig : public ov::PluginConfig {
     NotEmptyTestConfig() {
-    #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
-    #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
+    #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_MAPPING(__VA_ARGS__))
+    #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_MAPPING(__VA_ARGS__))
         OV_CONFIG_RELEASE_OPTION(, bool_property, true, "")
         OV_CONFIG_RELEASE_OPTION(, int_property, -1, "")
         OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "")
@@ -58,8 +58,8 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
         }
     }
 
-    #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__)  OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__)
-    #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__)  OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__)
+    #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__))  EXPAND(OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__))
+    #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__))  EXPAND(OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__))
         OV_CONFIG_RELEASE_OPTION(, bool_property, true, "")
         OV_CONFIG_RELEASE_OPTION(, int_property, -1, "")
         OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "")
@@ -109,13 +109,14 @@ TEST(plugin_config, can_create_empty_config) {
 }
 
 TEST(plugin_config, can_create_not_empty_config) {
-    ASSERT_NO_THROW(
-        NotEmptyTestConfig cfg;
 #ifdef ENABLE_DEBUG_CAPS
-        ASSERT_EQ(cfg.get_supported_properties().size(), 7);
+       size_t expected_options_num = 7;
 #else
-        ASSERT_EQ(cfg.get_supported_properties().size(), 5);
+       size_t expected_options_num = 5;
 #endif
+    ASSERT_NO_THROW(
+        NotEmptyTestConfig cfg;
+        ASSERT_EQ(cfg.get_supported_properties().size(), expected_options_num);
     );
 }
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
index a56d63e7119caf..ef62419dffafdd 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
@@ -31,8 +31,8 @@ struct ExecutionConfig : public ov::PluginConfig {
     void finalize(cldnn::engine& engine);
     using ov::PluginConfig::finalize;
 
-    #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__)
-    #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__)
+    #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__))
+    #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__))
     #include "intel_gpu/runtime/options.inl"
 
     #undef OV_CONFIG_LOCAL_OPTION
@@ -52,8 +52,8 @@ struct ExecutionConfig : public ov::PluginConfig {
     void apply_performance_hints(const cldnn::device_info& info);
     void apply_priority_hints(const cldnn::device_info& info);
 
-    #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__)
-    #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__)
+    #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__))
+    #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__))
     #include "intel_gpu/runtime/options.inl"
     #undef OV_CONFIG_LOCAL_OPTION
     #undef OV_CONFIG_GLOBAL_OPTION
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp
index 1d27eaf63efb86..ed1da835de0f29 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp
@@ -118,7 +118,7 @@ class memory_pool {
     std::map<layout, std::list<memory_record>, padded_pool_comparer> _padded_pool;
     std::multimap<uint64_t, memory_record> _no_reusable_pool;
     engine* _engine;
-    const ExecutionConfig& _config;
+    [[maybe_unused]] const ExecutionConfig& _config;
 
 public:
     explicit memory_pool(engine& engine, const ExecutionConfig& config);
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index bef767da04186c..0b26374aef4fa9 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -124,8 +124,8 @@ bool is_llm(const ov::Model& model) {
 #undef OV_CONFIG_GLOBAL_OPTION
 
 ExecutionConfig::ExecutionConfig() : ov::PluginConfig() {
-    #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
-    #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)
+    #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_MAPPING(__VA_ARGS__))
+    #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_MAPPING(__VA_ARGS__))
     #include "intel_gpu/runtime/options.inl"
     #undef OV_CONFIG_LOCAL_OPTION
     #undef OV_CONFIG_GLOBAL_OPTION
@@ -315,8 +315,8 @@ void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
 
 const ov::PluginConfig::OptionsDesc& ExecutionConfig::get_options_desc() const {
     static  ov::PluginConfig::OptionsDesc help_map {
-        #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__)
-        #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__)
+        #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_HELP(__VA_ARGS__))
+        #define OV_CONFIG_GLOBAL_OPTION(...) (OV_CONFIG_OPTION_HELP(__VA_ARGS__))
         #include "intel_gpu/runtime/options.inl"
         #undef OV_CONFIG_LOCAL_OPTION
         #undef OV_CONFIG_GLOBAL_OPTION

From e2e8682d5404310da4708af94db28916f47c4fd8 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Mon, 27 Jan 2025 16:15:19 +0400
Subject: [PATCH 29/44] cpu test fix

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../behavior/ov_plugin/properties_tests.cpp                     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
index 6d288d9f5ede8b..fc98a2659f9127 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
@@ -52,7 +52,7 @@ const std::vector<ov::AnyMap> cpu_inproperties = {
     {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
      {ov::hint::num_requests.name(), "should be int"}},
     {{ov::num_streams.name(), "OFF"}},
-    {{ov::hint::enable_cpu_pinning.name(), "OFF"}},
+    {{ov::hint::enable_cpu_pinning.name(), "NOT_BOOL"}},
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests,

From 3402f1e700aebba95d16f7b6e037d36822c65278 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Mon, 27 Jan 2025 16:53:26 +0400
Subject: [PATCH 30/44] improve bool any parsing

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/core/src/any.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp
index 8de26ad7c4d173..6d6652278e370b 100644
--- a/src/core/src/any.cpp
+++ b/src/core/src/any.cpp
@@ -6,6 +6,7 @@
 
 #include <limits>
 #include <string>
+#include <string_view>
 #include "openvino/util/common_util.hpp"
 namespace {
 template <class Container>
@@ -204,13 +205,14 @@ void Read<bool>::operator()(std::istream& is, bool& value) const {
     std::string str;
     is >> str;
 
-    std::set<std::string> off = {"0", "false", "off", "no"};
-    std::set<std::string> on = {"1", "true", "on", "yes"};
+    using namespace std::literals;
+    constexpr std::array off = {"0"sv, "false"sv, "off"sv, "no"sv};
+    constexpr std::array on = {"1"sv, "true"sv, "on"sv, "yes"sv};
     str = util::to_lower(str);
 
-    if (on.count(str)) {
+    if (std::find(on.begin(), on.end(), str) != on.end()) {
         value = true;
-    } else if (off.count(str)) {
+    } else if (std::find(off.begin(), off.end(), str) != off.end()) {
         value = false;
     } else {
         OPENVINO_THROW("Could not convert to bool from string " + str);

From 0ea319bd6dcaa9fca2683f90145fb89bc77836f7 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Mon, 27 Jan 2025 17:01:40 +0400
Subject: [PATCH 31/44] fix cpplint

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/plugins/intel_gpu/src/runtime/execution_config.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 0b26374aef4fa9..4442a72c8c2694 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -110,7 +110,7 @@ bool is_llm(const ov::Model& model) {
     }
 
     return false;
-};
+}
 
 } // namespace
 

From e4d85e0ff48be23032c86c284fabac23f800be1a Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 28 Jan 2025 09:25:31 +0400
Subject: [PATCH 32/44] fix build errors

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/core/src/any.cpp                                         | 5 ++---
 .../intel_gpu/include/intel_gpu/runtime/memory_pool.hpp      | 2 +-
 src/plugins/intel_gpu/src/runtime/execution_config.cpp       | 2 +-
 src/plugins/intel_gpu/src/runtime/memory_pool.cpp            | 4 +++-
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp
index 6d6652278e370b..3117f69114926c 100644
--- a/src/core/src/any.cpp
+++ b/src/core/src/any.cpp
@@ -205,9 +205,8 @@ void Read<bool>::operator()(std::istream& is, bool& value) const {
     std::string str;
     is >> str;
 
-    using namespace std::literals;
-    constexpr std::array off = {"0"sv, "false"sv, "off"sv, "no"sv};
-    constexpr std::array on = {"1"sv, "true"sv, "on"sv, "yes"sv};
+    constexpr std::array<std::string_view, 4> off = {"0", "false", "off", "no"};
+    constexpr std::array<std::string_view, 4> on = {"1", "true", "on", "yes"};
     str = util::to_lower(str);
 
     if (std::find(on.begin(), on.end(), str) != on.end()) {
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp
index ed1da835de0f29..1d27eaf63efb86 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp
@@ -118,7 +118,7 @@ class memory_pool {
     std::map<layout, std::list<memory_record>, padded_pool_comparer> _padded_pool;
     std::multimap<uint64_t, memory_record> _no_reusable_pool;
     engine* _engine;
-    [[maybe_unused]] const ExecutionConfig& _config;
+    const ExecutionConfig& _config;
 
 public:
     explicit memory_pool(engine& engine, const ExecutionConfig& config);
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 4442a72c8c2694..1ea3abc9eae4a3 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -316,7 +316,7 @@ void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
 const ov::PluginConfig::OptionsDesc& ExecutionConfig::get_options_desc() const {
     static  ov::PluginConfig::OptionsDesc help_map {
         #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_HELP(__VA_ARGS__))
-        #define OV_CONFIG_GLOBAL_OPTION(...) (OV_CONFIG_OPTION_HELP(__VA_ARGS__))
+        #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_HELP(__VA_ARGS__))
         #include "intel_gpu/runtime/options.inl"
         #undef OV_CONFIG_LOCAL_OPTION
         #undef OV_CONFIG_GLOBAL_OPTION
diff --git a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp
index 5ddd70f8c6bd3b..a899f78eb565d3 100644
--- a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp
+++ b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp
@@ -433,7 +433,9 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) {
     }
 }
 
-memory_pool::memory_pool(engine& engine, const ExecutionConfig& config) : _engine(&engine), _config(config) { }
+memory_pool::memory_pool(engine& engine, const ExecutionConfig& config) : _engine(&engine), _config(config) {
+    (void)(_config); // Silence unused warning
+}
 
 #ifdef GPU_DEBUG_CONFIG
 inline std::string get_mb_size(size_t size) {

From a694b8f0d8c351dddf3c678ec8224ea36edc5305 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 28 Jan 2025 10:00:37 +0400
Subject: [PATCH 33/44] build fixes

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/core/src/any.cpp                                     | 1 +
 src/inference/dev_api/openvino/runtime/plugin_config.hpp | 4 ++--
 src/inference/src/dev/plugin_config.cpp                  | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp
index 3117f69114926c..0ed7d3e213aa21 100644
--- a/src/core/src/any.cpp
+++ b/src/core/src/any.cpp
@@ -7,6 +7,7 @@
 #include <limits>
 #include <string>
 #include <string_view>
+#include <array>
 #include "openvino/util/common_util.hpp"
 namespace {
 template <class Container>
diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index e55e7ab86f7c8e..132e243cc42e79 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -26,7 +26,7 @@
     #define CAT(a, b) a ## b
 #endif
 
-#define GET_EXCEPT_LAST_IMPL(N, ...) CAT(GET_EXCEPT_LAST_IMPL_, N)(__VA_ARGS__)
+#define GET_EXCEPT_LAST_IMPL(N, ...) EXPAND(CAT(GET_EXCEPT_LAST_IMPL_, N)(__VA_ARGS__))
 #define GET_EXCEPT_LAST_IMPL_2(_0, _1) _0
 #define GET_EXCEPT_LAST_IMPL_3(_0, _1, _2) _0, _1
 #define GET_EXCEPT_LAST_IMPL_4(_0, _1, _2, _3) _0, _1, _2
@@ -34,7 +34,7 @@
 
 #define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__))
 
-#define GET_LAST_IMPL(N, ...) CAT(GET_LAST_IMPL_, N)(__VA_ARGS__)
+#define GET_LAST_IMPL(N, ...) EXPAND(CAT(GET_LAST_IMPL_, N)(__VA_ARGS__))
 #define GET_LAST_IMPL_0(_0, ...) _0
 #define GET_LAST_IMPL_1(_0, _1, ...) _1
 #define GET_LAST_IMPL_2(_0, _1, _2, ...) _2
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index 3917b7155dbd1d..1e1bbf3fece6a0 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -35,7 +35,7 @@ size_t get_terminal_width() {
     } else {
         return default_width;
     }
-#elif __linux__
+#elif defined(__linux__)
     struct winsize w;
     if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) {
         return w.ws_col;

From d1cd693c521d599ceb7a9483630263c87822d85d Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 28 Jan 2025 10:29:35 +0400
Subject: [PATCH 34/44] build fix

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../tests/unit/module_tests/shape_predictor_test.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp
index c89cd5d93b709f..6d4b8fd7388e76 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp
@@ -23,7 +23,9 @@ TEST_P(shape_predictor_tests, prediction) {
     auto& expected_predicted_shape = p.expected_predicted_shape;
     auto& engine = get_test_engine();
 
-    ShapePredictor sp(&engine, ShapePredictor::Settings{ .buffers_preallocation_ratio = p.buffers_preallocation_ratio });
+    ShapePredictor::Settings settings;
+    settings.buffers_preallocation_ratio = p.buffers_preallocation_ratio;
+    ShapePredictor sp(&engine, settings);
     std::pair<bool, ov::Shape> result;
 
     for (auto& shape : in_shapes)
@@ -74,7 +76,9 @@ TEST_P(shape_predictor_tests_b_fs_yx_fsv16, prediction) {
     auto& expected_predicted_shape = p.expected_predicted_shape;
     auto& engine = get_test_engine();
 
-    ShapePredictor sp(&engine, ShapePredictor::Settings{ .buffers_preallocation_ratio = p.buffers_preallocation_ratio });
+    ShapePredictor::Settings settings;
+    settings.buffers_preallocation_ratio = p.buffers_preallocation_ratio;
+    ShapePredictor sp(&engine, settings);
     std::pair<bool, ov::Shape> result;
 
     for (auto& shape : in_shapes)
@@ -122,7 +126,9 @@ TEST(shape_predictor_tests, check_max_buffer_size) {
     auto& engine = get_test_engine();
 
     const auto& buffers_preallocation_ratio = 1.1f;
-    ShapePredictor sp(&engine, ShapePredictor::Settings{ .buffers_preallocation_ratio = buffers_preallocation_ratio });
+    ShapePredictor::Settings settings;
+    settings.buffers_preallocation_ratio = buffers_preallocation_ratio;
+    ShapePredictor sp(&engine, settings);
 
     const auto max_alloc_mem_size = engine.get_device_info().max_alloc_mem_size;
     auto layout = cldnn::layout({static_cast<int64_t>(max_alloc_mem_size)}, ov::element::u8, format::bfyx);

From 6794136886996e9e9c9125c79dd67a6fa33cb618 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 28 Jan 2025 13:22:33 +0400
Subject: [PATCH 35/44] unit test fixes

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../include/intel_gpu/graph/network.hpp       |  3 +--
 src/plugins/intel_gpu/src/graph/network.cpp   |  5 ++--
 .../test_cases/fully_connected_gpu_test.cpp   | 26 +++++++++----------
 .../tests/unit/test_cases/loop_gpu_test.cpp   |  1 -
 .../tests/unit/test_utils/program_wrapper.h   |  4 ++-
 5 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
index 2a99b9dfef4b76..dabfe8ce09acd6 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
@@ -197,7 +197,7 @@ struct network {
     void set_reuse_variable_mem(bool reuse = false);
     bool is_reuse_variable_mem() { return _reuse_variable_mem; }
 
-    const ExecutionConfig& get_config() const { return _config; }
+    const ExecutionConfig& get_config() const { return _program->get_config(); }
 
     std::shared_ptr<ShapePredictor> get_shape_predictor() { return _shape_predictor; }
     void set_shape_predictor(std::shared_ptr<ShapePredictor> shape_predictor) { _shape_predictor = shape_predictor; }
@@ -210,7 +210,6 @@ struct network {
     using output_chains_map = std::map<primitive_id, std::vector<primitive_inst*>>;
     uint32_t net_id = 0;
     program::ptr _program;
-    ExecutionConfig _config;
     engine& _engine;
     stream::ptr _stream;
     std::unique_ptr<memory_pool> _memory_pool;
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index 214778c397dbbc..1b310fd4542f86 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -171,7 +171,6 @@ opt pass).
 */
 network::network(program::ptr program, stream::ptr stream, bool is_internal, bool is_primary_stream)
     : _program(program)
-    , _config(program->get_config())
     , _engine(program->get_engine())
     , _stream(stream)
     , _memory_pool(new memory_pool(program->get_engine(), program->get_config()))
@@ -225,7 +224,7 @@ network::~network() {
     if (_program != nullptr)
         _program->cancel_compilation_context();
     _memory_pool->clear_pool_for_network(net_id);
-    std::string dump_path = GPU_DEBUG_VALUE_OR(_config.get_dump_profiling_data_path(), "");
+    std::string dump_path = GPU_DEBUG_VALUE_OR(get_config().get_dump_profiling_data_path(), "");
     GPU_DEBUG_IF(!dump_path.empty()) {
         dump_perf_data_raw(dump_path + "/perf_raw" + std::to_string(net_id) + ".csv", false, _exec_order);
     }
@@ -385,7 +384,7 @@ void network::calculate_weights_cache_capacity() {
     }
 
     // Sum all weights constants for each stream
-    required_mem_size += weights_const_size * _config.get_num_streams();
+    required_mem_size += weights_const_size * get_config().get_num_streams();
     // Add all other constants (shared between streams)
     required_mem_size += total_const_size - weights_const_size;
 
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp
index 00fd36f16aaf9c..52209883247f99 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp
@@ -1555,7 +1555,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
         auto config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         config.set_property(ov::intel_gpu::optimize_data(true));
-        config.set_property(ov::hint::dynamic_quantization_group_size(32));
+        config.set_user_property(ov::hint::dynamic_quantization_group_size(32));
 
         network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
 
@@ -1643,7 +1643,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
             config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
             ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl };
             config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} }));
-            config.set_property(ov::hint::dynamic_quantization_group_size(0));
+            config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
 
             network network(engine, topology, config);
             network.set_input_data("input", input_mem);
@@ -1669,7 +1669,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
         auto config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         config.set_property(ov::intel_gpu::optimize_data(true));
-        config.set_property(ov::hint::dynamic_quantization_group_size(0));
+        config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
 
         network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
 
@@ -1753,7 +1753,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
             config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
             ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl };
             config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} }));
-            config.set_property(ov::hint::dynamic_quantization_group_size(0));
+            config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
 
             network network(engine, topology, config);
             network.set_input_data("input", input_mem);
@@ -1780,9 +1780,9 @@ class fully_connected_gpu_tests: public ::testing::Test {
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         config.set_property(ov::intel_gpu::optimize_data(true));
         if (is_dyn_quan) {
-            config.set_property(ov::hint::dynamic_quantization_group_size(32));
+            config.set_user_property(ov::hint::dynamic_quantization_group_size(32));
         } else {
-            config.set_property(ov::hint::dynamic_quantization_group_size(0));
+            config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
         }
 
         network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
@@ -1923,7 +1923,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
             config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
             ov::intel_gpu::ImplementationDesc fc_impl = { in_layout.format, "", impl_types::ocl };
             config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim1", fc_impl }, { "fc_prim2", fc_impl }  }));
-            config.set_property(ov::hint::dynamic_quantization_group_size(0));
+            config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
 
             network network(engine, topology, config);
             network.set_input_data("input", input_mem);
@@ -1952,7 +1952,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
         auto config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         config.set_property(ov::intel_gpu::optimize_data(true));
-        config.set_property(ov::hint::dynamic_quantization_group_size(0));
+        config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
 
         network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
 
@@ -2905,7 +2905,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
             config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
             ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl };
             config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} }));
-            config.set_property(ov::hint::dynamic_quantization_group_size(0));
+            config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
 
             network network(engine, topo, config);
             network.set_input_data("input", input_mem);
@@ -2931,7 +2931,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
         auto config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         config.set_property(ov::intel_gpu::optimize_data(true));
-        config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size));
+        config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size));
 
         network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false);
 
@@ -3031,7 +3031,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
             config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
             ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bf_tiled", impl_types::ocl };
             config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} }));
-            config.set_property(ov::hint::dynamic_quantization_group_size(0));
+            config.set_user_property(ov::hint::dynamic_quantization_group_size(0));
 
             network network(engine, topo, config);
             network.set_input_data("input", input_mem);
@@ -3057,7 +3057,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
         auto config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         config.set_property(ov::intel_gpu::optimize_data(true));
-        config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size));
+        config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size));
 
         network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false);
 
@@ -4210,7 +4210,7 @@ TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_sta
     this->test_compressed_int4_scale_dyn_quan_weight_i4(false, 320, 1024, 1024, 32, 32, true);
 }
 
-// Test weight zp for INT8 ASYM 
+// Test weight zp for INT8 ASYM
 TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_128_large) {
     this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 320, 4096, 4096, 128, 128, true);
 }
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp
index 16c35e04aa3f17..fc20272b2dec6b 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp
@@ -1105,7 +1105,6 @@ static void test_loop_gpu_wo_trip_count_update_primitive_id(ov::PartialShape bod
 
     auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx});
 
-
     std::vector<int32_t> body_input_layouts;
     for (size_t i = 0; i < body_input_layout.size(); i++) {
         if (body_input_layout[i].is_dynamic())
diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h b/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h
index 77b519d8e3cf5f..09c081abbb6fe6 100644
--- a/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h
+++ b/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h
@@ -38,7 +38,9 @@ namespace cldnn
             p.prepare_memory_dependencies();
         }
         static void update_configs_properties(program& p, const ov::AnyMap& properties) {
-            p._config.set_property(properties);
+            auto config_copy = p._config.clone();
+            config_copy.set_property(properties);
+            p._config = config_copy;
         }
     };
 

From 6747b23b79d368c4a298d1def42886e7f6fe0f2a Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 28 Jan 2025 13:46:56 +0400
Subject: [PATCH 36/44] fix func tests

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../intel_gpu/src/runtime/execution_config.cpp       | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 1ea3abc9eae4a3..d69fabcb346dd8 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -172,7 +172,7 @@ void ExecutionConfig::apply_rt_info(const IRemoteContext* context, const ov::RTM
     // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with
     // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not
     // using that mechanism.
-    if (m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE) {
+    if (get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) {
         apply_rt_info_property(ov::weights_path, rt_info);
     }
 }
@@ -217,7 +217,7 @@ void ExecutionConfig::apply_model_specific_options(const IRemoteContext* context
 }
 
 void ExecutionConfig::finalize_impl(const IRemoteContext* context) {
-    GPU_DEBUG_IF(m_help) {
+    GPU_DEBUG_IF(get_help()) {
         print_help();
         exit(-1);
     }
@@ -230,13 +230,13 @@ void ExecutionConfig::finalize_impl(const IRemoteContext* context) {
     if (!is_set_by_user(ov::intel_gpu::use_onednn) && info.supports_immad) {
         m_use_onednn = true;
     }
-    if (m_use_onednn) {
+    if (get_use_onednn()) {
         m_queue_type = QueueTypes::in_order;
     }
 
-    if (!is_set_by_user(ov::hint::kv_cache_precision) || m_kv_cache_precision == ov::element::undefined) {
+    if (!is_set_by_user(ov::hint::kv_cache_precision) || get_kv_cache_precision() == ov::element::undefined) {
         if (info.supports_immad) {  // MFDNN-11755
-            m_kv_cache_precision = m_inference_precision;
+            m_kv_cache_precision = get_inference_precision();
         } else {
             // Enable KV-cache compression by default for non-systolic platforms only
             m_kv_cache_precision = ov::element::i8;
@@ -244,7 +244,7 @@ void ExecutionConfig::finalize_impl(const IRemoteContext* context) {
     }
 
     // Enable dynamic quantization by default for non-systolic platforms
-    if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && m_dynamic_quantization_group_size == 0 && !info.supports_immad) {
+    if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && get_dynamic_quantization_group_size() == 0 && !info.supports_immad) {
         m_dynamic_quantization_group_size = 32;
     }
 

From 5bef3d2f1cc0e4a38512e584e81c10199654cf7d Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 28 Jan 2025 15:51:21 +0400
Subject: [PATCH 37/44] don't visit finalize flag

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/inference/src/dev/plugin_config.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index 1e1bbf3fece6a0..1fe42a147d2188 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -128,7 +128,6 @@ bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) {
     for (auto& prop : m_options_map) {
         visitor.on_attribute(prop.first + "__internal", prop.second);
     }
-    visitor.on_attribute("is_finalized", m_is_finalized);
 
     return true;
 }

From 15051c2bb8408a0c975793810587b611577d88b9 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Wed, 29 Jan 2025 09:34:36 +0400
Subject: [PATCH 38/44] few fixes and refactor

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/inference/CMakeLists.txt             |  4 +++
 src/inference/src/dev/plugin_config.cpp  | 32 ++++++++++-------------
 src/inference/tests/unit/config_test.cpp | 33 +++++++++++++++++++-----
 3 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt
index 9b2967e9f80fae..40528735f80071 100644
--- a/src/inference/CMakeLists.txt
+++ b/src/inference/CMakeLists.txt
@@ -137,6 +137,10 @@ target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS}
 
 target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE)
 
+if(ENABLE_DEBUG_CAPS)
+    target_compile_definitions(${TARGET_NAME}_s PUBLIC ENABLE_DEBUG_CAPS)
+endif()
+
 set_target_properties(${TARGET_NAME}_s PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
 
 # LTO
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index 1fe42a147d2188..31160a8e30f83c 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -64,10 +64,7 @@ ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility all
 void PluginConfig::set_property(const ov::AnyMap& config) {
     OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited");
 
-    for (auto& kv : config) {
-        auto& name = kv.first;
-        auto& val = kv.second;
-
+    for (auto& [name, val] : config) {
         get_option_ptr(name)->set_any(val);
     }
 }
@@ -75,10 +72,7 @@ void PluginConfig::set_property(const ov::AnyMap& config) {
 void PluginConfig::set_user_property(const ov::AnyMap& config, OptionVisibility allowed_visibility, bool throw_on_error) {
     OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited");
 
-    for (auto& kv : config) {
-        auto& name = kv.first;
-        auto& val = kv.second;
-
+    for (auto& [name, val] : config) {
         auto option = get_option_ptr(name);
         if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) {
             if (throw_on_error)
@@ -144,8 +138,8 @@ void PluginConfig::apply_debug_options(const IRemoteContext* context) {
         ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name());
         cleanup_unsupported(config_properties);
 #ifdef ENABLE_DEBUG_CAPS
-        for (auto& prop : config_properties) {
-            std::cout << "Non default config value for " << prop.first << " = " << prop.second.as<std::string>() << std::endl;
+        for (auto& [name, val] : config_properties) {
+            std::cout << "Non default config value for " << name << " = " << val.as<std::string>() << std::endl;
         }
 #endif
         set_user_property(config_properties, allowed_visibility, throw_on_error);
@@ -154,8 +148,8 @@ void PluginConfig::apply_debug_options(const IRemoteContext* context) {
     ov::AnyMap env_properties = read_env();
     cleanup_unsupported(env_properties);
 #ifdef ENABLE_DEBUG_CAPS
-    for (auto& prop : env_properties) {
-        std::cout << "Non default env value for " << prop.first << " = " << prop.second.as<std::string>() << std::endl;
+    for (auto& [name, val] : env_properties) {
+        std::cout << "Non default env value for " << name << " = " << val.as<std::string>() << std::endl;
     }
 #endif
     set_user_property(env_properties, allowed_visibility, throw_on_error);
@@ -219,10 +213,10 @@ ov::Any PluginConfig::read_env(const std::string& option_name, const std::string
 ov::AnyMap PluginConfig::read_env() const {
     ov::AnyMap config;
 
-    for (auto& kv : m_options_map) {
-        auto val = read_env(kv.first, m_allowed_env_prefix, kv.second);
+    for (auto& [name, option] : m_options_map) {
+        auto val = read_env(name, m_allowed_env_prefix, option);
         if (!val.empty()) {
-            config[kv.first] = val;
+            config[name] = val;
         }
     }
 
@@ -247,12 +241,12 @@ std::string PluginConfig::to_string() const {
     ss << "-----------------------------------------\n";
     ss << "PROPERTIES:\n";
 
-    for (const auto& option : m_options_map) {
-        ss << "\t" << option.first << ": " << option.second->get_any().as<std::string>() << std::endl;
+    for (const auto& [name, option] : m_options_map) {
+        ss << "\t" << name << ": " << option->get_any().as<std::string>() << std::endl;
     }
     ss << "USER PROPERTIES:\n";
-    for (const auto& user_prop : m_user_properties) {
-        ss << "\t" << user_prop.first << ": " << user_prop.second.as<std::string>() << std::endl;
+    for (const auto& [name, val] : m_user_properties) {
+        ss << "\t" << name << ": " << val.as<std::string>() << std::endl;
     }
 
     return ss.str();
diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index fd0a8eec04261a..9a82c1f5b6fefb 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -19,10 +19,10 @@ static constexpr Property<bool, PropertyMutability::RW> bool_property{"BOOL_PROP
 static constexpr Property<int32_t, PropertyMutability::RW> int_property{"INT_PROPERTY"};
 static constexpr Property<std::string, PropertyMutability::RW> high_level_property{"HIGH_LEVEL_PROPERTY"};
 static constexpr Property<std::string, PropertyMutability::RW> low_level_property{"LOW_LEVEL_PROPERTY"};
-static constexpr Property<uint8_t, PropertyMutability::RW> release_internal_property{"RELEASE_INTERNAL_PROPERTY"};
-static constexpr Property<uint8_t, PropertyMutability::RW> debug_property{"DEBUG_PROPERTY"};
+static constexpr Property<int64_t, PropertyMutability::RW> release_internal_property{"RELEASE_INTERNAL_PROPERTY"};
 
 #ifdef ENABLE_DEBUG_CAPS
+static constexpr Property<int64_t, PropertyMutability::RW> debug_property{"DEBUG_PROPERTY"};
 static constexpr Property<int32_t, PropertyMutability::RW> debug_global_property{"DEBUG_GLOBAL_PROPERTY"};
 #endif
 
@@ -236,7 +236,9 @@ TEST(plugin_config, can_copy_config) {
 TEST(plugin_config, set_property_throw_for_non_release_options) {
     NotEmptyTestConfig cfg;
     ASSERT_ANY_THROW(cfg.set_user_property({release_internal_property(10)}, OptionVisibility::RELEASE));
+#ifdef ENABLE_DEBUG_CAPS
     ASSERT_ANY_THROW(cfg.set_user_property({debug_property(10)}, OptionVisibility::RELEASE));
+#endif
 }
 
 TEST(plugin_config, visibility_is_correct) {
@@ -249,6 +251,26 @@ TEST(plugin_config, visibility_is_correct) {
 #endif
 }
 
+TEST(plugin_config, can_read_from_env) {
+    NotEmptyTestConfig cfg;
+    ASSERT_EQ(cfg.get_int_property(), -1);
+    std::string env_var1 = "OV_INT_PROPERTY=10";
+    ::putenv(env_var1.data());
+    ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only
+
+#ifdef ENABLE_DEBUG_CAPS
+    std::string env_var2 = "OV_DEBUG_PROPERTY=20";
+    ::putenv(env_var2.data());
+    ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option
+#endif
+
+    cfg.finalize(nullptr, nullptr);
+    ASSERT_EQ(cfg.get_int_property(), 10);
+#ifdef ENABLE_DEBUG_CAPS
+    ASSERT_EQ(cfg.get_debug_property(), 20);
+#endif
+}
+
 #ifdef ENABLE_DEBUG_CAPS
 TEST(plugin_config, can_get_global_property) {
     NotEmptyTestConfig cfg;
@@ -256,14 +278,13 @@ TEST(plugin_config, can_get_global_property) {
 }
 
 TEST(plugin_config, global_property_read_env_on_each_call) {
-    NotEmptyTestConfig cfg;
-    ASSERT_EQ(cfg.get_debug_global_property(), 4);
+    ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 4);
     std::string env_var1 = "OV_DEBUG_GLOBAL_PROPERTY=10";
     ::putenv(env_var1.data());
-    ASSERT_EQ(cfg.get_debug_global_property(), 10);
+    ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 10);
 
     std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20";
     ::putenv(env_var2.data());
-    ASSERT_EQ(cfg.get_debug_global_property(), 20);
+    ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 20);
 }
 #endif

From 69d6dc426fe4bf86ff06a70db201eaa433291e6c Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Wed, 29 Jan 2025 10:23:57 +0400
Subject: [PATCH 39/44] debug caps fixes and tests

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/inference/src/dev/plugin_config.cpp  |  5 ++
 src/inference/tests/unit/config_test.cpp | 76 +++++++++++++++++++++++-
 2 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index 31160a8e30f83c..7eafba4b8bbb28 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -98,7 +98,12 @@ void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* mode
     if (model)
         apply_model_specific_options(context, *model);
 
+#ifdef ENABLE_DEBUG_CAPS
+    // For now we apply env/config only for build with debug caps, but it can be updated in the future to allow
+    // reading release options for any build type
     apply_debug_options(context);
+#endif // ENABLE_DEBUG_CAPS
+
     // Copy internal properties before applying hints to ensure that
     // a property set by hint won't be overriden by a value in user config.
     // E.g num_streams=AUTO && hint=THROUGHPUT
diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index 9a82c1f5b6fefb..a5c7a092a5f2a7 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -3,13 +3,17 @@
 //
 
 #include "openvino/core/any.hpp"
+#include "openvino/core/except.hpp"
 #include "openvino/core/model.hpp"
 #include "openvino/core/node_vector.hpp"
 #include "openvino/op/parameter.hpp"
+#include "openvino/runtime/iremote_context.hpp"
 #include "openvino/runtime/plugin_config.hpp"
 
 #include <gtest/gtest.h>
+#include <nlohmann/json.hpp>
 #include <string>
+#include <fstream>
 
 using namespace ::testing;
 using namespace ov;
@@ -26,6 +30,38 @@ static constexpr Property<int64_t, PropertyMutability::RW> debug_property{"DEBUG
 static constexpr Property<int32_t, PropertyMutability::RW> debug_global_property{"DEBUG_GLOBAL_PROPERTY"};
 #endif
 
+namespace {
+void dump_config(const std::string& filename, const std::map<std::string, ov::AnyMap>& config) {
+    nlohmann::json jsonConfig;
+    for (const auto& item : config) {
+        std::string deviceName = item.first;
+        for (const auto& option : item.second) {
+            // primary property
+            std::stringstream strm;
+            option.second.print(strm);
+            auto property_string = strm.str();
+            jsonConfig[deviceName][option.first] = property_string;
+        }
+    }
+
+    std::ofstream ofs(filename);
+    if (!ofs.is_open()) {
+        throw std::runtime_error("Can't load config file \"" + filename + "\".");
+    }
+
+    ofs << jsonConfig;
+}
+class DummyRemoteContext : public ov::IRemoteContext {
+public:
+    const std::string& get_device_name() const override { static const std::string device_name = "SOME_DEVICE"; return device_name; }
+    const ov::AnyMap& get_property() const override { OPENVINO_NOT_IMPLEMENTED; };
+    ov::SoPtr<ov::IRemoteTensor> create_tensor(const ov::element::Type& type,
+                                                       const ov::Shape& shape,
+                                                       const ov::AnyMap& params = {}) override { OPENVINO_NOT_IMPLEMENTED; }
+    ov::SoPtr<ov::ITensor> create_host_tensor(const ov::element::Type type, const ov::Shape& shape) override { OPENVINO_NOT_IMPLEMENTED; }
+};
+}  // namespace
+
 struct EmptyTestConfig : public ov::PluginConfig {
     std::vector<std::string> get_supported_properties() const {
         std::vector<std::string> supported_properties;
@@ -251,12 +287,12 @@ TEST(plugin_config, visibility_is_correct) {
 #endif
 }
 
-TEST(plugin_config, can_read_from_env) {
+TEST(plugin_config, can_read_from_env_with_debug_caps) {
     NotEmptyTestConfig cfg;
     ASSERT_EQ(cfg.get_int_property(), -1);
     std::string env_var1 = "OV_INT_PROPERTY=10";
     ::putenv(env_var1.data());
-    ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only
+    ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only for build with debug caps
 
 #ifdef ENABLE_DEBUG_CAPS
     std::string env_var2 = "OV_DEBUG_PROPERTY=20";
@@ -265,12 +301,46 @@ TEST(plugin_config, can_read_from_env) {
 #endif
 
     cfg.finalize(nullptr, nullptr);
-    ASSERT_EQ(cfg.get_int_property(), 10);
+
 #ifdef ENABLE_DEBUG_CAPS
+    ASSERT_EQ(cfg.get_int_property(), 10);
     ASSERT_EQ(cfg.get_debug_property(), 20);
+#else
+    ASSERT_EQ(cfg.get_int_property(), -1); // no effect
 #endif
 }
 
+TEST(plugin_config, can_read_from_config) {
+    const std::filesystem::path filepath = "config.json";
+    try {
+        NotEmptyTestConfig cfg;
+        ov::AnyMap config {
+            int_property(10),
+    #ifdef ENABLE_DEBUG_CAPS
+            debug_property(20),
+    #endif
+        };
+
+        DummyRemoteContext ctx;
+        dump_config(filepath.generic_string(), {{ctx.get_device_name(), config }});
+
+        ASSERT_EQ(cfg.get_int_property(), -1); // config is applied after finalization only for build with debug caps
+    #ifdef ENABLE_DEBUG_CAPS
+        ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option
+    #endif
+
+        cfg.finalize(&ctx, nullptr);
+    #ifdef ENABLE_DEBUG_CAPS
+        ASSERT_EQ(cfg.get_int_property(), 10);
+        ASSERT_EQ(cfg.get_debug_property(), 20);
+    #else
+        ASSERT_EQ(cfg.get_int_property(), -1); // no effect
+    #endif
+    } catch (std::exception&) { }
+
+    std::filesystem::remove(filepath);
+}
+
 #ifdef ENABLE_DEBUG_CAPS
 TEST(plugin_config, can_get_global_property) {
     NotEmptyTestConfig cfg;

From e50d8bbb7ff3ce5b2e9e3a850a036473171eced9 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Wed, 29 Jan 2025 11:58:19 +0400
Subject: [PATCH 40/44] Update env/config usage

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../openvino/runtime/plugin_config.hpp        |  5 +-
 src/inference/src/dev/plugin_config.cpp       | 50 ++++++-------
 src/inference/tests/unit/config_test.cpp      | 75 +++++++++++--------
 .../intel_gpu/runtime/internal_properties.hpp |  1 +
 .../include/intel_gpu/runtime/options.inl     |  1 +
 .../src/runtime/execution_config.cpp          |  6 ++
 6 files changed, 75 insertions(+), 63 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index 132e243cc42e79..eaa9a75ae52afe 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -243,7 +243,8 @@ class OPENVINO_RUNTIME_API PluginConfig {
 
 protected:
     virtual void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) {}
-    virtual void apply_debug_options(const IRemoteContext* context);
+    void apply_env_options();
+    void apply_config_options(std::string_view device_name, std::string_view config_path = "");
     virtual void finalize_impl(const IRemoteContext* context) {}
 
     template <typename T, PropertyMutability mutability>
@@ -269,7 +270,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
         }
     }
 
-    ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const;
+    ov::AnyMap read_config_file(std::string_view filename, std::string_view target_device_name) const;
     ov::AnyMap read_env() const;
     static ov::Any read_env(const std::string& option_name, const std::string& prefix, const ConfigOptionBase* option);
     void cleanup_unsupported(ov::AnyMap& config) const;
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index 7eafba4b8bbb28..b608a2a3cfc969 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -98,12 +98,6 @@ void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* mode
     if (model)
         apply_model_specific_options(context, *model);
 
-#ifdef ENABLE_DEBUG_CAPS
-    // For now we apply env/config only for build with debug caps, but it can be updated in the future to allow
-    // reading release options for any build type
-    apply_debug_options(context);
-#endif // ENABLE_DEBUG_CAPS
-
     // Copy internal properties before applying hints to ensure that
     // a property set by hint won't be overriden by a value in user config.
     // E.g num_streams=AUTO && hint=THROUGHPUT
@@ -116,6 +110,10 @@ void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* mode
 
     finalize_impl(context);
 
+#ifdef ENABLE_DEBUG_CAPS
+    apply_env_options();
+#endif
+
     // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization
     m_user_properties.clear();
 
@@ -131,39 +129,35 @@ bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) {
     return true;
 }
 
-void PluginConfig::apply_debug_options(const IRemoteContext* context) {
-    const bool throw_on_error = false;
-#ifdef ENABLE_DEBUG_CAPS
-    constexpr const auto allowed_visibility = OptionVisibility::ANY;
-#else
-    constexpr const auto allowed_visibility = OptionVisibility::RELEASE;
-#endif
+void PluginConfig::apply_env_options() {
+    ov::AnyMap env_properties = read_env();
+    cleanup_unsupported(env_properties);
+    for (auto& [name, val] : env_properties) {
+        std::cout << "Non default env value for " << name << " = " << val.as<std::string>() << std::endl;
+    }
+    set_property(env_properties);
+}
 
-    if (context) {
-        ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name());
+void PluginConfig::apply_config_options(std::string_view device_name, std::string_view config_path) {
+    if (!config_path.empty()) {
+        ov::AnyMap config_properties = read_config_file(std::string(config_path), device_name);
         cleanup_unsupported(config_properties);
 #ifdef ENABLE_DEBUG_CAPS
         for (auto& [name, val] : config_properties) {
             std::cout << "Non default config value for " << name << " = " << val.as<std::string>() << std::endl;
         }
 #endif
-        set_user_property(config_properties, allowed_visibility, throw_on_error);
+        set_property(config_properties);
     }
-
-    ov::AnyMap env_properties = read_env();
-    cleanup_unsupported(env_properties);
-#ifdef ENABLE_DEBUG_CAPS
-    for (auto& [name, val] : env_properties) {
-        std::cout << "Non default env value for " << name << " = " << val.as<std::string>() << std::endl;
-    }
-#endif
-    set_user_property(env_properties, allowed_visibility, throw_on_error);
 }
 
-ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const {
+ov::AnyMap PluginConfig::read_config_file(std::string_view filename, std::string_view target_device_name) const {
+    if (filename.empty())
+        return {};
+
     ov::AnyMap config;
 
-    std::ifstream ifs(filename);
+    std::ifstream ifs(std::string{filename});
     if (!ifs.is_open()) {
         return config;
     }
@@ -175,7 +169,7 @@ ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std
         return config;
     }
 
-    DeviceIDParser parser(target_device_name);
+    DeviceIDParser parser(std::string{target_device_name});
     for (auto item = json_config.cbegin(), end = json_config.cend(); item != end; ++item) {
         const std::string& device_name = item.key();
         if (DeviceIDParser(device_name).get_device_name() != parser.get_device_name())
diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index a5c7a092a5f2a7..adde70c139b96d 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -11,6 +11,7 @@
 #include "openvino/runtime/plugin_config.hpp"
 
 #include <gtest/gtest.h>
+#include <cstdlib>
 #include <nlohmann/json.hpp>
 #include <string>
 #include <fstream>
@@ -31,6 +32,9 @@ static constexpr Property<int32_t, PropertyMutability::RW> debug_global_property
 #endif
 
 namespace {
+const std::string test_config_path = "test_debug_config_path.json";
+const std::string device_name = "SOME_DEVICE";
+
 void dump_config(const std::string& filename, const std::map<std::string, ov::AnyMap>& config) {
     nlohmann::json jsonConfig;
     for (const auto& item : config) {
@@ -51,15 +55,7 @@ void dump_config(const std::string& filename, const std::map<std::string, ov::An
 
     ofs << jsonConfig;
 }
-class DummyRemoteContext : public ov::IRemoteContext {
-public:
-    const std::string& get_device_name() const override { static const std::string device_name = "SOME_DEVICE"; return device_name; }
-    const ov::AnyMap& get_property() const override { OPENVINO_NOT_IMPLEMENTED; };
-    ov::SoPtr<ov::IRemoteTensor> create_tensor(const ov::element::Type& type,
-                                                       const ov::Shape& shape,
-                                                       const ov::AnyMap& params = {}) override { OPENVINO_NOT_IMPLEMENTED; }
-    ov::SoPtr<ov::ITensor> create_host_tensor(const ov::element::Type type, const ov::Shape& shape) override { OPENVINO_NOT_IMPLEMENTED; }
-};
+
 }  // namespace
 
 struct EmptyTestConfig : public ov::PluginConfig {
@@ -118,6 +114,9 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
         if (!is_set_by_user(low_level_property)) {
             m_low_level_property.value = m_high_level_property.value;
         }
+#ifdef ENABLE_DEBUG_CAPS
+        apply_config_options(device_name, test_config_path);
+#endif
     }
 
     void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) override {
@@ -288,30 +287,37 @@ TEST(plugin_config, visibility_is_correct) {
 }
 
 TEST(plugin_config, can_read_from_env_with_debug_caps) {
-    NotEmptyTestConfig cfg;
-    ASSERT_EQ(cfg.get_int_property(), -1);
-    std::string env_var1 = "OV_INT_PROPERTY=10";
-    ::putenv(env_var1.data());
-    ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only for build with debug caps
+    try {
+        NotEmptyTestConfig cfg;
+        ASSERT_EQ(cfg.get_int_property(), -1);
+        std::string env_var1 = "OV_INT_PROPERTY=10";
+        ::putenv(env_var1.data());
+        ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only for build with debug caps
 
 #ifdef ENABLE_DEBUG_CAPS
-    std::string env_var2 = "OV_DEBUG_PROPERTY=20";
-    ::putenv(env_var2.data());
-    ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option
+        std::string env_var2 = "OV_DEBUG_PROPERTY=20";
+        ::putenv(env_var2.data());
+        ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option
 #endif
 
-    cfg.finalize(nullptr, nullptr);
+        cfg.finalize(nullptr, nullptr);
 
 #ifdef ENABLE_DEBUG_CAPS
-    ASSERT_EQ(cfg.get_int_property(), 10);
-    ASSERT_EQ(cfg.get_debug_property(), 20);
+        ASSERT_EQ(cfg.get_int_property(), 10);
+        ASSERT_EQ(cfg.get_debug_property(), 20);
 #else
-    ASSERT_EQ(cfg.get_int_property(), -1); // no effect
+        ASSERT_EQ(cfg.get_int_property(), -1); // no effect
+#endif
+    } catch (std::exception&) {}
+
+    ::unsetenv("OV_INT_PROPERTY");
+#ifdef ENABLE_DEBUG_CAPS
+    ::unsetenv("OV_DEBUG_PROPERTY");
 #endif
 }
 
 TEST(plugin_config, can_read_from_config) {
-    const std::filesystem::path filepath = "config.json";
+    const std::filesystem::path filepath = test_config_path;
     try {
         NotEmptyTestConfig cfg;
         ov::AnyMap config {
@@ -321,15 +327,14 @@ TEST(plugin_config, can_read_from_config) {
     #endif
         };
 
-        DummyRemoteContext ctx;
-        dump_config(filepath.generic_string(), {{ctx.get_device_name(), config }});
+        dump_config(filepath.generic_string(), {{device_name, config }});
 
         ASSERT_EQ(cfg.get_int_property(), -1); // config is applied after finalization only for build with debug caps
     #ifdef ENABLE_DEBUG_CAPS
         ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option
     #endif
 
-        cfg.finalize(&ctx, nullptr);
+        cfg.finalize(nullptr, nullptr);
     #ifdef ENABLE_DEBUG_CAPS
         ASSERT_EQ(cfg.get_int_property(), 10);
         ASSERT_EQ(cfg.get_debug_property(), 20);
@@ -348,13 +353,17 @@ TEST(plugin_config, can_get_global_property) {
 }
 
 TEST(plugin_config, global_property_read_env_on_each_call) {
-    ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 4);
-    std::string env_var1 = "OV_DEBUG_GLOBAL_PROPERTY=10";
-    ::putenv(env_var1.data());
-    ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 10);
-
-    std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20";
-    ::putenv(env_var2.data());
-    ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 20);
+    try {
+        ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 4);
+        std::string env_var1 = "OV_DEBUG_GLOBAL_PROPERTY=10";
+        ::putenv(env_var1.data());
+        ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 10);
+
+        std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20";
+        ::putenv(env_var2.data());
+        ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 20);
+    } catch (std::exception&) {}
+
+    ::unsetenv("OV_DEBUG_GLOBAL_PROPERTY");
 }
 #endif
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
index afb32d3393d6b4..f037c8b02d36be 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
@@ -90,6 +90,7 @@ static constexpr Property<bool, PropertyMutability::RW> use_onednn{"USE_ONEDNN"}
 static constexpr Property<bool, ov::PropertyMutability::RW> help{"HELP"};
 static constexpr Property<size_t, ov::PropertyMutability::RW> verbose{"VERBOSE"};
 static constexpr Property<bool, ov::PropertyMutability::RW> verbose_color{"VERBOSE_COLOR"};
+static constexpr Property<std::string, ov::PropertyMutability::RW> debug_config{"DEBUG_CONFIG"};
 static constexpr Property<std::string, ov::PropertyMutability::RW> log_to_file{"GPU_LOG_TO_FILE"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_usm{"GPU_DISABLE_USM"};
 static constexpr Property<bool, ov::PropertyMutability::RW> disable_onednn_post_ops_opt{"DISABLE_ONEDNN_POST_OPS_OPT"};
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
index 6d513ef1cb2eda..14bd79a9c60e99 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl
@@ -55,6 +55,7 @@ OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, usm_policy, 0, "0: default, 1: use
 OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits<int32_t>::max(), "Max number of batch elements to dump")
 OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, dump_profiling_data_per_iter, false, "Save profiling data w/o per-iteration aggregation")
 OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file")
+OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, debug_config, "", "Path to debug config in json format")
 
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_onednn_post_ops_opt, false, "Disable optimization pass for onednn post-ops")
 OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data_path, "", "Save csv file with per-stage and per-primitive profiling data to specified folder")
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index d69fabcb346dd8..24a66c8f5ef214 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -251,6 +251,12 @@ void ExecutionConfig::finalize_impl(const IRemoteContext* context) {
     if (!get_force_implementations().empty()) {
         m_optimize_data = true;
     }
+
+#ifdef ENABLE_DEBUG_CAPS
+    // For now we apply env/config only for build with debug caps, but it can be updated in the future to allow
+    // reading release options for any build type
+    apply_config_options(context->get_device_name(), get_debug_config());
+#endif // ENABLE_DEBUG_CAPS
 }
 
 void ExecutionConfig::apply_hints(const cldnn::device_info& info) {

From c9c9e84b1b132c9995f13e850ebf6b90d6271092 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Wed, 29 Jan 2025 13:27:13 +0400
Subject: [PATCH 41/44] removed json dep in unit tests

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/inference/tests/unit/config_test.cpp | 29 +++++-------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index adde70c139b96d..682ee50eb81c79 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -12,9 +12,9 @@
 
 #include <gtest/gtest.h>
 #include <cstdlib>
-#include <nlohmann/json.hpp>
 #include <string>
 #include <fstream>
+#include <filesystem>
 
 using namespace ::testing;
 using namespace ov;
@@ -35,25 +35,13 @@ namespace {
 const std::string test_config_path = "test_debug_config_path.json";
 const std::string device_name = "SOME_DEVICE";
 
-void dump_config(const std::string& filename, const std::map<std::string, ov::AnyMap>& config) {
-    nlohmann::json jsonConfig;
-    for (const auto& item : config) {
-        std::string deviceName = item.first;
-        for (const auto& option : item.second) {
-            // primary property
-            std::stringstream strm;
-            option.second.print(strm);
-            auto property_string = strm.str();
-            jsonConfig[deviceName][option.first] = property_string;
-        }
-    }
-
+void dump_config(const std::string& filename, const std::string& config_content) {
     std::ofstream ofs(filename);
     if (!ofs.is_open()) {
-        throw std::runtime_error("Can't load config file \"" + filename + "\".");
+        throw std::runtime_error("Can't save config file \"" + filename + "\".");
     }
 
-    ofs << jsonConfig;
+    ofs << config_content;
 }
 
 }  // namespace
@@ -320,14 +308,9 @@ TEST(plugin_config, can_read_from_config) {
     const std::filesystem::path filepath = test_config_path;
     try {
         NotEmptyTestConfig cfg;
-        ov::AnyMap config {
-            int_property(10),
-    #ifdef ENABLE_DEBUG_CAPS
-            debug_property(20),
-    #endif
-        };
+        std::string config = "{\"SOME_DEVICE\":{\"DEBUG_PROPERTY\":\"20\",\"INT_PROPERTY\":\"10\"}}";
 
-        dump_config(filepath.generic_string(), {{device_name, config }});
+        dump_config(filepath.generic_string(), config);
 
         ASSERT_EQ(cfg.get_int_property(), -1); // config is applied after finalization only for build with debug caps
     #ifdef ENABLE_DEBUG_CAPS

From 1c2e0475c48daf4e98d0db35c925aa67facf9cd0 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Wed, 29 Jan 2025 13:49:18 +0400
Subject: [PATCH 42/44] fix env setting

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/inference/tests/unit/config_test.cpp | 34 ++++++++++++++++--------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index 682ee50eb81c79..cde86d96f6b1ee 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -44,6 +44,22 @@ void dump_config(const std::string& filename, const std::string& config_content)
     ofs << config_content;
 }
 
+void set_env(const std::string& name, const std::string& value) {
+#ifdef _WIN32
+    _putenv_s(name.c_str(), value.c_str());
+#else
+    ::setenv(name.c_str(), value.c_str(), 1);
+#endif
+}
+
+void unset_env(const std::string& name) {
+#ifdef _WIN32
+    _putenv_s(name.c_str(), env_value.c_str());
+#else
+    ::unsetenv(name.c_str());
+#endif
+}
+
 }  // namespace
 
 struct EmptyTestConfig : public ov::PluginConfig {
@@ -278,13 +294,11 @@ TEST(plugin_config, can_read_from_env_with_debug_caps) {
     try {
         NotEmptyTestConfig cfg;
         ASSERT_EQ(cfg.get_int_property(), -1);
-        std::string env_var1 = "OV_INT_PROPERTY=10";
-        ::putenv(env_var1.data());
+        set_env("OV_INT_PROPERTY", "10");
         ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only for build with debug caps
 
 #ifdef ENABLE_DEBUG_CAPS
-        std::string env_var2 = "OV_DEBUG_PROPERTY=20";
-        ::putenv(env_var2.data());
+        set_env("OV_DEBUG_PROPERTY", "20");
         ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option
 #endif
 
@@ -298,9 +312,9 @@ TEST(plugin_config, can_read_from_env_with_debug_caps) {
 #endif
     } catch (std::exception&) {}
 
-    ::unsetenv("OV_INT_PROPERTY");
+    unset_env("OV_INT_PROPERTY");
 #ifdef ENABLE_DEBUG_CAPS
-    ::unsetenv("OV_DEBUG_PROPERTY");
+    unset_env("OV_DEBUG_PROPERTY");
 #endif
 }
 
@@ -338,15 +352,13 @@ TEST(plugin_config, can_get_global_property) {
 TEST(plugin_config, global_property_read_env_on_each_call) {
     try {
         ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 4);
-        std::string env_var1 = "OV_DEBUG_GLOBAL_PROPERTY=10";
-        ::putenv(env_var1.data());
+        set_env("OV_DEBUG_GLOBAL_PROPERTY", "10");
         ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 10);
 
-        std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20";
-        ::putenv(env_var2.data());
+        set_env("OV_DEBUG_GLOBAL_PROPERTY", "20");
         ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 20);
     } catch (std::exception&) {}
 
-    ::unsetenv("OV_DEBUG_GLOBAL_PROPERTY");
+    unset_env("OV_DEBUG_GLOBAL_PROPERTY");
 }
 #endif

From 0a113340bac8bee7a89482c9ee805f3612a37a0e Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Wed, 29 Jan 2025 14:36:08 +0400
Subject: [PATCH 43/44] fix env unset

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 src/inference/tests/unit/config_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp
index cde86d96f6b1ee..9cfb9b98c34c3f 100644
--- a/src/inference/tests/unit/config_test.cpp
+++ b/src/inference/tests/unit/config_test.cpp
@@ -54,7 +54,7 @@ void set_env(const std::string& name, const std::string& value) {
 
 void unset_env(const std::string& name) {
 #ifdef _WIN32
-    _putenv_s(name.c_str(), env_value.c_str());
+    _putenv_s(name.c_str(), "");
 #else
     ::unsetenv(name.c_str());
 #endif

From 6797fcfad66f74be79ecc67ab561ea576657edfe Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Fri, 31 Jan 2025 10:43:05 +0400
Subject: [PATCH 44/44] apply review comments

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../dev_api/openvino/runtime/plugin_config.hpp   |  8 ++++----
 src/inference/src/dev/plugin_config.cpp          | 16 +++++++++-------
 .../intel_gpu/src/runtime/execution_config.cpp   | 14 ++++++--------
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
index eaa9a75ae52afe..0890bd5192f98d 100644
--- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp
+++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp
@@ -134,9 +134,9 @@ struct ConfigOptionBase {
     explicit ConfigOptionBase() {}
     virtual ~ConfigOptionBase() = default;
 
-    virtual void set_any(const ov::Any any) = 0;
+    virtual void set_any(const ov::Any& any) = 0;
     virtual ov::Any get_any() const = 0;
-    virtual bool is_valid_value(ov::Any val) = 0;
+    virtual bool is_valid_value(const ov::Any& val) const = 0;
     virtual OptionVisibility get_visibility() const = 0;
 };
 
@@ -147,7 +147,7 @@ struct ConfigOption : public ConfigOptionBase {
     T value;
     constexpr static const auto visibility = visibility_;
 
-    void set_any(const ov::Any any) override {
+    void set_any(const ov::Any& any) override {
         if (validator)
             OPENVINO_ASSERT(validator(any.as<T>()), "Invalid value: ", any.as<std::string>());
         value = any.as<T>();
@@ -157,7 +157,7 @@ struct ConfigOption : public ConfigOptionBase {
         return ov::Any(value);
     }
 
-    bool is_valid_value(ov::Any val) override {
+    bool is_valid_value(const ov::Any& val) const override {
         try {
             auto v = val.as<T>();
             return validator ? validator(v) : true;
diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp
index b608a2a3cfc969..eda2d85670d845 100644
--- a/src/inference/src/dev/plugin_config.cpp
+++ b/src/inference/src/dev/plugin_config.cpp
@@ -11,6 +11,8 @@
 #include <cmath>
 #include <fstream>
 #include <iomanip>
+#include <array>
+#include <string_view>
 
 #ifdef JSON_HEADER
 #    include <json.hpp>
@@ -190,16 +192,16 @@ ov::Any PluginConfig::read_env(const std::string& option_name, const std::string
 
     if (!val.empty()) {
         if (dynamic_cast<const ConfigOption<bool>*>(option) != nullptr) {
-            const std::set<std::string> off = {"0", "false", "off", "no"};
-            const std::set<std::string> on = {"1", "true", "on", "yes"};
+            constexpr std::array<std::string_view, 4> off = {"0", "false", "off", "no"};
+            constexpr std::array<std::string_view, 4> on = {"1", "true", "on", "yes"};
+            const auto& val_lower = util::to_lower(val);
 
-            const auto& val_lower = ov::util::to_lower(val);
-            if (off.count(val_lower)) {
-                return false;
-            } else if (on.count(val_lower)) {
+            if (std::find(on.begin(), on.end(), val_lower) != on.end()) {
                 return true;
+            } else if (std::find(off.begin(), off.end(), val_lower) != off.end()) {
+                return false;
             } else {
-                OPENVINO_THROW("Unexpected value for boolean property: ", val);
+                OPENVINO_THROW("Unexpected value for boolean property: " + val);
             }
         } else {
             return val;
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 24a66c8f5ef214..5641aeb96abd84 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -164,7 +164,7 @@ void ExecutionConfig::apply_rt_info(const IRemoteContext* context, const ov::RTM
     if (!info.supports_immad) {
         apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
     }
-    if (!info.supports_immad || !is_llm)
+    if (!is_llm)
         apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
 
     apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
@@ -182,7 +182,7 @@ void ExecutionConfig::apply_model_specific_options(const IRemoteContext* context
 
     const auto& ops = model.get_ops();
 
-    auto process_op = [this](std::shared_ptr<Node> op) {
+    std::function<void(std::shared_ptr<Node>)> process_op = [&, this](std::shared_ptr<Node> op) {
         if (requires_new_shape_infer(op)) {
             m_allow_new_shape_infer = true;
         }
@@ -197,20 +197,18 @@ void ExecutionConfig::apply_model_specific_options(const IRemoteContext* context
         if (ov::is_type<ov::op::v5::LSTMSequence>(op)) {
             m_use_onednn = true;
         }
-    };
 
-    // In the case of inner program, allow_new_shape_infer flag is setted by outside of program.
-    // So, do not check allow_new_shape_infer for inner program build
-    for (const auto& op : ops) {
         if (auto multi_subgraph_op = ov::as_type_ptr<op::util::MultiSubGraphOp>(op)) {
             for (const auto& sub_graph : multi_subgraph_op->get_functions()) {
                 for (auto& sub_op : sub_graph->get_ops()) {
                     process_op(sub_op);
                 }
             }
-        } else {
-            process_op(op);
         }
+    };
+
+    for (const auto& op : ops) {
+        process_op(op);
     }
 
     m_optimize_data = true;