From 6ad934bd3fd4e0780e71150dbff157b873077e5e Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 17 Dec 2024 13:25:10 +0400 Subject: [PATCH 01/44] Base impl Signed-off-by: Vladimir Paramuzov --- .../tests/unit/module_tests/config_common.cpp | 77 +++++++++ .../tests/unit/module_tests/config_common.hpp | 127 ++++++++++++++ .../tests/unit/module_tests/config_gpu.cpp | 160 ++++++++++++++++++ .../tests/unit/module_tests/config_gpu.hpp | 48 ++++++ .../module_tests/config_gpu_debug_options.inl | 31 ++++ .../config_gpu_debug_properties.hpp | 42 +++++ .../unit/module_tests/config_gpu_options.inl | 50 ++++++ .../tests/unit/module_tests/device_test.cpp | 41 +++++ 8 files changed, 576 insertions(+) create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp new file mode 100644 index 00000000000000..06625d33c40307 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "config_common.hpp" +#include "openvino/core/except.hpp" + + +namespace ov { + +void PluginConfig::set_property(const AnyMap& config) { + for (auto& kv : config) { + auto& name = kv.first; + auto& val = kv.second; + + const auto& known_options = m_options_map; + auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); + OPENVINO_ASSERT(it != known_options.end()); + + it->second->set_any(val); + } +} + +ov::Any PluginConfig::get_property(const std::string& name) const { + const auto& known_options = m_options_map; + auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); + OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); + + return it->second->get_any(); +} + +void PluginConfig::set_user_property(const AnyMap& config) { + for (auto& kv : config) { + auto& name = kv.first; + auto& val = kv.second; + + const auto& known_options = m_options_map; + auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); + OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); + OPENVINO_ASSERT(it->second->is_valid_value(val), "Invalid value: ", val.as(), " for property: ", name); + + user_properties[name] = val; + } +} + +void PluginConfig::finalize(std::shared_ptr context, const ov::RTMap& rt_info) { + // Copy internal properties before applying hints to ensure that + // a property set by hint won't be overriden by a value in user config. + // E.g num_streams=AUTO && hint=THROUGHPUT + // If we apply hints first and then copy all values from user config to internal one, + // then we'll get num_streams=AUTO in final config while some integer number is expected. + for (const auto& prop : user_properties) { + auto& option = m_options_map.at(prop.first); + option->set_any(prop.second); + } + + finalize_impl(context, rt_info); +} + +std::string PluginConfig::to_string() const { + std::stringstream s; + + s << "-----------------------------------------\n"; + s << "PROPERTIES:\n"; + + for (const auto& option : m_options_map) { + s << "\t" << option.first << ":" << option.second->get_any().as() << std::endl; + } + s << "USER PROPERTIES:\n"; + for (const auto& user_prop : user_properties) { + s << "\t" << user_prop.first << ": " << user_prop.second.as() << std::endl; + } + + return s.str(); +} + +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp new file mode 100644 index 00000000000000..3a6a2bc0d8de51 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp @@ -0,0 +1,127 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "openvino/runtime/iremote_context.hpp" +#include "openvino/runtime/properties.hpp" +#include "openvino/core/except.hpp" + +#ifndef COUNT_N + #define COUNT_N(_1, _2, _3, _4, _5, N, ...) N +#endif + +#ifndef COUNT + #define COUNT(...) EXPAND(COUNT_N(__VA_ARGS__, 5, 4, 3, 2, 1)) +#endif + +#ifndef CAT + #define CAT(a, b) a ## b +#endif + +#ifndef EXPAND + #define EXPAND(N) N +#endif + +#define GET_EXCEPT_LAST_IMPL(N, ...) CAT(GET_EXCEPT_LAST_IMPL_, N)(__VA_ARGS__) +#define GET_EXCEPT_LAST_IMPL_2(_0, _1) _0 +#define GET_EXCEPT_LAST_IMPL_3(_0, _1, _2) _0, _1 +#define GET_EXCEPT_LAST_IMPL_4(_0, _1, _2, _3) _0, _1, _2 + +#define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__)) + +namespace ov { + + +struct ConfigOptionBase { + explicit ConfigOptionBase() {} + virtual ~ConfigOptionBase() = default; + + virtual void set_any(const ov::Any any) = 0; + virtual ov::Any get_any() const = 0; + virtual bool is_valid_value(ov::Any val) = 0; +}; + +template +struct ConfigOption : public ConfigOptionBase { + ConfigOption(const T& default_val, std::function validator = nullptr) + : ConfigOptionBase(), value(default_val), validator(validator) {} + T value; + std::function validator; + + void set_any(const ov::Any any) override { + if (validator) + OPENVINO_ASSERT(validator(any.as()), "Invalid value: ", any.as()); + value = any.as(); + } + + ov::Any get_any() const override { + return ov::Any(value); + } + + bool is_valid_value(ov::Any val) override { + try { + return validator ? validator(val.as()) : true; + } catch (std::exception&) { + return false; + } + + } +}; + +class PluginConfig { +public: + PluginConfig() {} + PluginConfig(std::initializer_list values) : PluginConfig() { set_property(ov::AnyMap(values)); } + explicit PluginConfig(const ov::AnyMap& properties) : PluginConfig() { set_property(properties); } + explicit PluginConfig(const ov::AnyMap::value_type& property) : PluginConfig() { set_property(property); } + + void set_property(const ov::AnyMap& properties); + Any get_property(const std::string& name) const; + void set_user_property(const ov::AnyMap& properties); + + template + util::EnableIfAllStringAny set_property(Properties&&... properties) { + set_property(ov::AnyMap{std::forward(properties)...}); + } + + template + util::EnableIfAllStringAny set_user_property(Properties&&... properties) { + set_user_property(ov::AnyMap{std::forward(properties)...}); + } + + template + T get_property(const ov::Property& property) const { + OPENVINO_ASSERT(m_options_map.find(property.name()) != m_options_map.end(), "Property not found: ", property.name()); + return static_cast*>(m_options_map.at(property.name()))->value; + } + + std::string to_string() const; + + void finalize(std::shared_ptr context, const ov::RTMap& rt_info); + virtual void finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) = 0; + +protected: + template + bool is_set_by_user(const ov::Property& property) const { + return user_properties.find(property.name()) != user_properties.end(); + } + + template + void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { + if (!is_set_by_user(property)) { + auto rt_info_val = rt_info.find(property.name()); + if (rt_info_val != rt_info.end()) { + set_user_property(property(rt_info_val->second.template as())); + } + } + } + std::unordered_map m_options_map; + ov::AnyMap user_properties; + using OptionMapEntry = decltype(m_options_map)::value_type; +}; + +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp new file mode 100644 index 00000000000000..b3c81da368ae68 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp @@ -0,0 +1,160 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "config_gpu.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "openvino/runtime/internal_properties.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" +#include "config_gpu_debug_properties.hpp" + + +namespace ov { +namespace intel_gpu { + +NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { + #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ + m_options_map[PropertyNamespace::PropertyVar.name()] = &PropertyVar; + + OV_CONFIG_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") + #include "config_gpu_options.inl" + #include "config_gpu_debug_options.inl" + + #undef OV_CONFIG_OPTION +} + +void NewExecutionConfig::finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) { + const auto& device_info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + apply_user_properties(device_info); + apply_rt_info(device_info, rt_info); +} + +void NewExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::execution_mode)) { + const auto mode = get_property(ov::hint::execution_mode); + if (!is_set_by_user(ov::hint::inference_precision)) { + if (mode == ov::hint::ExecutionMode::ACCURACY) { + set_property(ov::hint::inference_precision(ov::element::undefined)); + } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) { + if (info.supports_fp16) + set_property(ov::hint::inference_precision(ov::element::f16)); + else + set_property(ov::hint::inference_precision(ov::element::f32)); + } + } + } +} + +void NewExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::performance_mode)) { + const auto mode = get_property(ov::hint::performance_mode); + if (!is_set_by_user(ov::num_streams)) { + if (mode == ov::hint::PerformanceMode::LATENCY) { + set_property(ov::num_streams(1)); + } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { + set_property(ov::num_streams(ov::streams::AUTO)); + } + } + } + + if (get_property(ov::num_streams) == ov::streams::AUTO) { + int32_t n_streams = std::max(info.num_ccs, 2); + set_property(ov::num_streams(n_streams)); + } + + if (get_property(ov::internal::exclusive_async_requests)) { + set_property(ov::num_streams(1)); + } + + // Allow kernels reuse only for single-stream scenarios + if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) { + if (get_property(ov::num_streams) != 1) { + set_property(ov::intel_gpu::hint::enable_kernels_reuse(false)); + } + } +} + +void NewExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::model_priority)) { + const auto priority = get_property(ov::hint::model_priority); + if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { + set_property(ov::intel_gpu::hint::queue_priority(priority)); + } + } +} + +void NewExecutionConfig::apply_debug_options(const cldnn::device_info& info) { + // GPU_DEBUG_GET_INSTANCE(debug_config); + // GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { + // set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); + // } + + // GPU_DEBUG_IF(debug_config->serialize_compile == 1) { + // set_property(ov::compilation_num_threads(1)); + // } + + // GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + // GPU_DEBUG_COUT << "[WARNING] ov::enable_profiling property was forced because of enabled OV_GPU_DumpProfilingData debug option\n"; + // set_property(ov::enable_profiling(true)); + // } + + // GPU_DEBUG_IF(debug_config->disable_dynamic_impl == 1) { + // set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); + // } + + // GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { + // if (debug_config->dynamic_quantize_group_size == -1) + // set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX)); + // else + // set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size)); + // } + + // GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) { + // GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) { + // set_property(ov::hint::kv_cache_precision(ov::element::i8)); + // } else { + // set_property(ov::hint::kv_cache_precision(ov::element::undefined)); + // } + // } +} + +void NewExecutionConfig::apply_hints(const cldnn::device_info& info) { + apply_execution_hints(info); + apply_performance_hints(info); + apply_priority_hints(info); + apply_debug_options(info); +} + +void NewExecutionConfig::apply_user_properties(const cldnn::device_info& info) { + apply_hints(info); + if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { + set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); + } + if (info.supports_immad) { + set_property(ov::intel_gpu::use_onednn(true)); + } + if (get_property(ov::intel_gpu::use_onednn)) { + set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + } + + // Enable KV-cache compression by default for non-systolic platforms + if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { + set_property(ov::hint::kv_cache_precision(ov::element::i8)); + } + + // Enable dynamic quantization by default for non-systolic platforms + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { + set_property(ov::hint::dynamic_quantization_group_size(32)); + } +} + +void NewExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) { + if (!info.supports_immad) { + apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); + apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); + } + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp new file mode 100644 index 00000000000000..69b9c321863c03 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "config_common.hpp" +#include "intel_gpu/runtime/device_info.hpp" +#include "intel_gpu/runtime/utils.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" +#include "openvino/runtime/internal_properties.hpp" +#include "config_gpu_debug_properties.hpp" +#include + +namespace ov { +namespace intel_gpu { + +struct NewExecutionConfig : public ov::PluginConfig { + NewExecutionConfig(); + + #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ + ConfigOption PropertyVar = \ + ConfigOption(GET_EXCEPT_LAST(__VA_ARGS__)); + + + #include "config_gpu_options.inl" + #include "config_gpu_debug_options.inl" + + #undef OV_CONFIG_OPTION + + void finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) override; + +protected: + // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call + // So this method should be called after setting all user properties, but before apply_user_properties() call. + void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info); + + void apply_user_properties(const cldnn::device_info& info); + void apply_hints(const cldnn::device_info& info); + void apply_execution_hints(const cldnn::device_info& info); + void apply_performance_hints(const cldnn::device_info& info); + void apply_priority_hints(const cldnn::device_info& info); + void apply_debug_options(const cldnn::device_info& info); +}; + + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl new file mode 100644 index 00000000000000..687475f67f287c --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl @@ -0,0 +1,31 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifdef GPU_DEBUG_CONFIG +OV_CONFIG_OPTION(ov::intel_gpu, verbose, false, "Enable") +OV_CONFIG_OPTION(ov::intel_gpu, help, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_usm, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_onednn_post_ops, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, dump_profiling_data, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, dump_graphs, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, dump_sources, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, dump_tensors, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, dump_memory_pool, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, dump_iterations, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, host_time_profiling, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "") +OV_CONFIG_OPTION(ov::intel_gpu, impls_cache_capacity, 0, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_async_compilation, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_shape_agnostic_impls, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_memory_reuse, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, use_usm_host, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, enable_kv_cache_compression, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "") + +#endif diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp new file mode 100644 index 00000000000000..f3ff878ce07740 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp @@ -0,0 +1,42 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/runtime/properties.hpp" + +#ifdef GPU_DEBUG_CONFIG + +namespace ov { +namespace intel_gpu { + +static constexpr Property verbose{"VERBOSE"}; +static constexpr Property help{"HELP"}; +static constexpr Property disable_usm{"DISABLE_USM"}; +static constexpr Property disable_onednn_post_ops{"DISABLE_ONEDNN_POST_OPS"}; +static constexpr Property dump_profiling_data{"DUMP_PROFILING_DATA"}; +// static constexpr Property dump_graphs{"DUMP_GRAPHS"}; +static constexpr Property dump_sources{"DUMP_SOURCES"}; +static constexpr Property dump_tensors{"DUMP_TENSORS"}; +static constexpr Property dump_memory_pool{"DUMP_MEMORY_POOL"}; +static constexpr Property dump_iterations{"DUMP_ITERATIONS"}; +static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; +// static constexpr Property max_kernels_per_batch{"MAX_KERNELS_PER_BATCH"}; +static constexpr Property impls_cache_capacity{"IMPLS_CACHE_CAPACITY"}; +static constexpr Property disable_async_compilation{"DISABLE_ASYNC_COMPILATION"}; +static constexpr Property disable_shape_agnostic_impls{"DISABLE_SHAPE_AGNOSTIC_IMPLS"}; +static constexpr Property disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"}; +static constexpr Property disable_memory_reuse{"DISABLE_MEMORY_REUSE"}; +static constexpr Property disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"}; +static constexpr Property disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"}; +static constexpr Property use_usm_host{"USE_USM_HOST"}; +static constexpr Property enable_kv_cache_compression{"ENABLE_KV_CACHE_COMPRESSION"}; +static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; +static constexpr Property mem_prealloc_options{"MEM_PREALLOC_OPTIONS"}; +static constexpr Property load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"}; + +} // namespace intel_gpu +} // namespace ov + +#endif // GPU_DEBUG_CONFIG diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl new file mode 100644 index 00000000000000..b3aa12dc75c49b --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl @@ -0,0 +1,50 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Namespace, property name, default value, [validator], description +OV_CONFIG_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") +OV_CONFIG_OPTION(ov::device, id, "0", "ID of the current device") +OV_CONFIG_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty") +OV_CONFIG_OPTION(ov, num_streams, 1, "Number of streams to be used for inference") +OV_CONFIG_OPTION(ov, compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism") +OV_CONFIG_OPTION(ov::hint, inference_precision, ov::element::f16, + [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision") +OV_CONFIG_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings") +OV_CONFIG_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc") +OV_CONFIG_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") +OV_CONFIG_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application") +OV_CONFIG_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not") + +OV_CONFIG_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks") +OV_CONFIG_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level") +OV_CONFIG_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property") +OV_CONFIG_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution") +OV_CONFIG_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling") +OV_CONFIG_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available") +OV_CONFIG_OPTION(ov::internal, exclusive_async_requests, false, "") +OV_CONFIG_OPTION(ov::internal, query_model_ratio, 1.0f, "") +OV_CONFIG_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache") +OV_CONFIG_OPTION(ov, cache_encryption_callbacks, EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model") +OV_CONFIG_OPTION(ov::hint, dynamic_quantization_group_size, 0, "") +OV_CONFIG_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "") +OV_CONFIG_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "") +OV_CONFIG_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching") +OV_CONFIG_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision") + +OV_CONFIG_OPTION(ov::intel_gpu, nv12_two_inputs, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, config_file, "", "") +OV_CONFIG_OPTION(ov::intel_gpu, enable_lp_transformations, false, "") + +OV_CONFIG_OPTION(ov::intel_gpu, max_dynamic_batch, 1, "") +OV_CONFIG_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "") +OV_CONFIG_OPTION(ov::intel_gpu, optimize_data, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, enable_memory_pool, true, "") +OV_CONFIG_OPTION(ov::intel_gpu, allow_static_input_reorder, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, custom_outputs, std::vector{}, "") +OV_CONFIG_OPTION(ov::intel_gpu, force_implementations, ImplForcingMap{}, "") +OV_CONFIG_OPTION(ov::intel_gpu, partial_build_program, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, use_only_static_kernels_for_dynamic_shape, false, "") +OV_CONFIG_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "") +OV_CONFIG_OPTION(ov::intel_gpu, use_onednn, false, "") diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp index b27275d0f03d99..b296242905b958 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp @@ -2,6 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/execution_config.hpp" +#include "module_tests/config_gpu.hpp" +#include "openvino/runtime/properties.hpp" #include "test_utils.h" #include "intel_gpu/runtime/device.hpp" #include "runtime/ocl/ocl_device_detector.hpp" @@ -101,3 +104,41 @@ TEST(devices_test, sort_order_three_vendors) { ASSERT_EQ(expected_devices_order, actual_devices_order); } + +// class Test { +// public: +// int i; +// constexpr Test(int i) : i(i) {} +// }; + +// constexpr const Test test1(1); +// constexpr const Test test2(2); + +// template +// int get_prop() { +// static_assert(false, "FAIL"); +// } + +// template class prop, typename T, ov::PropertyMutability mutability> +// T get_prop() { +// static_assert(false, "FAIL"); +// } + + +TEST(config_test, basic) { + ov::intel_gpu::NewExecutionConfig cfg; + std::cerr << cfg.to_string(); + + cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); + cfg.set_property(ov::hint::inference_precision(ov::element::f32)); + + std::cerr << "PROF: " << cfg.enable_profiling.value << std::endl; + + std::cerr << cfg.to_string(); + + std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl; + std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl; + +// std::cerr << get_prop() << std::endl; +// std::cerr << get_prop() << std::endl; +} From e5a286cd59828f0fbbd8d5c00799ab13477dc20f Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 17 Dec 2024 14:19:13 +0400 Subject: [PATCH 02/44] make it common Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp} | 30 +++++++++--- .../src/dev/plugin_config.cpp} | 5 +- .../intel_gpu/runtime/internal_properties.hpp | 24 ++++++++- .../intel_gpu/runtime/options_debug.inl} | 4 +- .../intel_gpu/runtime/options_release.inl} | 0 .../intel_gpu/runtime/plugin_config.hpp} | 13 ++--- .../runtime/plugin_config.cpp} | 49 ++++--------------- .../config_gpu_debug_properties.hpp | 42 ---------------- .../tests/unit/module_tests/config_test.cpp | 28 +++++++++++ .../tests/unit/module_tests/device_test.cpp | 41 ---------------- 10 files changed, 96 insertions(+), 140 deletions(-) rename src/{plugins/intel_gpu/tests/unit/module_tests/config_common.hpp => inference/dev_api/openvino/runtime/plugin_config.hpp} (74%) rename src/{plugins/intel_gpu/tests/unit/module_tests/config_common.cpp => inference/src/dev/plugin_config.cpp} (93%) rename src/plugins/intel_gpu/{tests/unit/module_tests/config_gpu_debug_options.inl => include/intel_gpu/runtime/options_debug.inl} (91%) rename src/plugins/intel_gpu/{tests/unit/module_tests/config_gpu_options.inl => include/intel_gpu/runtime/options_release.inl} (100%) rename src/plugins/intel_gpu/{tests/unit/module_tests/config_gpu.hpp => include/intel_gpu/runtime/plugin_config.hpp} (84%) rename src/plugins/intel_gpu/{tests/unit/module_tests/config_gpu.cpp => src/runtime/plugin_config.cpp} (71%) delete mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp create mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp similarity index 74% rename from src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp rename to src/inference/dev_api/openvino/runtime/plugin_config.hpp index 3a6a2bc0d8de51..acccd0bf343604 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -5,7 +5,7 @@ #pragma once #include -#include +#include #include "openvino/runtime/iremote_context.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/core/except.hpp" @@ -35,7 +35,6 @@ namespace ov { - struct ConfigOptionBase { explicit ConfigOptionBase() {} virtual ~ConfigOptionBase() = default; @@ -50,7 +49,6 @@ struct ConfigOption : public ConfigOptionBase { ConfigOption(const T& default_val, std::function validator = nullptr) : ConfigOptionBase(), value(default_val), validator(validator) {} T value; - std::function validator; void set_any(const ov::Any any) override { if (validator) @@ -68,11 +66,29 @@ struct ConfigOption : public ConfigOptionBase { } catch (std::exception&) { return false; } - } + +private: + std::function validator; }; -class PluginConfig { +// Base class for configuration of plugins +// Implementation should provide a list of properties with default values and validators (optional) +// For the sake of efficiency, we expect that plugin properties are defined as class members of the derived class +// and accessed directly in the plugin's code (i.e. w/o get_property()/set_property() calls) +// get/set property members are provided to handle external property access +// The class provides a helpers to read the properties from configuration file and from environment variables +// +// Expected order of properties resolution: +// 1. Assign default value for each property per device +// 2. Save user properties passed via Core::set_property() call to user_properties +// 3. Save user properties passed via Core::compile_model() call to user_properties +// 4. Apply RT info properties to user_properties if they were not set by user +// 5. Read and apply properties from the config file as user_properties +// 6. Read and apply properties from the the environment variables as user_properties +// 7. Apply user_properties to actual plugin properties +// 8. Update dependant properties if they were not set by user either way +class OPENVINO_RUNTIME_API PluginConfig { public: PluginConfig() {} PluginConfig(std::initializer_list values) : PluginConfig() { set_property(ov::AnyMap(values)); } @@ -119,7 +135,9 @@ class PluginConfig { } } } - std::unordered_map m_options_map; + std::map m_options_map; + + // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info ov::AnyMap user_properties; using OptionMapEntry = decltype(m_options_map)::value_type; }; diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp b/src/inference/src/dev/plugin_config.cpp similarity index 93% rename from src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp rename to src/inference/src/dev/plugin_config.cpp index 06625d33c40307..9f169c07663a40 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_common.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "config_common.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/core/except.hpp" @@ -55,6 +55,9 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R } finalize_impl(context, rt_info); + + // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization + user_properties.clear(); } std::string PluginConfig::to_string() const { diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index 765333e971842e..ddbb260647b287 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -46,7 +46,6 @@ static constexpr Property allow_static_input_reord static constexpr Property partial_build_program{"GPU_PARTIAL_BUILD"}; static constexpr Property allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"}; static constexpr Property use_only_static_kernels_for_dynamic_shape{"GPU_USE_ONLY_STATIC_KERNELS_FOR_DYNAMIC_SHAPE"}; -static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; static constexpr Property, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"}; static constexpr Property force_implementations{"GPU_FORCE_IMPLEMENTATIONS"}; static constexpr Property config_file{"CONFIG_FILE"}; @@ -57,6 +56,29 @@ static constexpr Property buffers_preallocation_r static constexpr Property max_kernels_per_batch{"GPU_MAX_KERNELS_PER_BATCH"}; static constexpr Property use_onednn{"USE_ONEDNN"}; +static constexpr Property help{"HELP"}; +static constexpr Property verbose{"VERBOSE"}; +static constexpr Property disable_usm{"DISABLE_USM"}; +static constexpr Property disable_onednn_post_ops{"DISABLE_ONEDNN_POST_OPS"}; +static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; +static constexpr Property dump_profiling_data{"DUMP_PROFILING_DATA"}; +static constexpr Property dump_sources{"DUMP_SOURCES"}; +static constexpr Property dump_tensors{"DUMP_TENSORS"}; +static constexpr Property dump_memory_pool{"DUMP_MEMORY_POOL"}; +static constexpr Property dump_iterations{"DUMP_ITERATIONS"}; +static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; +static constexpr Property impls_cache_capacity{"IMPLS_CACHE_CAPACITY"}; +static constexpr Property disable_async_compilation{"DISABLE_ASYNC_COMPILATION"}; +static constexpr Property disable_shape_agnostic_impls{"DISABLE_SHAPE_AGNOSTIC_IMPLS"}; +static constexpr Property disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"}; +static constexpr Property disable_memory_reuse{"DISABLE_MEMORY_REUSE"}; +static constexpr Property disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"}; +static constexpr Property disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"}; +static constexpr Property use_usm_host{"USE_USM_HOST"}; +static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; +static constexpr Property mem_prealloc_options{"MEM_PREALLOC_OPTIONS"}; +static constexpr Property load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"}; + } // namespace ov::intel_gpu namespace cldnn { diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl similarity index 91% rename from src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl rename to src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl index 687475f67f287c..62548a7abb17fd 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl @@ -3,7 +3,8 @@ // #ifdef GPU_DEBUG_CONFIG -OV_CONFIG_OPTION(ov::intel_gpu, verbose, false, "Enable") + +OV_CONFIG_OPTION(ov::intel_gpu, verbose, 0, "Enable") OV_CONFIG_OPTION(ov::intel_gpu, help, false, "") OV_CONFIG_OPTION(ov::intel_gpu, disable_usm, false, "") OV_CONFIG_OPTION(ov::intel_gpu, disable_onednn_post_ops, false, "") @@ -23,7 +24,6 @@ OV_CONFIG_OPTION(ov::intel_gpu, disable_memory_reuse, false, "") OV_CONFIG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "") OV_CONFIG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "") OV_CONFIG_OPTION(ov::intel_gpu, use_usm_host, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, enable_kv_cache_compression, false, "") OV_CONFIG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "") OV_CONFIG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "") OV_CONFIG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "") diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl similarity index 100% rename from src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_options.inl rename to src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp similarity index 84% rename from src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp rename to src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp index 69b9c321863c03..51e72da8be5923 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp @@ -4,12 +4,10 @@ #pragma once -#include "config_common.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "intel_gpu/runtime/device_info.hpp" -#include "intel_gpu/runtime/utils.hpp" #include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/runtime/internal_properties.hpp" -#include "config_gpu_debug_properties.hpp" #include namespace ov { @@ -22,15 +20,14 @@ struct NewExecutionConfig : public ov::PluginConfig { ConfigOption PropertyVar = \ ConfigOption(GET_EXCEPT_LAST(__VA_ARGS__)); - - #include "config_gpu_options.inl" - #include "config_gpu_debug_options.inl" + #include "options_release.inl" + #include "options_debug.inl" #undef OV_CONFIG_OPTION void finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) override; -protected: +private: // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call // So this method should be called after setting all user properties, but before apply_user_properties() call. void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info); @@ -40,7 +37,7 @@ struct NewExecutionConfig : public ov::PluginConfig { void apply_execution_hints(const cldnn::device_info& info); void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); - void apply_debug_options(const cldnn::device_info& info); + void read_debug_options(const cldnn::device_info& info); }; diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp similarity index 71% rename from src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp rename to src/plugins/intel_gpu/src/runtime/plugin_config.cpp index b3c81da368ae68..0a3c49e6387104 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu.cpp +++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp @@ -2,11 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "config_gpu.hpp" +#include "intel_gpu/runtime/plugin_config.hpp" #include "intel_gpu/plugin/remote_context.hpp" +#include "openvino/core/any.hpp" #include "openvino/runtime/internal_properties.hpp" #include "intel_gpu/runtime/internal_properties.hpp" -#include "config_gpu_debug_properties.hpp" namespace ov { @@ -16,15 +16,15 @@ NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ m_options_map[PropertyNamespace::PropertyVar.name()] = &PropertyVar; - OV_CONFIG_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") - #include "config_gpu_options.inl" - #include "config_gpu_debug_options.inl" + #include "intel_gpu/runtime/options_release.inl" + #include "intel_gpu/runtime/options_debug.inl" #undef OV_CONFIG_OPTION } void NewExecutionConfig::finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) { const auto& device_info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + read_debug_options(device_info); apply_user_properties(device_info); apply_rt_info(device_info, rt_info); } @@ -83,46 +83,17 @@ void NewExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { } } -void NewExecutionConfig::apply_debug_options(const cldnn::device_info& info) { - // GPU_DEBUG_GET_INSTANCE(debug_config); - // GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - // set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); - // } - - // GPU_DEBUG_IF(debug_config->serialize_compile == 1) { - // set_property(ov::compilation_num_threads(1)); - // } - - // GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { - // GPU_DEBUG_COUT << "[WARNING] ov::enable_profiling property was forced because of enabled OV_GPU_DumpProfilingData debug option\n"; - // set_property(ov::enable_profiling(true)); - // } - - // GPU_DEBUG_IF(debug_config->disable_dynamic_impl == 1) { - // set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); - // } - - // GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { - // if (debug_config->dynamic_quantize_group_size == -1) - // set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX)); - // else - // set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size)); - // } - - // GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) { - // GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) { - // set_property(ov::hint::kv_cache_precision(ov::element::i8)); - // } else { - // set_property(ov::hint::kv_cache_precision(ov::element::undefined)); - // } - // } +void NewExecutionConfig::read_debug_options(const cldnn::device_info& info) { + ov::AnyMap config_properties; + set_user_property(config_properties); + ov::AnyMap env_properties; + set_user_property(env_properties); } void NewExecutionConfig::apply_hints(const cldnn::device_info& info) { apply_execution_hints(info); apply_performance_hints(info); apply_priority_hints(info); - apply_debug_options(info); } void NewExecutionConfig::apply_user_properties(const cldnn::device_info& info) { diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp deleted file mode 100644 index f3ff878ce07740..00000000000000 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_gpu_debug_properties.hpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/runtime/properties.hpp" - -#ifdef GPU_DEBUG_CONFIG - -namespace ov { -namespace intel_gpu { - -static constexpr Property verbose{"VERBOSE"}; -static constexpr Property help{"HELP"}; -static constexpr Property disable_usm{"DISABLE_USM"}; -static constexpr Property disable_onednn_post_ops{"DISABLE_ONEDNN_POST_OPS"}; -static constexpr Property dump_profiling_data{"DUMP_PROFILING_DATA"}; -// static constexpr Property dump_graphs{"DUMP_GRAPHS"}; -static constexpr Property dump_sources{"DUMP_SOURCES"}; -static constexpr Property dump_tensors{"DUMP_TENSORS"}; -static constexpr Property dump_memory_pool{"DUMP_MEMORY_POOL"}; -static constexpr Property dump_iterations{"DUMP_ITERATIONS"}; -static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; -// static constexpr Property max_kernels_per_batch{"MAX_KERNELS_PER_BATCH"}; -static constexpr Property impls_cache_capacity{"IMPLS_CACHE_CAPACITY"}; -static constexpr Property disable_async_compilation{"DISABLE_ASYNC_COMPILATION"}; -static constexpr Property disable_shape_agnostic_impls{"DISABLE_SHAPE_AGNOSTIC_IMPLS"}; -static constexpr Property disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"}; -static constexpr Property disable_memory_reuse{"DISABLE_MEMORY_REUSE"}; -static constexpr Property disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"}; -static constexpr Property disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"}; -static constexpr Property use_usm_host{"USE_USM_HOST"}; -static constexpr Property enable_kv_cache_compression{"ENABLE_KV_CACHE_COMPRESSION"}; -static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; -static constexpr Property mem_prealloc_options{"MEM_PREALLOC_OPTIONS"}; -static constexpr Property load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"}; - -} // namespace intel_gpu -} // namespace ov - -#endif // GPU_DEBUG_CONFIG diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp new file mode 100644 index 00000000000000..a1bb0ac8b6e6a0 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp @@ -0,0 +1,28 @@ +// Copyright (C) 2022-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/runtime/plugin_config.hpp" +#include "openvino/runtime/properties.hpp" +#include "test_utils.h" + +using namespace cldnn; +using namespace ::tests; + +TEST(config_test, basic) { + ov::intel_gpu::NewExecutionConfig cfg; + std::cerr << cfg.to_string(); + + cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); + cfg.set_property(ov::hint::inference_precision(ov::element::f32)); + + std::cerr << "PROF: " << cfg.enable_profiling.value << std::endl; + + std::cerr << cfg.to_string(); + + std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl; + std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl; + +// std::cerr << get_prop() << std::endl; +// std::cerr << get_prop() << std::endl; +} diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp index b296242905b958..b27275d0f03d99 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp @@ -2,9 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "intel_gpu/runtime/execution_config.hpp" -#include "module_tests/config_gpu.hpp" -#include "openvino/runtime/properties.hpp" #include "test_utils.h" #include "intel_gpu/runtime/device.hpp" #include "runtime/ocl/ocl_device_detector.hpp" @@ -104,41 +101,3 @@ TEST(devices_test, sort_order_three_vendors) { ASSERT_EQ(expected_devices_order, actual_devices_order); } - -// class Test { -// public: -// int i; -// constexpr Test(int i) : i(i) {} -// }; - -// constexpr const Test test1(1); -// constexpr const Test test2(2); - -// template -// int get_prop() { -// static_assert(false, "FAIL"); -// } - -// template class prop, typename T, ov::PropertyMutability mutability> -// T get_prop() { -// static_assert(false, "FAIL"); -// } - - -TEST(config_test, basic) { - ov::intel_gpu::NewExecutionConfig cfg; - std::cerr << cfg.to_string(); - - cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); - cfg.set_property(ov::hint::inference_precision(ov::element::f32)); - - std::cerr << "PROF: " << cfg.enable_profiling.value << std::endl; - - std::cerr << cfg.to_string(); - - std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl; - std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl; - -// std::cerr << get_prop() << std::endl; -// std::cerr << get_prop() << std::endl; -} From 67fb59ce1face7c4156177d013625766c76cfc7b Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 17 Dec 2024 17:14:52 +0400 Subject: [PATCH 03/44] env and config Signed-off-by: Vladimir Paramuzov --- .../intel_gpu/runtime/execution_config.hpp | 10 +-- .../intel_gpu/runtime/plugin_config.hpp | 9 +- .../src/runtime/execution_config.cpp | 51 +++++++---- .../intel_gpu/src/runtime/plugin_config.cpp | 86 ++++++++----------- .../tests/unit/module_tests/config_test.cpp | 4 + 5 files changed, 82 insertions(+), 78 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 96e09605eaa998..924f6cf5d42a40 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -56,12 +56,12 @@ class PropertyTypeValidator : public BaseValidator { } }; -class ExecutionConfig { +class OldExecutionConfig { public: - ExecutionConfig(); - ExecutionConfig(std::initializer_list values) : ExecutionConfig() { set_property(ov::AnyMap(values)); } - explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); } - explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); } + OldExecutionConfig(); + OldExecutionConfig(std::initializer_list values) : OldExecutionConfig() { set_property(ov::AnyMap(values)); } + explicit OldExecutionConfig(const ov::AnyMap& properties) : OldExecutionConfig() { set_property(properties); } + explicit OldExecutionConfig(const ov::AnyMap::value_type& property) : OldExecutionConfig() { set_property(property); } void set_default(); void set_property(const ov::AnyMap& properties); diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp index 51e72da8be5923..6ea8f4e107bfc8 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp @@ -25,21 +25,16 @@ struct NewExecutionConfig : public ov::PluginConfig { #undef OV_CONFIG_OPTION - void finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) override; + void finalize_impl(std::shared_ptr context) override; + void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override; private: - // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call - // So this method should be called after setting all user properties, but before apply_user_properties() call. - void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info); - void apply_user_properties(const cldnn::device_info& info); void apply_hints(const cldnn::device_info& info); void apply_execution_hints(const cldnn::device_info& info); void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); - void read_debug_options(const cldnn::device_info& info); }; - } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 7d2a9d5f90fc8b..5362f8bccb531c 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -11,7 +11,7 @@ namespace ov::intel_gpu { -ExecutionConfig::ExecutionConfig() { +OldExecutionConfig::OldExecutionConfig() { set_default(); } @@ -33,7 +33,7 @@ class PerformanceModeValidator : public BaseValidator { } }; -void ExecutionConfig::set_default() { +void OldExecutionConfig::set_default() { register_property( std::make_tuple(ov::device::id, "0"), std::make_tuple(ov::enable_profiling, false), @@ -86,13 +86,13 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::intel_gpu::use_onednn, false)); } -void ExecutionConfig::register_property_impl(const std::pair& property, PropertyVisibility visibility, BaseValidator::Ptr validator) { +void OldExecutionConfig::register_property_impl(const std::pair& property, PropertyVisibility visibility, BaseValidator::Ptr validator) { property_validators[property.first] = validator; supported_properties[property.first] = visibility; internal_properties[property.first] = property.second; } -void ExecutionConfig::set_property(const AnyMap& config) { +void OldExecutionConfig::set_property(const AnyMap& config) { for (auto& kv : config) { auto& name = kv.first; auto& val = kv.second; @@ -102,18 +102,18 @@ void ExecutionConfig::set_property(const AnyMap& config) { } } -bool ExecutionConfig::is_supported(const std::string& name) const { +bool OldExecutionConfig::is_supported(const std::string& name) const { bool supported = supported_properties.find(name) != supported_properties.end(); bool has_validator = property_validators.find(name) != property_validators.end(); return supported && has_validator; } -bool ExecutionConfig::is_set_by_user(const std::string& name) const { +bool OldExecutionConfig::is_set_by_user(const std::string& name) const { return user_properties.find(name) != user_properties.end(); } -void ExecutionConfig::set_user_property(const AnyMap& config) { +void OldExecutionConfig::set_user_property(const AnyMap& config) { for (auto& kv : config) { auto& name = kv.first; auto& val = kv.second; @@ -125,7 +125,7 @@ void ExecutionConfig::set_user_property(const AnyMap& config) { } } -Any ExecutionConfig::get_property(const std::string& name) const { +Any OldExecutionConfig::get_property(const std::string& name) const { if (user_properties.find(name) != user_properties.end()) { return user_properties.at(name); } @@ -134,7 +134,7 @@ Any ExecutionConfig::get_property(const std::string& name) const { return internal_properties.at(name); } -void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { +void OldExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::execution_mode)) { const auto mode = get_property(ov::hint::execution_mode); if (!is_set_by_user(ov::hint::inference_precision)) { @@ -150,7 +150,7 @@ void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { } } -void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { +void OldExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::performance_mode)) { const auto mode = get_property(ov::hint::performance_mode); if (!is_set_by_user(ov::num_streams)) { @@ -179,7 +179,7 @@ void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { } } -void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { +void OldExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::model_priority)) { const auto priority = get_property(ov::hint::model_priority); if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { @@ -188,7 +188,7 @@ void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { } } -void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) { +void OldExecutionConfig::apply_debug_options(const cldnn::device_info& info) { GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); @@ -223,16 +223,33 @@ void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) { } } -void ExecutionConfig::apply_hints(const cldnn::device_info& info) { +void OldExecutionConfig::apply_hints(const cldnn::device_info& info) { apply_execution_hints(info); apply_performance_hints(info); apply_priority_hints(info); apply_debug_options(info); } -void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { - if (finalized) +void ExecutionConfig::update_specific_default_properties(const cldnn::device_info& info) { + // These default properties should be set once. + if (specific_default_properties_is_set) return; + specific_default_properties_is_set = true; + + // Enable KV-cache compression by default for non-systolic platforms MFDNN-11755 + if (get_property(ov::hint::kv_cache_precision) == ov::element::undefined && !info.supports_immad) { + set_property(ov::hint::kv_cache_precision(ov::element::i8)); + } + + // Enable dynamic quantization by default for non-systolic platforms + if (get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) { + set_property(ov::hint::dynamic_quantization_group_size(32)); + } +} + +void OldExecutionConfig::apply_user_properties(const cldnn::device_info& info) { + // Update specific default properties, call once before internal_properties updated. + update_specific_default_properties(info); // Copy internal properties before applying hints to ensure that // a property set by hint won't be overriden by a value in user config. @@ -283,7 +300,7 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { user_properties.clear(); } -void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info, const bool is_llm) { +void OldExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info, const bool is_llm) { if (!info.supports_immad) { apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); } @@ -292,7 +309,7 @@ void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RT apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); } -std::string ExecutionConfig::to_string() const { +std::string OldExecutionConfig::to_string() const { std::stringstream s; s << "internal properties:\n"; for (auto& kv : internal_properties) { diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp index 0a3c49e6387104..9aa975d83923a3 100644 --- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp @@ -22,11 +22,43 @@ NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { #undef OV_CONFIG_OPTION } -void NewExecutionConfig::finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) { - const auto& device_info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); - read_debug_options(device_info); - apply_user_properties(device_info); - apply_rt_info(device_info, rt_info); +void NewExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { + const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + if (!info.supports_immad) { + apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); + apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); + } + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); +} + +void NewExecutionConfig::finalize_impl(std::shared_ptr context) { + const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + apply_hints(info); + if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { + set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); + } + if (info.supports_immad) { + set_property(ov::intel_gpu::use_onednn(true)); + } + if (get_property(ov::intel_gpu::use_onednn)) { + set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + } + + // Enable KV-cache compression by default for non-systolic platforms + if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { + set_property(ov::hint::kv_cache_precision(ov::element::i8)); + } + + // Enable dynamic quantization by default for non-systolic platforms + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { + set_property(ov::hint::dynamic_quantization_group_size(32)); + } +} + +void NewExecutionConfig::apply_hints(const cldnn::device_info& info) { + apply_execution_hints(info); + apply_performance_hints(info); + apply_priority_hints(info); } void NewExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { @@ -83,49 +115,5 @@ void NewExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { } } -void NewExecutionConfig::read_debug_options(const cldnn::device_info& info) { - ov::AnyMap config_properties; - set_user_property(config_properties); - ov::AnyMap env_properties; - set_user_property(env_properties); -} - -void NewExecutionConfig::apply_hints(const cldnn::device_info& info) { - apply_execution_hints(info); - apply_performance_hints(info); - apply_priority_hints(info); -} - -void NewExecutionConfig::apply_user_properties(const cldnn::device_info& info) { - apply_hints(info); - if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { - set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); - } - if (info.supports_immad) { - set_property(ov::intel_gpu::use_onednn(true)); - } - if (get_property(ov::intel_gpu::use_onednn)) { - set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - } - - // Enable KV-cache compression by default for non-systolic platforms - if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } - - // Enable dynamic quantization by default for non-systolic platforms - if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); - } -} - -void NewExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) { - if (!info.supports_immad) { - apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); - apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); - } - apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); -} - } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp index a1bb0ac8b6e6a0..930128ef53bff6 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/plugin/remote_context.hpp" #include "intel_gpu/runtime/plugin_config.hpp" #include "openvino/runtime/properties.hpp" #include "test_utils.h" @@ -23,6 +24,9 @@ TEST(config_test, basic) { std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl; std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl; + auto ctx = std::make_shared("GPU", std::vector{ get_test_engine().get_device() }); + cfg.finalize(ctx, {}); + std::cerr << cfg.to_string(); // std::cerr << get_prop() << std::endl; // std::cerr << get_prop() << std::endl; } From e5f7cc350403c1b935de3e553b963d7a5a58e955 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 18 Dec 2024 16:28:20 +0400 Subject: [PATCH 04/44] Replace old config & fixes Signed-off-by: Vladimir Paramuzov --- src/inference/CMakeLists.txt | 2 +- .../openvino/runtime/plugin_config.hpp | 25 ++++- src/inference/src/dev/plugin_config.cpp | 106 +++++++++++++++++- .../intel_gpu/runtime/execution_config.hpp | 1 + .../intel_gpu/runtime/plugin_config.hpp | 6 + src/plugins/intel_gpu/src/graph/program.cpp | 13 ++- src/plugins/intel_gpu/src/plugin/plugin.cpp | 27 +++-- .../intel_gpu/src/runtime/plugin_config.cpp | 17 ++- 8 files changed, 170 insertions(+), 27 deletions(-) diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt index 22c28c2acde6e6..7e6cae62b85b67 100644 --- a/src/inference/CMakeLists.txt +++ b/src/inference/CMakeLists.txt @@ -87,7 +87,7 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE # for ov_plugins.hpp $,$>,${CMAKE_CURRENT_BINARY_DIR}/$,${CMAKE_CURRENT_BINARY_DIR}>) -target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt openvino::util openvino::core::dev) +target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt openvino::util openvino::core::dev nlohmann_json::nlohmann_json) ov_mark_target_as_cc(${TARGET_NAME}_obj) # OpenVINO Runtime is public API => need to mark this library as important for ABI free diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index acccd0bf343604..16985d70b2841a 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -62,7 +62,8 @@ struct ConfigOption : public ConfigOptionBase { bool is_valid_value(ov::Any val) override { try { - return validator ? validator(val.as()) : true; + auto v = val.as(); + return validator ? validator(v) : true; } catch (std::exception&) { return false; } @@ -74,6 +75,7 @@ struct ConfigOption : public ConfigOptionBase { // Base class for configuration of plugins // Implementation should provide a list of properties with default values and validators (optional) +// and prepare a map string property name -> ConfigOptionBase pointer // For the sake of efficiency, we expect that plugin properties are defined as class members of the derived class // and accessed directly in the plugin's code (i.e. w/o get_property()/set_property() calls) // get/set property members are provided to handle external property access @@ -91,9 +93,14 @@ struct ConfigOption : public ConfigOptionBase { class OPENVINO_RUNTIME_API PluginConfig { public: PluginConfig() {} - PluginConfig(std::initializer_list values) : PluginConfig() { set_property(ov::AnyMap(values)); } - explicit PluginConfig(const ov::AnyMap& properties) : PluginConfig() { set_property(properties); } - explicit PluginConfig(const ov::AnyMap::value_type& property) : PluginConfig() { set_property(property); } + virtual ~PluginConfig() = default; + + // Disable copy and move as we need to setup m_options_map properly and ensure that + // values are a part of current config object + PluginConfig(const PluginConfig& other) = delete; + PluginConfig& operator=(const PluginConfig& other) = delete; + PluginConfig(PluginConfig&& other) = delete; + PluginConfig& operator=(PluginConfig&& other) = delete; void set_property(const ov::AnyMap& properties); Any get_property(const std::string& name) const; @@ -118,9 +125,12 @@ class OPENVINO_RUNTIME_API PluginConfig { std::string to_string() const; void finalize(std::shared_ptr context, const ov::RTMap& rt_info); - virtual void finalize_impl(std::shared_ptr context, const ov::RTMap& rt_info) = 0; protected: + virtual void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) {} + virtual void apply_debug_options(std::shared_ptr context); + virtual void finalize_impl(std::shared_ptr context) {} + template bool is_set_by_user(const ov::Property& property) const { return user_properties.find(property.name()) != user_properties.end(); @@ -135,6 +145,11 @@ class OPENVINO_RUNTIME_API PluginConfig { } } } + + ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; + ov::AnyMap read_env(const std::vector& prefixes) const; + void cleanup_unsupported(ov::AnyMap& config) const; + std::map m_options_map; // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index 9f169c07663a40..c3ac86e05ba04b 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -1,10 +1,20 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "openvino/runtime/plugin_config.hpp" +#include "openvino/core/any.hpp" #include "openvino/core/except.hpp" +#include "openvino/runtime/device_id_parser.hpp" +#include "openvino/util/common_util.hpp" +#include "openvino/util/env_util.hpp" +#include +#ifdef JSON_HEADER +# include +#else +# include +#endif namespace ov { @@ -15,7 +25,8 @@ void PluginConfig::set_property(const AnyMap& config) { const auto& known_options = m_options_map; auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); - OPENVINO_ASSERT(it != known_options.end()); + OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); + OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); it->second->set_any(val); } @@ -25,6 +36,7 @@ ov::Any PluginConfig::get_property(const std::string& name) const { const auto& known_options = m_options_map; auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); + OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); return it->second->get_any(); } @@ -37,6 +49,7 @@ void PluginConfig::set_user_property(const AnyMap& config) { const auto& known_options = m_options_map; auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); + OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); OPENVINO_ASSERT(it->second->is_valid_value(val), "Invalid value: ", val.as(), " for property: ", name); user_properties[name] = val; @@ -44,6 +57,8 @@ void PluginConfig::set_user_property(const AnyMap& config) { } void PluginConfig::finalize(std::shared_ptr context, const ov::RTMap& rt_info) { + apply_rt_info(context, rt_info); + apply_debug_options(context); // Copy internal properties before applying hints to ensure that // a property set by hint won't be overriden by a value in user config. // E.g num_streams=AUTO && hint=THROUGHPUT @@ -54,12 +69,95 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R option->set_any(prop.second); } - finalize_impl(context, rt_info); + finalize_impl(context); // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization user_properties.clear(); } +void PluginConfig::apply_debug_options(std::shared_ptr context) { + ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); + cleanup_unsupported(config_properties); + set_user_property(config_properties); + ov::AnyMap env_properties = read_env({"OV_"}); + set_user_property(env_properties); +} + +ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { + ov::AnyMap config; + + std::ifstream ifs(filename); + if (!ifs.is_open()) { + return config; + } + + nlohmann::json json_config; + try { + ifs >> json_config; + } catch (const std::exception& e) { + return config; + } + + DeviceIDParser parser(target_device_name); + for (auto item = json_config.cbegin(), end = json_config.cend(); item != end; ++item) { + const std::string& device_name = item.key(); + if (DeviceIDParser(device_name).get_device_name() != parser.get_device_name()) + continue; + + const auto& item_value = item.value(); + for (auto option = item_value.cbegin(), item_value_end = item_value.cend(); option != item_value_end; ++option) { + config[option.key()] = option.value().get(); + } + } + + return config; +} + +ov::AnyMap PluginConfig::read_env(const std::vector& prefixes) const { + ov::AnyMap config; + + for (auto& kv : m_options_map) { + for (auto& prefix : prefixes) { + auto var_name = prefix + kv.first; + const auto& val = ov::util::getenv_string(var_name.c_str()); + + if (!val.empty()) { + if (dynamic_cast*>(kv.second) != nullptr) { + const std::set off = {"0", "false", "off", "no"}; + const std::set on = {"1", "true", "on", "yes"}; + + const auto& val_lower = ov::util::to_lower(val); + if (off.count(val_lower)) { + config[kv.first] = false; + } else if (on.count(val_lower)) { + config[kv.first] = true; + } else { + OPENVINO_THROW("Unexpected value for boolean property: ", val); + } + } else { + config[kv.first] = val; + } + break; + } + } + } + + return config; +} + +void PluginConfig::cleanup_unsupported(ov::AnyMap& config) const { + for (auto it = config.begin(); it != config.end();) { + const auto& known_options = m_options_map; + auto& name = it->first; + auto opt_it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); + if (opt_it == known_options.end()) { + it = config.erase(it); + } else { + ++it; + } + } +} + std::string PluginConfig::to_string() const { std::stringstream s; @@ -67,7 +165,7 @@ std::string PluginConfig::to_string() const { s << "PROPERTIES:\n"; for (const auto& option : m_options_map) { - s << "\t" << option.first << ":" << option.second->get_any().as() << std::endl; + s << "\t" << option.first << ": " << option.second->get_any().as() << std::endl; } s << "USER PROPERTIES:\n"; for (const auto& user_prop : user_properties) { diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 924f6cf5d42a40..6e39e0b0b6a822 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -6,6 +6,7 @@ #include "intel_gpu/runtime/internal_properties.hpp" #include "intel_gpu/runtime/device.hpp" +#include "intel_gpu/runtime/plugin_config.hpp" namespace ov::intel_gpu { diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp index 6ea8f4e107bfc8..5931a60ffae37a 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp @@ -15,6 +15,12 @@ namespace intel_gpu { struct NewExecutionConfig : public ov::PluginConfig { NewExecutionConfig(); + NewExecutionConfig(std::initializer_list values) : NewExecutionConfig() { set_property(ov::AnyMap(values)); } + explicit NewExecutionConfig(const ov::AnyMap& properties) : NewExecutionConfig() { set_property(properties); } + explicit NewExecutionConfig(const ov::AnyMap::value_type& property) : NewExecutionConfig() { set_property(property); } + + NewExecutionConfig(const NewExecutionConfig& other); + NewExecutionConfig& operator=(const NewExecutionConfig& other); #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ ConfigOption PropertyVar = \ diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index c3de17f8a196d3..800ac5ce997d6c 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -3,6 +3,7 @@ // #include "impls/registry/implementation_manager.hpp" +#include "intel_gpu/plugin/remote_context.hpp" #include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/core/type.hpp" #include "openvino/runtime/system_conf.hpp" @@ -162,7 +163,8 @@ program::program(engine& engine_ref, program_node::reset_unique_id(); if (no_optimizations) { init_graph(); - _config.apply_user_properties(_engine.get_device_info()); + auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); + _config.finalize(ctx, {}); } else { build_program(is_internal); if (_is_body_program) { @@ -198,7 +200,8 @@ program::program(engine& engine_ref, _task_executor(std::move(task_executor)), processing_order(), is_internal(is_internal) { - _config.apply_user_properties(_engine.get_device_info()); + auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); + _config.finalize(ctx, {}); init_primitives(); init_program(); prepare_nodes(nodes); @@ -211,7 +214,8 @@ program::program(engine& engine, const ExecutionConfig& config) _config(config), processing_order() { init_primitives(); - _config.apply_user_properties(_engine.get_device_info()); + auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); + _config.finalize(ctx, {}); new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); _layout_optimizer = std::make_unique(); } @@ -498,7 +502,8 @@ void program::set_options() { void program::build_program(bool is_internal) { init_graph(); - _config.apply_user_properties(_engine.get_device_info()); + auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); + _config.finalize(ctx, {}); { pre_optimize_graph(is_internal); } run_graph_compilation(); { post_optimize_graph(is_internal); } diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index a99ac1e534312b..63c98b8c756845 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -25,6 +25,7 @@ #include "intel_gpu/runtime/device_query.hpp" #include "intel_gpu/runtime/execution_config.hpp" #include "intel_gpu/runtime/itt.hpp" +#include "openvino/core/any.hpp" #include "openvino/core/deprecated.hpp" #include "openvino/op/gather.hpp" #include "openvino/op/concat.hpp" @@ -56,6 +57,16 @@ using Time = std::chrono::high_resolution_clock; namespace ov::intel_gpu { +namespace { + +ov::RTMap get_rt_info(const ov::Model& model) { + if (model.has_rt_info("runtime_options")) + return model.get_rt_info("runtime_options"); + return {}; +} + +} // namespace + #define FACTORY_DECLARATION(op_version, op_name) \ void __register ## _ ## op_name ## _ ## op_version(); @@ -220,9 +231,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config); - if (model->has_rt_info("runtime_options")) - config.apply_rt_info(context->get_engine().get_device_info(), model->get_rt_info("runtime_options"), is_llm(model)); - config.apply_user_properties(context->get_engine().get_device_info()); + config.finalize(context, get_rt_info(*model)); set_cache_info(model, config); @@ -242,11 +251,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); - if (model->has_rt_info("runtime_options")) - config.apply_rt_info(context_impl->get_engine().get_device_info(), model->get_rt_info("runtime_options"), is_llm(model)); - config.apply_user_properties(context_impl->get_engine().get_device_info()); - + config.finalize(context_impl, get_rt_info(*model)); set_cache_info(model, config); auto transformed_model = clone_and_transform_model(model, config, context_impl); @@ -313,9 +318,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config); - if (model->has_rt_info("runtime_options")) - config.apply_rt_info(ctx->get_engine().get_device_info(), model->get_rt_info("runtime_options"), is_llm(model)); - config.apply_user_properties(ctx->get_engine().get_device_info()); + config.finalize(ctx, get_rt_info(*model)); ProgramBuilder prog(ctx->get_engine(), config); @@ -370,7 +373,7 @@ std::shared_ptr Plugin::import_model(std::istream& model, ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(_orig_config); - config.apply_user_properties(context_impl->get_engine().get_device_info()); + config.finalize(context_impl, {}); ov::CacheMode cache_mode = config.get_property(ov::cache_mode); ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks); diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp index 9aa975d83923a3..330d3ed40c2175 100644 --- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -22,6 +22,21 @@ NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { #undef OV_CONFIG_OPTION } +NewExecutionConfig::NewExecutionConfig(const NewExecutionConfig& other) : NewExecutionConfig() { + user_properties = other.user_properties; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); + } +} + +NewExecutionConfig& NewExecutionConfig::operator=(const NewExecutionConfig& other) { + user_properties = other.user_properties; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); + } + return *this; +} + void NewExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); if (!info.supports_immad) { From 90572bd4e0a1de851cbd5a06a6242ef3b861c90d Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 18 Dec 2024 17:34:47 +0400 Subject: [PATCH 05/44] prefix for config members and unit tests Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 3 + src/inference/src/dev/plugin_config.cpp | 9 +- src/inference/tests/unit/config_test.cpp | 202 ++++++++++++++++++ .../intel_gpu/runtime/plugin_config.hpp | 2 +- .../intel_gpu/src/runtime/plugin_config.cpp | 2 +- .../tests/unit/module_tests/config_test.cpp | 3 +- 6 files changed, 215 insertions(+), 6 deletions(-) create mode 100644 src/inference/tests/unit/config_test.cpp diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 16985d70b2841a..78f8da4fe61ca2 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -118,6 +118,9 @@ class OPENVINO_RUNTIME_API PluginConfig { template T get_property(const ov::Property& property) const { + if (is_set_by_user(property)) { + return user_properties.at(property.name()).template as(); + } OPENVINO_ASSERT(m_options_map.find(property.name()) != m_options_map.end(), "Property not found: ", property.name()); return static_cast*>(m_options_map.at(property.name()))->value; } diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index c3ac86e05ba04b..c4489cdc1bc69f 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -76,9 +76,12 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R } void PluginConfig::apply_debug_options(std::shared_ptr context) { - ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); - cleanup_unsupported(config_properties); - set_user_property(config_properties); + if (context) { + ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); + cleanup_unsupported(config_properties); + set_user_property(config_properties); + } + ov::AnyMap env_properties = read_env({"OV_"}); set_user_property(env_properties); } diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp new file mode 100644 index 00000000000000..db832247dd2bd6 --- /dev/null +++ b/src/inference/tests/unit/config_test.cpp @@ -0,0 +1,202 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/core/any.hpp" +#include "openvino/runtime/plugin_config.hpp" + +#include +#include + +#include "common_test_utils/common_utils.hpp" + +using namespace ::testing; +using namespace ov; + +static constexpr Property unsupported_property{"UNSUPPORTED_PROPERTY"}; +static constexpr Property bool_property{"BOOL_PROPERTY"}; +static constexpr Property int_property{"INT_PROPERTY"}; +static constexpr Property high_level_property{"HIGH_LEVEL_PROPERTY"}; +static constexpr Property low_level_property{"LOW_LEVEL_PROPERTY"}; + + +struct EmptyTestConfig : public ov::PluginConfig { + std::vector get_supported_properties() const { + std::vector supported_properties; + for (const auto& kv : m_options_map) { + supported_properties.push_back(kv.first); + } + return supported_properties; + } +}; + +struct NotEmptyTestConfig : public ov::PluginConfig { + NotEmptyTestConfig() { + m_options_map[bool_property.name()] = &m_bool_property; + m_options_map[int_property.name()] = &m_int_property; + m_options_map[high_level_property.name()] = &m_high_level_property; + m_options_map[low_level_property.name()] = &m_low_level_property; + } + + NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() { + user_properties = other.user_properties; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); + } + } + + ConfigOption m_bool_property = ConfigOption(true); + ConfigOption m_int_property = ConfigOption(-1); + ConfigOption m_high_level_property = ConfigOption(""); + ConfigOption m_low_level_property = ConfigOption(""); + + std::vector get_supported_properties() const { + std::vector supported_properties; + for (const auto& kv : m_options_map) { + supported_properties.push_back(kv.first); + } + return supported_properties; + } + + void finalize_impl(std::shared_ptr context) override { + if (!is_set_by_user(low_level_property)) { + m_low_level_property.value = m_high_level_property.value; + } + } + + void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override { + apply_rt_info_property(high_level_property, rt_info); + } + + using ov::PluginConfig::is_set_by_user; +}; + +TEST(plugin_config, can_create_empty_config) { + ASSERT_NO_THROW( + EmptyTestConfig cfg; + ASSERT_EQ(cfg.get_supported_properties().size(), 0); + ); +} + +TEST(plugin_config, can_create_not_empty_config) { + ASSERT_NO_THROW( + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_supported_properties().size(), 4); + ); +} + +TEST(plugin_config, can_set_get_property) { + NotEmptyTestConfig cfg; + ASSERT_NO_THROW(cfg.get_property(bool_property)); + ASSERT_EQ(cfg.get_property(bool_property), true); + ASSERT_NO_THROW(cfg.set_property(bool_property(false))); + ASSERT_EQ(cfg.get_property(bool_property), false); + + ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); + ASSERT_EQ(cfg.get_property(bool_property), true); +} + +TEST(plugin_config, throw_for_unsupported_property) { + NotEmptyTestConfig cfg; + ASSERT_ANY_THROW(cfg.get_property(unsupported_property)); + ASSERT_ANY_THROW(cfg.set_property(unsupported_property(10.0f))); + ASSERT_ANY_THROW(cfg.set_user_property(unsupported_property(10.0f))); +} + +TEST(plugin_config, can_direct_access_to_properties) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.m_bool_property.value, cfg.get_property(bool_property)); + ASSERT_NO_THROW(cfg.set_property(bool_property(false))); + ASSERT_EQ(cfg.m_bool_property.value, cfg.get_property(bool_property)); + ASSERT_EQ(cfg.m_bool_property.value, false); + + ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); + ASSERT_EQ(cfg.m_bool_property.value, false); // user property doesn't impact member value until finalize() is called + + cfg.m_bool_property.value = true; + ASSERT_EQ(cfg.get_property(bool_property), true); +} + +TEST(plugin_config, finalization_updates_member) { + NotEmptyTestConfig cfg; + ASSERT_NO_THROW(cfg.set_user_property(bool_property(false))); + ASSERT_EQ(cfg.m_bool_property.value, true); // user property doesn't impact member value until finalize() is called + + cfg.finalize(nullptr, {}); + + ASSERT_EQ(cfg.m_bool_property.value, false); // now the value has changed +} + +TEST(plugin_config, get_property_before_finalization_returns_user_property_if_set) { + NotEmptyTestConfig cfg; + + ASSERT_EQ(cfg.get_property(bool_property), true); // default value + ASSERT_EQ(cfg.m_bool_property.value, true); // default value + + cfg.m_bool_property.value = false; // update member directly + ASSERT_EQ(cfg.get_property(bool_property), false); // OK, return the class member value as no user property was set + + ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); + ASSERT_TRUE(cfg.is_set_by_user(bool_property)); + ASSERT_EQ(cfg.get_property(bool_property), true); // now user property value is returned + ASSERT_EQ(cfg.m_bool_property.value, false); // but class member is not updated + + cfg.finalize(nullptr, {}); + ASSERT_EQ(cfg.get_property(bool_property), cfg.m_bool_property.value); // equal after finalization + ASSERT_FALSE(cfg.is_set_by_user(bool_property)); // and user property is cleared +} + +TEST(plugin_config, finalization_updates_dependant_properties) { + NotEmptyTestConfig cfg; + + cfg.set_user_property(high_level_property("value1")); + ASSERT_TRUE(cfg.is_set_by_user(high_level_property)); + ASSERT_FALSE(cfg.is_set_by_user(low_level_property)); + + cfg.finalize(nullptr, {}); + ASSERT_EQ(cfg.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg.m_low_level_property.value, "value1"); + ASSERT_FALSE(cfg.is_set_by_user(high_level_property)); + ASSERT_FALSE(cfg.is_set_by_user(low_level_property)); +} + +TEST(plugin_config, can_set_property_from_rt_info) { + NotEmptyTestConfig cfg; + + RTMap rt_info = { + {high_level_property.name(), "value1"}, + {int_property.name(), 10} // int_property is not applied from rt info + }; + + // default values + ASSERT_EQ(cfg.m_high_level_property.value, ""); + ASSERT_EQ(cfg.m_low_level_property.value, ""); + ASSERT_EQ(cfg.m_int_property.value, -1); + + cfg.finalize(nullptr, rt_info); + + ASSERT_EQ(cfg.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg.m_low_level_property.value, "value1"); // dependant is updated too + ASSERT_EQ(cfg.m_int_property.value, -1); // still default +} + +TEST(plugin_config, can_copy_config) { + NotEmptyTestConfig cfg1; + + cfg1.m_high_level_property.value = "value1"; + cfg1.m_low_level_property.value = "value2"; + cfg1.m_int_property.value = 1; + cfg1.set_user_property(bool_property(false)); + + NotEmptyTestConfig cfg2 = cfg1; + ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg2.m_low_level_property.value, "value2"); + ASSERT_EQ(cfg2.m_int_property.value, 1); + ASSERT_EQ(cfg2.get_property(bool_property), false); // ensure user properties are copied too + + // check that cfg1 modification doesn't impact a copy + cfg1.set_property(high_level_property("value3")); + cfg1.m_int_property.value = 3; + ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg2.m_int_property.value, 1); +} diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp index 5931a60ffae37a..f18b32cd8b7cbb 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp @@ -23,7 +23,7 @@ struct NewExecutionConfig : public ov::PluginConfig { NewExecutionConfig& operator=(const NewExecutionConfig& other); #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ - ConfigOption PropertyVar = \ + ConfigOption m_ ## PropertyVar = \ ConfigOption(GET_EXCEPT_LAST(__VA_ARGS__)); #include "options_release.inl" diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp index 330d3ed40c2175..8f4319734d3e9f 100644 --- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp @@ -14,7 +14,7 @@ namespace intel_gpu { NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ - m_options_map[PropertyNamespace::PropertyVar.name()] = &PropertyVar; + m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; #include "intel_gpu/runtime/options_release.inl" #include "intel_gpu/runtime/options_debug.inl" diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp index 930128ef53bff6..b14c5b0bf4623d 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp @@ -14,10 +14,11 @@ TEST(config_test, basic) { ov::intel_gpu::NewExecutionConfig cfg; std::cerr << cfg.to_string(); + std::cerr << cfg.get_property("PERFORMANCE_HINT").as(); cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); cfg.set_property(ov::hint::inference_precision(ov::element::f32)); - std::cerr << "PROF: " << cfg.enable_profiling.value << std::endl; + std::cerr << "PROF: " << cfg.m_enable_profiling.value << std::endl; std::cerr << cfg.to_string(); From f87d45b004bbdc8e706bfccc5713435a2e491bca Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Thu, 19 Dec 2024 14:25:26 +0400 Subject: [PATCH 06/44] added visibility for options Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 50 ++++++++++++- src/inference/src/dev/plugin_config.cpp | 43 ++++++----- src/inference/tests/unit/config_test.cpp | 45 +++++++++--- .../include/intel_gpu/graph/program.hpp | 4 +- .../intel_gpu/runtime/internal_properties.hpp | 8 +-- .../include/intel_gpu/runtime/options.inl | 71 +++++++++++++++++++ .../intel_gpu/runtime/options_debug.inl | 31 -------- .../intel_gpu/runtime/options_release.inl | 50 ------------- .../intel_gpu/runtime/plugin_config.hpp | 9 +-- .../intel_gpu/src/plugin/ops/condition.cpp | 1 - src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 1 - .../src/runtime/execution_config.cpp | 7 -- .../intel_gpu/src/runtime/plugin_config.cpp | 8 +-- .../tests/unit/fusions/gemm_fusion_test.cpp | 1 - .../tests/unit/fusions/loop_fusion_test.cpp | 1 - .../tests/unit/test_cases/crop_gpu_test.cpp | 1 - .../tests/unit/test_cases/loop_gpu_test.cpp | 5 +- 17 files changed, 189 insertions(+), 147 deletions(-) create mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 78f8da4fe61ca2..36b6765849ee8e 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -33,8 +33,40 @@ #define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__)) +#define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ + ConfigOption m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; + +#define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \ + m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; + +#define OV_CONFIG_RELEASE_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE, __VA_ARGS__) + +#define OV_CONFIG_RELEASE_INTERNAL_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE_INTERNAL, __VA_ARGS__) + +#define OV_CONFIG_DEBUG_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__) + namespace ov { +enum class OptionVisibility { + RELEASE = 0, // Option can be set for any build type via public interface, environment and config file + RELEASE_INTERNAL = 1, // Option can be set for any build type via environment and config file only + DEBUG = 2, // Option can be set for debug builds only via environment and config file +}; + +inline std::ostream& operator<<(std::ostream& os, const OptionVisibility& visibility) { + switch (visibility) { + case OptionVisibility::RELEASE: os << "RELEASE"; break; + case OptionVisibility::RELEASE_INTERNAL: os << "RELEASE_INTERNAL"; break; + case OptionVisibility::DEBUG: os << "DEBUG"; break; + default: os << "UNKNOWN"; break; + } + + return os; +} + struct ConfigOptionBase { explicit ConfigOptionBase() {} virtual ~ConfigOptionBase() = default; @@ -42,13 +74,15 @@ struct ConfigOptionBase { virtual void set_any(const ov::Any any) = 0; virtual ov::Any get_any() const = 0; virtual bool is_valid_value(ov::Any val) = 0; + virtual OptionVisibility get_visibility() const = 0; }; -template +template struct ConfigOption : public ConfigOptionBase { ConfigOption(const T& default_val, std::function validator = nullptr) : ConfigOptionBase(), value(default_val), validator(validator) {} T value; + constexpr static const auto visibility = visibility_; void set_any(const ov::Any any) override { if (validator) @@ -69,6 +103,10 @@ struct ConfigOption : public ConfigOptionBase { } } + OptionVisibility get_visibility() const override { + return visibility; + } + private: std::function validator; }; @@ -139,6 +177,14 @@ class OPENVINO_RUNTIME_API PluginConfig { return user_properties.find(property.name()) != user_properties.end(); } + ConfigOptionBase* get_option_ptr(const std::string& name) const { + auto it = m_options_map.find(name); + OPENVINO_ASSERT(it != m_options_map.end(), "Option not found: ", name); + OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); + + return it->second; + } + template void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { if (!is_set_by_user(property)) { @@ -149,6 +195,8 @@ class OPENVINO_RUNTIME_API PluginConfig { } } + void set_user_property(const ov::AnyMap& properties, const std::vector& allowed_visibility); + ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; ov::AnyMap read_env(const std::vector& prefixes) const; void cleanup_unsupported(ov::AnyMap& config) const; diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index c4489cdc1bc69f..cfc48745f677f5 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -23,34 +23,31 @@ void PluginConfig::set_property(const AnyMap& config) { auto& name = kv.first; auto& val = kv.second; - const auto& known_options = m_options_map; - auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); - OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); - OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); - - it->second->set_any(val); + auto option = get_option_ptr(name); + option->set_any(val); } } ov::Any PluginConfig::get_property(const std::string& name) const { - const auto& known_options = m_options_map; - auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); - OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); - OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); - - return it->second->get_any(); + auto option = get_option_ptr(name); + return option->get_any(); } void PluginConfig::set_user_property(const AnyMap& config) { + static std::vector allowed_visibility = {OptionVisibility::RELEASE}; + set_user_property(config, allowed_visibility); +} + +void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector& allowed_visibility) { for (auto& kv : config) { auto& name = kv.first; auto& val = kv.second; - const auto& known_options = m_options_map; - auto it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); - OPENVINO_ASSERT(it != known_options.end(), "Option not found: ", name); - OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); - OPENVINO_ASSERT(it->second->is_valid_value(val), "Invalid value: ", val.as(), " for property: ", name); + auto option = get_option_ptr(name); + if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) { + OPENVINO_THROW("Unkown property: ", name); + } + OPENVINO_ASSERT(option->is_valid_value(val), "Invalid value: ", val.as(), " for property: ", name); user_properties[name] = val; } @@ -76,14 +73,22 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R } void PluginConfig::apply_debug_options(std::shared_ptr context) { + static std::vector allowed_visibility = { + OptionVisibility::RELEASE, + OptionVisibility::RELEASE_INTERNAL, +#ifdef ENABLE_DEBUG_CAPS + OptionVisibility::DEBUG +#endif + }; + if (context) { ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); cleanup_unsupported(config_properties); - set_user_property(config_properties); + set_user_property(config_properties, allowed_visibility); } ov::AnyMap env_properties = read_env({"OV_"}); - set_user_property(env_properties); + set_user_property(env_properties, allowed_visibility); } ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index db832247dd2bd6..0feeef707a2779 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -8,8 +8,6 @@ #include #include -#include "common_test_utils/common_utils.hpp" - using namespace ::testing; using namespace ov; @@ -18,6 +16,8 @@ static constexpr Property bool_property{"BOOL_PROP static constexpr Property int_property{"INT_PROPERTY"}; static constexpr Property high_level_property{"HIGH_LEVEL_PROPERTY"}; static constexpr Property low_level_property{"LOW_LEVEL_PROPERTY"}; +static constexpr Property release_internal_property{"RELEASE_INTERNAL_PROPERTY"}; +static constexpr Property debug_property{"DEBUG_PROPERTY"}; struct EmptyTestConfig : public ov::PluginConfig { @@ -32,10 +32,15 @@ struct EmptyTestConfig : public ov::PluginConfig { struct NotEmptyTestConfig : public ov::PluginConfig { NotEmptyTestConfig() { - m_options_map[bool_property.name()] = &m_bool_property; - m_options_map[int_property.name()] = &m_int_property; - m_options_map[high_level_property.name()] = &m_high_level_property; - m_options_map[low_level_property.name()] = &m_low_level_property; + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") + OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") + OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") + OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "") + OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "") + OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "") + #undef OV_CONFIG_OPTION + } NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() { @@ -45,10 +50,14 @@ struct NotEmptyTestConfig : public ov::PluginConfig { } } - ConfigOption m_bool_property = ConfigOption(true); - ConfigOption m_int_property = ConfigOption(-1); - ConfigOption m_high_level_property = ConfigOption(""); - ConfigOption m_low_level_property = ConfigOption(""); + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") + OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") + OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") + OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "") + OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "") + OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "") + #undef OV_CONFIG_OPTION std::vector get_supported_properties() const { std::vector supported_properties; @@ -68,6 +77,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig { apply_rt_info_property(high_level_property, rt_info); } + using ov::PluginConfig::get_option_ptr; using ov::PluginConfig::is_set_by_user; }; @@ -81,7 +91,7 @@ TEST(plugin_config, can_create_empty_config) { TEST(plugin_config, can_create_not_empty_config) { ASSERT_NO_THROW( NotEmptyTestConfig cfg; - ASSERT_EQ(cfg.get_supported_properties().size(), 4); + ASSERT_EQ(cfg.get_supported_properties().size(), 6); ); } @@ -200,3 +210,16 @@ TEST(plugin_config, can_copy_config) { ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); ASSERT_EQ(cfg2.m_int_property.value, 1); } + +TEST(plugin_config, set_user_property_throw_for_non_release_options) { + NotEmptyTestConfig cfg; + ASSERT_ANY_THROW(cfg.set_user_property(release_internal_property(10))); + ASSERT_ANY_THROW(cfg.set_user_property(debug_property(10))); +} + +TEST(plugin_config, visibility_is_correct) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_option_ptr(release_internal_property.name())->get_visibility(), OptionVisibility::RELEASE_INTERNAL); + ASSERT_EQ(cfg.get_option_ptr(debug_property.name())->get_visibility(), OptionVisibility::DEBUG); + ASSERT_EQ(cfg.get_option_ptr(int_property.name())->get_visibility(), OptionVisibility::RELEASE); +} diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index c775537a514dde..bec721ad3938a6 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -309,8 +309,8 @@ struct program { std::vector allocating_order; std::unique_ptr pm; std::unique_ptr _layout_optimizer; - bool is_internal; - bool _is_body_program; + bool is_internal = false; + bool _is_body_program = false; // if subgraph can be optimized if it consists of only inputs and corresponding outputs bool _can_be_optimized; std::unique_ptr _impls_cache; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index ddbb260647b287..77a00294e8076f 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -45,21 +45,19 @@ static constexpr Property optimize_data{"GPU_OPTIM static constexpr Property allow_static_input_reorder{"GPU_ALLOW_STATIC_INPUT_REORDER"}; static constexpr Property partial_build_program{"GPU_PARTIAL_BUILD"}; static constexpr Property allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"}; -static constexpr Property use_only_static_kernels_for_dynamic_shape{"GPU_USE_ONLY_STATIC_KERNELS_FOR_DYNAMIC_SHAPE"}; static constexpr Property, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"}; static constexpr Property force_implementations{"GPU_FORCE_IMPLEMENTATIONS"}; static constexpr Property config_file{"CONFIG_FILE"}; static constexpr Property enable_lp_transformations{"LP_TRANSFORMS_MODE"}; -static constexpr Property max_dynamic_batch{"DYN_BATCH_LIMIT"}; -static constexpr Property nv12_two_inputs{"GPU_NV12_TWO_INPUTS"}; static constexpr Property buffers_preallocation_ratio{"GPU_BUFFERS_PREALLOCATION_RATIO"}; static constexpr Property max_kernels_per_batch{"GPU_MAX_KERNELS_PER_BATCH"}; static constexpr Property use_onednn{"USE_ONEDNN"}; static constexpr Property help{"HELP"}; static constexpr Property verbose{"VERBOSE"}; +static constexpr Property log_to_file{"LOG_TO_FILE"}; static constexpr Property disable_usm{"DISABLE_USM"}; -static constexpr Property disable_onednn_post_ops{"DISABLE_ONEDNN_POST_OPS"}; +static constexpr Property disable_onednn_post_ops_opt{"DISABLE_ONEDNN_POST_OPS_OPT"}; static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; static constexpr Property dump_profiling_data{"DUMP_PROFILING_DATA"}; static constexpr Property dump_sources{"DUMP_SOURCES"}; @@ -69,11 +67,11 @@ static constexpr Property dump_iteratio static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; static constexpr Property impls_cache_capacity{"IMPLS_CACHE_CAPACITY"}; static constexpr Property disable_async_compilation{"DISABLE_ASYNC_COMPILATION"}; -static constexpr Property disable_shape_agnostic_impls{"DISABLE_SHAPE_AGNOSTIC_IMPLS"}; static constexpr Property disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"}; static constexpr Property disable_memory_reuse{"DISABLE_MEMORY_REUSE"}; static constexpr Property disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"}; static constexpr Property disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"}; +static constexpr Property disable_fake_alignment{"DISABLE_FAKE_ALIGNMENT"}; static constexpr Property use_usm_host{"USE_USM_HOST"}; static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; static constexpr Property mem_prealloc_options{"MEM_PREALLOC_OPTIONS"}; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl new file mode 100644 index 00000000000000..d5da1edf81bd69 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -0,0 +1,71 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Namespace, property name, default value, [validator], description +OV_CONFIG_RELEASE_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") +OV_CONFIG_RELEASE_OPTION(ov::device, id, "0", "ID of the current device") +OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty") +OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Number of streams to be used for inference") +OV_CONFIG_RELEASE_OPTION(ov, compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism") +OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16, + [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision") +OV_CONFIG_RELEASE_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings") +OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc") +OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") +OV_CONFIG_RELEASE_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application") +OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not") + +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available") +OV_CONFIG_RELEASE_OPTION(ov::internal, exclusive_async_requests, false, "") +OV_CONFIG_RELEASE_OPTION(ov::internal, query_model_ratio, 1.0f, "") +OV_CONFIG_RELEASE_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache") +OV_CONFIG_RELEASE_OPTION(ov, cache_encryption_callbacks, EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model") +OV_CONFIG_RELEASE_OPTION(ov::hint, dynamic_quantization_group_size, 0, "") +OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "") +OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching") +OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision") + +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_lp_transformations, false, "Enable/Disable Low precision transformations set") + +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "Type of the queue that must be used for model execution. May be in-order or out-of-order") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, optimize_data, false, "Enable/Disable data flow optimizations for cldnn::program") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_memory_pool, true, "Enable/Disable memory pool usage") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_static_input_reorder, false, "Controls if weights tensors can be reordered during model compilation to more friendly layout for specific kernel") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, custom_outputs, std::vector{}, "List of output primitive names") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, force_implementations, ImplForcingMap{}, "Specifies the list of forced implementations for the primitives") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, partial_build_program, false, "Early exit from model compilation process which allows faster execution graph dumping") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "Switch between new and old shape inference flow. Shall be removed soon") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "Threshold for preallocation feature in case when it uses ratio policy") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disable onednn for usage for particular model/platform") + +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, help, false, "Print help message for all config options") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_onednn_post_ops_opt, false, "Disable optimization pass for onednn post-ops") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data, "", "Save csv file with per-stage and per-primitive profiling data to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs, "", "Save intermediate graph representations during model compilation pipeline to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources, "", "Save generated sources for each kernel to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, "", "Save intermediate in/out tensors of each primitive to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool, "", "Save csv file with memory pool info to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, "", "Space separated list of iterations where other dump options should be enabled") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "Disable fusions of operations as post-ops/fused-ops") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "Disable pass which merges QKV projections into single MatMul") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fake alignment feature which tries to keep gpu friendly memory alignment for arbitrary tensor shapes") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usage for all allocations") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "Preallocation setting") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "List of layers to load raw binary") diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl deleted file mode 100644 index 62548a7abb17fd..00000000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options_debug.inl +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#ifdef GPU_DEBUG_CONFIG - -OV_CONFIG_OPTION(ov::intel_gpu, verbose, 0, "Enable") -OV_CONFIG_OPTION(ov::intel_gpu, help, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_usm, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_onednn_post_ops, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, dump_profiling_data, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, dump_graphs, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, dump_sources, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, dump_tensors, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, dump_memory_pool, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, dump_iterations, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, host_time_profiling, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "") -OV_CONFIG_OPTION(ov::intel_gpu, impls_cache_capacity, 0, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_async_compilation, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_shape_agnostic_impls, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_memory_reuse, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, use_usm_host, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "") - -#endif diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl deleted file mode 100644 index b3aa12dc75c49b..00000000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options_release.inl +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -// Namespace, property name, default value, [validator], description -OV_CONFIG_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") -OV_CONFIG_OPTION(ov::device, id, "0", "ID of the current device") -OV_CONFIG_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty") -OV_CONFIG_OPTION(ov, num_streams, 1, "Number of streams to be used for inference") -OV_CONFIG_OPTION(ov, compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism") -OV_CONFIG_OPTION(ov::hint, inference_precision, ov::element::f16, - [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision") -OV_CONFIG_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings") -OV_CONFIG_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc") -OV_CONFIG_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") -OV_CONFIG_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application") -OV_CONFIG_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not") - -OV_CONFIG_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks") -OV_CONFIG_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level") -OV_CONFIG_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property") -OV_CONFIG_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution") -OV_CONFIG_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling") -OV_CONFIG_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available") -OV_CONFIG_OPTION(ov::internal, exclusive_async_requests, false, "") -OV_CONFIG_OPTION(ov::internal, query_model_ratio, 1.0f, "") -OV_CONFIG_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache") -OV_CONFIG_OPTION(ov, cache_encryption_callbacks, EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model") -OV_CONFIG_OPTION(ov::hint, dynamic_quantization_group_size, 0, "") -OV_CONFIG_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "") -OV_CONFIG_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "") -OV_CONFIG_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching") -OV_CONFIG_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision") - -OV_CONFIG_OPTION(ov::intel_gpu, nv12_two_inputs, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, config_file, "", "") -OV_CONFIG_OPTION(ov::intel_gpu, enable_lp_transformations, false, "") - -OV_CONFIG_OPTION(ov::intel_gpu, max_dynamic_batch, 1, "") -OV_CONFIG_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "") -OV_CONFIG_OPTION(ov::intel_gpu, optimize_data, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, enable_memory_pool, true, "") -OV_CONFIG_OPTION(ov::intel_gpu, allow_static_input_reorder, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, custom_outputs, std::vector{}, "") -OV_CONFIG_OPTION(ov::intel_gpu, force_implementations, ImplForcingMap{}, "") -OV_CONFIG_OPTION(ov::intel_gpu, partial_build_program, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, use_only_static_kernels_for_dynamic_shape, false, "") -OV_CONFIG_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "") -OV_CONFIG_OPTION(ov::intel_gpu, use_onednn, false, "") diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp index f18b32cd8b7cbb..19a3c1e468e28c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp @@ -22,13 +22,8 @@ struct NewExecutionConfig : public ov::PluginConfig { NewExecutionConfig(const NewExecutionConfig& other); NewExecutionConfig& operator=(const NewExecutionConfig& other); - #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ - ConfigOption m_ ## PropertyVar = \ - ConfigOption(GET_EXCEPT_LAST(__VA_ARGS__)); - - #include "options_release.inl" - #include "options_debug.inl" - + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" #undef OV_CONFIG_OPTION void finalize_impl(std::shared_ptr context) override; diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index 5c797b622aa28b..825b3ddfbc7282 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -27,7 +27,6 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); } } - config.set_property(ov::intel_gpu::max_dynamic_batch(1)); config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true); diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index 3e052c134390ae..6e18486ee1d738 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -299,7 +299,6 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr( - std::make_tuple(ov::intel_gpu::max_dynamic_batch, 1), std::make_tuple(ov::intel_gpu::queue_type, QueueTypes::out_of_order), std::make_tuple(ov::intel_gpu::optimize_data, false), std::make_tuple(ov::intel_gpu::enable_memory_pool, true), @@ -80,7 +78,6 @@ void OldExecutionConfig::set_default() { std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}), std::make_tuple(ov::intel_gpu::partial_build_program, false), std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false), - std::make_tuple(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape, false), std::make_tuple(ov::intel_gpu::buffers_preallocation_ratio, 1.1f), std::make_tuple(ov::intel_gpu::max_kernels_per_batch, 8), std::make_tuple(ov::intel_gpu::use_onednn, false)); @@ -203,10 +200,6 @@ void OldExecutionConfig::apply_debug_options(const cldnn::device_info& info) { set_property(ov::enable_profiling(true)); } - GPU_DEBUG_IF(debug_config->disable_dynamic_impl == 1) { - set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); - } - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { if (debug_config->dynamic_quantize_group_size == -1) set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX)); diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp index 8f4319734d3e9f..5eff06155280b1 100644 --- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp @@ -13,12 +13,8 @@ namespace ov { namespace intel_gpu { NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { - #define OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, ...) \ - m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; - - #include "intel_gpu/runtime/options_release.inl" - #include "intel_gpu/runtime/options_debug.inl" - + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" #undef OV_CONFIG_OPTION } diff --git a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp index 1fa303656f80a5..a32dd526f19250 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp @@ -339,7 +339,6 @@ TEST_P(gemm_2in_add, eltwise_postop_dynamic) { if (engine.get_device_info().supports_immad) { ov::intel_gpu::ImplementationDesc gemmv_impl = { cldnn::format::type::any, "", impl_types::onednn }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm_prim", gemmv_impl } })); - cfg_fused.set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); } auto add_data_layout = get_output_layout(p); diff --git a/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp index 1fbd1c096e7c6e..0b8c1b153c8f5d 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp @@ -55,7 +55,6 @@ program::ptr build_program(engine& engine, ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(output_names_vec)); - config.set_property(ov::intel_gpu::max_dynamic_batch(1)); return program::build_program(engine, body_topology, config, false, false, true); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp index 9e92f2ebbfd293..0ff21d1ed5f3dc 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp @@ -1317,7 +1317,6 @@ TEST_P(crop_gpu_dynamic, i32_in2x3x2x2_crop_offsets) { } } } - config2.set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); network network2(engine, topology, config2); // run with static kernel network2.set_input_data("input", input); auto outputs2 = network2.execute(); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp index 4939630fab3c57..16c35e04aa3f17 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp @@ -55,7 +55,6 @@ static program::ptr build_program(engine& engine, ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(output_names_vec)); - config.set_property(ov::intel_gpu::max_dynamic_batch(1)); config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer)); return program::build_program(engine, body_topology, config, false, false, true); @@ -837,7 +836,7 @@ static void test_loop_gpu_multiple_shapes(ov::PartialShape body_input_layout, permute("permute1", input_info("input_origin"), {0, 1, 2, 3}), concatenation("input1", {input_info("permute1"), input_info("input_origin")}, 0), loop("loop", - {input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input1"), input_info("input2")}, + {input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input1"), input_info("input2")}, body_program, trip_count_id, initial_condition_id, actual_iteration_count_id, input_primitive_maps, output_primitive_maps, back_edges, num_iterations, body_current_iteration_id, body_execution_condition_id, 2), @@ -1105,7 +1104,7 @@ static void test_loop_gpu_wo_trip_count_update_primitive_id(ov::PartialShape bod auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true); auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx}); - + std::vector body_input_layouts; for (size_t i = 0; i < body_input_layout.size(); i++) { From 4017ffb3fcf78f262849d83d81b79c8c6ca7db39 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Thu, 19 Dec 2024 14:45:22 +0400 Subject: [PATCH 07/44] remove old config Signed-off-by: Vladimir Paramuzov --- .../intel_gpu/runtime/execution_config.hpp | 171 ++--------- .../intel_gpu/runtime/plugin_config.hpp | 41 --- .../src/runtime/execution_config.cpp | 290 +++--------------- .../intel_gpu/src/runtime/plugin_config.cpp | 130 -------- .../tests/unit/module_tests/config_test.cpp | 33 -- 5 files changed, 68 insertions(+), 597 deletions(-) delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp delete mode 100644 src/plugins/intel_gpu/src/runtime/plugin_config.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 6e39e0b0b6a822..b21d0c91b1d924 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -1,178 +1,43 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once +#include "openvino/runtime/plugin_config.hpp" +#include "intel_gpu/runtime/device_info.hpp" #include "intel_gpu/runtime/internal_properties.hpp" -#include "intel_gpu/runtime/device.hpp" -#include "intel_gpu/runtime/plugin_config.hpp" +#include "openvino/runtime/internal_properties.hpp" +#include namespace ov::intel_gpu { -enum class PropertyVisibility { - INTERNAL = 0, - PUBLIC = 1 -}; - -inline std::ostream& operator<<(std::ostream& os, const PropertyVisibility& visibility) { - switch (visibility) { - case PropertyVisibility::PUBLIC: os << "PUBLIC"; break; - case PropertyVisibility::INTERNAL: os << "INTERNAL"; break; - default: os << "UNKNOWN"; break; - } +struct ExecutionConfig : public ov::PluginConfig { + ExecutionConfig(); + ExecutionConfig(std::initializer_list values) : ExecutionConfig() { set_property(ov::AnyMap(values)); } + explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); } + explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); } - return os; -} + ExecutionConfig(const ExecutionConfig& other); + ExecutionConfig& operator=(const ExecutionConfig& other); -class BaseValidator { -public: - using Ptr = std::shared_ptr; - virtual ~BaseValidator() = default; - virtual bool is_valid(const ov::Any& v) const = 0; -}; + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_OPTION -class FuncValidator : public BaseValidator { -public: -explicit FuncValidator(std::function func) : m_func(func) { } - bool is_valid(const ov::Any& v) const override { - return m_func(v); - } + void finalize_impl(std::shared_ptr context) override; + void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override; private: - std::function m_func; -}; - -// PropertyTypeValidator ensures that value can be converted to given property type -template -class PropertyTypeValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - try { - v.as(); - return true; - } catch (ov::Exception&) { - return false; - } - } -}; - -class OldExecutionConfig { -public: - OldExecutionConfig(); - OldExecutionConfig(std::initializer_list values) : OldExecutionConfig() { set_property(ov::AnyMap(values)); } - explicit OldExecutionConfig(const ov::AnyMap& properties) : OldExecutionConfig() { set_property(properties); } - explicit OldExecutionConfig(const ov::AnyMap::value_type& property) : OldExecutionConfig() { set_property(property); } - - void set_default(); - void set_property(const ov::AnyMap& properties); - void set_user_property(const ov::AnyMap& properties); - Any get_property(const std::string& name) const; - bool is_set_by_user(const std::string& name) const; - bool is_supported(const std::string& name) const; - void register_property_impl(const std::pair& propertiy, PropertyVisibility visibility, BaseValidator::Ptr validator); - - template ::type = true> - void register_property_impl() { } - - template - void register_property_impl(const std::tuple, ValueT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared>()); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } - - template - typename std::enable_if::value, void>::type - register_property_impl(const std::tuple, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared(std::get<2>(property))); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } - - template - typename std::enable_if, ValidatorT>::value, void>::type - register_property_impl(const std::tuple, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared(std::get<2>(property))); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } - - template - void register_property(PropertyInitializer&&... properties) { - register_property_impl(properties...); - } - - template - util::EnableIfAllStringAny set_property(Properties&&... properties) { - set_property(ov::AnyMap{std::forward(properties)...}); - } - - template - util::EnableIfAllStringAny set_user_property(Properties&&... properties) { - set_user_property(ov::AnyMap{std::forward(properties)...}); - } - - template - bool is_set_by_user(const ov::Property& property) const { - return is_set_by_user(property.name()); - } - - template - T get_property(const ov::Property& property) const { - return get_property(property.name()).template as(); - } - void apply_user_properties(const cldnn::device_info& info); - - // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call - // So this method should be called after setting all user properties, but before apply_user_properties() call. - void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info, const bool is_llm); - - std::string to_string() const; - -protected: void apply_hints(const cldnn::device_info& info); void apply_execution_hints(const cldnn::device_info& info); void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); - void apply_debug_options(const cldnn::device_info& info); - - template - void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { - if (!is_set_by_user(property)) { - auto rt_info_val = rt_info.find(property.name()); - if (rt_info_val != rt_info.end()) { - set_user_property(property(rt_info_val->second.template as())); - } - } - } - -private: - ov::AnyMap internal_properties; - ov::AnyMap user_properties; - - std::map supported_properties; - std::map property_validators; - - bool finalized = false; }; } // namespace ov::intel_gpu namespace cldnn { using ov::intel_gpu::ExecutionConfig; -} // namespace cldnn +} diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp deleted file mode 100644 index 19a3c1e468e28c..00000000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/plugin_config.hpp +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/runtime/plugin_config.hpp" -#include "intel_gpu/runtime/device_info.hpp" -#include "intel_gpu/runtime/internal_properties.hpp" -#include "openvino/runtime/internal_properties.hpp" -#include - -namespace ov { -namespace intel_gpu { - -struct NewExecutionConfig : public ov::PluginConfig { - NewExecutionConfig(); - NewExecutionConfig(std::initializer_list values) : NewExecutionConfig() { set_property(ov::AnyMap(values)); } - explicit NewExecutionConfig(const ov::AnyMap& properties) : NewExecutionConfig() { set_property(properties); } - explicit NewExecutionConfig(const ov::AnyMap::value_type& property) : NewExecutionConfig() { set_property(property); } - - NewExecutionConfig(const NewExecutionConfig& other); - NewExecutionConfig& operator=(const NewExecutionConfig& other); - - #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) - #include "intel_gpu/runtime/options.inl" - #undef OV_CONFIG_OPTION - - void finalize_impl(std::shared_ptr context) override; - void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override; - -private: - void apply_user_properties(const cldnn::device_info& info); - void apply_hints(const cldnn::device_info& info); - void apply_execution_hints(const cldnn::device_info& info); - void apply_performance_hints(const cldnn::device_info& info); - void apply_priority_hints(const cldnn::device_info& info); -}; - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index e700c64b2979e1..9a0d0028201b03 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -1,137 +1,77 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "intel_gpu/runtime/execution_config.hpp" -#include "intel_gpu/runtime/debug_configuration.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "openvino/core/any.hpp" #include "openvino/runtime/internal_properties.hpp" -#include "openvino/runtime/properties.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" -#include namespace ov::intel_gpu { -OldExecutionConfig::OldExecutionConfig() { - set_default(); +ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_OPTION } -class InferencePrecisionValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - auto precision = v.as(); - return precision == ov::element::f16 || precision == ov::element::f32 || precision == ov::element::undefined; +ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { + user_properties = other.user_properties; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); } -}; - -class PerformanceModeValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - auto mode = v.as(); - return mode == ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT || - mode == ov::hint::PerformanceMode::THROUGHPUT || - mode == ov::hint::PerformanceMode::LATENCY; - } -}; - -void OldExecutionConfig::set_default() { - register_property( - std::make_tuple(ov::device::id, "0"), - std::make_tuple(ov::enable_profiling, false), - std::make_tuple(ov::cache_dir, ""), - std::make_tuple(ov::num_streams, 1), - std::make_tuple(ov::compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency()))), - std::make_tuple(ov::hint::inference_precision, ov::element::f16, InferencePrecisionValidator()), - std::make_tuple(ov::hint::model_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY, PerformanceModeValidator()), - std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE), - std::make_tuple(ov::hint::num_requests, 0), - std::make_tuple(ov::hint::enable_cpu_pinning, false), - std::make_tuple(ov::hint::enable_cpu_reservation, false), - - std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::queue_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::enable_sdpa_optimization, true), - std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true), - std::make_tuple(ov::intel_gpu::disable_winograd_convolution, false), - std::make_tuple(ov::internal::exclusive_async_requests, false), - std::make_tuple(ov::internal::query_model_ratio, 1.0f), - std::make_tuple(ov::cache_mode, ov::CacheMode::OPTIMIZE_SPEED), - std::make_tuple(ov::cache_encryption_callbacks, EncryptionCallbacks{}), - std::make_tuple(ov::hint::dynamic_quantization_group_size, 0), - std::make_tuple(ov::hint::kv_cache_precision, ov::element::f16), - std::make_tuple(ov::intel_gpu::hint::enable_kernels_reuse, false), - std::make_tuple(ov::weights_path, ""), - std::make_tuple(ov::hint::activations_scale_factor, -1.f), - - // Legacy API properties - std::make_tuple(ov::intel_gpu::config_file, ""), - std::make_tuple(ov::intel_gpu::enable_lp_transformations, false)); - - register_property( - std::make_tuple(ov::intel_gpu::queue_type, QueueTypes::out_of_order), - std::make_tuple(ov::intel_gpu::optimize_data, false), - std::make_tuple(ov::intel_gpu::enable_memory_pool, true), - std::make_tuple(ov::intel_gpu::allow_static_input_reorder, false), - std::make_tuple(ov::intel_gpu::custom_outputs, std::vector{}), - std::make_tuple(ov::intel_gpu::dump_graphs, ""), - std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}), - std::make_tuple(ov::intel_gpu::partial_build_program, false), - std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false), - std::make_tuple(ov::intel_gpu::buffers_preallocation_ratio, 1.1f), - std::make_tuple(ov::intel_gpu::max_kernels_per_batch, 8), - std::make_tuple(ov::intel_gpu::use_onednn, false)); } -void OldExecutionConfig::register_property_impl(const std::pair& property, PropertyVisibility visibility, BaseValidator::Ptr validator) { - property_validators[property.first] = validator; - supported_properties[property.first] = visibility; - internal_properties[property.first] = property.second; -} - -void OldExecutionConfig::set_property(const AnyMap& config) { - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; - OPENVINO_ASSERT(is_supported(kv.first), "[GPU] Attempt to set property ", name, " (", val.as(), ") which was not registered!\n"); - OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": ", val.as()); - internal_properties[name] = val; +ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { + user_properties = other.user_properties; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); } + return *this; } -bool OldExecutionConfig::is_supported(const std::string& name) const { - bool supported = supported_properties.find(name) != supported_properties.end(); - bool has_validator = property_validators.find(name) != property_validators.end(); - - return supported && has_validator; -} - -bool OldExecutionConfig::is_set_by_user(const std::string& name) const { - return user_properties.find(name) != user_properties.end(); +void ExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { + const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + if (!info.supports_immad) { + apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); + apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); + } + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); } -void OldExecutionConfig::set_user_property(const AnyMap& config) { - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; - bool supported = is_supported(name) && supported_properties.at(name) == PropertyVisibility::PUBLIC; - OPENVINO_ASSERT(supported, "[GPU] Attempt to set user property ", name, " (", val.as(), ") which was not registered or internal!\n"); - OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": `", val.as(), "`"); +void ExecutionConfig::finalize_impl(std::shared_ptr context) { + const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + apply_hints(info); + if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { + set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); + } + if (info.supports_immad) { + set_property(ov::intel_gpu::use_onednn(true)); + } + if (get_property(ov::intel_gpu::use_onednn)) { + set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + } - user_properties[kv.first] = kv.second; + // Enable KV-cache compression by default for non-systolic platforms + if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { + set_property(ov::hint::kv_cache_precision(ov::element::i8)); } -} -Any OldExecutionConfig::get_property(const std::string& name) const { - if (user_properties.find(name) != user_properties.end()) { - return user_properties.at(name); + // Enable dynamic quantization by default for non-systolic platforms + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { + set_property(ov::hint::dynamic_quantization_group_size(32)); } +} - OPENVINO_ASSERT(internal_properties.find(name) != internal_properties.end(), "[GPU] Can't get internal property with name ", name); - return internal_properties.at(name); +void ExecutionConfig::apply_hints(const cldnn::device_info& info) { + apply_execution_hints(info); + apply_performance_hints(info); + apply_priority_hints(info); } -void OldExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { +void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::execution_mode)) { const auto mode = get_property(ov::hint::execution_mode); if (!is_set_by_user(ov::hint::inference_precision)) { @@ -147,7 +87,7 @@ void OldExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { } } -void OldExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { +void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::performance_mode)) { const auto mode = get_property(ov::hint::performance_mode); if (!is_set_by_user(ov::num_streams)) { @@ -176,7 +116,7 @@ void OldExecutionConfig::apply_performance_hints(const cldnn::device_info& info) } } -void OldExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { +void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::model_priority)) { const auto priority = get_property(ov::hint::model_priority); if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { @@ -185,134 +125,4 @@ void OldExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { } } -void OldExecutionConfig::apply_debug_options(const cldnn::device_info& info) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); - } - - GPU_DEBUG_IF(debug_config->serialize_compile == 1) { - set_property(ov::compilation_num_threads(1)); - } - - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { - GPU_DEBUG_COUT << "[WARNING] ov::enable_profiling property was forced because of enabled OV_GPU_DumpProfilingData debug option\n"; - set_property(ov::enable_profiling(true)); - } - - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { - if (debug_config->dynamic_quantize_group_size == -1) - set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX)); - else - set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size)); - } - - GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) { - GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } else { - set_property(ov::hint::kv_cache_precision(ov::element::undefined)); - } - } -} - -void OldExecutionConfig::apply_hints(const cldnn::device_info& info) { - apply_execution_hints(info); - apply_performance_hints(info); - apply_priority_hints(info); - apply_debug_options(info); -} - -void ExecutionConfig::update_specific_default_properties(const cldnn::device_info& info) { - // These default properties should be set once. - if (specific_default_properties_is_set) - return; - specific_default_properties_is_set = true; - - // Enable KV-cache compression by default for non-systolic platforms MFDNN-11755 - if (get_property(ov::hint::kv_cache_precision) == ov::element::undefined && !info.supports_immad) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } - - // Enable dynamic quantization by default for non-systolic platforms - if (get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); - } -} - -void OldExecutionConfig::apply_user_properties(const cldnn::device_info& info) { - // Update specific default properties, call once before internal_properties updated. - update_specific_default_properties(info); - - // Copy internal properties before applying hints to ensure that - // a property set by hint won't be overriden by a value in user config. - // E.g num_streams=AUTO && hint=THROUGHPUT - // If we apply hints first and then copy all values from user config to internal one, - // then we'll get num_streams=AUTO in final config while some integer number is expected. - for (auto& kv : user_properties) { - internal_properties[kv.first] = kv.second; - } - apply_hints(info); - if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { - set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); - } - if (info.supports_immad) { - set_property(ov::intel_gpu::use_onednn(true)); - } - if (get_property(ov::intel_gpu::use_onednn)) { - set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - } - if (!is_set_by_user(ov::hint::enable_cpu_reservation)) { - if (get_property(ov::hint::enable_cpu_pinning)) { - set_property(ov::hint::enable_cpu_reservation(true)); - } - } - if (get_property(ov::hint::enable_cpu_reservation)) { - if (!is_set_by_user(ov::hint::enable_cpu_pinning)) { - set_property(ov::hint::enable_cpu_pinning(true)); - } - } - - if (!is_set_by_user(ov::hint::kv_cache_precision) || get_property(ov::hint::kv_cache_precision) == ov::element::undefined) { - if (info.supports_immad) { // MFDNN-11755 - set_property(ov::hint::kv_cache_precision(get_property(ov::hint::inference_precision))); - } else { - // Enable KV-cache compression by default for non-systolic platforms only - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } - } - - // Enable dynamic quantization by default for non-systolic platforms - if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && - get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); - } - - finalized = true; - - user_properties.clear(); -} - -void OldExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info, const bool is_llm) { - if (!info.supports_immad) { - apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); - } - if (!is_llm) - apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); - apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); -} - -std::string OldExecutionConfig::to_string() const { - std::stringstream s; - s << "internal properties:\n"; - for (auto& kv : internal_properties) { - s << "\t" << kv.first << ": " << kv.second.as() << std::endl; - } - s << "user properties:\n"; - for (auto& kv : user_properties) { - s << "\t" << kv.first << ": " << kv.second.as() << std::endl; - } - return s.str(); -} - } // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp b/src/plugins/intel_gpu/src/runtime/plugin_config.cpp deleted file mode 100644 index 5eff06155280b1..00000000000000 --- a/src/plugins/intel_gpu/src/runtime/plugin_config.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "intel_gpu/runtime/plugin_config.hpp" -#include "intel_gpu/plugin/remote_context.hpp" -#include "openvino/core/any.hpp" -#include "openvino/runtime/internal_properties.hpp" -#include "intel_gpu/runtime/internal_properties.hpp" - - -namespace ov { -namespace intel_gpu { - -NewExecutionConfig::NewExecutionConfig() : ov::PluginConfig() { - #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) - #include "intel_gpu/runtime/options.inl" - #undef OV_CONFIG_OPTION -} - -NewExecutionConfig::NewExecutionConfig(const NewExecutionConfig& other) : NewExecutionConfig() { - user_properties = other.user_properties; - for (const auto& kv : other.m_options_map) { - m_options_map.at(kv.first)->set_any(kv.second->get_any()); - } -} - -NewExecutionConfig& NewExecutionConfig::operator=(const NewExecutionConfig& other) { - user_properties = other.user_properties; - for (const auto& kv : other.m_options_map) { - m_options_map.at(kv.first)->set_any(kv.second->get_any()); - } - return *this; -} - -void NewExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { - const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); - if (!info.supports_immad) { - apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); - apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); - } - apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); -} - -void NewExecutionConfig::finalize_impl(std::shared_ptr context) { - const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); - apply_hints(info); - if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { - set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); - } - if (info.supports_immad) { - set_property(ov::intel_gpu::use_onednn(true)); - } - if (get_property(ov::intel_gpu::use_onednn)) { - set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - } - - // Enable KV-cache compression by default for non-systolic platforms - if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } - - // Enable dynamic quantization by default for non-systolic platforms - if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); - } -} - -void NewExecutionConfig::apply_hints(const cldnn::device_info& info) { - apply_execution_hints(info); - apply_performance_hints(info); - apply_priority_hints(info); -} - -void NewExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { - if (is_set_by_user(ov::hint::execution_mode)) { - const auto mode = get_property(ov::hint::execution_mode); - if (!is_set_by_user(ov::hint::inference_precision)) { - if (mode == ov::hint::ExecutionMode::ACCURACY) { - set_property(ov::hint::inference_precision(ov::element::undefined)); - } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) { - if (info.supports_fp16) - set_property(ov::hint::inference_precision(ov::element::f16)); - else - set_property(ov::hint::inference_precision(ov::element::f32)); - } - } - } -} - -void NewExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { - if (is_set_by_user(ov::hint::performance_mode)) { - const auto mode = get_property(ov::hint::performance_mode); - if (!is_set_by_user(ov::num_streams)) { - if (mode == ov::hint::PerformanceMode::LATENCY) { - set_property(ov::num_streams(1)); - } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { - set_property(ov::num_streams(ov::streams::AUTO)); - } - } - } - - if (get_property(ov::num_streams) == ov::streams::AUTO) { - int32_t n_streams = std::max(info.num_ccs, 2); - set_property(ov::num_streams(n_streams)); - } - - if (get_property(ov::internal::exclusive_async_requests)) { - set_property(ov::num_streams(1)); - } - - // Allow kernels reuse only for single-stream scenarios - if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) { - if (get_property(ov::num_streams) != 1) { - set_property(ov::intel_gpu::hint::enable_kernels_reuse(false)); - } - } -} - -void NewExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { - if (is_set_by_user(ov::hint::model_priority)) { - const auto priority = get_property(ov::hint::model_priority); - if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { - set_property(ov::intel_gpu::hint::queue_priority(priority)); - } - } -} - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp deleted file mode 100644 index b14c5b0bf4623d..00000000000000 --- a/src/plugins/intel_gpu/tests/unit/module_tests/config_test.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (C) 2022-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "intel_gpu/plugin/remote_context.hpp" -#include "intel_gpu/runtime/plugin_config.hpp" -#include "openvino/runtime/properties.hpp" -#include "test_utils.h" - -using namespace cldnn; -using namespace ::tests; - -TEST(config_test, basic) { - ov::intel_gpu::NewExecutionConfig cfg; - std::cerr << cfg.to_string(); - - std::cerr << cfg.get_property("PERFORMANCE_HINT").as(); - cfg.set_user_property(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); - cfg.set_property(ov::hint::inference_precision(ov::element::f32)); - - std::cerr << "PROF: " << cfg.m_enable_profiling.value << std::endl; - - std::cerr << cfg.to_string(); - - std::cerr << cfg.get_property(ov::hint::inference_precision) << std::endl; - std::cerr << cfg.get_property(ov::hint::execution_mode) << std::endl; - - auto ctx = std::make_shared("GPU", std::vector{ get_test_engine().get_device() }); - cfg.finalize(ctx, {}); - std::cerr << cfg.to_string(); -// std::cerr << get_prop() << std::endl; -// std::cerr << get_prop() << std::endl; -} From 36f0bf6407c2d41bd0698bbe7df35b1f9824df4e Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Thu, 19 Dec 2024 15:46:55 +0400 Subject: [PATCH 08/44] enhancements Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 25 ++++++++++-- src/inference/src/dev/plugin_config.cpp | 39 ++++++++++++------- src/inference/tests/unit/config_test.cpp | 2 +- .../intel_gpu/src/graph/fully_connected.cpp | 3 +- .../src/runtime/execution_config.cpp | 4 +- 5 files changed, 50 insertions(+), 23 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 36b6765849ee8e..769a4619b60fe8 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -107,6 +107,23 @@ struct ConfigOption : public ConfigOptionBase { return visibility; } + operator T() const { + return value; + } + + ConfigOption& operator=(const T& val) { + value = val; + return *this; + } + + bool operator==(const T& val) const { + return value == val; + } + + bool operator!=(const T& val) const { + return !(*this == val); + } + private: std::function validator; }; @@ -157,7 +174,7 @@ class OPENVINO_RUNTIME_API PluginConfig { template T get_property(const ov::Property& property) const { if (is_set_by_user(property)) { - return user_properties.at(property.name()).template as(); + return m_user_properties.at(property.name()).template as(); } OPENVINO_ASSERT(m_options_map.find(property.name()) != m_options_map.end(), "Property not found: ", property.name()); return static_cast*>(m_options_map.at(property.name()))->value; @@ -174,7 +191,7 @@ class OPENVINO_RUNTIME_API PluginConfig { template bool is_set_by_user(const ov::Property& property) const { - return user_properties.find(property.name()) != user_properties.end(); + return m_user_properties.find(property.name()) != m_user_properties.end(); } ConfigOptionBase* get_option_ptr(const std::string& name) const { @@ -195,7 +212,7 @@ class OPENVINO_RUNTIME_API PluginConfig { } } - void set_user_property(const ov::AnyMap& properties, const std::vector& allowed_visibility); + void set_user_property(const ov::AnyMap& properties, const std::vector& allowed_visibility, bool throw_on_error); ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; ov::AnyMap read_env(const std::vector& prefixes) const; @@ -204,7 +221,7 @@ class OPENVINO_RUNTIME_API PluginConfig { std::map m_options_map; // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info - ov::AnyMap user_properties; + ov::AnyMap m_user_properties; using OptionMapEntry = decltype(m_options_map)::value_type; }; diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index cfc48745f677f5..27d113a04a88cd 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -34,22 +34,31 @@ ov::Any PluginConfig::get_property(const std::string& name) const { } void PluginConfig::set_user_property(const AnyMap& config) { - static std::vector allowed_visibility = {OptionVisibility::RELEASE}; - set_user_property(config, allowed_visibility); + const static std::vector allowed_visibility = {OptionVisibility::RELEASE}; + const bool throw_on_error = true; + set_user_property(config, allowed_visibility, throw_on_error); } -void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector& allowed_visibility) { +void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector& allowed_visibility, bool throw_on_error) { for (auto& kv : config) { auto& name = kv.first; auto& val = kv.second; auto option = get_option_ptr(name); if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) { - OPENVINO_THROW("Unkown property: ", name); + if (throw_on_error) + OPENVINO_THROW("Unkown property: ", name); + else + continue; + } + if (!option->is_valid_value(val)) { + if (throw_on_error) + OPENVINO_THROW("Invalid value: ", val.as(), " for property: ", name); + else + continue; } - OPENVINO_ASSERT(option->is_valid_value(val), "Invalid value: ", val.as(), " for property: ", name); - user_properties[name] = val; + m_user_properties[name] = val; } } @@ -61,7 +70,7 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R // E.g num_streams=AUTO && hint=THROUGHPUT // If we apply hints first and then copy all values from user config to internal one, // then we'll get num_streams=AUTO in final config while some integer number is expected. - for (const auto& prop : user_properties) { + for (const auto& prop : m_user_properties) { auto& option = m_options_map.at(prop.first); option->set_any(prop.second); } @@ -69,7 +78,7 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R finalize_impl(context); // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization - user_properties.clear(); + m_user_properties.clear(); } void PluginConfig::apply_debug_options(std::shared_ptr context) { @@ -81,14 +90,17 @@ void PluginConfig::apply_debug_options(std::shared_ptr context) #endif }; + const bool throw_on_error = false; + if (context) { ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); cleanup_unsupported(config_properties); - set_user_property(config_properties, allowed_visibility); + set_user_property(config_properties, allowed_visibility, throw_on_error); } ov::AnyMap env_properties = read_env({"OV_"}); - set_user_property(env_properties, allowed_visibility); + cleanup_unsupported(env_properties); + set_user_property(env_properties, allowed_visibility, throw_on_error); } ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { @@ -155,10 +167,9 @@ ov::AnyMap PluginConfig::read_env(const std::vector& prefixes) cons void PluginConfig::cleanup_unsupported(ov::AnyMap& config) const { for (auto it = config.begin(); it != config.end();) { - const auto& known_options = m_options_map; auto& name = it->first; - auto opt_it = std::find_if(known_options.begin(), known_options.end(), [&](const OptionMapEntry& o) { return o.first == name; }); - if (opt_it == known_options.end()) { + auto opt_it = std::find_if(m_options_map.begin(), m_options_map.end(), [&](const OptionMapEntry& o) { return o.first == name; }); + if (opt_it == m_options_map.end()) { it = config.erase(it); } else { ++it; @@ -176,7 +187,7 @@ std::string PluginConfig::to_string() const { s << "\t" << option.first << ": " << option.second->get_any().as() << std::endl; } s << "USER PROPERTIES:\n"; - for (const auto& user_prop : user_properties) { + for (const auto& user_prop : m_user_properties) { s << "\t" << user_prop.first << ": " << user_prop.second.as() << std::endl; } diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index 0feeef707a2779..fa09be0616c8d4 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -44,7 +44,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig { } NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() { - user_properties = other.user_properties; + m_user_properties = other.m_user_properties; for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp index 0c03916d45efcb..d635e87c7494d0 100644 --- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp @@ -250,8 +250,7 @@ kernel_impl_params fully_connected_inst::get_fake_aligned_params(kernel_impl_par } } - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_fake_alignment) { + GPU_DEBUG_IF(orig_impl_param.get_program().get_config().m_disable_fake_alignment) { can_apply_fake_alignment = false; } diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 9a0d0028201b03..ff420a0e66c1b9 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -18,14 +18,14 @@ ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { } ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { - user_properties = other.user_properties; + m_user_properties = other.m_user_properties; for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } } ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { - user_properties = other.user_properties; + m_user_properties = other.m_user_properties; for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } From 4cb213e562cd33a7709f9db1e931152544c1713e Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 23 Dec 2024 10:27:54 +0400 Subject: [PATCH 09/44] update behavior for set/get property. Add help message Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 36 +++-- src/inference/src/dev/plugin_config.cpp | 134 ++++++++++++++++-- src/inference/tests/unit/config_test.cpp | 32 ++--- .../include/intel_gpu/plugin/plugin.hpp | 1 - .../intel_gpu/runtime/execution_config.hpp | 3 +- .../include/intel_gpu/runtime/options.inl | 2 +- src/plugins/intel_gpu/src/graph/broadcast.cpp | 2 +- src/plugins/intel_gpu/src/graph/crop.cpp | 2 +- src/plugins/intel_gpu/src/graph/eltwise.cpp | 2 +- src/plugins/intel_gpu/src/graph/gather.cpp | 2 +- .../graph_optimizer/add_required_reorders.cpp | 2 +- .../graph_optimizer/build_implementations.cpp | 2 +- .../graph_optimizer/graph_initializations.cpp | 4 +- .../graph_optimizer/propagate_constants.cpp | 4 +- .../select_preferred_formats.cpp | 2 +- .../src/graph/impls/ocl/fully_connected.cpp | 2 +- .../impls/ocl/kernel_selector_helper.cpp | 4 +- .../src/graph/impls/ocl/kernels_cache.cpp | 10 +- .../impls/onednn/primitive_onednn_base.h | 8 +- .../impls/registry/implementation_manager.cpp | 2 +- .../registry/non_max_suppression_impls.cpp | 2 +- .../intel_gpu/src/graph/layout_optimizer.cpp | 2 +- src/plugins/intel_gpu/src/graph/network.cpp | 6 +- .../src/graph/non_max_suppression.cpp | 2 +- src/plugins/intel_gpu/src/graph/permute.cpp | 2 +- .../intel_gpu/src/graph/primitive_inst.cpp | 8 +- src/plugins/intel_gpu/src/graph/program.cpp | 45 +++--- .../src/graph/program_dump_graph.cpp | 2 +- src/plugins/intel_gpu/src/graph/reorder.cpp | 2 +- src/plugins/intel_gpu/src/graph/reshape.cpp | 2 +- .../src/graph/scatter_elements_update.cpp | 2 +- .../intel_gpu/src/graph/scatter_nd_update.cpp | 2 +- .../intel_gpu/src/graph/scatter_update.cpp | 2 +- src/plugins/intel_gpu/src/graph/select.cpp | 2 +- .../intel_gpu/src/graph/strided_slice.cpp | 2 +- .../intel_gpu/src/plugin/compiled_model.cpp | 20 +-- src/plugins/intel_gpu/src/plugin/graph.cpp | 20 ++- .../intel_gpu/src/plugin/ops/condition.cpp | 6 +- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 4 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 49 +++---- .../intel_gpu/src/plugin/program_builder.cpp | 18 +-- .../src/plugin/sync_infer_request.cpp | 8 +- .../src/plugin/transformations_pipeline.cpp | 12 +- .../src/runtime/execution_config.cpp | 46 ++++-- .../intel_gpu/src/runtime/ocl/ocl_engine.cpp | 2 +- .../intel_gpu/src/runtime/ocl/ocl_stream.cpp | 8 +- src/plugins/intel_gpu/src/runtime/stream.cpp | 4 +- .../test_cases/fully_connected_gpu_test.cpp | 24 ++-- 48 files changed, 337 insertions(+), 223 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 769a4619b60fe8..a1bcab62b5d5fd 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -33,12 +33,26 @@ #define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__)) +#define GET_LAST_IMPL(N, ...) CAT(GET_LAST_IMPL_, N)(__VA_ARGS__) +#define GET_LAST_IMPL_0(_0, ...) _0 +#define GET_LAST_IMPL_1(_0, _1, ...) _1 +#define GET_LAST_IMPL_2(_0, _1, _2, ...) _2 +#define GET_LAST_IMPL_3(_0, _1, _2, _3, ...) _3 +#define GET_LAST_IMPL_4(_0, _1, _2, _3, _4, ...) _4 +#define GET_LAST_IMPL_5(_0, _1, _2, _3, _4, _5, ...) _5 +#define GET_LAST_IMPL_6(_0, _1, _2, _3, _4, _5, _6, ...) _6 + +#define GET_LAST(...) GET_LAST_IMPL(COUNT(__VA_ARGS__), _, __VA_ARGS__ ,,,,,,,,,,,) + #define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ ConfigOption m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; #define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \ m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; +#define OV_CONFIG_OPTION_HELP(PropertyNamespace, PropertyVar, Visibility, DefaultValue, ...) \ + { #PropertyNamespace "::" #PropertyVar, PropertyNamespace::PropertyVar.name(), GET_LAST(__VA_ARGS__)}, + #define OV_CONFIG_RELEASE_OPTION(PropertyNamespace, PropertyVar, ...) \ OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE, __VA_ARGS__) @@ -159,18 +173,12 @@ class OPENVINO_RUNTIME_API PluginConfig { void set_property(const ov::AnyMap& properties); Any get_property(const std::string& name) const; - void set_user_property(const ov::AnyMap& properties); template util::EnableIfAllStringAny set_property(Properties&&... properties) { set_property(ov::AnyMap{std::forward(properties)...}); } - template - util::EnableIfAllStringAny set_user_property(Properties&&... properties) { - set_user_property(ov::AnyMap{std::forward(properties)...}); - } - template T get_property(const ov::Property& property) const { if (is_set_by_user(property)) { @@ -189,6 +197,7 @@ class OPENVINO_RUNTIME_API PluginConfig { virtual void apply_debug_options(std::shared_ptr context); virtual void finalize_impl(std::shared_ptr context) {} + template bool is_set_by_user(const ov::Property& property) const { return m_user_properties.find(property.name()) != m_user_properties.end(); @@ -207,12 +216,13 @@ class OPENVINO_RUNTIME_API PluginConfig { if (!is_set_by_user(property)) { auto rt_info_val = rt_info.find(property.name()); if (rt_info_val != rt_info.end()) { - set_user_property(property(rt_info_val->second.template as())); + set_property(property(rt_info_val->second.template as())); } } } - void set_user_property(const ov::AnyMap& properties, const std::vector& allowed_visibility, bool throw_on_error); + ov::Any get_property(const std::string& name, const std::vector& allowed_visibility) const; + void set_property(const ov::AnyMap& properties, const std::vector& allowed_visibility, bool throw_on_error); ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; ov::AnyMap read_env(const std::vector& prefixes) const; @@ -223,6 +233,16 @@ class OPENVINO_RUNTIME_API PluginConfig { // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info ov::AnyMap m_user_properties; using OptionMapEntry = decltype(m_options_map)::value_type; + + // property variable name, string name, default value, description + using OptionsDesc = std::vector>; + static OptionsDesc m_options_desc; + virtual const OptionsDesc& get_options_desc() const { static OptionsDesc empty; return empty; } + const std::string get_help_message(const std::string& name = "") const; + void print_help() const; + +private: + bool m_is_finalized = false; }; } // namespace ov diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index 27d113a04a88cd..e1b09b76ad8235 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -8,7 +8,9 @@ #include "openvino/runtime/device_id_parser.hpp" #include "openvino/util/common_util.hpp" #include "openvino/util/env_util.hpp" +#include #include +#include #ifdef JSON_HEADER # include @@ -16,30 +18,63 @@ # include #endif -namespace ov { - -void PluginConfig::set_property(const AnyMap& config) { - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; +#ifdef _WIN32 +#include +#else +#include +#include +#endif - auto option = get_option_ptr(name); - option->set_any(val); +namespace { +size_t get_terminal_width() { + const size_t default_width = 120; +#ifdef _WIN32 + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) { + return csbi.srWindow.Right - csbi.srWindow.Left + 1; + } else { + return default_width; + } +#else + struct winsize w; + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) { + return w.ws_col; + } else { + return default_width; } +#endif // _WIN32 } +} + +namespace ov { ov::Any PluginConfig::get_property(const std::string& name) const { + const static std::vector allowed_visibility = {OptionVisibility::RELEASE, OptionVisibility::RELEASE_INTERNAL}; + return get_property(name, allowed_visibility); +} + +ov::Any PluginConfig::get_property(const std::string& name, const std::vector& allowed_visibility) const { + if (m_user_properties.find(name) != m_user_properties.end()) { + return m_user_properties.at(name); + } + auto option = get_option_ptr(name); + if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) { + OPENVINO_THROW("Couldn't get unknown property: ", name); + } + return option->get_any(); } -void PluginConfig::set_user_property(const AnyMap& config) { +void PluginConfig::set_property(const AnyMap& config) { const static std::vector allowed_visibility = {OptionVisibility::RELEASE}; const bool throw_on_error = true; - set_user_property(config, allowed_visibility, throw_on_error); + set_property(config, allowed_visibility, throw_on_error); } -void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector& allowed_visibility, bool throw_on_error) { +void PluginConfig::set_property(const ov::AnyMap& config, const std::vector& allowed_visibility, bool throw_on_error) { + OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); + for (auto& kv : config) { auto& name = kv.first; auto& val = kv.second; @@ -47,13 +82,13 @@ void PluginConfig::set_user_property(const ov::AnyMap& config, const std::vector auto option = get_option_ptr(name); if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) { if (throw_on_error) - OPENVINO_THROW("Unkown property: ", name); + OPENVINO_THROW("Couldn't set unknown property: ", name); else continue; } if (!option->is_valid_value(val)) { if (throw_on_error) - OPENVINO_THROW("Invalid value: ", val.as(), " for property: ", name); + OPENVINO_THROW("Invalid value: ", val.as(), " for property: ", name, "\nProperty description: ", get_help_message(name)); else continue; } @@ -79,6 +114,8 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization m_user_properties.clear(); + + m_is_finalized = true; } void PluginConfig::apply_debug_options(std::shared_ptr context) { @@ -95,12 +132,12 @@ void PluginConfig::apply_debug_options(std::shared_ptr context) if (context) { ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); cleanup_unsupported(config_properties); - set_user_property(config_properties, allowed_visibility, throw_on_error); + set_property(config_properties, allowed_visibility, throw_on_error); } ov::AnyMap env_properties = read_env({"OV_"}); cleanup_unsupported(env_properties); - set_user_property(env_properties, allowed_visibility, throw_on_error); + set_property(env_properties, allowed_visibility, throw_on_error); } ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { @@ -194,4 +231,71 @@ std::string PluginConfig::to_string() const { return s.str(); } +void PluginConfig::print_help() const { + auto format_text = [](const std::string& cpp_name, const std::string& str_name, const std::string& desc, size_t max_name_width, size_t max_width) { + std::istringstream words(desc); + std::ostringstream formatted_text; + std::string word; + std::vector words_vec; + + while (words >> word) { + words_vec.push_back(word); + } + + size_t j = 0; + size_t count_of_desc_lines = (desc.length() + max_width - 1) / max_width; + for (size_t i = 0 ; i < std::max(2, count_of_desc_lines); i++) { + if (i == 0) { + formatted_text << std::left << std::setw(max_name_width) << cpp_name; + } else if (i == 1) { + formatted_text << std::left << std::setw(max_name_width) << str_name; + } else { + formatted_text << std::left << std::setw(max_name_width) << ""; + } + + formatted_text << " | "; + + size_t line_length = max_name_width + 3; + for (; j < words_vec.size();) { + line_length += words_vec[j].size() + 1; + if (line_length > max_width) { + break; + } else { + formatted_text << words_vec[j] << " "; + } + j++; + } + formatted_text << "\n"; + } + return formatted_text.str(); + }; + + const auto& options_desc = get_options_desc(); + std::stringstream ss; + auto max_name_length_item = std::max_element(options_desc.begin(), options_desc.end(), + [](const OptionsDesc::value_type& a, const OptionsDesc::value_type& b){ + return std::get<0>(a).size() < std::get<0>(b).size(); + }); + + const size_t max_name_width = static_cast(std::get<0>(*max_name_length_item).size() + std::get<1>(*max_name_length_item).size()); + const size_t terminal_width = get_terminal_width(); + ss << std::left << std::setw(max_name_width) << ("Option name") << " | " << " Description " << "\n"; + ss << std::left << std::setw(terminal_width) << std::setfill('-') << "" << "\n"; + for (auto& kv : options_desc) { + ss << format_text(std::get<0>(kv), std::get<1>(kv), std::get<2>(kv), max_name_width, terminal_width) << "\n"; + } + + std::cout << ss.str(); +} + +const std::string PluginConfig::get_help_message(const std::string& name) const { + const auto& options_desc = get_options_desc(); + auto it = std::find_if(options_desc.begin(), options_desc.end(), [&](const OptionsDesc::value_type& v) { return std::get<1>(v) == name; }); + if (it != options_desc.end()) { + return std::get<2>(*it); + } + + return ""; +} + } // namespace ov diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index fa09be0616c8d4..42b7fba115a273 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -101,35 +101,27 @@ TEST(plugin_config, can_set_get_property) { ASSERT_EQ(cfg.get_property(bool_property), true); ASSERT_NO_THROW(cfg.set_property(bool_property(false))); ASSERT_EQ(cfg.get_property(bool_property), false); - - ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); - ASSERT_EQ(cfg.get_property(bool_property), true); } TEST(plugin_config, throw_for_unsupported_property) { NotEmptyTestConfig cfg; ASSERT_ANY_THROW(cfg.get_property(unsupported_property)); ASSERT_ANY_THROW(cfg.set_property(unsupported_property(10.0f))); - ASSERT_ANY_THROW(cfg.set_user_property(unsupported_property(10.0f))); } TEST(plugin_config, can_direct_access_to_properties) { NotEmptyTestConfig cfg; - ASSERT_EQ(cfg.m_bool_property.value, cfg.get_property(bool_property)); - ASSERT_NO_THROW(cfg.set_property(bool_property(false))); - ASSERT_EQ(cfg.m_bool_property.value, cfg.get_property(bool_property)); - ASSERT_EQ(cfg.m_bool_property.value, false); + ASSERT_EQ(cfg.m_int_property.value, cfg.get_property(int_property)); + ASSERT_NO_THROW(cfg.set_property(int_property(1))); + ASSERT_EQ(cfg.m_int_property.value, -1); // user property doesn't impact member value until finalize() is called - ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); - ASSERT_EQ(cfg.m_bool_property.value, false); // user property doesn't impact member value until finalize() is called - - cfg.m_bool_property.value = true; - ASSERT_EQ(cfg.get_property(bool_property), true); + cfg.m_int_property.value = 2; + ASSERT_EQ(cfg.get_property(int_property), 1); // still 1 as user property was set previously } TEST(plugin_config, finalization_updates_member) { NotEmptyTestConfig cfg; - ASSERT_NO_THROW(cfg.set_user_property(bool_property(false))); + ASSERT_NO_THROW(cfg.set_property(bool_property(false))); ASSERT_EQ(cfg.m_bool_property.value, true); // user property doesn't impact member value until finalize() is called cfg.finalize(nullptr, {}); @@ -146,7 +138,7 @@ TEST(plugin_config, get_property_before_finalization_returns_user_property_if_se cfg.m_bool_property.value = false; // update member directly ASSERT_EQ(cfg.get_property(bool_property), false); // OK, return the class member value as no user property was set - ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); + ASSERT_NO_THROW(cfg.set_property(bool_property(true))); ASSERT_TRUE(cfg.is_set_by_user(bool_property)); ASSERT_EQ(cfg.get_property(bool_property), true); // now user property value is returned ASSERT_EQ(cfg.m_bool_property.value, false); // but class member is not updated @@ -159,7 +151,7 @@ TEST(plugin_config, get_property_before_finalization_returns_user_property_if_se TEST(plugin_config, finalization_updates_dependant_properties) { NotEmptyTestConfig cfg; - cfg.set_user_property(high_level_property("value1")); + cfg.set_property(high_level_property("value1")); ASSERT_TRUE(cfg.is_set_by_user(high_level_property)); ASSERT_FALSE(cfg.is_set_by_user(low_level_property)); @@ -196,7 +188,7 @@ TEST(plugin_config, can_copy_config) { cfg1.m_high_level_property.value = "value1"; cfg1.m_low_level_property.value = "value2"; cfg1.m_int_property.value = 1; - cfg1.set_user_property(bool_property(false)); + cfg1.set_property(bool_property(false)); NotEmptyTestConfig cfg2 = cfg1; ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); @@ -211,10 +203,10 @@ TEST(plugin_config, can_copy_config) { ASSERT_EQ(cfg2.m_int_property.value, 1); } -TEST(plugin_config, set_user_property_throw_for_non_release_options) { +TEST(plugin_config, set_property_throw_for_non_release_options) { NotEmptyTestConfig cfg; - ASSERT_ANY_THROW(cfg.set_user_property(release_internal_property(10))); - ASSERT_ANY_THROW(cfg.set_user_property(debug_property(10))); + ASSERT_ANY_THROW(cfg.set_property(release_internal_property(10))); + ASSERT_ANY_THROW(cfg.set_property(debug_property(10))); } TEST(plugin_config, visibility_is_correct) { diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index 869d32825b8761..cec27660baa2b4 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -43,7 +43,6 @@ class Plugin : public ov::IPlugin { bool is_metric(const std::string& name) const; ov::Any get_metric(const std::string& name, const ov::AnyMap& arguments) const; - void set_cache_info(const std::shared_ptr& model, ExecutionConfig& properties) const; public: Plugin(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index b21d0c91b1d924..b96a558171c203 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -25,10 +25,11 @@ struct ExecutionConfig : public ov::PluginConfig { #include "intel_gpu/runtime/options.inl" #undef OV_CONFIG_OPTION +protected: void finalize_impl(std::shared_ptr context) override; void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override; + const ov::PluginConfig::OptionsDesc& get_options_desc() const override; -private: void apply_user_properties(const cldnn::device_info& info); void apply_hints(const cldnn::device_info& info); void apply_execution_hints(const cldnn::device_info& info); diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index d5da1edf81bd69..1941aaec69b2bf 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -9,7 +9,7 @@ OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Number of streams to be used for inference") OV_CONFIG_RELEASE_OPTION(ov, compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism") OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16, - [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision") + [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision. Supported values: { f16, f32, undefined }") OV_CONFIG_RELEASE_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings") OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc") OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp index 0f69379fa8e217..f2037c94979e0c 100644 --- a/src/plugins/intel_gpu/src/graph/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp @@ -149,7 +149,7 @@ void broadcast_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index d9f6ebd8b71872..85be51a788c842 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -278,7 +278,7 @@ void crop_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout()); diff --git a/src/plugins/intel_gpu/src/graph/eltwise.cpp b/src/plugins/intel_gpu/src/graph/eltwise.cpp index 7805cb575aed9a..491867dc206d31 100644 --- a/src/plugins/intel_gpu/src/graph/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/eltwise.cpp @@ -393,7 +393,7 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) : ""); } } else { - bool use_new_shape_infer = network.get_config().get_property(ov::intel_gpu::allow_new_shape_infer); + bool use_new_shape_infer = network.get_config().m_allow_new_shape_infer; auto input0_pshape = node.get_input_pshape(0); for (size_t i = 1; i < inputs_count; ++i) { diff --git a/src/plugins/intel_gpu/src/graph/gather.cpp b/src/plugins/intel_gpu/src/graph/gather.cpp index 2020f10015f916..e32a45ef00aab5 100644 --- a/src/plugins/intel_gpu/src/graph/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/gather.cpp @@ -150,7 +150,7 @@ void gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp index 333afe18775e0b..0ef04576494665 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp @@ -161,7 +161,7 @@ bool add_required_reorders::test_format(cldnn::program_node& node, format reques } void add_required_reorders::run(program& p) { - bool optimize_data = p.get_config().get_property(ov::intel_gpu::optimize_data); + bool optimize_data = p.get_config().m_optimize_data; auto usr_itr = p.get_processing_order().begin(); while (usr_itr != p.get_processing_order().end()) { auto& usr = *usr_itr++; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp index 4c1b1008434144..999e103c3fe200 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp @@ -11,7 +11,7 @@ using namespace cldnn; void build_implementations::run(program& p) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "pass::build_implementations"); - if (p.get_config().get_property(ov::intel_gpu::partial_build_program)) { + if (p.get_config().m_partial_build_program) { return; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp index eec55260e2ea4b..daa3b5ddc41501 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp @@ -17,7 +17,7 @@ using namespace cldnn; namespace cldnn { void graph_initializations::set_outputs(program& p) { - auto custom_outputs = p.get_config().get_property(ov::intel_gpu::custom_outputs); + auto custom_outputs = p.get_config().m_custom_outputs.value; if (!custom_outputs.empty()) { for (auto const& output : custom_outputs) { OPENVINO_ASSERT(p.has_node(output), "not found custom output node in current cldnn::program: ", output); @@ -37,7 +37,7 @@ void graph_initializations::set_outputs(program& p) { void graph_initializations::run(program& p) { set_outputs(p); - auto forcing_map = p.get_config().get_property(ov::intel_gpu::force_implementations); + auto forcing_map = p.get_config().m_force_implementations.value; for (auto& kv : forcing_map) { if (p.has_node(kv.first)) { p.get_node(kv.first).set_forced_impl_type(kv.second.impl_type); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index 34fa9647ec99c3..cc04b3efe0913c 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -143,8 +143,8 @@ propagate_constants::calculate(engine& engine, return {}; ExecutionConfig cf_config = config; - cf_config.set_property(ov::intel_gpu::optimize_data(false)); - cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs)); + cf_config.m_optimize_data = false; + cf_config.m_custom_outputs = const_outputs; network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true); std::map, std::shared_ptr>> weightless_cache_map; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp index a4e6d989543837..9313544928b6b4 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp @@ -74,7 +74,7 @@ void select_preferred_formats::run(program& p) { } #endif // ENABLE_ONEDNN_FOR_GPU - auto forcing_map = p.get_config().get_property(ov::intel_gpu::force_implementations); + auto forcing_map = p.get_config().m_force_implementations.value; for (auto n : p.get_processing_order()) { n->recalc_output_layout(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index d7813c03d80f99..a22d23ca4dfed9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -203,7 +203,7 @@ struct fully_connected_impl : typed_primitive_impl_ocl { params.quantization = kernel_selector::QuantizationType::NONE; } - params.dynamic_quantization_group_size = impl_param.get_program().get_config().get_property(ov::hint::dynamic_quantization_group_size); + params.dynamic_quantization_group_size = impl_param.get_program().get_config().m_dynamic_quantization_group_size; return params; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index 624db86e38342c..4fea0bbe644d63 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -1204,13 +1204,13 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p params.engineInfo.ip_version = device_info.ip_version; params.engineInfo.arch = kernel_selector::gpu_arch(static_cast::type>(device_info.arch)); - auto impl_forcing = config.get_property(ov::intel_gpu::force_implementations); + auto impl_forcing = config.m_force_implementations.value; if (impl_forcing.count(param_info.desc->id) != 0) { params.forceImplementation = impl_forcing.at(param_info.desc->id).kernel_name; } - params.allowStaticInputReordering = config.get_property(ov::intel_gpu::optimize_data) || config.get_property(ov::intel_gpu::allow_static_input_reorder); + params.allowStaticInputReordering = config.m_optimize_data || config.m_allow_static_input_reorder; params.allowInputReordering = false; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp index 8d305a592e081b..50f1b1ed9ab879 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp @@ -111,7 +111,7 @@ namespace cldnn { std::mutex kernels_cache::_mutex; std::string kernels_cache::get_cache_path() const { - auto path = _config.get_property(ov::cache_dir); + auto path = _config.m_cache_dir.value; if (path.empty()) { return {}; } @@ -123,12 +123,12 @@ std::string kernels_cache::get_cache_path() const { } bool kernels_cache::is_cache_enabled() const { - if (!_config.get_property(ov::intel_gpu::allow_new_shape_infer) && - (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SPEED)) { + if (!_config.m_allow_new_shape_infer && + (_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SPEED)) { return false; } - return !_config.get_property(ov::cache_dir).empty(); + return !_config.m_cache_dir.value.empty(); } size_t kernels_cache::get_max_kernels_per_batch() const { @@ -136,7 +136,7 @@ size_t kernels_cache::get_max_kernels_per_batch() const { GPU_DEBUG_IF(debug_config->max_kernels_per_batch >= 1) { return static_cast(debug_config->max_kernels_per_batch); } - return _config.get_property(ov::intel_gpu::max_kernels_per_batch); + return _config.m_max_kernels_per_batch; } void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::vector* all_batches) const { diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 05a3dc5b2a9e4b..34910d5d73531d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -47,7 +47,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _attrs(attrs), _pd(pd) { - _enable_profiling = config.get_property(ov::enable_profiling); + _enable_profiling = config.m_enable_profiling; _scratchpad_md = _pd.scratchpad_desc(); @@ -70,7 +70,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _pd(), _prim() { - _enable_profiling = config.get_property(ov::enable_profiling); + _enable_profiling = config.m_enable_profiling; GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { _enable_profiling = true; @@ -318,7 +318,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { private: std::string get_cache_directory(const ExecutionConfig& config) const { - auto path = config.get_property(ov::cache_dir); + auto path = config.m_cache_dir.value; if (path.empty()) { return {}; } @@ -343,7 +343,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { void build_primitive(const ExecutionConfig& config) { auto cache_outpath = get_cache_directory(config); - if (!config.get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!config.m_allow_new_shape_infer) { cache_outpath = ""; } diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp index fdb2f151de8986..b135d9af73f31f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp @@ -58,7 +58,7 @@ std::unique_ptr ImplementationManager::create(const program_node if (auto impl = create_impl(node, params)) { update_impl(*impl, params); impl->set_node_params(node); - impl->can_share_kernels = node.get_program().get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse); + impl->can_share_kernels = node.get_program().get_config().m_enable_kernels_reuse; return impl; } diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp index d974b78f8e6d14..142b4b45fbf001 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp @@ -59,7 +59,7 @@ const std::vector>& Registry(scores_layout.get_partial_shape()[0].get_length()); const size_t kClassNum = static_cast(scores_layout.get_partial_shape()[1].get_length()); const size_t kNStreams = - static_cast(node.get_program().get_config().get_property(ov::streams::num)); + static_cast(node.get_program().get_config().m_num_streams.value); const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast(8)) * kNStreams; return kKeyValue > 64; } diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 167b254a247637..84458b58279028 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -435,7 +435,7 @@ bool should_use_winograd_2x3_s1(const convolution_node& node, layout const& input_layout, layout const& weights_layout, bool output_size_handling_enabled) { - bool disable_winograd_conv = node.get_program().get_config().get_property(ov::intel_gpu::disable_winograd_convolution); + bool disable_winograd_conv = node.get_program().get_config().m_disable_winograd_convolution; if (disable_winograd_conv) return false; diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index aa7c9a55775e6d..653ff2f57ee385 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -180,9 +180,9 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo , _memory_pool(new memory_pool(program->get_engine())) , _internal(is_internal) , _is_primary_stream(is_primary_stream) - , _enable_profiling(program->get_config().get_property(ov::enable_profiling)) + , _enable_profiling(program->get_config().m_enable_profiling) , _reset_arguments(true) - , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio))) { + , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().m_buffers_preallocation_ratio)) { if (!_internal) { net_id = get_unique_net_id(); } @@ -398,7 +398,7 @@ void network::calculate_weights_cache_capacity() { } // Sum all weights constants for each stream - required_mem_size += weights_const_size * _config.get_property(ov::streams::num); + required_mem_size += weights_const_size * _config.m_num_streams.value; // Add all other constants (shared between streams) required_mem_size += total_const_size - weights_const_size; diff --git a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp index 2732476a063f8f..725f3333a0f132 100644 --- a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp @@ -157,7 +157,7 @@ void non_max_suppression_gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[i]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[i].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[i] = {_network.get_engine().reinterpret_buffer(input_memory(i), _impl_params->get_output_layout(i))}; diff --git a/src/plugins/intel_gpu/src/graph/permute.cpp b/src/plugins/intel_gpu/src/graph/permute.cpp index bf87e78e4bbbc5..41252797f09eca 100644 --- a/src/plugins/intel_gpu/src/graph/permute.cpp +++ b/src/plugins/intel_gpu/src/graph/permute.cpp @@ -146,7 +146,7 @@ void permute_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index bcf468cea97d03..2f2a755c675cf1 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -163,7 +163,7 @@ static memory::ptr get_memory_from_pool(engine& _engine, OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate output for dynamic layout without upper bound"); // Use layout with max tensor for dynamic shape with upper bound - if (_node.get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + if (_node.get_program().get_config().m_enable_memory_pool) { if (curr_memory != nullptr) pool.release_memory(curr_memory, _node.get_unique_id(), _node.id(), net_id); return pool.get_memory(layout, @@ -2042,7 +2042,7 @@ primitive_inst::primitive_inst(network & network, program_node const& node, bool : _network(network) , _node(&node) , _node_output_layout(node.get_output_layout()) - , _use_shared_kernels(node.get_program().get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse)) + , _use_shared_kernels(node.get_program().get_config().m_enable_kernels_reuse) , _impl_params(node.get_kernel_impl_params()) , _impl(node.get_selected_impl() ? node.get_selected_impl()->clone() : nullptr) , _runtime_memory_dependencies(node.get_memory_dependencies()) @@ -2596,8 +2596,8 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() { ExecutionConfig subgraph_config{ ov::intel_gpu::allow_static_input_reorder(true), ov::intel_gpu::allow_new_shape_infer(true), - ov::enable_profiling(get_network().get_config().get_property(ov::enable_profiling)), - ov::intel_gpu::use_onednn(get_network().get_config().get_property(ov::intel_gpu::use_onednn)) + ov::enable_profiling(get_network().get_config().m_enable_profiling), + ov::intel_gpu::use_onednn(get_network().get_config().m_use_onednn) }; auto prog = program::build_program(get_network().get_engine(), t, diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 800ac5ce997d6c..a4068e408779c5 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -108,8 +108,8 @@ using namespace cldnn; using namespace ov::intel_gpu; static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) { - int streams = (num_streams > 0) ? num_streams : config.get_property(ov::compilation_num_threads); - auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority); + int streams = (num_streams > 0) ? num_streams : config.m_compilation_num_threads.value; + auto priority = config.m_host_task_priority; auto core_type = ov::hint::SchedulingCoreType::ANY_CORE; switch (priority) { case ov::hint::Priority::LOW: core_type = ov::hint::SchedulingCoreType::ECORE_ONLY; break; @@ -117,7 +117,7 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E case ov::hint::Priority::HIGH: core_type = ov::hint::SchedulingCoreType::PCORE_ONLY; break; default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority); } - bool enable_cpu_pinning = config.get_property(ov::hint::enable_cpu_pinning); + bool enable_cpu_pinning = config.m_enable_cpu_pinning; ov::threading::IStreamsExecutor::Config task_executor_config(tags, streams, @@ -216,7 +216,7 @@ program::program(engine& engine, const ExecutionConfig& config) init_primitives(); auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); _config.finalize(ctx, {}); - new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); + new_shape_infer = _config.m_allow_new_shape_infer; _layout_optimizer = std::make_unique(); } @@ -228,7 +228,7 @@ void program::init_program() { set_options(); pm = std::unique_ptr(new pass_manager(*this)); - new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); + new_shape_infer = _config.m_allow_new_shape_infer; if (_task_executor == nullptr) _task_executor = program::make_task_executor(_config); @@ -236,7 +236,7 @@ void program::init_program() { kernel_selector::KernelBase::get_db().get_batch_headers(), kernel_selector::KernelBase::get_db().get_cm_batch_headers())); - _kernels_cache->set_kernels_reuse(get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse)); + _kernels_cache->set_kernels_reuse(get_config().m_enable_kernels_reuse); if (!_compilation_context) _compilation_context = program::make_compilation_context(_config); @@ -490,13 +490,13 @@ void program::set_options() { static std::atomic id_gen{0}; prog_id = ++id_gen; assert(prog_id != 0); - if (!_config.get_property(ov::intel_gpu::force_implementations).empty()) { - _config.set_property(ov::intel_gpu::optimize_data(true)); + if (!_config.m_force_implementations.value.empty()) { + _config.m_optimize_data = true; } GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - _config.set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); + _config.m_dump_graphs = debug_config->dump_graphs; } } @@ -534,8 +534,7 @@ void program::init_graph() { if (!node->is_type()) node->get_output_layouts(); if (node->is_type()) { - _config.set_property(ov::intel_gpu::use_onednn(true)); - _config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + _config.m_use_onednn = true; } } // Perform initial shape_of subgraphs markup @@ -554,7 +553,7 @@ void program::pre_optimize_graph(bool is_internal) { bool output_size_handling_enabled = analyze_output_size_handling_need(); - bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = _config.m_optimize_data; if (optimize_data) { apply_opt_pass(); } @@ -631,7 +630,7 @@ void program::post_optimize_graph(bool is_internal) { reorder_factory rf; - bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = _config.m_optimize_data; if (!is_internal) { apply_opt_pass(rf); @@ -639,7 +638,7 @@ void program::post_optimize_graph(bool is_internal) { apply_opt_pass(false, true); // TODO: do we need it at this place also? - auto partial_build = _config.get_property(ov::intel_gpu::partial_build_program); + auto partial_build = _config.m_partial_build_program; #ifdef GPU_DEBUG_CONFIG GPU_DEBUG_GET_INSTANCE(debug_config); if (!is_internal && (!partial_build || !debug_config->dry_run_path.empty())) { @@ -658,7 +657,7 @@ void program::post_optimize_graph(bool is_internal) { // Recalculate processing order after all graph transformation to keep optimal primitives ordering // for OOO queue - if (_config.get_property(ov::intel_gpu::queue_type) == QueueTypes::out_of_order) + if (_config.m_queue_type == QueueTypes::out_of_order) get_processing_order().calculate_BFS_processing_order(); apply_opt_pass(); @@ -782,7 +781,7 @@ const std::vector& program::get_allocating_order(bool forced_updat } void program::prepare_memory_dependencies() { - if (!_config.get_property(ov::intel_gpu::enable_memory_pool)) + if (!_config.m_enable_memory_pool) return; for (auto& node : get_processing_order()) { node->add_memory_dependency(node->get_unique_id()); @@ -1414,7 +1413,7 @@ program::primitives_info program::get_current_stage_info() const { void program::save_pass_info(std::string pass_name) { // TODO: Directory path here can be probably changed to some bool flag - if (!_config.get_property(ov::intel_gpu::dump_graphs).empty()) + if (!_config.m_dump_graphs.value.empty()) optimizer_passes_info.emplace_back(pass_name, get_current_stage_info()); } @@ -1442,7 +1441,7 @@ const program::primitives_info& program::get_primitives_info() const { return pr void program::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); } void program::set_layout_optimizer_attributes(layout_optimizer& lo) { - lo.set_implementation_forcing(_config.get_property(ov::intel_gpu::force_implementations)); + lo.set_implementation_forcing(_config.m_force_implementations); // first pass to set layout optimization_attributes for topology @@ -1668,15 +1667,15 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1); #ifdef ENABLE_ONEDNN_FOR_GPU - bool enable_onednn_for_tests = get_config().get_property(ov::intel_gpu::optimize_data) || is_internal_program(); + bool enable_onednn_for_tests = get_config().m_optimize_data || is_internal_program(); auto& engine = get_engine(); if (engine.get_device_info().vendor_id == INTEL_VENDOR_ID && - get_config().get_property(ov::intel_gpu::queue_type) == QueueTypes::in_order && + get_config().m_queue_type == QueueTypes::in_order && enable_onednn_for_tests) { if (engine.get_device_info().supports_immad) { lo.add_all_onednn_impls_optimization_attribute(); } else { - if (get_config().get_property(ov::intel_gpu::use_onednn)) { + if (get_config().m_use_onednn) { lo.enable_onednn_for(); } } @@ -1890,8 +1889,8 @@ void program::load(cldnn::BinaryInputBuffer& ib) { init_program(); std::shared_ptr mapped_memory = nullptr; - std::string weights_path = _config.get_property(ov::weights_path); - if (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && + std::string weights_path = _config.m_weights_path; + if (_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE && ov::util::validate_weights_path(weights_path)) { mapped_memory = ov::load_mmap_object(weights_path); } diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index 6a09fcd10eb513..a14cbfd0139d29 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -157,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) { } // namespace std::string get_dir_path(const ExecutionConfig& config) { - auto path = config.get_property(ov::intel_gpu::dump_graphs); + auto path = config.m_dump_graphs.value; if (path.empty()) { return {}; } diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index 76b15ca54bcff8..c3b7c472d59437 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -287,7 +287,7 @@ void reorder_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index fc874e29f70ac4..1250a73e881e9a 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -320,7 +320,7 @@ void reshape_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp index 316acef0e492e8..7ddea056745a53 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp @@ -75,7 +75,7 @@ void scatter_elements_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp index 2f109f83df428f..56c8bf6607c4a3 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp @@ -86,7 +86,7 @@ void scatter_nd_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_update.cpp index 4291ee67caa3ef..0d34b8f95e9f1a 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_update.cpp @@ -66,7 +66,7 @@ void scatter_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp index 5435de5598bea0..35d1db8fc389b1 100644 --- a/src/plugins/intel_gpu/src/graph/select.cpp +++ b/src/plugins/intel_gpu/src/graph/select.cpp @@ -95,7 +95,7 @@ select_inst::typed_primitive_inst(network& network, select_node const& node) : p 3, ""); - bool allow_new_shape_infer = network.get_program()->get_config().get_property(ov::intel_gpu::allow_new_shape_infer); + bool allow_new_shape_infer = network.get_program()->get_config().m_allow_new_shape_infer; // Broadcast check is performed in ngraph shape infer of select when allow_new_shape_infer=true if (!allow_new_shape_infer) { if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) { diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp index 007553b8a9d192..baff08bc8de82e 100644 --- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp @@ -208,7 +208,7 @@ void strided_slice_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().m_enable_memory_pool) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index d273ba2b1df6a6..8bd73dbdce7029 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -20,7 +20,7 @@ namespace ov::intel_gpu { namespace { std::shared_ptr create_task_executor(const std::shared_ptr& plugin, const ExecutionConfig& config) { - if (config.get_property(ov::internal::exclusive_async_requests)) { + if (config.m_exclusive_async_requests) { // exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with // the CPU behavior return plugin->get_executor_manager()->get_executor("GPU"); @@ -30,7 +30,7 @@ std::shared_ptr create_task_executor(const std::sh bool enable_cpu_reservation = config.get_property(ov::hint::enable_cpu_reservation); return std::make_shared( ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", - config.get_property(ov::num_streams), + config.m_num_streams.value, 1, ov::hint::SchedulingCoreType::PCORE_ONLY, enable_cpu_reservation, @@ -38,7 +38,7 @@ std::shared_ptr create_task_executor(const std::sh } else { return std::make_shared( ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", - config.get_property(ov::num_streams), + config.m_num_streams.value, 0, ov::hint::SchedulingCoreType::ANY_CORE, false, @@ -62,7 +62,7 @@ CompiledModel::CompiledModel(std::shared_ptr model, m_outputs(ov::ICompiledModel::outputs()), m_loaded_from_cache(false) { auto graph_base = std::make_shared(model, m_context, m_config, 0); - for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { + for (uint16_t n = 0; n < m_config.m_num_streams.value; n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -157,7 +157,7 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, } auto graph_base = std::make_shared(ib, context, m_config, 0); - for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { + for (uint16_t n = 0; n < m_config.m_num_streams.value; n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -179,8 +179,8 @@ std::shared_ptr CompiledModel::create_infer_request() co void CompiledModel::export_model(std::ostream& model) const { // If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching // which requires the weights_path. - ov::CacheMode cache_mode = m_config.get_property(ov::cache_mode); - std::string weights_path = m_config.get_property(ov::weights_path); + ov::CacheMode cache_mode = m_config.m_cache_mode; + std::string weights_path = m_config.m_weights_path; if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) return; @@ -188,7 +188,7 @@ void CompiledModel::export_model(std::ostream& model) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); - const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks); + const ov::EncryptionCallbacks encryption_callbacks = m_config.m_cache_encryption_callbacks; // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty. const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; @@ -290,8 +290,8 @@ ov::Any CompiledModel::get_property(const std::string& name) const { } else if (name == ov::loaded_from_cache) { return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache}; } else if (name == ov::optimal_number_of_infer_requests) { - unsigned int nr = m_config.get_property(ov::num_streams); - if (m_config.get_property(ov::hint::performance_mode) != ov::hint::PerformanceMode::LATENCY) + unsigned int nr = m_config.m_num_streams.value; + if (m_config.m_performance_mode != ov::hint::PerformanceMode::LATENCY) nr *= 2; return decltype(ov::optimal_number_of_infer_requests)::value_type {nr}; } else if (name == ov::execution_devices) { diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 6859728076fb6a..ab69fd5d4e26f8 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -86,13 +86,9 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context } } { - bool bool_prop_value; - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::partial_build_program(bool_prop_value)); - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::optimize_data(bool_prop_value)); - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::allow_new_shape_infer(bool_prop_value)); + ib >> m_config.m_partial_build_program.value; + ib >> m_config.m_optimize_data.value; + ib >> m_config.m_allow_new_shape_infer.value; } auto imported_prog = std::make_shared(get_engine(), m_config); @@ -177,7 +173,7 @@ void Graph::build(std::shared_ptr program) { auto external_queue = m_context->get_external_queue(); if (external_queue) { - OPENVINO_ASSERT(m_config.get_property(ov::num_streams) == 1, "[GPU] Throughput streams can't be used with shared queue!"); + OPENVINO_ASSERT(m_config.m_num_streams == 1, "[GPU] Throughput streams can't be used with shared queue!"); const auto &engine = program->get_engine(); m_network = std::make_shared(program, engine.create_stream(m_config, external_queue), m_stream_id); } else { @@ -209,7 +205,7 @@ bool Graph::use_external_queue() const { std::shared_ptr Graph::get_runtime_model(std::vector& primitives_info, bool filter_const_primitives) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::get_runtime_model"); - if (m_config.get_property(ov::enable_profiling)) { + if (m_config.m_enable_profiling) { try { // Update may throw an exception for step-by-step runtime graph dump, // since network->get_executed_primitives() method can't be called before network execution @@ -521,9 +517,9 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) { } } { - ob << m_config.get_property(ov::intel_gpu::partial_build_program); - ob << m_config.get_property(ov::intel_gpu::optimize_data); - ob << m_config.get_property(ov::intel_gpu::allow_new_shape_infer); + ob << m_config.m_partial_build_program.value; + ob << m_config.m_optimize_data.value; + ob << m_config.m_allow_new_shape_infer.value; } ob.set_stream(m_network->get_stream_ptr().get()); diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index 825b3ddfbc7282..889a066aa74325 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -22,12 +22,12 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ auto config = p.get_config(); { - auto custom_outputs = config.get_property(ov::intel_gpu::custom_outputs); + auto custom_outputs = config.m_custom_outputs.value; if (!custom_outputs.empty()) { - config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); + config.m_custom_outputs = std::vector({}); } } - config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); + config.m_allow_new_shape_infer = op->is_dynamic() || p.use_new_shape_infer(); ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true); branch.inner_program = prog.get_compiled_program(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index 6e18486ee1d738..5d808f4d041275 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -298,8 +298,8 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr("runtime_options"); - return {}; + rt_info = model.get_rt_info("runtime_options"); + + if (model.has_rt_info("__weights_path")) { + rt_info[ov::weights_path.name()] = model.get_rt_info("__weights_path"); + } + return rt_info; } } // namespace @@ -205,22 +210,6 @@ Plugin::Plugin() { m_compiled_model_runtime_properties["OV_VERSION"] = ov_version.buildNumber; } -void Plugin::set_cache_info(const std::shared_ptr& model, ExecutionConfig& config) const { - // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with - // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not - // using that mechanism. - if (config.get_property(ov::cache_mode) != ov::CacheMode::OPTIMIZE_SIZE) { - return; - } - - const auto& rt_info = model->get_rt_info(); - auto weights_path = rt_info.find("__weights_path"); - if (weights_path != rt_info.end()) { - ov::AnyMap weights_path_property{{"WEIGHTS_PATH", weights_path->second}}; - config.set_property(weights_path_property); - } -} - std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, const ov::AnyMap& orig_config) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model"); std::string device_id = get_device_id(orig_config); @@ -230,11 +219,9 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); + config.set_property(orig_config); config.finalize(context, get_rt_info(*model)); - set_cache_info(model, config); - auto transformed_model = clone_and_transform_model(model, config, context); { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model::CreateCompiledModel"); @@ -252,7 +239,6 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< ExecutionConfig config = m_configs_map.at(device_id); config.finalize(context_impl, get_rt_info(*model)); - set_cache_info(model, config); auto transformed_model = clone_and_transform_model(model, config, context_impl); return std::make_shared(transformed_model, shared_from_this(), context_impl, config); @@ -282,7 +268,7 @@ ov::SoPtr Plugin::get_default_context(const AnyMap& params) void Plugin::set_property(const ov::AnyMap &config) { auto update_config = [](ExecutionConfig& config, const ov::AnyMap& user_config) { - config.set_user_property(user_config); + config.set_property(user_config); // Check that custom layers config can be loaded if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) { CustomLayerMap custom_layers; @@ -317,12 +303,12 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& auto ctx = get_default_context(device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); + config.set_property(orig_config); config.finalize(ctx, get_rt_info(*model)); ProgramBuilder prog(ctx->get_engine(), config); - float query_model_ratio = config.get_property(ov::internal::query_model_ratio.name()).as(); + float query_model_ratio = config.m_query_model_ratio; auto supported = ov::get_supported_nodes(model, [&config,&ctx,this](std::shared_ptr& model) { @@ -372,11 +358,11 @@ std::shared_ptr Plugin::import_model(std::istream& model, } ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(_orig_config); + config.set_property(_orig_config); config.finalize(context_impl, {}); - ov::CacheMode cache_mode = config.get_property(ov::cache_mode); - ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks); + ov::CacheMode cache_mode = config.m_cache_mode; + ov::EncryptionCallbacks encryption_callbacks = config.m_cache_encryption_callbacks; const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; std::unique_ptr ib_ptr = @@ -393,9 +379,8 @@ std::shared_ptr Plugin::import_model(std::istream& model, return nullptr; } - std::string weights_path = config.get_property(ov::weights_path); - if (config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && - !ov::util::validate_weights_path(weights_path)) { + std::string weights_path = config.m_weights_path; + if (config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) { return nullptr; } @@ -695,7 +680,7 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { auto context = get_default_contexts().at(device_id); const auto& device_info = context->get_engine().get_device_info(); const auto& config = m_configs_map.at(device_id); - uint32_t n_streams = static_cast(config.get_property(ov::num_streams)); + uint32_t n_streams = static_cast(config.m_num_streams.value); uint64_t occupied_device_mem = 0; auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as>(); auto occupied_usm_dev = statistic_result.find("usm_device_current"); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 2abc8bb65df6ac..a9f4fd17f2be2f 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -105,7 +105,7 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml"; CustomLayer::LoadFromFile(config_path, m_custom_layers, true); - auto custom_layers_config = m_config.get_property(ov::intel_gpu::config_file); + auto custom_layers_config = m_config.m_config_file.value; CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty()); auto ops = model->get_ordered_ops(); @@ -113,9 +113,9 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& // smaller # of kernels are built compared to static models. // So having smaller batch size is even better for dynamic model as we can do more parallel build. if (model->is_dynamic()) { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(4)); + m_config.m_max_kernels_per_batch = 4; } else { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(8)); + m_config.m_max_kernels_per_batch = 8; } m_program = build(ops, partial_build, is_inner_program); @@ -160,12 +160,12 @@ std::shared_ptr ProgramBuilder::build(const std::vectororigin_op_name = op.get_friendly_name(); prim->origin_op_type_name = op.get_type_name(); - if (this->m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { + if (this->m_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE) { if (auto data_prim = dynamic_cast(prim.get())) { auto rt_info = op.get_rt_info(); @@ -340,7 +340,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_type_name = prim->type_string(); } - if (this->m_config.get_property(ov::enable_profiling) && should_profile) { + if (this->m_config.m_enable_profiling && should_profile) { profiling_ids.push_back(prim_id); init_profile_info(*prim); } diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 41b358b6d7faad..6d74722a5b435a 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -113,8 +113,8 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr& c : ov::ISyncInferRequest(compiled_model) , m_graph(compiled_model->get_graph(0)) , m_context(std::static_pointer_cast(compiled_model->get_context_impl())) - , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio))) - , m_enable_profiling(m_graph->get_config().get_property(ov::enable_profiling)) + , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().m_buffers_preallocation_ratio)) + , m_enable_profiling(m_graph->get_config().m_enable_profiling) , m_use_external_queue(m_graph->use_external_queue()) { GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { @@ -422,7 +422,7 @@ void SyncInferRequest::wait() { auto mem_shape = output_layout.get_shape(); // In case of old shape infer we need to shrink out tensor shape to avoid redudnant dimensions that occur due to rank extension // For new shape infer this shouldn't happen, thus remove that WA once we migrate to ngraph-based shape infer for all cases - if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!m_graph->get_config().m_allow_new_shape_infer) { OPENVINO_ASSERT(port.get_partial_shape().is_static(), "[GPU] Unexpected dynamic shape for legacy shape inference"); OPENVINO_ASSERT(ov::shape_size(port.get_shape()) == ov::shape_size(mem_shape), "[GPU] Unexpected elements count for output tensor"); mem_shape = port.get_shape(); @@ -895,7 +895,7 @@ std::vector SyncInferRequest::prepare_input(const std::string auto memory = device_tensor->get_memory(); // WA to extend shape to ranks expected by legacy shape infer. Remove after full migration to new shape infer - if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!m_graph->get_config().m_allow_new_shape_infer) { auto new_layout = memory->get_layout(); new_layout.set_partial_shape(m_graph->get_input_layouts().at(input_idx).get_shape()); memory = engine.reinterpret_buffer(*memory, new_layout); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index f036afc0cd59ad..c9e7c155158448 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -289,7 +289,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { const ov::element::TypeVector supported_woq_types = {ov::element::u8, ov::element::i8, ov::element::u4, ov::element::i4}; bool enableInt8; ov::element::Type infer_precision = ov::element::undefined; - bool unroll_loop = config.get_property(ov::intel_gpu::enable_loop_unrolling); + bool unroll_loop = config.m_enable_loop_unrolling; { ov::pass::Manager manager("Plugin:GPU"); auto pass_config = manager.get_pass_config(); @@ -302,7 +302,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); - enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && is_model_quantized; + enableInt8 = config.m_enable_lp_transformations && is_model_quantized; manager.register_pass( std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }, @@ -335,7 +335,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { }; // Add conversion from FP data types to infer precision if it's specified - infer_precision = config.get_property(ov::hint::inference_precision); + infer_precision = config.m_inference_precision; if (infer_precision != ov::element::undefined) { if (!fp_precision_supported(infer_precision)) infer_precision = fallback_precision; @@ -416,7 +416,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { GPU_DEBUG_CODE(return cldnn::debug_configuration::get_instance()->enable_sdpa == 1); } - if (!config.get_property(ov::intel_gpu::hint::enable_sdpa_optimization)) + if (!config.m_enable_sdpa_optimization) return false; auto sdpa = ov::as_type_ptr(node); @@ -1032,7 +1032,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - auto kv_cache_compression_dt = config.get_property(ov::hint::kv_cache_precision); + auto kv_cache_compression_dt = config.m_kv_cache_precision; manager.register_pass(kv_cache_compression_dt, device_info.supports_immad); manager.register_pass(); @@ -1052,7 +1052,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); if (device_info.supports_immad) { - auto dynamic_quantization_group_size = config.get_property(ov::hint::dynamic_quantization_group_size); + auto dynamic_quantization_group_size = config.m_dynamic_quantization_group_size; pass_config->set_callback([=](const_node_ptr& root) -> bool { for (size_t i = 0 ; i < root->get_input_node_shared_ptr(0)->get_output_size(); ++i) { if (root->get_input_node_shared_ptr(0)->get_output_element_type(i) == ov::element::Type_t::f32) { diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index ff420a0e66c1b9..587bf23a23d15c 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -7,6 +7,8 @@ #include "openvino/core/any.hpp" #include "openvino/runtime/internal_properties.hpp" #include "intel_gpu/runtime/internal_properties.hpp" +#include "openvino/runtime/plugin_config.hpp" +#include "openvino/runtime/properties.hpp" namespace ov::intel_gpu { @@ -39,29 +41,36 @@ void ExecutionConfig::apply_rt_info(std::shared_ptr context, con apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); } apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); + + // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with + // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not + // using that mechanism. + if (get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { + apply_rt_info_property(ov::weights_path, rt_info); + } } void ExecutionConfig::finalize_impl(std::shared_ptr context) { const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); apply_hints(info); if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { - set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); + m_enable_lp_transformations = info.supports_imad || info.supports_immad; } if (info.supports_immad) { - set_property(ov::intel_gpu::use_onednn(true)); + m_use_onednn = true; } if (get_property(ov::intel_gpu::use_onednn)) { - set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + m_queue_type = QueueTypes::in_order; } // Enable KV-cache compression by default for non-systolic platforms if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); + m_kv_cache_precision = ov::element::i8; } // Enable dynamic quantization by default for non-systolic platforms if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); + m_dynamic_quantization_group_size = 32; } } @@ -76,12 +85,12 @@ void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { const auto mode = get_property(ov::hint::execution_mode); if (!is_set_by_user(ov::hint::inference_precision)) { if (mode == ov::hint::ExecutionMode::ACCURACY) { - set_property(ov::hint::inference_precision(ov::element::undefined)); + m_inference_precision = ov::element::undefined; } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) { if (info.supports_fp16) - set_property(ov::hint::inference_precision(ov::element::f16)); + m_inference_precision = ov::element::f16; else - set_property(ov::hint::inference_precision(ov::element::f32)); + m_inference_precision = ov::element::f32; } } } @@ -92,26 +101,26 @@ void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { const auto mode = get_property(ov::hint::performance_mode); if (!is_set_by_user(ov::num_streams)) { if (mode == ov::hint::PerformanceMode::LATENCY) { - set_property(ov::num_streams(1)); + m_num_streams = 1; } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { - set_property(ov::num_streams(ov::streams::AUTO)); + m_num_streams = ov::streams::AUTO; } } } if (get_property(ov::num_streams) == ov::streams::AUTO) { int32_t n_streams = std::max(info.num_ccs, 2); - set_property(ov::num_streams(n_streams)); + m_num_streams = n_streams; } if (get_property(ov::internal::exclusive_async_requests)) { - set_property(ov::num_streams(1)); + m_num_streams = 1; } // Allow kernels reuse only for single-stream scenarios if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) { if (get_property(ov::num_streams) != 1) { - set_property(ov::intel_gpu::hint::enable_kernels_reuse(false)); + m_enable_kernels_reuse = false; } } } @@ -120,9 +129,18 @@ void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::model_priority)) { const auto priority = get_property(ov::hint::model_priority); if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { - set_property(ov::intel_gpu::hint::queue_priority(priority)); + m_queue_priority = priority; } } } +const ov::PluginConfig::OptionsDesc& ExecutionConfig::get_options_desc() const { + static ov::PluginConfig::OptionsDesc help_map { + #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_OPTION + }; + return help_map; +} + } // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp index 11fab0106bff83..b954f494abfe5a 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp @@ -64,7 +64,7 @@ void ocl_engine::create_onednn_engine(const ExecutionConfig& config) { auto casted = std::dynamic_pointer_cast(_device); OPENVINO_ASSERT(casted, "[GPU] Invalid device type stored in ocl_engine"); - std::string cache_dir = config.get_property(ov::cache_dir); + std::string cache_dir = config.m_cache_dir; if (cache_dir.empty()) { _onednn_engine = std::make_shared(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get())); } else { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp index e227c94c7dc06d..61844cd640ea41 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp @@ -189,22 +189,22 @@ void set_arguments_impl(ocl_kernel_type& kernel, } // namespace ocl_stream::ocl_stream(const ocl_engine &engine, const ExecutionConfig& config) - : stream(config.get_property(ov::intel_gpu::queue_type), stream::get_expected_sync_method(config)) + : stream(config.m_queue_type, stream::get_expected_sync_method(config)) , _engine(engine) { auto context = engine.get_cl_context(); auto device = engine.get_cl_device(); ocl::command_queues_builder queue_builder; - queue_builder.set_profiling(config.get_property(ov::enable_profiling)); + queue_builder.set_profiling(config.m_enable_profiling); queue_builder.set_out_of_order(m_queue_type == QueueTypes::out_of_order); OPENVINO_ASSERT(m_sync_method != SyncMethods::none || m_queue_type == QueueTypes::in_order, "[GPU] Unexpected sync method (none) is specified for out_of_order queue"); bool priorty_extensions = engine.extension_supported("cl_khr_priority_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_priority_mode(config.get_property(ov::intel_gpu::hint::queue_priority), priorty_extensions); + queue_builder.set_priority_mode(config.m_queue_priority, priorty_extensions); bool throttle_extensions = engine.extension_supported("cl_khr_throttle_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_throttle_mode(config.get_property(ov::intel_gpu::hint::queue_throttle), throttle_extensions); + queue_builder.set_throttle_mode(config.m_queue_throttle, throttle_extensions); bool queue_families_extension = engine.get_device_info().supports_queue_families; queue_builder.set_supports_queue_families(queue_families_extension); diff --git a/src/plugins/intel_gpu/src/runtime/stream.cpp b/src/plugins/intel_gpu/src/runtime/stream.cpp index 0516a551f19177..2043afb9f3869c 100644 --- a/src/plugins/intel_gpu/src/runtime/stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/stream.cpp @@ -20,8 +20,8 @@ QueueTypes stream::detect_queue_type(engine_types engine_type, void* queue_handl } SyncMethods stream::get_expected_sync_method(const ExecutionConfig& config) { - auto profiling = config.get_property(ov::enable_profiling); - auto queue_type = config.get_property(ov::intel_gpu::queue_type); + auto profiling = config.m_enable_profiling; + auto queue_type = config.m_queue_type; return profiling ? SyncMethods::events : queue_type == QueueTypes::out_of_order ? SyncMethods::barriers : SyncMethods::none; } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index 87d4c4ed7f0a2d..00fd36f16aaf9c 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -1555,7 +1555,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(32)); + config.set_property(ov::hint::dynamic_quantization_group_size(32)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1643,7 +1643,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1669,7 +1669,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1753,7 +1753,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1780,9 +1780,9 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); if (is_dyn_quan) { - config.set_user_property(ov::hint::dynamic_quantization_group_size(32)); + config.set_property(ov::hint::dynamic_quantization_group_size(32)); } else { - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); } network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1923,7 +1923,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl = { in_layout.format, "", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim1", fc_impl }, { "fc_prim2", fc_impl } })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1952,7 +1952,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -2905,7 +2905,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topo, config); network.set_input_data("input", input_mem); @@ -2931,7 +2931,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); + config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false); @@ -3031,7 +3031,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bf_tiled", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topo, config); network.set_input_data("input", input_mem); @@ -3057,7 +3057,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); + config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false); From a899aca9403e1db5879f02a3ef44f4b143ebe519 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 23 Dec 2024 13:23:35 +0400 Subject: [PATCH 10/44] refactor Signed-off-by: Vladimir Paramuzov --- src/inference/dev_api/openvino/runtime/plugin_config.hpp | 3 --- src/inference/src/dev/plugin_config.cpp | 5 ++++- src/plugins/intel_gpu/src/graph/program.cpp | 5 ----- src/plugins/intel_gpu/src/plugin/program_builder.cpp | 1 + src/plugins/intel_gpu/src/runtime/execution_config.cpp | 7 +++++++ 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index a1bcab62b5d5fd..04e384cc26d35e 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -197,7 +197,6 @@ class OPENVINO_RUNTIME_API PluginConfig { virtual void apply_debug_options(std::shared_ptr context); virtual void finalize_impl(std::shared_ptr context) {} - template bool is_set_by_user(const ov::Property& property) const { return m_user_properties.find(property.name()) != m_user_properties.end(); @@ -236,12 +235,10 @@ class OPENVINO_RUNTIME_API PluginConfig { // property variable name, string name, default value, description using OptionsDesc = std::vector>; - static OptionsDesc m_options_desc; virtual const OptionsDesc& get_options_desc() const { static OptionsDesc empty; return empty; } const std::string get_help_message(const std::string& name = "") const; void print_help() const; -private: bool m_is_finalized = false; }; diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index e1b09b76ad8235..ca1c87cce1b659 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -98,6 +98,9 @@ void PluginConfig::set_property(const ov::AnyMap& config, const std::vector context, const ov::RTMap& rt_info) { + if (m_is_finalized) + return; + apply_rt_info(context, rt_info); apply_debug_options(context); // Copy internal properties before applying hints to ensure that @@ -122,8 +125,8 @@ void PluginConfig::apply_debug_options(std::shared_ptr context) static std::vector allowed_visibility = { OptionVisibility::RELEASE, OptionVisibility::RELEASE_INTERNAL, -#ifdef ENABLE_DEBUG_CAPS OptionVisibility::DEBUG +#ifdef ENABLE_DEBUG_CAPS #endif }; diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index a4068e408779c5..271a866e3b5cf2 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -493,11 +493,6 @@ void program::set_options() { if (!_config.m_force_implementations.value.empty()) { _config.m_optimize_data = true; } - - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - _config.m_dump_graphs = debug_config->dump_graphs; - } } void program::build_program(bool is_internal) { diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index a9f4fd17f2be2f..297b9d42266efb 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -11,6 +11,7 @@ #include "openvino/op/loop.hpp" #include "openvino/op/search_sorted.hpp" #include "openvino/op/stft.hpp" +#include "openvino/runtime/properties.hpp" #include "ov_ops/dynamic_quantize.hpp" #include "intel_gpu/plugin/common_utils.hpp" diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 587bf23a23d15c..9885b075109e1a 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -21,6 +21,7 @@ ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { m_user_properties = other.m_user_properties; + m_is_finalized = other.m_is_finalized; for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } @@ -28,6 +29,7 @@ ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { m_user_properties = other.m_user_properties; + m_is_finalized = other.m_is_finalized; for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } @@ -51,6 +53,11 @@ void ExecutionConfig::apply_rt_info(std::shared_ptr context, con } void ExecutionConfig::finalize_impl(std::shared_ptr context) { + if (m_help) { + print_help(); + exit(-1); + } + const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); apply_hints(info); if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { From b0b5ab271c4ae88d76f98239a97f3d5551fb48e8 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 23 Dec 2024 16:35:56 +0400 Subject: [PATCH 11/44] Hide config class members Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 16 ++++++ src/inference/src/dev/plugin_config.cpp | 14 ++++- .../intel_gpu/runtime/execution_config.hpp | 9 +++- src/plugins/intel_gpu/src/graph/broadcast.cpp | 2 +- src/plugins/intel_gpu/src/graph/crop.cpp | 2 +- src/plugins/intel_gpu/src/graph/eltwise.cpp | 2 +- .../intel_gpu/src/graph/fully_connected.cpp | 2 +- src/plugins/intel_gpu/src/graph/gather.cpp | 2 +- .../graph_optimizer/add_required_reorders.cpp | 2 +- .../graph_optimizer/build_implementations.cpp | 2 +- .../graph_optimizer/graph_initializations.cpp | 4 +- .../graph_optimizer/propagate_constants.cpp | 6 ++- .../select_preferred_formats.cpp | 2 +- .../src/graph/impls/ocl/fully_connected.cpp | 2 +- .../impls/ocl/kernel_selector_helper.cpp | 4 +- .../src/graph/impls/ocl/kernels_cache.cpp | 10 ++-- .../impls/onednn/primitive_onednn_base.h | 8 +-- .../impls/registry/implementation_manager.cpp | 2 +- .../registry/non_max_suppression_impls.cpp | 2 +- .../intel_gpu/src/graph/layout_optimizer.cpp | 2 +- src/plugins/intel_gpu/src/graph/network.cpp | 6 +-- .../src/graph/non_max_suppression.cpp | 2 +- src/plugins/intel_gpu/src/graph/permute.cpp | 2 +- .../intel_gpu/src/graph/primitive_inst.cpp | 8 +-- src/plugins/intel_gpu/src/graph/program.cpp | 54 ++++++++----------- .../src/graph/program_dump_graph.cpp | 2 +- src/plugins/intel_gpu/src/graph/reorder.cpp | 2 +- src/plugins/intel_gpu/src/graph/reshape.cpp | 2 +- .../src/graph/scatter_elements_update.cpp | 2 +- .../intel_gpu/src/graph/scatter_nd_update.cpp | 2 +- .../intel_gpu/src/graph/scatter_update.cpp | 2 +- src/plugins/intel_gpu/src/graph/select.cpp | 2 +- .../intel_gpu/src/graph/strided_slice.cpp | 2 +- .../intel_gpu/src/plugin/compiled_model.cpp | 28 +++++----- src/plugins/intel_gpu/src/plugin/graph.cpp | 16 ++---- .../intel_gpu/src/plugin/ops/condition.cpp | 11 ++-- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 5 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 12 ++--- .../intel_gpu/src/plugin/program_builder.cpp | 43 +++++++++------ .../src/plugin/sync_infer_request.cpp | 8 +-- .../src/plugin/transformations_pipeline.cpp | 12 ++--- .../src/runtime/execution_config.cpp | 13 ++++- .../intel_gpu/src/runtime/ocl/ocl_engine.cpp | 2 +- .../intel_gpu/src/runtime/ocl/ocl_stream.cpp | 8 +-- src/plugins/intel_gpu/src/runtime/stream.cpp | 4 +- 45 files changed, 191 insertions(+), 154 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 04e384cc26d35e..9e566b216590cb 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -6,6 +6,7 @@ #include #include +#include "openvino/core/attribute_visitor.hpp" #include "openvino/runtime/iremote_context.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/core/except.hpp" @@ -47,6 +48,19 @@ #define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ ConfigOption m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; +#define OV_CONFIG_DECLARE_GETTERS(PropertyNamespace, PropertyVar, Visibility, ...) \ + const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() const { \ + if (m_is_finalized) { \ + return m_ ## PropertyVar.value; \ + } else { \ + if (m_user_properties.find(PropertyNamespace::PropertyVar.name()) != m_user_properties.end()) { \ + return m_user_properties.at(PropertyNamespace::PropertyVar.name()).as(); \ + } else { \ + return m_ ## PropertyVar.value; \ + } \ + } \ + } + #define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \ m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; @@ -192,6 +206,8 @@ class OPENVINO_RUNTIME_API PluginConfig { void finalize(std::shared_ptr context, const ov::RTMap& rt_info); + bool visit_attributes(ov::AttributeVisitor& visitor) const; + protected: virtual void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) {} virtual void apply_debug_options(std::shared_ptr context); diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index ca1c87cce1b659..b21547f40a57df 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -67,7 +67,7 @@ ov::Any PluginConfig::get_property(const std::string& name, const std::vector allowed_visibility = {OptionVisibility::RELEASE}; + const static std::vector allowed_visibility = {OptionVisibility::RELEASE,OptionVisibility::RELEASE_INTERNAL, OptionVisibility::DEBUG}; const bool throw_on_error = true; set_property(config, allowed_visibility, throw_on_error); } @@ -121,6 +121,18 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R m_is_finalized = true; } +bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) const { + // for (const auto& prop : m_user_properties) { + // visitor.on_attribute(prop.first + "__user", prop.second.as()); + // } + // for (const auto& prop : m_options_map) { + // visitor.on_attribute(prop.first + "__internal", prop.second->get_any().as()); + // } + // visitor.on_attribute("is_finalized", m_is_finalized); + + return true; +} + void PluginConfig::apply_debug_options(std::shared_ptr context) { static std::vector allowed_visibility = { OptionVisibility::RELEASE, diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index b96a558171c203..f8f639d8f67a25 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -21,7 +21,10 @@ struct ExecutionConfig : public ov::PluginConfig { ExecutionConfig(const ExecutionConfig& other); ExecutionConfig& operator=(const ExecutionConfig& other); - #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + void finalize(cldnn::engine& engine); + using ov::PluginConfig::finalize; + + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__) #include "intel_gpu/runtime/options.inl" #undef OV_CONFIG_OPTION @@ -35,6 +38,10 @@ struct ExecutionConfig : public ov::PluginConfig { void apply_execution_hints(const cldnn::device_info& info); void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); + + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_OPTION }; } // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp index f2037c94979e0c..95d060ef0b760e 100644 --- a/src/plugins/intel_gpu/src/graph/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp @@ -149,7 +149,7 @@ void broadcast_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index 85be51a788c842..fec36fb9ff5c81 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -278,7 +278,7 @@ void crop_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout()); diff --git a/src/plugins/intel_gpu/src/graph/eltwise.cpp b/src/plugins/intel_gpu/src/graph/eltwise.cpp index 491867dc206d31..581f3f08dc120d 100644 --- a/src/plugins/intel_gpu/src/graph/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/eltwise.cpp @@ -393,7 +393,7 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) : ""); } } else { - bool use_new_shape_infer = network.get_config().m_allow_new_shape_infer; + bool use_new_shape_infer = network.get_config().get_allow_new_shape_infer(); auto input0_pshape = node.get_input_pshape(0); for (size_t i = 1; i < inputs_count; ++i) { diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp index d635e87c7494d0..cc3956393c4103 100644 --- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp @@ -250,7 +250,7 @@ kernel_impl_params fully_connected_inst::get_fake_aligned_params(kernel_impl_par } } - GPU_DEBUG_IF(orig_impl_param.get_program().get_config().m_disable_fake_alignment) { + GPU_DEBUG_IF(orig_impl_param.get_program().get_config().get_disable_fake_alignment()) { can_apply_fake_alignment = false; } diff --git a/src/plugins/intel_gpu/src/graph/gather.cpp b/src/plugins/intel_gpu/src/graph/gather.cpp index e32a45ef00aab5..7d503faf8a2255 100644 --- a/src/plugins/intel_gpu/src/graph/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/gather.cpp @@ -150,7 +150,7 @@ void gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp index 0ef04576494665..cffb4457000380 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp @@ -161,7 +161,7 @@ bool add_required_reorders::test_format(cldnn::program_node& node, format reques } void add_required_reorders::run(program& p) { - bool optimize_data = p.get_config().m_optimize_data; + bool optimize_data = p.get_config().get_optimize_data(); auto usr_itr = p.get_processing_order().begin(); while (usr_itr != p.get_processing_order().end()) { auto& usr = *usr_itr++; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp index 999e103c3fe200..ef4300c33bfea1 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp @@ -11,7 +11,7 @@ using namespace cldnn; void build_implementations::run(program& p) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "pass::build_implementations"); - if (p.get_config().m_partial_build_program) { + if (p.get_config().get_partial_build_program()) { return; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp index daa3b5ddc41501..6c38bce8dd9e31 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp @@ -17,7 +17,7 @@ using namespace cldnn; namespace cldnn { void graph_initializations::set_outputs(program& p) { - auto custom_outputs = p.get_config().m_custom_outputs.value; + auto custom_outputs = p.get_config().get_custom_outputs(); if (!custom_outputs.empty()) { for (auto const& output : custom_outputs) { OPENVINO_ASSERT(p.has_node(output), "not found custom output node in current cldnn::program: ", output); @@ -37,7 +37,7 @@ void graph_initializations::set_outputs(program& p) { void graph_initializations::run(program& p) { set_outputs(p); - auto forcing_map = p.get_config().m_force_implementations.value; + auto forcing_map = p.get_config().get_force_implementations(); for (auto& kv : forcing_map) { if (p.has_node(kv.first)) { p.get_node(kv.first).set_forced_impl_type(kv.second.impl_type); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index cc04b3efe0913c..c632019cf91b01 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "pass_manager.h" #include "program_node.h" #include "intel_gpu/runtime/engine.hpp" @@ -143,8 +144,9 @@ propagate_constants::calculate(engine& engine, return {}; ExecutionConfig cf_config = config; - cf_config.m_optimize_data = false; - cf_config.m_custom_outputs = const_outputs; + cf_config.set_property(ov::intel_gpu::optimize_data(false)); + cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs)); + cf_config.finalize(engine); network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true); std::map, std::shared_ptr>> weightless_cache_map; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp index 9313544928b6b4..a5d98be69771a9 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp @@ -74,7 +74,7 @@ void select_preferred_formats::run(program& p) { } #endif // ENABLE_ONEDNN_FOR_GPU - auto forcing_map = p.get_config().m_force_implementations.value; + auto forcing_map = p.get_config().get_force_implementations(); for (auto n : p.get_processing_order()) { n->recalc_output_layout(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index a22d23ca4dfed9..b33a391dadea4c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -203,7 +203,7 @@ struct fully_connected_impl : typed_primitive_impl_ocl { params.quantization = kernel_selector::QuantizationType::NONE; } - params.dynamic_quantization_group_size = impl_param.get_program().get_config().m_dynamic_quantization_group_size; + params.dynamic_quantization_group_size = impl_param.get_program().get_config().get_dynamic_quantization_group_size(); return params; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index 4fea0bbe644d63..9b73d9711a5a32 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -1204,13 +1204,13 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p params.engineInfo.ip_version = device_info.ip_version; params.engineInfo.arch = kernel_selector::gpu_arch(static_cast::type>(device_info.arch)); - auto impl_forcing = config.m_force_implementations.value; + auto impl_forcing = config.get_force_implementations(); if (impl_forcing.count(param_info.desc->id) != 0) { params.forceImplementation = impl_forcing.at(param_info.desc->id).kernel_name; } - params.allowStaticInputReordering = config.m_optimize_data || config.m_allow_static_input_reorder; + params.allowStaticInputReordering = config.get_optimize_data() || config.get_allow_static_input_reorder(); params.allowInputReordering = false; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp index 50f1b1ed9ab879..0d90dc31e691f9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp @@ -111,7 +111,7 @@ namespace cldnn { std::mutex kernels_cache::_mutex; std::string kernels_cache::get_cache_path() const { - auto path = _config.m_cache_dir.value; + auto path = _config.get_cache_dir(); if (path.empty()) { return {}; } @@ -123,12 +123,12 @@ std::string kernels_cache::get_cache_path() const { } bool kernels_cache::is_cache_enabled() const { - if (!_config.m_allow_new_shape_infer && - (_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SPEED)) { + if (!_config.get_allow_new_shape_infer() && + (_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SPEED)) { return false; } - return !_config.m_cache_dir.value.empty(); + return !_config.get_cache_dir().empty(); } size_t kernels_cache::get_max_kernels_per_batch() const { @@ -136,7 +136,7 @@ size_t kernels_cache::get_max_kernels_per_batch() const { GPU_DEBUG_IF(debug_config->max_kernels_per_batch >= 1) { return static_cast(debug_config->max_kernels_per_batch); } - return _config.m_max_kernels_per_batch; + return _config.get_max_kernels_per_batch(); } void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::vector* all_batches) const { diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 34910d5d73531d..4aca436bdd34d8 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -47,7 +47,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _attrs(attrs), _pd(pd) { - _enable_profiling = config.m_enable_profiling; + _enable_profiling = config.get_enable_profiling(); _scratchpad_md = _pd.scratchpad_desc(); @@ -70,7 +70,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _pd(), _prim() { - _enable_profiling = config.m_enable_profiling; + _enable_profiling = config.get_enable_profiling(); GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { _enable_profiling = true; @@ -318,7 +318,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { private: std::string get_cache_directory(const ExecutionConfig& config) const { - auto path = config.m_cache_dir.value; + auto path = config.get_cache_dir(); if (path.empty()) { return {}; } @@ -343,7 +343,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { void build_primitive(const ExecutionConfig& config) { auto cache_outpath = get_cache_directory(config); - if (!config.m_allow_new_shape_infer) { + if (!config.get_allow_new_shape_infer()) { cache_outpath = ""; } diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp index b135d9af73f31f..0ce180380f14b5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp @@ -58,7 +58,7 @@ std::unique_ptr ImplementationManager::create(const program_node if (auto impl = create_impl(node, params)) { update_impl(*impl, params); impl->set_node_params(node); - impl->can_share_kernels = node.get_program().get_config().m_enable_kernels_reuse; + impl->can_share_kernels = node.get_program().get_config().get_enable_kernels_reuse(); return impl; } diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp index 142b4b45fbf001..ce461632631d15 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp @@ -59,7 +59,7 @@ const std::vector>& Registry(scores_layout.get_partial_shape()[0].get_length()); const size_t kClassNum = static_cast(scores_layout.get_partial_shape()[1].get_length()); const size_t kNStreams = - static_cast(node.get_program().get_config().m_num_streams.value); + static_cast(node.get_program().get_config().get_num_streams()); const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast(8)) * kNStreams; return kKeyValue > 64; } diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 84458b58279028..b64a4744360071 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -435,7 +435,7 @@ bool should_use_winograd_2x3_s1(const convolution_node& node, layout const& input_layout, layout const& weights_layout, bool output_size_handling_enabled) { - bool disable_winograd_conv = node.get_program().get_config().m_disable_winograd_convolution; + bool disable_winograd_conv = node.get_program().get_config().get_disable_winograd_convolution(); if (disable_winograd_conv) return false; diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 653ff2f57ee385..1c61b2c3fba82c 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -180,9 +180,9 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo , _memory_pool(new memory_pool(program->get_engine())) , _internal(is_internal) , _is_primary_stream(is_primary_stream) - , _enable_profiling(program->get_config().m_enable_profiling) + , _enable_profiling(program->get_config().get_enable_profiling()) , _reset_arguments(true) - , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().m_buffers_preallocation_ratio)) { + , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_buffers_preallocation_ratio())) { if (!_internal) { net_id = get_unique_net_id(); } @@ -398,7 +398,7 @@ void network::calculate_weights_cache_capacity() { } // Sum all weights constants for each stream - required_mem_size += weights_const_size * _config.m_num_streams.value; + required_mem_size += weights_const_size * _config.get_num_streams(); // Add all other constants (shared between streams) required_mem_size += total_const_size - weights_const_size; diff --git a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp index 725f3333a0f132..df29ed36a7fd12 100644 --- a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp @@ -157,7 +157,7 @@ void non_max_suppression_gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[i]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[i].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[i] = {_network.get_engine().reinterpret_buffer(input_memory(i), _impl_params->get_output_layout(i))}; diff --git a/src/plugins/intel_gpu/src/graph/permute.cpp b/src/plugins/intel_gpu/src/graph/permute.cpp index 41252797f09eca..c4fddfde978c2d 100644 --- a/src/plugins/intel_gpu/src/graph/permute.cpp +++ b/src/plugins/intel_gpu/src/graph/permute.cpp @@ -146,7 +146,7 @@ void permute_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 2f2a755c675cf1..53932dfd91a466 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -163,7 +163,7 @@ static memory::ptr get_memory_from_pool(engine& _engine, OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate output for dynamic layout without upper bound"); // Use layout with max tensor for dynamic shape with upper bound - if (_node.get_program().get_config().m_enable_memory_pool) { + if (_node.get_program().get_config().get_enable_memory_pool()) { if (curr_memory != nullptr) pool.release_memory(curr_memory, _node.get_unique_id(), _node.id(), net_id); return pool.get_memory(layout, @@ -2042,7 +2042,7 @@ primitive_inst::primitive_inst(network & network, program_node const& node, bool : _network(network) , _node(&node) , _node_output_layout(node.get_output_layout()) - , _use_shared_kernels(node.get_program().get_config().m_enable_kernels_reuse) + , _use_shared_kernels(node.get_program().get_config().get_enable_kernels_reuse()) , _impl_params(node.get_kernel_impl_params()) , _impl(node.get_selected_impl() ? node.get_selected_impl()->clone() : nullptr) , _runtime_memory_dependencies(node.get_memory_dependencies()) @@ -2596,8 +2596,8 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() { ExecutionConfig subgraph_config{ ov::intel_gpu::allow_static_input_reorder(true), ov::intel_gpu::allow_new_shape_infer(true), - ov::enable_profiling(get_network().get_config().m_enable_profiling), - ov::intel_gpu::use_onednn(get_network().get_config().m_use_onednn) + ov::enable_profiling(get_network().get_config().get_enable_profiling()), + ov::intel_gpu::use_onednn(get_network().get_config().get_use_onednn()) }; auto prog = program::build_program(get_network().get_engine(), t, diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 271a866e3b5cf2..98c61b021477d5 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -108,8 +108,8 @@ using namespace cldnn; using namespace ov::intel_gpu; static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) { - int streams = (num_streams > 0) ? num_streams : config.m_compilation_num_threads.value; - auto priority = config.m_host_task_priority; + int streams = (num_streams > 0) ? num_streams : config.get_compilation_num_threads(); + auto priority = config.get_host_task_priority(); auto core_type = ov::hint::SchedulingCoreType::ANY_CORE; switch (priority) { case ov::hint::Priority::LOW: core_type = ov::hint::SchedulingCoreType::ECORE_ONLY; break; @@ -117,7 +117,7 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E case ov::hint::Priority::HIGH: core_type = ov::hint::SchedulingCoreType::PCORE_ONLY; break; default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority); } - bool enable_cpu_pinning = config.m_enable_cpu_pinning; + bool enable_cpu_pinning = config.get_enable_cpu_pinning(); ov::threading::IStreamsExecutor::Config task_executor_config(tags, streams, @@ -163,8 +163,7 @@ program::program(engine& engine_ref, program_node::reset_unique_id(); if (no_optimizations) { init_graph(); - auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); - _config.finalize(ctx, {}); + _config.finalize(_engine); } else { build_program(is_internal); if (_is_body_program) { @@ -200,8 +199,7 @@ program::program(engine& engine_ref, _task_executor(std::move(task_executor)), processing_order(), is_internal(is_internal) { - auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); - _config.finalize(ctx, {}); + _config.finalize(_engine); init_primitives(); init_program(); prepare_nodes(nodes); @@ -214,9 +212,8 @@ program::program(engine& engine, const ExecutionConfig& config) _config(config), processing_order() { init_primitives(); - auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); - _config.finalize(ctx, {}); - new_shape_infer = _config.m_allow_new_shape_infer; + _config.finalize(_engine); + new_shape_infer = _config.get_allow_new_shape_infer(); _layout_optimizer = std::make_unique(); } @@ -228,7 +225,7 @@ void program::init_program() { set_options(); pm = std::unique_ptr(new pass_manager(*this)); - new_shape_infer = _config.m_allow_new_shape_infer; + new_shape_infer = _config.get_allow_new_shape_infer(); if (_task_executor == nullptr) _task_executor = program::make_task_executor(_config); @@ -236,7 +233,7 @@ void program::init_program() { kernel_selector::KernelBase::get_db().get_batch_headers(), kernel_selector::KernelBase::get_db().get_cm_batch_headers())); - _kernels_cache->set_kernels_reuse(get_config().m_enable_kernels_reuse); + _kernels_cache->set_kernels_reuse(_config.get_enable_kernels_reuse()); if (!_compilation_context) _compilation_context = program::make_compilation_context(_config); @@ -490,15 +487,11 @@ void program::set_options() { static std::atomic id_gen{0}; prog_id = ++id_gen; assert(prog_id != 0); - if (!_config.m_force_implementations.value.empty()) { - _config.m_optimize_data = true; - } } void program::build_program(bool is_internal) { init_graph(); - auto ctx = std::make_shared("GPU", std::vector{_engine.get_device()}); - _config.finalize(ctx, {}); + _config.finalize(_engine); { pre_optimize_graph(is_internal); } run_graph_compilation(); { post_optimize_graph(is_internal); } @@ -528,9 +521,6 @@ void program::init_graph() { for (auto& node : processing_order) { if (!node->is_type()) node->get_output_layouts(); - if (node->is_type()) { - _config.m_use_onednn = true; - } } // Perform initial shape_of subgraphs markup apply_opt_pass(); @@ -548,7 +538,7 @@ void program::pre_optimize_graph(bool is_internal) { bool output_size_handling_enabled = analyze_output_size_handling_need(); - bool optimize_data = _config.m_optimize_data; + bool optimize_data = _config.get_optimize_data(); if (optimize_data) { apply_opt_pass(); } @@ -625,7 +615,7 @@ void program::post_optimize_graph(bool is_internal) { reorder_factory rf; - bool optimize_data = _config.m_optimize_data; + bool optimize_data = _config.get_optimize_data(); if (!is_internal) { apply_opt_pass(rf); @@ -633,7 +623,7 @@ void program::post_optimize_graph(bool is_internal) { apply_opt_pass(false, true); // TODO: do we need it at this place also? - auto partial_build = _config.m_partial_build_program; + auto partial_build = _config.get_partial_build_program(); #ifdef GPU_DEBUG_CONFIG GPU_DEBUG_GET_INSTANCE(debug_config); if (!is_internal && (!partial_build || !debug_config->dry_run_path.empty())) { @@ -652,7 +642,7 @@ void program::post_optimize_graph(bool is_internal) { // Recalculate processing order after all graph transformation to keep optimal primitives ordering // for OOO queue - if (_config.m_queue_type == QueueTypes::out_of_order) + if (_config.get_queue_type() == QueueTypes::out_of_order) get_processing_order().calculate_BFS_processing_order(); apply_opt_pass(); @@ -776,7 +766,7 @@ const std::vector& program::get_allocating_order(bool forced_updat } void program::prepare_memory_dependencies() { - if (!_config.m_enable_memory_pool) + if (!_config.get_enable_memory_pool()) return; for (auto& node : get_processing_order()) { node->add_memory_dependency(node->get_unique_id()); @@ -1408,7 +1398,7 @@ program::primitives_info program::get_current_stage_info() const { void program::save_pass_info(std::string pass_name) { // TODO: Directory path here can be probably changed to some bool flag - if (!_config.m_dump_graphs.value.empty()) + if (!_config.get_dump_graphs().empty()) optimizer_passes_info.emplace_back(pass_name, get_current_stage_info()); } @@ -1436,7 +1426,7 @@ const program::primitives_info& program::get_primitives_info() const { return pr void program::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); } void program::set_layout_optimizer_attributes(layout_optimizer& lo) { - lo.set_implementation_forcing(_config.m_force_implementations); + lo.set_implementation_forcing(_config.get_force_implementations()); // first pass to set layout optimization_attributes for topology @@ -1662,15 +1652,15 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1); #ifdef ENABLE_ONEDNN_FOR_GPU - bool enable_onednn_for_tests = get_config().m_optimize_data || is_internal_program(); + bool enable_onednn_for_tests = get_config().get_optimize_data() || is_internal_program(); auto& engine = get_engine(); if (engine.get_device_info().vendor_id == INTEL_VENDOR_ID && - get_config().m_queue_type == QueueTypes::in_order && + get_config().get_queue_type() == QueueTypes::in_order && enable_onednn_for_tests) { if (engine.get_device_info().supports_immad) { lo.add_all_onednn_impls_optimization_attribute(); } else { - if (get_config().m_use_onednn) { + if (get_config().get_use_onednn()) { lo.enable_onednn_for(); } } @@ -1884,8 +1874,8 @@ void program::load(cldnn::BinaryInputBuffer& ib) { init_program(); std::shared_ptr mapped_memory = nullptr; - std::string weights_path = _config.m_weights_path; - if (_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE && + std::string weights_path = _config.get_weights_path(); + if (_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE && ov::util::validate_weights_path(weights_path)) { mapped_memory = ov::load_mmap_object(weights_path); } diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index a14cbfd0139d29..aeae56173f4fd0 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -157,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) { } // namespace std::string get_dir_path(const ExecutionConfig& config) { - auto path = config.m_dump_graphs.value; + auto path = config.get_dump_graphs(); if (path.empty()) { return {}; } diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index c3b7c472d59437..75a4011eeefc25 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -287,7 +287,7 @@ void reorder_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index 1250a73e881e9a..b6e5b23a0f6476 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -320,7 +320,7 @@ void reshape_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp index 7ddea056745a53..a651baa50002fa 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp @@ -75,7 +75,7 @@ void scatter_elements_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp index 56c8bf6607c4a3..3dbec05dbbe3b3 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp @@ -86,7 +86,7 @@ void scatter_nd_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_update.cpp index 0d34b8f95e9f1a..947507533796e0 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_update.cpp @@ -66,7 +66,7 @@ void scatter_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp index 35d1db8fc389b1..a3b6ad9166c964 100644 --- a/src/plugins/intel_gpu/src/graph/select.cpp +++ b/src/plugins/intel_gpu/src/graph/select.cpp @@ -95,7 +95,7 @@ select_inst::typed_primitive_inst(network& network, select_node const& node) : p 3, ""); - bool allow_new_shape_infer = network.get_program()->get_config().m_allow_new_shape_infer; + bool allow_new_shape_infer = network.get_program()->get_config().get_allow_new_shape_infer(); // Broadcast check is performed in ngraph shape infer of select when allow_new_shape_infer=true if (!allow_new_shape_infer) { if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) { diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp index baff08bc8de82e..674e7649bc9820 100644 --- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp @@ -208,7 +208,7 @@ void strided_slice_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().m_enable_memory_pool) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 8bd73dbdce7029..3fc1439d56a727 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -20,17 +20,17 @@ namespace ov::intel_gpu { namespace { std::shared_ptr create_task_executor(const std::shared_ptr& plugin, const ExecutionConfig& config) { - if (config.m_exclusive_async_requests) { + if (config.get_exclusive_async_requests()) { // exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with // the CPU behavior return plugin->get_executor_manager()->get_executor("GPU"); - } else if (config.get_property(ov::hint::enable_cpu_pinning) || - config.get_property(ov::hint::enable_cpu_reservation)) { - bool enable_cpu_pinning = config.get_property(ov::hint::enable_cpu_pinning); - bool enable_cpu_reservation = config.get_property(ov::hint::enable_cpu_reservation); + } else if (config.get_enable_cpu_pinning() || + config.get_enable_cpu_reservation()) { + bool enable_cpu_pinning = config.get_enable_cpu_pinning(); + bool enable_cpu_reservation = config.get_enable_cpu_reservation(); return std::make_shared( ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", - config.m_num_streams.value, + config.get_num_streams(), 1, ov::hint::SchedulingCoreType::PCORE_ONLY, enable_cpu_reservation, @@ -38,7 +38,7 @@ std::shared_ptr create_task_executor(const std::sh } else { return std::make_shared( ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", - config.m_num_streams.value, + config.get_num_streams(), 0, ov::hint::SchedulingCoreType::ANY_CORE, false, @@ -62,7 +62,7 @@ CompiledModel::CompiledModel(std::shared_ptr model, m_outputs(ov::ICompiledModel::outputs()), m_loaded_from_cache(false) { auto graph_base = std::make_shared(model, m_context, m_config, 0); - for (uint16_t n = 0; n < m_config.m_num_streams.value; n++) { + for (uint16_t n = 0; n < m_config.get_num_streams(); n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -157,7 +157,7 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, } auto graph_base = std::make_shared(ib, context, m_config, 0); - for (uint16_t n = 0; n < m_config.m_num_streams.value; n++) { + for (uint16_t n = 0; n < m_config.get_num_streams(); n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -179,8 +179,8 @@ std::shared_ptr CompiledModel::create_infer_request() co void CompiledModel::export_model(std::ostream& model) const { // If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching // which requires the weights_path. - ov::CacheMode cache_mode = m_config.m_cache_mode; - std::string weights_path = m_config.m_weights_path; + ov::CacheMode cache_mode = m_config.get_cache_mode(); + std::string weights_path = m_config.get_weights_path(); if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) return; @@ -188,7 +188,7 @@ void CompiledModel::export_model(std::ostream& model) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); - const ov::EncryptionCallbacks encryption_callbacks = m_config.m_cache_encryption_callbacks; + const ov::EncryptionCallbacks encryption_callbacks = m_config.get_cache_encryption_callbacks(); // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty. const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; @@ -290,8 +290,8 @@ ov::Any CompiledModel::get_property(const std::string& name) const { } else if (name == ov::loaded_from_cache) { return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache}; } else if (name == ov::optimal_number_of_infer_requests) { - unsigned int nr = m_config.m_num_streams.value; - if (m_config.m_performance_mode != ov::hint::PerformanceMode::LATENCY) + unsigned int nr = m_config.get_num_streams(); + if (m_config.get_performance_mode() != ov::hint::PerformanceMode::LATENCY) nr *= 2; return decltype(ov::optimal_number_of_infer_requests)::value_type {nr}; } else if (name == ov::execution_devices) { diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index ab69fd5d4e26f8..4f1f541b725090 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -85,11 +85,7 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context ib >> perfEntry.parentPrimitive; } } - { - ib >> m_config.m_partial_build_program.value; - ib >> m_config.m_optimize_data.value; - ib >> m_config.m_allow_new_shape_infer.value; - } + // ib >> m_config; auto imported_prog = std::make_shared(get_engine(), m_config); imported_prog->load(ib); @@ -173,7 +169,7 @@ void Graph::build(std::shared_ptr program) { auto external_queue = m_context->get_external_queue(); if (external_queue) { - OPENVINO_ASSERT(m_config.m_num_streams == 1, "[GPU] Throughput streams can't be used with shared queue!"); + OPENVINO_ASSERT(m_config.get_num_streams() == 1, "[GPU] Throughput streams can't be used with shared queue!"); const auto &engine = program->get_engine(); m_network = std::make_shared(program, engine.create_stream(m_config, external_queue), m_stream_id); } else { @@ -205,7 +201,7 @@ bool Graph::use_external_queue() const { std::shared_ptr Graph::get_runtime_model(std::vector& primitives_info, bool filter_const_primitives) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::get_runtime_model"); - if (m_config.m_enable_profiling) { + if (m_config.get_enable_profiling()) { try { // Update may throw an exception for step-by-step runtime graph dump, // since network->get_executed_primitives() method can't be called before network execution @@ -516,11 +512,7 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) { ob << perf_item.second.second.parentPrimitive; } } - { - ob << m_config.m_partial_build_program.value; - ob << m_config.m_optimize_data.value; - ob << m_config.m_allow_new_shape_infer.value; - } + // ob << m_config; ob.set_stream(m_network->get_stream_ptr().get()); m_network->get_program()->save(ob); diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index 889a066aa74325..20690957fea5e6 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -1,6 +1,7 @@ // Copyright (C) 2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/op/if.hpp" #include "intel_gpu/plugin/program_builder.hpp" #include "intel_gpu/primitives/condition.hpp" @@ -21,13 +22,9 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ << ", num inputs: " << op->get_input_size() << std::endl; auto config = p.get_config(); - { - auto custom_outputs = config.m_custom_outputs.value; - if (!custom_outputs.empty()) { - config.m_custom_outputs = std::vector({}); - } - } - config.m_allow_new_shape_infer = op->is_dynamic() || p.use_new_shape_infer(); + config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); + config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); + config.finalize(p.get_engine()); ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true); branch.inner_program = prog.get_compiled_program(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index 5d808f4d041275..9665918f88d0be 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -298,8 +298,9 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr& ProgramBuilder prog(ctx->get_engine(), config); - float query_model_ratio = config.m_query_model_ratio; + float query_model_ratio = config.get_query_model_ratio(); auto supported = ov::get_supported_nodes(model, [&config,&ctx,this](std::shared_ptr& model) { @@ -361,8 +361,8 @@ std::shared_ptr Plugin::import_model(std::istream& model, config.set_property(_orig_config); config.finalize(context_impl, {}); - ov::CacheMode cache_mode = config.m_cache_mode; - ov::EncryptionCallbacks encryption_callbacks = config.m_cache_encryption_callbacks; + ov::CacheMode cache_mode = config.get_cache_mode(); + ov::EncryptionCallbacks encryption_callbacks = config.get_cache_encryption_callbacks(); const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; std::unique_ptr ib_ptr = @@ -379,8 +379,8 @@ std::shared_ptr Plugin::import_model(std::istream& model, return nullptr; } - std::string weights_path = config.m_weights_path; - if (config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) { + std::string weights_path = config.get_weights_path(); + if (config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) { return nullptr; } @@ -680,7 +680,7 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { auto context = get_default_contexts().at(device_id); const auto& device_info = context->get_engine().get_device_info(); const auto& config = m_configs_map.at(device_id); - uint32_t n_streams = static_cast(config.m_num_streams.value); + uint32_t n_streams = static_cast(config.get_num_streams()); uint64_t occupied_device_mem = 0; auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as>(); auto occupied_usm_dev = statistic_result.find("usm_device_current"); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 297b9d42266efb..cd69e741e435de 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/split.hpp" @@ -106,19 +107,10 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml"; CustomLayer::LoadFromFile(config_path, m_custom_layers, true); - auto custom_layers_config = m_config.m_config_file.value; + auto custom_layers_config = m_config.get_config_file(); CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty()); auto ops = model->get_ordered_ops(); - // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, - // smaller # of kernels are built compared to static models. - // So having smaller batch size is even better for dynamic model as we can do more parallel build. - if (model->is_dynamic()) { - m_config.m_max_kernels_per_batch = 4; - } else { - m_config.m_max_kernels_per_batch = 8; - } - m_program = build(ops, partial_build, is_inner_program); } @@ -159,14 +151,33 @@ std::shared_ptr ProgramBuilder::build(const std::vectoris_dynamic()) { + is_dynamic = true; + break; + } + } if (is_inner_program) { - allow_new_shape_infer = (m_config.m_allow_new_shape_infer || allow_new_shape_infer); + allow_new_shape_infer = (m_config.get_allow_new_shape_infer() || allow_new_shape_infer); + } + + // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, + // smaller # of kernels are built compared to static models. + // So having smaller batch size is even better for dynamic model as we can do more parallel build. + if (is_dynamic) { + m_config.set_property(ov::intel_gpu::max_kernels_per_batch(4));; + } else { + m_config.set_property(ov::intel_gpu::max_kernels_per_batch(8));; } - m_config.m_partial_build_program = partial_build; - m_config.m_optimize_data = true; - m_config.m_allow_new_shape_infer = allow_new_shape_infer; + m_config.set_property(ov::intel_gpu::partial_build_program(partial_build)); + m_config.set_property(ov::intel_gpu::optimize_data(true)); + m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer)); + //if (has_lstm) + m_config.set_property(ov::intel_gpu::use_onednn(true)); + m_config.finalize(m_engine); prepare_build(); { @@ -310,7 +321,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_name = op.get_friendly_name(); prim->origin_op_type_name = op.get_type_name(); - if (this->m_config.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE) { + if (this->m_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) { if (auto data_prim = dynamic_cast(prim.get())) { auto rt_info = op.get_rt_info(); @@ -341,7 +352,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_type_name = prim->type_string(); } - if (this->m_config.m_enable_profiling && should_profile) { + if (this->m_config.get_enable_profiling() && should_profile) { profiling_ids.push_back(prim_id); init_profile_info(*prim); } diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 6d74722a5b435a..7c3a1b0e452fa8 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -113,8 +113,8 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr& c : ov::ISyncInferRequest(compiled_model) , m_graph(compiled_model->get_graph(0)) , m_context(std::static_pointer_cast(compiled_model->get_context_impl())) - , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().m_buffers_preallocation_ratio)) - , m_enable_profiling(m_graph->get_config().m_enable_profiling) + , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_buffers_preallocation_ratio())) + , m_enable_profiling(m_graph->get_config().get_enable_profiling()) , m_use_external_queue(m_graph->use_external_queue()) { GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { @@ -422,7 +422,7 @@ void SyncInferRequest::wait() { auto mem_shape = output_layout.get_shape(); // In case of old shape infer we need to shrink out tensor shape to avoid redudnant dimensions that occur due to rank extension // For new shape infer this shouldn't happen, thus remove that WA once we migrate to ngraph-based shape infer for all cases - if (!m_graph->get_config().m_allow_new_shape_infer) { + if (!m_graph->get_config().get_allow_new_shape_infer()) { OPENVINO_ASSERT(port.get_partial_shape().is_static(), "[GPU] Unexpected dynamic shape for legacy shape inference"); OPENVINO_ASSERT(ov::shape_size(port.get_shape()) == ov::shape_size(mem_shape), "[GPU] Unexpected elements count for output tensor"); mem_shape = port.get_shape(); @@ -895,7 +895,7 @@ std::vector SyncInferRequest::prepare_input(const std::string auto memory = device_tensor->get_memory(); // WA to extend shape to ranks expected by legacy shape infer. Remove after full migration to new shape infer - if (!m_graph->get_config().m_allow_new_shape_infer) { + if (!m_graph->get_config().get_allow_new_shape_infer()) { auto new_layout = memory->get_layout(); new_layout.set_partial_shape(m_graph->get_input_layouts().at(input_idx).get_shape()); memory = engine.reinterpret_buffer(*memory, new_layout); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index c9e7c155158448..339ee80ba7a3eb 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -289,7 +289,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { const ov::element::TypeVector supported_woq_types = {ov::element::u8, ov::element::i8, ov::element::u4, ov::element::i4}; bool enableInt8; ov::element::Type infer_precision = ov::element::undefined; - bool unroll_loop = config.m_enable_loop_unrolling; + bool unroll_loop = config.get_enable_loop_unrolling(); { ov::pass::Manager manager("Plugin:GPU"); auto pass_config = manager.get_pass_config(); @@ -302,7 +302,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); - enableInt8 = config.m_enable_lp_transformations && is_model_quantized; + enableInt8 = config.get_enable_lp_transformations() && is_model_quantized; manager.register_pass( std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }, @@ -335,7 +335,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { }; // Add conversion from FP data types to infer precision if it's specified - infer_precision = config.m_inference_precision; + infer_precision = config.get_inference_precision(); if (infer_precision != ov::element::undefined) { if (!fp_precision_supported(infer_precision)) infer_precision = fallback_precision; @@ -416,7 +416,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { GPU_DEBUG_CODE(return cldnn::debug_configuration::get_instance()->enable_sdpa == 1); } - if (!config.m_enable_sdpa_optimization) + if (!config.get_enable_sdpa_optimization()) return false; auto sdpa = ov::as_type_ptr(node); @@ -1032,7 +1032,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - auto kv_cache_compression_dt = config.m_kv_cache_precision; + auto kv_cache_compression_dt = config.get_kv_cache_precision(); manager.register_pass(kv_cache_compression_dt, device_info.supports_immad); manager.register_pass(); @@ -1052,7 +1052,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); if (device_info.supports_immad) { - auto dynamic_quantization_group_size = config.m_dynamic_quantization_group_size; + auto dynamic_quantization_group_size = config.get_dynamic_quantization_group_size(); pass_config->set_callback([=](const_node_ptr& root) -> bool { for (size_t i = 0 ; i < root->get_input_node_shared_ptr(0)->get_output_size(); ++i) { if (root->get_input_node_shared_ptr(0)->get_output_element_type(i) == ov::element::Type_t::f32) { diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 9885b075109e1a..529e0543f6de67 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -21,7 +21,7 @@ ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { m_user_properties = other.m_user_properties; - m_is_finalized = other.m_is_finalized; + m_is_finalized = false; // copy is not automatically finalized for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } @@ -29,13 +29,18 @@ ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { m_user_properties = other.m_user_properties; - m_is_finalized = other.m_is_finalized; + m_is_finalized = false; // copy is not automatically finalized for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } return *this; } +void ExecutionConfig::finalize(cldnn::engine& engine) { + auto ctx = std::make_shared("GPU", std::vector{engine.get_device()}); + PluginConfig::finalize(ctx, {}); +} + void ExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); if (!info.supports_immad) { @@ -79,6 +84,10 @@ void ExecutionConfig::finalize_impl(std::shared_ptr context) { if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { m_dynamic_quantization_group_size = 32; } + + if (!get_force_implementations().empty()) { + m_optimize_data = true; + } } void ExecutionConfig::apply_hints(const cldnn::device_info& info) { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp index b954f494abfe5a..857283b9558647 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp @@ -64,7 +64,7 @@ void ocl_engine::create_onednn_engine(const ExecutionConfig& config) { auto casted = std::dynamic_pointer_cast(_device); OPENVINO_ASSERT(casted, "[GPU] Invalid device type stored in ocl_engine"); - std::string cache_dir = config.m_cache_dir; + std::string cache_dir = config.get_cache_dir(); if (cache_dir.empty()) { _onednn_engine = std::make_shared(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get())); } else { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp index 61844cd640ea41..bc01a8174292e4 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp @@ -189,22 +189,22 @@ void set_arguments_impl(ocl_kernel_type& kernel, } // namespace ocl_stream::ocl_stream(const ocl_engine &engine, const ExecutionConfig& config) - : stream(config.m_queue_type, stream::get_expected_sync_method(config)) + : stream(config.get_queue_type(), stream::get_expected_sync_method(config)) , _engine(engine) { auto context = engine.get_cl_context(); auto device = engine.get_cl_device(); ocl::command_queues_builder queue_builder; - queue_builder.set_profiling(config.m_enable_profiling); + queue_builder.set_profiling(config.get_enable_profiling()); queue_builder.set_out_of_order(m_queue_type == QueueTypes::out_of_order); OPENVINO_ASSERT(m_sync_method != SyncMethods::none || m_queue_type == QueueTypes::in_order, "[GPU] Unexpected sync method (none) is specified for out_of_order queue"); bool priorty_extensions = engine.extension_supported("cl_khr_priority_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_priority_mode(config.m_queue_priority, priorty_extensions); + queue_builder.set_priority_mode(config.get_queue_priority(), priorty_extensions); bool throttle_extensions = engine.extension_supported("cl_khr_throttle_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_throttle_mode(config.m_queue_throttle, throttle_extensions); + queue_builder.set_throttle_mode(config.get_queue_throttle(), throttle_extensions); bool queue_families_extension = engine.get_device_info().supports_queue_families; queue_builder.set_supports_queue_families(queue_families_extension); diff --git a/src/plugins/intel_gpu/src/runtime/stream.cpp b/src/plugins/intel_gpu/src/runtime/stream.cpp index 2043afb9f3869c..913d84d8f476f5 100644 --- a/src/plugins/intel_gpu/src/runtime/stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/stream.cpp @@ -20,8 +20,8 @@ QueueTypes stream::detect_queue_type(engine_types engine_type, void* queue_handl } SyncMethods stream::get_expected_sync_method(const ExecutionConfig& config) { - auto profiling = config.m_enable_profiling; - auto queue_type = config.m_queue_type; + auto profiling = config.get_enable_profiling(); + auto queue_type = config.get_queue_type(); return profiling ? SyncMethods::events : queue_type == QueueTypes::out_of_order ? SyncMethods::barriers : SyncMethods::none; } From f57e7a1661e12f30b64ff221b30c178e19587367 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 24 Dec 2024 09:38:23 +0400 Subject: [PATCH 12/44] Options visibility update Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 44 +++++++++++-------- src/inference/src/dev/plugin_config.cpp | 30 +++---------- src/inference/tests/unit/config_test.cpp | 25 +++++------ src/plugins/intel_gpu/src/plugin/plugin.cpp | 11 ++--- .../src/runtime/execution_config.cpp | 18 ++++---- .../unit/fusions/convolution_fusion_test.cpp | 2 +- .../fusions/fully_connected_fusion_test.cpp | 8 ++-- .../graph_manipulation_gpu_test.cpp | 2 +- .../passes/prepare_primitive_fusing_test.cpp | 2 +- .../remove_redundant_reorders_tests.cpp | 12 ++--- .../test_cases/concatenation_gpu_test.cpp | 6 +-- .../unit/test_cases/reorder_gpu_test.cpp | 2 +- .../tests/unit/test_utils/test_utils.cpp | 6 +-- 13 files changed, 79 insertions(+), 89 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 9e566b216590cb..36785d071bfc8e 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -78,12 +78,32 @@ namespace ov { -enum class OptionVisibility { - RELEASE = 0, // Option can be set for any build type via public interface, environment and config file - RELEASE_INTERNAL = 1, // Option can be set for any build type via environment and config file only - DEBUG = 2, // Option can be set for debug builds only via environment and config file +enum class OptionVisibility : uint8_t { + RELEASE = 1 << 0, // Option can be set for any build type via public interface, environment and config file + RELEASE_INTERNAL = 1 << 1, // Option can be set for any build type via environment and config file only + DEBUG = 1 << 2, // Option can be set for debug builds only via environment and config file +#ifdef ENABLE_DEBUG_CAPS + ANY = 0x07, // Any visibility is valid including DEBUG +#else + ANY = 0x03, // Any visibility is valid excluding DEBUG +#endif }; +inline OptionVisibility operator&(OptionVisibility a, OptionVisibility b) { + typedef std::underlying_type::type underlying_type; + return static_cast(static_cast(a) & static_cast(b)); +} + +inline OptionVisibility operator|(OptionVisibility a, OptionVisibility b) { + typedef std::underlying_type::type underlying_type; + return static_cast(static_cast(a) | static_cast(b)); +} + +inline OptionVisibility operator~(OptionVisibility a) { + typedef std::underlying_type::type underlying_type; + return static_cast(~static_cast(a)); +} + inline std::ostream& operator<<(std::ostream& os, const OptionVisibility& visibility) { switch (visibility) { case OptionVisibility::RELEASE: os << "RELEASE"; break; @@ -185,23 +205,14 @@ class OPENVINO_RUNTIME_API PluginConfig { PluginConfig(PluginConfig&& other) = delete; PluginConfig& operator=(PluginConfig&& other) = delete; - void set_property(const ov::AnyMap& properties); - Any get_property(const std::string& name) const; + void set_property(const ov::AnyMap& properties, OptionVisibility allowed_visibility = OptionVisibility::ANY, bool throw_on_error = true); + Any get_property(const std::string& name, OptionVisibility allowed_visibility = OptionVisibility::ANY) const; template util::EnableIfAllStringAny set_property(Properties&&... properties) { set_property(ov::AnyMap{std::forward(properties)...}); } - template - T get_property(const ov::Property& property) const { - if (is_set_by_user(property)) { - return m_user_properties.at(property.name()).template as(); - } - OPENVINO_ASSERT(m_options_map.find(property.name()) != m_options_map.end(), "Property not found: ", property.name()); - return static_cast*>(m_options_map.at(property.name()))->value; - } - std::string to_string() const; void finalize(std::shared_ptr context, const ov::RTMap& rt_info); @@ -236,9 +247,6 @@ class OPENVINO_RUNTIME_API PluginConfig { } } - ov::Any get_property(const std::string& name, const std::vector& allowed_visibility) const; - void set_property(const ov::AnyMap& properties, const std::vector& allowed_visibility, bool throw_on_error); - ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; ov::AnyMap read_env(const std::vector& prefixes) const; void cleanup_unsupported(ov::AnyMap& config) const; diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index b21547f40a57df..bbfe88d8737f80 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -48,31 +48,21 @@ size_t get_terminal_width() { namespace ov { -ov::Any PluginConfig::get_property(const std::string& name) const { - const static std::vector allowed_visibility = {OptionVisibility::RELEASE, OptionVisibility::RELEASE_INTERNAL}; - return get_property(name, allowed_visibility); -} -ov::Any PluginConfig::get_property(const std::string& name, const std::vector& allowed_visibility) const { +ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility allowed_visibility) const { if (m_user_properties.find(name) != m_user_properties.end()) { return m_user_properties.at(name); } auto option = get_option_ptr(name); - if (std::find(allowed_visibility.begin(), allowed_visibility.end(), option->get_visibility()) == allowed_visibility.end()) { + if ((allowed_visibility & option->get_visibility()) == option->get_visibility()) { OPENVINO_THROW("Couldn't get unknown property: ", name); } return option->get_any(); } -void PluginConfig::set_property(const AnyMap& config) { - const static std::vector allowed_visibility = {OptionVisibility::RELEASE,OptionVisibility::RELEASE_INTERNAL, OptionVisibility::DEBUG}; - const bool throw_on_error = true; - set_property(config, allowed_visibility, throw_on_error); -} - -void PluginConfig::set_property(const ov::AnyMap& config, const std::vector& allowed_visibility, bool throw_on_error) { +void PluginConfig::set_property(const ov::AnyMap& config, OptionVisibility allowed_visibility, bool throw_on_error) { OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); for (auto& kv : config) { @@ -80,7 +70,7 @@ void PluginConfig::set_property(const ov::AnyMap& config, const std::vectorget_visibility()) == allowed_visibility.end()) { + if ((allowed_visibility & option->get_visibility()) == option->get_visibility()) { if (throw_on_error) OPENVINO_THROW("Couldn't set unknown property: ", name); else @@ -134,25 +124,17 @@ bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) const { } void PluginConfig::apply_debug_options(std::shared_ptr context) { - static std::vector allowed_visibility = { - OptionVisibility::RELEASE, - OptionVisibility::RELEASE_INTERNAL, - OptionVisibility::DEBUG -#ifdef ENABLE_DEBUG_CAPS -#endif - }; - const bool throw_on_error = false; if (context) { ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); cleanup_unsupported(config_properties); - set_property(config_properties, allowed_visibility, throw_on_error); + set_property(config_properties, OptionVisibility::ANY, throw_on_error); } ov::AnyMap env_properties = read_env({"OV_"}); cleanup_unsupported(env_properties); - set_property(env_properties, allowed_visibility, throw_on_error); + set_property(env_properties, OptionVisibility::ANY, throw_on_error); } ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index 42b7fba115a273..d2c99585ab015b 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -40,7 +40,6 @@ struct NotEmptyTestConfig : public ov::PluginConfig { OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "") OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "") #undef OV_CONFIG_OPTION - } NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() { @@ -50,7 +49,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig { } } - #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__) OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") @@ -97,26 +96,26 @@ TEST(plugin_config, can_create_not_empty_config) { TEST(plugin_config, can_set_get_property) { NotEmptyTestConfig cfg; - ASSERT_NO_THROW(cfg.get_property(bool_property)); - ASSERT_EQ(cfg.get_property(bool_property), true); + ASSERT_NO_THROW(cfg.get_bool_property()); + ASSERT_EQ(cfg.get_bool_property(), true); ASSERT_NO_THROW(cfg.set_property(bool_property(false))); - ASSERT_EQ(cfg.get_property(bool_property), false); + ASSERT_EQ(cfg.get_bool_property(), false); } TEST(plugin_config, throw_for_unsupported_property) { NotEmptyTestConfig cfg; - ASSERT_ANY_THROW(cfg.get_property(unsupported_property)); + ASSERT_ANY_THROW(cfg.get_property(unsupported_property.name())); ASSERT_ANY_THROW(cfg.set_property(unsupported_property(10.0f))); } TEST(plugin_config, can_direct_access_to_properties) { NotEmptyTestConfig cfg; - ASSERT_EQ(cfg.m_int_property.value, cfg.get_property(int_property)); + ASSERT_EQ(cfg.m_int_property.value, cfg.get_int_property()); ASSERT_NO_THROW(cfg.set_property(int_property(1))); ASSERT_EQ(cfg.m_int_property.value, -1); // user property doesn't impact member value until finalize() is called cfg.m_int_property.value = 2; - ASSERT_EQ(cfg.get_property(int_property), 1); // still 1 as user property was set previously + ASSERT_EQ(cfg.get_int_property(), 1); // stil 1 as user property was set previously } TEST(plugin_config, finalization_updates_member) { @@ -132,19 +131,19 @@ TEST(plugin_config, finalization_updates_member) { TEST(plugin_config, get_property_before_finalization_returns_user_property_if_set) { NotEmptyTestConfig cfg; - ASSERT_EQ(cfg.get_property(bool_property), true); // default value + ASSERT_EQ(cfg.get_bool_property(), true); // default value ASSERT_EQ(cfg.m_bool_property.value, true); // default value cfg.m_bool_property.value = false; // update member directly - ASSERT_EQ(cfg.get_property(bool_property), false); // OK, return the class member value as no user property was set + ASSERT_EQ(cfg.get_bool_property(), false); // OK, return the class member value as no user property was set ASSERT_NO_THROW(cfg.set_property(bool_property(true))); ASSERT_TRUE(cfg.is_set_by_user(bool_property)); - ASSERT_EQ(cfg.get_property(bool_property), true); // now user property value is returned + ASSERT_EQ(cfg.get_bool_property(), true); // now user property value is returned ASSERT_EQ(cfg.m_bool_property.value, false); // but class member is not updated cfg.finalize(nullptr, {}); - ASSERT_EQ(cfg.get_property(bool_property), cfg.m_bool_property.value); // equal after finalization + ASSERT_EQ(cfg.get_bool_property(), cfg.m_bool_property.value); // equal after finalization ASSERT_FALSE(cfg.is_set_by_user(bool_property)); // and user property is cleared } @@ -194,7 +193,7 @@ TEST(plugin_config, can_copy_config) { ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); ASSERT_EQ(cfg2.m_low_level_property.value, "value2"); ASSERT_EQ(cfg2.m_int_property.value, 1); - ASSERT_EQ(cfg2.get_property(bool_property), false); // ensure user properties are copied too + ASSERT_EQ(cfg2.get_bool_property(), false); // ensure user properties are copied too // check that cfg1 modification doesn't impact a copy cfg1.set_property(high_level_property("value3")); diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 2370d60dda808d..937077886b10ad 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -39,6 +39,7 @@ #include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/performance_heuristics.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/util/common_util.hpp" #include "openvino/util/weights_path.hpp" @@ -219,7 +220,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_property(orig_config); + config.set_property(orig_config, OptionVisibility::RELEASE); config.finalize(context, get_rt_info(*model)); auto transformed_model = clone_and_transform_model(model, config, context); @@ -268,7 +269,7 @@ ov::SoPtr Plugin::get_default_context(const AnyMap& params) void Plugin::set_property(const ov::AnyMap &config) { auto update_config = [](ExecutionConfig& config, const ov::AnyMap& user_config) { - config.set_property(user_config); + config.set_property(user_config, OptionVisibility::RELEASE); // Check that custom layers config can be loaded if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) { CustomLayerMap custom_layers; @@ -303,7 +304,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& auto ctx = get_default_context(device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_property(orig_config); + config.set_property(orig_config, OptionVisibility::RELEASE); config.finalize(ctx, get_rt_info(*model)); ProgramBuilder prog(ctx->get_engine(), config); @@ -358,7 +359,7 @@ std::shared_ptr Plugin::import_model(std::istream& model, } ExecutionConfig config = m_configs_map.at(device_id); - config.set_property(_orig_config); + config.set_property(_orig_config, OptionVisibility::RELEASE); config.finalize(context_impl, {}); ov::CacheMode cache_mode = config.get_cache_mode(); @@ -466,7 +467,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] get_property: Couldn't find config for GPU with id ", device_id); const auto& c = m_configs_map.at(device_id); - return c.get_property(name); + return c.get_property(name, OptionVisibility::RELEASE); } auto StringRightTrim = [](std::string string, std::string substring, bool case_sensitive = true) { diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 529e0543f6de67..f185a2f321489d 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -52,7 +52,7 @@ void ExecutionConfig::apply_rt_info(std::shared_ptr context, con // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not // using that mechanism. - if (get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { + if (get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) { apply_rt_info_property(ov::weights_path, rt_info); } } @@ -71,7 +71,7 @@ void ExecutionConfig::finalize_impl(std::shared_ptr context) { if (info.supports_immad) { m_use_onednn = true; } - if (get_property(ov::intel_gpu::use_onednn)) { + if (get_use_onednn()) { m_queue_type = QueueTypes::in_order; } @@ -98,7 +98,7 @@ void ExecutionConfig::apply_hints(const cldnn::device_info& info) { void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::execution_mode)) { - const auto mode = get_property(ov::hint::execution_mode); + const auto mode = get_execution_mode(); if (!is_set_by_user(ov::hint::inference_precision)) { if (mode == ov::hint::ExecutionMode::ACCURACY) { m_inference_precision = ov::element::undefined; @@ -114,7 +114,7 @@ void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::performance_mode)) { - const auto mode = get_property(ov::hint::performance_mode); + const auto mode = get_performance_mode(); if (!is_set_by_user(ov::num_streams)) { if (mode == ov::hint::PerformanceMode::LATENCY) { m_num_streams = 1; @@ -124,18 +124,18 @@ void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { } } - if (get_property(ov::num_streams) == ov::streams::AUTO) { + if (get_num_streams() == ov::streams::AUTO) { int32_t n_streams = std::max(info.num_ccs, 2); m_num_streams = n_streams; } - if (get_property(ov::internal::exclusive_async_requests)) { + if (get_exclusive_async_requests()) { m_num_streams = 1; } // Allow kernels reuse only for single-stream scenarios - if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) { - if (get_property(ov::num_streams) != 1) { + if (get_enable_kernels_reuse()) { + if (get_num_streams() != 1) { m_enable_kernels_reuse = false; } } @@ -143,7 +143,7 @@ void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { if (is_set_by_user(ov::hint::model_priority)) { - const auto priority = get_property(ov::hint::model_priority); + const auto priority = get_model_priority(); if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { m_queue_priority = priority; } diff --git a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp index 6d8f231b93576a..6fb20181eb1936 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp @@ -318,7 +318,7 @@ class WeightsPrimitiveFusingTestOneDNN : public BaseFusingTestget_layout_optimizer().set_implementation_forcing(config.get_property(ov::intel_gpu::force_implementations)); + prog->get_layout_optimizer().set_implementation_forcing(config.get_force_implementations()); program_wrapper::apply_opt_pass(*prog); ASSERT_TRUE(!has_node(*prog, "permute")); diff --git a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp index 1fbeab7e67ac2d..b46033f15d77db 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp @@ -136,7 +136,7 @@ TEST(remove_redundant_reorders, skip_reorder_fusing_when_sibling_not_support_pad auto prog = program::build_program(engine, topology, config, false, true); config.set_property(ov::intel_gpu::optimize_data(true)); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -165,7 +165,7 @@ TEST(remove_redundant_reorders, not_to_fuse_reshape_with_fused_prims) { auto prog = program::build_program(engine, topology, config, false, true); program_wrapper::apply_opt_pass(*prog); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -204,7 +204,7 @@ TEST(remove_redundant_reorders, not_to_fuse_permute) { auto prog = program::build_program(engine, topology, config, false, true); ASSERT_NE(prog, nullptr); - bool opt_data = config.get_property(ov::intel_gpu::optimize_data); + bool opt_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog); program_wrapper::apply_opt_pass(*prog, opt_data); @@ -266,7 +266,7 @@ TEST(remove_redundant_reorders, remove_fused) { auto prog = program::build_program(engine, topology, config, false, true); program_wrapper::apply_opt_pass(*prog); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -293,7 +293,7 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_mvn_dyn) { config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config, false, true); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -336,7 +336,7 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_concat_dyn) { config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config, false, true); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp index b145fa603063bc..1b2d0be5907293 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp @@ -1422,7 +1422,7 @@ struct concat_gpu_4d_implicit : public concat_gpu { } auto outputs = concat_network->execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network->get_primitive("concat"))->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); @@ -1642,7 +1642,7 @@ struct concat_gpu_4d_implicit_onednn : public concat_gpu { } auto outputs = concat_network.execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); @@ -1805,7 +1805,7 @@ struct concat_gpu_4d_explicit : public concat_gpu { } auto outputs = concat_network.execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); // If sibling is using onednn impl and batch > 1, the onednn impl cannot process the implicit concat'ed buffer. diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp index 8caad2b576d734..8fd32877291d44 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp @@ -1913,7 +1913,7 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant) auto outputs = net.execute(); auto executed_primitives = net.get_executed_primitives(); - if (config.get_property(ov::intel_gpu::queue_type) != QueueTypes::out_of_order) + if (config.get_queue_type() != QueueTypes::out_of_order) GTEST_SKIP(); ASSERT_TRUE(executed_primitives.count("in") == 1); diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp index 6af8572d323907..23641dbdba7a63 100644 --- a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp @@ -54,14 +54,14 @@ void generic_test::run_single_test(bool is_caching_test) { } } std::string input_name = "input" + std::to_string(i); - if ((i == 0) && generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if ((i == 0) && generic_params->network_config.get_optimize_data()) { // Add reorder after the first input in case of optimize data flag since it might change the input layout. input_name = "input0_init"; } // First input is provided to the network as input_layout. // Other inputs are provided as input_layout if optimize data flag is off. Otherwise they are provided as data. - if ((i == 0) || !generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if ((i == 0) || !generic_params->network_config.get_optimize_data()) { topology.add(input_layout(input_name, input_mems[i]->get_layout())); input_layouts_names.push_back(input_name); } else { @@ -74,7 +74,7 @@ void generic_test::run_single_test(bool is_caching_test) { } } - if (generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if (generic_params->network_config.get_optimize_data()) { // Add reorder after the first input in case of optimize data flag since it might change the input layout. topology.add(reorder("input0", input_info("input0_init"), input_mems[0]->get_layout())); } From d84741e7d0481dd6bbe39655e1b9ff4c731697b9 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 24 Dec 2024 10:33:25 +0400 Subject: [PATCH 13/44] Fixes and visit_attributes method impl Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 2 +- src/inference/src/dev/plugin_config.cpp | 38 ++++++++++--------- .../include/intel_gpu/runtime/options.inl | 2 +- src/plugins/intel_gpu/src/plugin/graph.cpp | 33 +++++++++++++++- 4 files changed, 54 insertions(+), 21 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 36785d071bfc8e..2eacd157ec8b94 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -217,7 +217,7 @@ class OPENVINO_RUNTIME_API PluginConfig { void finalize(std::shared_ptr context, const ov::RTMap& rt_info); - bool visit_attributes(ov::AttributeVisitor& visitor) const; + bool visit_attributes(ov::AttributeVisitor& visitor); protected: virtual void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) {} diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index bbfe88d8737f80..bebcd891251616 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -55,7 +55,7 @@ ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility all } auto option = get_option_ptr(name); - if ((allowed_visibility & option->get_visibility()) == option->get_visibility()) { + if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) { OPENVINO_THROW("Couldn't get unknown property: ", name); } @@ -70,7 +70,7 @@ void PluginConfig::set_property(const ov::AnyMap& config, OptionVisibility allow auto& val = kv.second; auto option = get_option_ptr(name); - if ((allowed_visibility & option->get_visibility()) == option->get_visibility()) { + if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) { if (throw_on_error) OPENVINO_THROW("Couldn't set unknown property: ", name); else @@ -111,14 +111,16 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R m_is_finalized = true; } -bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) const { - // for (const auto& prop : m_user_properties) { - // visitor.on_attribute(prop.first + "__user", prop.second.as()); - // } - // for (const auto& prop : m_options_map) { - // visitor.on_attribute(prop.first + "__internal", prop.second->get_any().as()); - // } - // visitor.on_attribute("is_finalized", m_is_finalized); +bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) { + for (const auto& prop : m_user_properties) { + auto val = prop.second.as(); + visitor.on_attribute(prop.first + "__user", val); + } + for (const auto& prop : m_options_map) { + auto val = prop.second->get_any().as(); + visitor.on_attribute(prop.first + "__internal", val); + } + visitor.on_attribute("is_finalized", m_is_finalized); return true; } @@ -212,20 +214,20 @@ void PluginConfig::cleanup_unsupported(ov::AnyMap& config) const { } std::string PluginConfig::to_string() const { - std::stringstream s; + std::stringstream ss; - s << "-----------------------------------------\n"; - s << "PROPERTIES:\n"; + ss << "-----------------------------------------\n"; + ss << "PROPERTIES:\n"; for (const auto& option : m_options_map) { - s << "\t" << option.first << ": " << option.second->get_any().as() << std::endl; + ss << "\t" << option.first << ": " << option.second->get_any().as() << std::endl; } - s << "USER PROPERTIES:\n"; + ss << "USER PROPERTIES:\n"; for (const auto& user_prop : m_user_properties) { - s << "\t" << user_prop.first << ": " << user_prop.second.as() << std::endl; + ss << "\t" << user_prop.first << ": " << user_prop.second.as() << std::endl; } - return s.str(); + return ss.str(); } void PluginConfig::print_help() const { @@ -276,7 +278,7 @@ void PluginConfig::print_help() const { const size_t max_name_width = static_cast(std::get<0>(*max_name_length_item).size() + std::get<1>(*max_name_length_item).size()); const size_t terminal_width = get_terminal_width(); - ss << std::left << std::setw(max_name_width) << ("Option name") << " | " << " Description " << "\n"; + ss << std::left << std::setw(max_name_width) << "Option name" << " | " << " Description " << "\n"; ss << std::left << std::setw(terminal_width) << std::setfill('-') << "" << "\n"; for (auto& kv : options_desc) { ss << format_text(std::get<0>(kv), std::get<1>(kv), std::get<2>(kv), max_name_width, terminal_width) << "\n"; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index 1941aaec69b2bf..1e4f7076887a3e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -45,6 +45,7 @@ OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, partial_build_program, false, " OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "Switch between new and old shape inference flow. Shall be removed soon") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "Threshold for preallocation feature in case when it uses ratio policy") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disable onednn for usage for particular model/platform") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file") @@ -58,7 +59,6 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, "", "Save intermediate in/ou OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool, "", "Save csv file with memory pool info to specified folder") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, "", "Space separated list of iterations where other dump options should be enabled") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop") diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 4f1f541b725090..a28d986ce46160 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -2,7 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/graph/serialization/helpers.hpp" #include "intel_gpu/runtime/layout.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/runtime/threading/executor_manager.hpp" #include "openvino/runtime/exec_model_info.hpp" #include "openvino/pass/serialize.hpp" @@ -33,6 +35,34 @@ #include namespace ov::intel_gpu { +namespace { + + +class OstreamAttributeVisitor : public ov::AttributeVisitor { + cldnn::BinaryOutputBuffer& os; + + template + void append_attribute(const std::string& name, const T& value) { + os << name; + os << value; + } +public: + OstreamAttributeVisitor(cldnn::BinaryOutputBuffer& os) : os(os) {} + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + OPENVINO_THROW("Attribute ", name, " can't be processed\n"); + } + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + append_attribute(name, adapter.get()); + } + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + append_attribute(name, adapter.get()); + } +}; + +} // namespace Graph::Graph(std::shared_ptr model, const RemoteContextImpl::Ptr& context, const ExecutionConfig& config, uint16_t stream_id) : m_context(context) @@ -512,7 +542,8 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) { ob << perf_item.second.second.parentPrimitive; } } - // ob << m_config; + OstreamAttributeVisitor visitor(ob); + m_config.visit_attributes(visitor); ob.set_stream(m_network->get_stream_ptr().get()); m_network->get_program()->save(ob); From 29c978ec90ed060896adc7d05939e49c20a81f5e Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 24 Dec 2024 11:28:55 +0400 Subject: [PATCH 14/44] Refactor debug knobs Signed-off-by: Vladimir Paramuzov --- .../intel_gpu/runtime/internal_properties.hpp | 56 ++++++++++++++++--- .../include/intel_gpu/runtime/options.inl | 18 +++--- .../intel_gpu/src/graph/layout_optimizer.cpp | 5 -- src/plugins/intel_gpu/src/graph/program.cpp | 3 +- .../src/graph/program_dump_graph.cpp | 2 +- 5 files changed, 60 insertions(+), 24 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index 77a00294e8076f..ec36c924c5d185 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -35,6 +35,40 @@ inline std::ostream& operator<<(std::ostream& os, const QueueTypes& val) { return os; } +enum class DumpFormat : uint8_t { + binary = 0, + text = 1, + text_raw = 2, +}; + +inline std::ostream& operator<<(std::ostream& os, const DumpFormat& val) { + switch (val) { + case DumpFormat::binary: os << "binary"; break; + case DumpFormat::text: os << "text"; break; + case DumpFormat::text_raw: os << "text_raw"; break; + default: os << "unknown"; + } + + return os; +} + +enum class DumpTensors : uint8_t { + all = 0, + in = 1, + out = 2, +}; + +inline std::ostream& operator<<(std::ostream& os, const DumpTensors& val) { + switch (val) { + case DumpTensors::all: os << "all"; break; + case DumpTensors::in: os << "in"; break; + case DumpTensors::out: os << "out"; break; + default: os << "unknown"; + } + + return os; +} + /** * @brief Defines queue type that must be used for model execution */ @@ -55,15 +89,19 @@ static constexpr Property use_onednn{"USE_ONEDNN"} static constexpr Property help{"HELP"}; static constexpr Property verbose{"VERBOSE"}; -static constexpr Property log_to_file{"LOG_TO_FILE"}; -static constexpr Property disable_usm{"DISABLE_USM"}; +static constexpr Property log_to_file{"GPU_LOG_TO_FILE"}; +static constexpr Property disable_usm{"GPU_DISABLE_USM"}; static constexpr Property disable_onednn_post_ops_opt{"DISABLE_ONEDNN_POST_OPS_OPT"}; -static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; -static constexpr Property dump_profiling_data{"DUMP_PROFILING_DATA"}; -static constexpr Property dump_sources{"DUMP_SOURCES"}; -static constexpr Property dump_tensors{"DUMP_TENSORS"}; -static constexpr Property dump_memory_pool{"DUMP_MEMORY_POOL"}; -static constexpr Property dump_iterations{"DUMP_ITERATIONS"}; +static constexpr Property dump_graphs_path{"GPU_DUMP_GRAPHS_PATH"}; +static constexpr Property dump_profiling_data_path{"GPU_DUMP_PROFILING_DATA_PATH"}; +static constexpr Property dump_sources_path{"GPU_DUMP_SOURCES_PATH"}; +static constexpr Property dump_tensors_path{"GPU_DUMP_TENSORS_PATH"}; +static constexpr Property dump_tensors{"DUMP_TENSORS"}; +static constexpr Property dump_layers{"GPU_DUMP_LAYERS"}; +static constexpr Property dump_tensors_format{"DUMP_TENSORS_FORMAT"}; +static constexpr Property dump_memory_pool_path{"GPU_DUMP_MEMORY_POOL_PATH"}; +static constexpr Property dump_batch_limit{"GPU_DUMP_BATCH_LIMIT"}; +static constexpr Property, ov::PropertyMutability::RW> dump_iterations{"GPU_DUMP_ITERATIONS"}; static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; static constexpr Property impls_cache_capacity{"IMPLS_CACHE_CAPACITY"}; static constexpr Property disable_async_compilation{"DISABLE_ASYNC_COMPILATION"}; @@ -75,7 +113,7 @@ static constexpr Property disable_fake_alignme static constexpr Property use_usm_host{"USE_USM_HOST"}; static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; static constexpr Property mem_prealloc_options{"MEM_PREALLOC_OPTIONS"}; -static constexpr Property load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"}; +static constexpr Property, ov::PropertyMutability::RW> load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"}; } // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index 1e4f7076887a3e..93b4653034ab92 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -52,12 +52,16 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to spec OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, help, false, "Print help message for all config options") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_onednn_post_ops_opt, false, "Disable optimization pass for onednn post-ops") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data, "", "Save csv file with per-stage and per-primitive profiling data to specified folder") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs, "", "Save intermediate graph representations during model compilation pipeline to specified folder") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources, "", "Save generated sources for each kernel to specified folder") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, "", "Save intermediate in/out tensors of each primitive to specified folder") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool, "", "Save csv file with memory pool info to specified folder") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, "", "Space separated list of iterations where other dump options should be enabled") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data_path, "", "Save csv file with per-stage and per-primitive profiling data to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs_path, "", "Save intermediate graph representations during model compilation pipeline to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources_path, "", "Save generated sources for each kernel to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_path, "", "Save intermediate in/out tensors of each primitive to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, ov::intel_gpu::DumpTensors::all, "Tensor types to dump. Supported values: all, inputs, outputs") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_format, ov::intel_gpu::DumpFormat::text, "Format of the tensors dump. Supported values: binary, text, text_raw") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_layers, "", "Activate dump for specified layers only") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool_path, "", "Save csv file with memory pool info to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, std::set{}, "Space separated list of iterations where other dump options should be enabled") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits::max(), "Max number of batch elements to dump") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation") @@ -68,4 +72,4 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fa OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usage for all allocations") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "Preallocation setting") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, "", "List of layers to load raw binary") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, std::vector{}, "List of layers to load raw binary") diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index b64a4744360071..067ca8c8f5fb0d 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -439,11 +439,6 @@ bool should_use_winograd_2x3_s1(const convolution_node& node, if (disable_winograd_conv) return false; - // cases when NOT to use winograd - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_winograd_conv == 1) - return false; - auto prim = node.get_primitive(); if (input_layout.data_type != data_types::f16 || (input_layout.is_static() && input_layout.feature() % 64 != 0) // current algorithm is effective for ifm to be multiply of 64 diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 98c61b021477d5..82e6f7b708d8ca 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -1397,8 +1397,7 @@ program::primitives_info program::get_current_stage_info() const { } void program::save_pass_info(std::string pass_name) { - // TODO: Directory path here can be probably changed to some bool flag - if (!_config.get_dump_graphs().empty()) + if (!_config.get_dump_graphs_path().empty()) optimizer_passes_info.emplace_back(pass_name, get_current_stage_info()); } diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index aeae56173f4fd0..61daf949e762f0 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -157,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) { } // namespace std::string get_dir_path(const ExecutionConfig& config) { - auto path = config.get_dump_graphs(); + auto path = config.get_dump_graphs_path(); if (path.empty()) { return {}; } From e830be276e7571cda545c059d840880cea577c5f Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 24 Dec 2024 15:12:36 +0400 Subject: [PATCH 15/44] split set_prop and set_user_prop again Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 5 +- src/inference/src/dev/plugin_config.cpp | 27 ++++- .../include/intel_gpu/graph/program.hpp | 1 - .../intel_gpu/plugin/program_builder.hpp | 10 +- .../intel_gpu/runtime/internal_properties.hpp | 1 + .../include/intel_gpu/runtime/options.inl | 3 +- .../graph_optimizer/prepare_buffer_fusing.cpp | 7 +- .../prepare_primitive_fusing.cpp | 8 +- .../prepare_primitive_fusing_through.cpp | 3 + .../graph/graph_optimizer/reorder_inputs.cpp | 2 +- .../src/graph/impls/ocl/kernels_cache.cpp | 22 +--- .../src/graph/impls/ocl/kernels_cache.hpp | 1 - .../impls/onednn/concatenation_onednn.hpp | 3 +- .../graph/impls/onednn/convolution_onednn.hpp | 3 +- .../impls/onednn/deconvolution_onednn.hpp | 3 +- .../impls/onednn/fully_connected_onednn.hpp | 3 +- .../src/graph/impls/onednn/gemm_onednn.hpp | 3 +- .../graph/impls/onednn/lstm_seq_onednn.hpp | 4 +- .../src/graph/impls/onednn/pooling_onednn.hpp | 3 +- .../impls/onednn/primitive_onednn_base.h | 6 +- .../src/graph/impls/onednn/reduce_onednn.hpp | 3 +- .../src/graph/impls/onednn/reorder_onednn.hpp | 3 +- .../src/graph/include/primitive_inst.h | 2 + .../src/graph/include/program_node.h | 1 + src/plugins/intel_gpu/src/graph/network.cpp | 5 +- .../intel_gpu/src/graph/primitive_inst.cpp | 15 +-- src/plugins/intel_gpu/src/graph/program.cpp | 24 +--- .../intel_gpu/src/graph/program_node.cpp | 3 +- .../fully_connected_kernel_bf_tiled.cpp | 20 ---- src/plugins/intel_gpu/src/plugin/graph.cpp | 6 +- .../intel_gpu/src/plugin/ops/condition.cpp | 2 +- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 3 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 112 ++++++++++++++++-- .../intel_gpu/src/plugin/program_builder.cpp | 45 +------ .../src/plugin/sync_infer_request.cpp | 7 +- .../dynamic_quantize_fully_connected.cpp | 6 +- .../dynamic_quantize_fully_connected.hpp | 2 +- .../src/plugin/transformations_pipeline.cpp | 15 +-- .../src/runtime/execution_config.cpp | 2 +- .../intel_gpu/src/runtime/ocl/ocl_device.cpp | 3 - 40 files changed, 209 insertions(+), 188 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 2eacd157ec8b94..08dd148b6d1c36 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -77,7 +77,7 @@ OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__) namespace ov { - +#define ENABLE_DEBUG_CAPS enum class OptionVisibility : uint8_t { RELEASE = 1 << 0, // Option can be set for any build type via public interface, environment and config file RELEASE_INTERNAL = 1 << 1, // Option can be set for any build type via environment and config file only @@ -205,7 +205,8 @@ class OPENVINO_RUNTIME_API PluginConfig { PluginConfig(PluginConfig&& other) = delete; PluginConfig& operator=(PluginConfig&& other) = delete; - void set_property(const ov::AnyMap& properties, OptionVisibility allowed_visibility = OptionVisibility::ANY, bool throw_on_error = true); + void set_property(const ov::AnyMap& properties); + void set_user_property(const ov::AnyMap& properties, OptionVisibility allowed_visibility = OptionVisibility::ANY, bool throw_on_error = true); Any get_property(const std::string& name, OptionVisibility allowed_visibility = OptionVisibility::ANY) const; template diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index bebcd891251616..b756894d5e414f 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -62,7 +62,18 @@ ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility all return option->get_any(); } -void PluginConfig::set_property(const ov::AnyMap& config, OptionVisibility allowed_visibility, bool throw_on_error) { +void PluginConfig::set_property(const ov::AnyMap& config) { + OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); + + for (auto& kv : config) { + auto& name = kv.first; + auto& val = kv.second; + + get_option_ptr(name)->set_any(val); + } +} + +void PluginConfig::set_user_property(const ov::AnyMap& config, OptionVisibility allowed_visibility, bool throw_on_error) { OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); for (auto& kv : config) { @@ -131,12 +142,22 @@ void PluginConfig::apply_debug_options(std::shared_ptr context) if (context) { ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); cleanup_unsupported(config_properties); - set_property(config_properties, OptionVisibility::ANY, throw_on_error); +#ifdef ENABLE_DEBUG_CAPS + for (auto& prop : config_properties) { + std::cout << "Non default config value for " << prop.first << " = " << prop.second.as() << std::endl; + } +#endif + set_user_property(config_properties, OptionVisibility::ANY, throw_on_error); } ov::AnyMap env_properties = read_env({"OV_"}); cleanup_unsupported(env_properties); - set_property(env_properties, OptionVisibility::ANY, throw_on_error); +#ifdef ENABLE_DEBUG_CAPS + for (auto& prop : env_properties) { + std::cout << "Non default env value for " << prop.first << " = " << prop.second.as() << std::endl; + } +#endif + set_user_property(env_properties, OptionVisibility::ANY, throw_on_error); } ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index bec721ad3938a6..96bfff48820aaa 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -314,7 +314,6 @@ struct program { // if subgraph can be optimized if it consists of only inputs and corresponding outputs bool _can_be_optimized; std::unique_ptr _impls_cache; - const size_t _impls_cache_capacity = 300; std::shared_ptr _compilation_context; bool _loaded_from_cache = false; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp index 51087405f09769..6b28c02930d530 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp @@ -79,7 +79,7 @@ struct PerfCounter { class ProgramBuilder final { public: - ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, bool partialBuild = false, + ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, std::shared_ptr task_executor = nullptr, std::shared_ptr compilation_context = nullptr, bool innerProgram = false); @@ -137,8 +137,8 @@ class ProgramBuilder final { void add_primitive(const ov::Node& op, std::shared_ptr prim, std::vector aliases = {}); - bool use_new_shape_infer() const { return allow_new_shape_infer; } - bool requires_new_shape_infer(const std::shared_ptr& op) const; + bool use_new_shape_infer() const { return m_config.get_allow_new_shape_infer(); } + bool is_inner_program() const { return m_is_inner_program; } bool is_query_mode() { return queryMode; } @@ -156,8 +156,6 @@ class ProgramBuilder final { std::shared_ptr m_topology; CustomLayerMap m_custom_layers; - bool allow_new_shape_infer = false; - bool queryMode; std::shared_ptr m_task_executor; @@ -172,7 +170,7 @@ class ProgramBuilder final { void cleanup_build(); // TODO(eunsoo): remove createTopolpgyOnly argument and add another method to create topology from ngraph function - std::shared_ptr build(const std::vector>& ops, bool partialBuild = false, bool innerProgram = false); + std::shared_ptr build(const std::vector>& ops, bool innerProgram = false); void CreateSingleLayerPrimitive(const std::shared_ptr& op); }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index ec36c924c5d185..5924457193765d 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -109,6 +109,7 @@ static constexpr Property disable_runtime_buff static constexpr Property disable_memory_reuse{"DISABLE_MEMORY_REUSE"}; static constexpr Property disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"}; static constexpr Property disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"}; +static constexpr Property disable_fc_swiglu_fusion{"DISABLE_FC_SWIGLU_FUSION"}; static constexpr Property disable_fake_alignment{"DISABLE_FAKE_ALIGNMENT"}; static constexpr Property use_usm_host{"USE_USM_HOST"}; static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index 93b4653034ab92..9d63b6b15e3368 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -46,6 +46,7 @@ OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_new_shape_infer, false, " OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, buffers_preallocation_ratio, 1.1f, "Threshold for preallocation feature in case when it uses ratio policy") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disable onednn for usage for particular model/platform") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file") @@ -63,11 +64,11 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool_path, "", "Save csv file OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, std::set{}, "Space separated list of iterations where other dump options should be enabled") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits::max(), "Max number of batch elements to dump") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "Disable fusions of operations as post-ops/fused-ops") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "Disable pass which merges QKV projections into single MatMul") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fc_swiglu_fusion, false, "Disable pass which merges FC and SwiGLU ops") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fake alignment feature which tries to keep gpu friendly memory alignment for arbitrary tensor shapes") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usage for all allocations") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations") diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 03e4af4d16359b..8e8cac35fa1fb0 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -80,8 +80,8 @@ bool concat_in_place_optimization::match(const program_node& concat_node, if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph()) return false; bool do_runtime_buffer_fusing = true; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + const auto& config = concat_node.get_config(); + GPU_DEBUG_IF(config.get_disable_runtime_buffer_fusing()) { do_runtime_buffer_fusing = false; } @@ -522,8 +522,7 @@ bool crop_in_place_optimization::match(const program_node& node, return false; if (node.get_users().size() > 0) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing && node.is_dynamic()) { + GPU_DEBUG_IF(node.get_config().get_disable_runtime_buffer_fusing() && node.is_dynamic()) { return false; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index ce5333f95a1b59..cc3b5ff2260ea6 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -56,6 +56,9 @@ using namespace cldnn; void prepare_primitive_fusing::run(program& p) { + if (p.get_config().get_disable_post_ops_fusions()) + return; + fuse_reorders(p); remove_redundant_reshape(p); fuse_swiglu(p); @@ -165,10 +168,7 @@ void prepare_primitive_fusing::fuse_reorders(program &p) { } void prepare_primitive_fusing::fuse_swiglu(program &p) { - GPU_DEBUG_GET_INSTANCE(debug_config); - bool disable_fc_swiglu_fusion = false; - GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1) - disable_fc_swiglu_fusion = true; + bool disable_fc_swiglu_fusion = p.get_config().get_disable_fc_swiglu_fusion(); // Apply only for high performant GPU if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 128) return; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp index f63f1bf4efbe21..78b494c52645de 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp @@ -16,6 +16,9 @@ using namespace cldnn; void prepare_primitive_fusing_through::run(program& p) { + if (p.get_config().get_disable_post_ops_fusions()) + return; + auto try_fuse_through = [&](program_node& node) -> std::vector { // This function tries to fuse peer_node to first non reorder or reshape previous primitive. // It returns chain of primitives (reshapes and reorders) including potential fused_node (e.g. Conv, FC, etc) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index 7b3cbdabe639a0..269a88052e7bb0 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -704,7 +704,7 @@ void reorder_inputs::run(program& p, reorder_factory& rf) { GPU_DEBUG_LOG_PASS << " " << node_ptr->id() << " " << fmt_to_str(fmt) << std::endl; } - GPU_DEBUG_IF(debug_config->verbose >= 2) { + GPU_DEBUG_IF(p.get_config().get_verbose() >= 2) { reorder_cnt total_reorder_count = std::accumulate(p.get_processing_order().begin(), p.get_processing_order().end(), diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp index 0d90dc31e691f9..e5c1fe016b96df 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp @@ -131,14 +131,6 @@ bool kernels_cache::is_cache_enabled() const { return !_config.get_cache_dir().empty(); } -size_t kernels_cache::get_max_kernels_per_batch() const { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->max_kernels_per_batch >= 1) { - return static_cast(debug_config->max_kernels_per_batch); - } - return _config.get_max_kernels_per_batch(); -} - void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::vector* all_batches) const { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "KernelsCache::BuildAll::GetProgramSource"); std::map>> program_buckets; @@ -205,7 +197,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, // Create new kernels batch when the limit is reached // and current kernel's entry_point is duplicated in this kernels batch - if (current_bucket.back().kernels_counter >= get_max_kernels_per_batch() + if (current_bucket.back().kernels_counter >= _config.get_max_kernels_per_batch() || current_bucket.back().entry_point_to_id.find(entry_point) != current_bucket.back().entry_point_to_id.end() || need_separate_batch(entry_point)) { const auto& batch_id = static_cast(current_bucket.size()); @@ -247,9 +239,8 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, b.hash_value = std::hash()(full_code); std::string dump_sources_dir = ""; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_sources.empty()) { - dump_sources_dir = debug_config->dump_sources; + GPU_DEBUG_IF(!_config.get_dump_sources_path().empty()) { + dump_sources_dir = _config.get_dump_sources_path(); } // Add -g -s to build options to allow IGC assembly dumper to associate assembler sources with corresponding OpenCL kernel code lines @@ -307,10 +298,9 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co bool dump_sources = batch.dump_custom_program; std::string dump_sources_dir = ""; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_sources.empty()) { + GPU_DEBUG_IF(!_config.get_dump_sources_path().empty()) { dump_sources = true; - dump_sources_dir = debug_config->dump_sources; + dump_sources_dir = _config.get_dump_sources_path(); } std::string err_log; // accumulated build log from all program's parts (only contains messages from parts which @@ -385,7 +375,7 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co if (is_cache_enabled()) { // If kernels caching is enabled, then we save compiled bucket to binary file with name ${code_hash_value}.cl_cache // Note: Bin file contains full bucket, not separate kernels, so kernels reuse across different models is quite limited - // Bucket size can be changed in get_max_kernels_per_batch() method, but forcing it to 1 will lead to much longer + // Bucket size can be changed by max_kernels_per_batch config option, but forcing it to 1 will lead to much longer // compile time. std::lock_guard lock(cacheAccessMutex); ov::intel_gpu::save_binary(cached_bin_name, getProgramBinaries(program)); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp index 56459b93d2481c..b45226f44bd3e9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp @@ -113,7 +113,6 @@ class kernels_cache { std::string get_cache_path() const; bool is_cache_enabled() const; - size_t get_max_kernels_per_batch() const; bool _reuse_kernels = false; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp index 9e0a3fa5cfb390..64b92a15d1f4ba 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp @@ -18,8 +18,9 @@ struct ConcatenationImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; static const std::vector supported_types = { ov::element::f16, ov::element::u8, ov::element::i8 }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp index c3f599fc5db9f6..430c42dee57f75 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp @@ -23,8 +23,9 @@ struct ConvolutionImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& conv_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp index 039cf36261caa0..238214f82dc6fb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp @@ -19,8 +19,9 @@ struct DeconvolutionImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& deconv_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp index c4dc5f7faa6531..731a83372a9dfc 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp @@ -21,8 +21,9 @@ struct FullyConnectedImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& fc_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp index 6c576d177043ee..3d64d2009490c0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp @@ -18,8 +18,9 @@ struct GemmImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& gemm_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp index 6fd16a4dd04acf..4b2615c62e2747 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp @@ -22,10 +22,10 @@ struct LSTMSeqImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (info.arch == gpu_arch::unknown) + if (info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; - const auto& lstm_seq_node = node.as(); const auto& in_layout = lstm_seq_node.get_input_layout(0); const auto& out_layout = lstm_seq_node.get_output_layout(0); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp index 4710b0c77b83c7..ced0316e13a08f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp @@ -19,8 +19,9 @@ struct PoolingImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& in_layout = node.get_input_layout(0); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 4aca436bdd34d8..fe5920355e29c7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -51,8 +51,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _scratchpad_md = _pd.scratchpad_desc(); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->verbose >= 4) { + GPU_DEBUG_IF(config.get_verbose() >= 4) { if (_scratchpad_md.get_size() > 0) { static std::atomic_llong total{0}; int64_t size = _scratchpad_md.get_size() / 1048576; @@ -71,8 +70,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _pd(), _prim() { _enable_profiling = config.get_enable_profiling(); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + GPU_DEBUG_IF(!config.get_dump_profiling_data_path().empty()) { _enable_profiling = true; } } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp index 68d963fd9e369f..4a4a4c60df032d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp @@ -48,8 +48,9 @@ struct ReduceImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& reduce_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp index c4117058da88e9..824069f56b9583 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp @@ -56,8 +56,9 @@ struct ReorderImplementationManager : public ImplementationManager { if (output_fmt == format::custom) return true; + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; if (!one_of(input_fmt.value, supported_formats) || !one_of(output_fmt.value, supported_formats)) diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 01286a1c6d04bc..619693f3b1a6fc 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -199,6 +199,8 @@ class primitive_inst { program_node const& get_node() const { return *_node; } network& get_network() const { return _network; } uint32_t get_network_id() const; + const ExecutionConfig& get_config() const { return get_network().get_config(); } + virtual event::ptr set_output_memory(memory::ptr mem, bool check = true, size_t idx = 0); void check_memory_to_set(const memory& mem, const layout& layout) const; const std::list& get_users() const { return _node->get_users(); } diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index 269a3c30fd293c..229dec6a80c77e 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -158,6 +158,7 @@ struct program_node { program& get_program() { return myprog; } program& get_program() const { return myprog; } + const ExecutionConfig& get_config() const { return myprog.get_config(); } primitive_impl* get_selected_impl() const { return selected_impl.get(); } void set_selected_impl(std::unique_ptr impl); diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 1c61b2c3fba82c..d6642ac27e56c3 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -238,9 +238,8 @@ network::~network() { if (_program != nullptr) _program->cancel_compilation_context(); _memory_pool->clear_pool_for_network(net_id); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { - dump_perf_data_raw(debug_config->dump_profiling_data + "/perf_raw" + std::to_string(net_id) + ".csv", _exec_order); + GPU_DEBUG_IF(!_config.get_dump_profiling_data_path().empty()) { + dump_perf_data_raw(_config.get_dump_profiling_data_path() + "/perf_raw" + std::to_string(net_id) + ".csv", _exec_order); } } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 53932dfd91a466..22f6ef4571b3c7 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -1082,8 +1082,7 @@ void primitive_inst::realloc_if_needed(bool prev_execution_skipped) { } bool primitive_inst::use_async_compilation() { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_async_compilation) { + GPU_DEBUG_IF(get_config().get_disable_async_compilation()) { return false; } @@ -1581,8 +1580,7 @@ void primitive_inst::do_runtime_in_place_concat() { return false; }; OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_concat: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + GPU_DEBUG_IF(get_config().get_disable_runtime_buffer_fusing()) { return; } if (update_shape_done_by_other) { @@ -1691,8 +1689,7 @@ void primitive_inst::do_runtime_skip_scatter_update() { void primitive_inst::do_runtime_in_place_crop() { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_crop: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + GPU_DEBUG_IF(get_config().get_disable_runtime_buffer_fusing()) { return; } @@ -1985,8 +1982,7 @@ void primitive_inst::execute() { set_out_event(_impl->execute(_impl_params->dep_events, *this)); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + GPU_DEBUG_IF(!get_config().get_dump_profiling_data_path().empty()) { auto ev = _impl_params->out_event; get_network().get_stream().wait_for_events({ev}); @@ -2323,8 +2319,7 @@ void primitive_inst::update_weights() { reorder_impl->set_arguments(*reorder_inst, args); add_dep_event(reorder_impl->execute({}, *reorder_inst)); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + GPU_DEBUG_IF(!get_config().get_dump_profiling_data_path().empty()) { stream.wait_for_events(_impl_params->dep_events); } diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 82e6f7b708d8ca..a455c1e07b18f4 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -240,12 +240,7 @@ void program::init_program() { _layout_optimizer = std::make_unique(); - size_t impls_cache_capacity = _impls_cache_capacity; - GPU_DEBUG_IF(debug_config->impls_cache_capacity >= 0) { - impls_cache_capacity = debug_config->impls_cache_capacity; - } - - _impls_cache = std::make_unique(impls_cache_capacity); + _impls_cache = std::make_unique(get_config().get_impls_cache_capacity()); // Remove items of compilation context's internal queue when some impl is popped in kernels_cache // compilation context's queue check duplication of inserted task _impls_cache->set_remove_item_callback([this](ImplementationsCache::ItemType& item) { @@ -548,26 +543,13 @@ void program::pre_optimize_graph(bool is_internal) { reorder_factory rf; if (optimize_data) { - GPU_DEBUG_GET_INSTANCE(debug_config); -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) { -#else - { -#endif - apply_opt_pass(); - } + apply_opt_pass(); apply_opt_pass(); apply_opt_pass(); -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) { -#else - { -#endif - apply_opt_pass(); - } + apply_opt_pass(); apply_opt_pass(); diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index b7767c14f9abff..65df228d6c733f 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -1851,8 +1851,7 @@ void program_node::create_onednn_primitive_attributes( // Trying to combine multiplications and additions which are placed one after another. // We do it in the cycle because some optimization cases can be simplified again from time to time do { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_onednn_opt_post_ops) + GPU_DEBUG_IF(get_config().get_disable_onednn_post_ops_opt()) break; optimized_post_ops = try_optimize_post_ops(fused_ops, optimized_post_ops, attrs, optimization_is_finished); } while (!optimization_is_finished); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 25558be18c481a..a6b798bde97b9e 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -89,26 +89,6 @@ static bool is_per_token_dynamic_quantize(const fully_connected_params& params) static size_t get_dynamic_quantize_group_size(const fully_connected_params& params) { auto dynamic_quantization_group_size = params.dynamic_quantization_group_size; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { - dynamic_quantization_group_size = debug_config->dynamic_quantize_group_size; - - // Specify which Fully-connected layer would be dynamic-quantized - GPU_DEBUG_IF(!debug_config->dynamic_quantize_layers_without_onednn.empty()) { - auto layers = debug_config->dynamic_quantize_layers_without_onednn; - auto iter = std::find_if(layers.begin(), layers.end(), [&](const std::string& pattern){ - return debug_config->is_layer_name_matched(params.layerID, pattern); - }); - - if (iter != layers.end()) { - dynamic_quantization_group_size = debug_config->dynamic_quantize_group_size; - GPU_DEBUG_COUT << "Found specified Fully-connected layer [" << params.layerID << "]. Enable Dynamic-quantize." << std::endl; - } else { - dynamic_quantization_group_size = 0; - } - } - } - size_t scale_group_size = get_scale_group_size(params); size_t zp_group_num = params.decompression_zero_point.Feature().v; size_t zp_group_size = 0; diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index a28d986ce46160..8fca999126fa1e 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -68,7 +68,7 @@ Graph::Graph(std::shared_ptr model, const RemoteContextImpl::Ptr& con : m_context(context) , m_config(config) , m_stream_id(stream_id) { - auto program_builder = std::make_shared(model, get_engine(), config, false); + auto program_builder = std::make_shared(model, get_engine(), config); m_config = program_builder->get_config(); build(program_builder->get_compiled_program()); @@ -212,12 +212,12 @@ void Graph::build(std::shared_ptr program) { exit(0); } - GPU_DEBUG_IF(!debug_config->dump_graphs.empty() && m_stream_id == 0) { + GPU_DEBUG_IF(!m_config.get_dump_graphs_path().empty() && m_stream_id == 0) { static int net_id = 0; auto steps_info = get_network()->get_optimizer_passes_info(); size_t step_idx = 0; for (auto& step : steps_info) { - auto xml_path = debug_config->dump_graphs + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml"; + auto xml_path = m_config.get_dump_graphs_path() + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml"; ov::pass::Serialize(xml_path, "").run_on_model(get_runtime_model(step.second, true)); step_idx++; } diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index 20690957fea5e6..3d287eb46c465e 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -26,7 +26,7 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); config.finalize(p.get_engine()); - ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true); + ProgramBuilder prog(internal_body, p.get_engine(), config, p.get_task_executor(), p.get_compilation_context(), true); branch.inner_program = prog.get_compiled_program(); auto& input_map = branch.input_map; diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index 9665918f88d0be..08b014d72206c6 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -299,11 +299,10 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr& op) { + if (op->is_dynamic()) { + return true; + } + + // HACK: SearchSorted has specific shape requirements. + // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine, + // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid. + if (ov::is_type(op)) + return true; + + if (ov::is_type(op)) + return true; + + if (ov::is_type(op)) { + const auto body_function = std::static_pointer_cast(op)->get_function(); + if (body_function->is_dynamic()) + return true; + } + + if (ov::is_type(op) || ov::is_type(op)) { + return true; + } + // When input node has dynamic shape with 4 dimension, this function return false + // because op.is_dynamic() which only checks input shapes return false. + // So, in the case of input data, we need to check output shape. + for (size_t i = 0; i < op->get_output_size(); i++) { + if (op->get_output_partial_shape(i).is_dynamic()) + return true; + } + + for (size_t i = 0; i < op->get_output_size(); i++) { + if (op->get_output_partial_shape(i).size() > 6) + return true; + } + + for (size_t i = 0; i < op->get_input_size(); i++) { + if (op->get_input_partial_shape(i).size() > 6) + return true; + } + + return false; +} + +void set_model_properties(const ov::Model& model, ExecutionConfig& config) { + const auto& ops = model.get_ordered_ops(); + // In the case of inner program, allow_new_shape_infer flag is setted by outside of program. + // So, do not check allow_new_shape_infer for inner program build + for (const auto& op : ops) { + if (requires_new_shape_infer(op)) { + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + break; + } + } + bool is_dynamic = false; + for (const auto& op : ops) { + if (op->is_dynamic()) { + is_dynamic = true; + break; + } + } + bool has_lstm = false; + for (const auto& op : ops) { + if (ov::is_type(op)) { + has_lstm = true; + break; + } + } + + // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, + // smaller # of kernels are built compared to static models. + // So having smaller batch size is even better for dynamic model as we can do more parallel build. + if (is_dynamic) { + config.set_property(ov::intel_gpu::max_kernels_per_batch(4)); + } else { + config.set_property(ov::intel_gpu::max_kernels_per_batch(8)); + } + + config.set_property(ov::intel_gpu::optimize_data(true)); + + if (has_lstm) + config.set_property(ov::intel_gpu::use_onednn(true)); +} + } // namespace #define FACTORY_DECLARATION(op_version, op_name) \ @@ -145,14 +231,13 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p const ExecutionConfig& config, const std::shared_ptr& context) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::clone_and_transform_model"); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_DEFINE_MEM_LOGGER("Plugin::clone_and_transform_model"); auto cloned_model = model->clone(); OPENVINO_ASSERT(cloned_model != nullptr, "[GPU] Failed to clone model!"); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name(); + GPU_DEBUG_IF(!config.get_dump_graphs_path().empty()) { + auto path_base = config.get_dump_graphs_path() + "/" + cloned_model->get_name(); ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } @@ -171,8 +256,8 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p new_res->set_friendly_name(old_res->get_friendly_name()); } - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name() + "_" + "transformed_func"; + GPU_DEBUG_IF(!config.get_dump_graphs_path().empty()) { + auto path_base = config.get_dump_graphs_path() + "/" + cloned_model->get_name() + "_" + "transformed_func"; ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } return cloned_model; @@ -220,7 +305,8 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_property(orig_config, OptionVisibility::RELEASE); + config.set_user_property(orig_config, OptionVisibility::RELEASE); + set_model_properties(*model, config); config.finalize(context, get_rt_info(*model)); auto transformed_model = clone_and_transform_model(model, config, context); @@ -239,6 +325,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); + set_model_properties(*model, config); config.finalize(context_impl, get_rt_info(*model)); auto transformed_model = clone_and_transform_model(model, config, context_impl); @@ -269,7 +356,7 @@ ov::SoPtr Plugin::get_default_context(const AnyMap& params) void Plugin::set_property(const ov::AnyMap &config) { auto update_config = [](ExecutionConfig& config, const ov::AnyMap& user_config) { - config.set_property(user_config, OptionVisibility::RELEASE); + config.set_user_property(user_config, OptionVisibility::RELEASE); // Check that custom layers config can be loaded if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) { CustomLayerMap custom_layers; @@ -304,7 +391,8 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& auto ctx = get_default_context(device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_property(orig_config, OptionVisibility::RELEASE); + config.set_user_property(orig_config, OptionVisibility::RELEASE); + set_model_properties(*model, config); config.finalize(ctx, get_rt_info(*model)); ProgramBuilder prog(ctx->get_engine(), config); @@ -359,7 +447,7 @@ std::shared_ptr Plugin::import_model(std::istream& model, } ExecutionConfig config = m_configs_map.at(device_id); - config.set_property(_orig_config, OptionVisibility::RELEASE); + config.set_user_property(_orig_config, OptionVisibility::RELEASE); config.finalize(context_impl, {}); ov::CacheMode cache_mode = config.get_cache_mode(); @@ -680,7 +768,9 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { auto device_id = get_property(ov::device::id.name(), options).as(); auto context = get_default_contexts().at(device_id); const auto& device_info = context->get_engine().get_device_info(); - const auto& config = m_configs_map.at(device_id); + auto config = m_configs_map.at(device_id); + config.set_property(ov::intel_gpu::partial_build_program(true)); + config.finalize(context, {}); uint32_t n_streams = static_cast(config.get_num_streams()); uint64_t occupied_device_mem = 0; auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as>(); @@ -798,7 +888,7 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { TransformationsPipeline transformations(config, context); transformations.apply(cloned_model); - program = std::make_shared(cloned_model, engine, config, true); + program = std::make_shared(cloned_model, engine, config); std::pair device_memory_usage = program->get_compiled_program()->get_estimated_device_mem_usage(); if (device_memory_usage.first == static_cast(-1L) && device_memory_usage.second == static_cast(-1L)) { return static_cast(max_batch_size); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index cd69e741e435de..f30bd9464a4238 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -13,7 +13,7 @@ #include "openvino/op/search_sorted.hpp" #include "openvino/op/stft.hpp" #include "openvino/runtime/properties.hpp" -#include "ov_ops/dynamic_quantize.hpp" + #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/program_builder.hpp" @@ -63,7 +63,6 @@ std::string layer_type_name_ID(const std::shared_ptr& op) { } ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, - bool partial_build, std::shared_ptr task_executor, std::shared_ptr compilation_context, bool is_inner_program) @@ -111,7 +110,7 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty()); auto ops = model->get_ordered_ops(); - m_program = build(ops, partial_build, is_inner_program); + m_program = build(ops, is_inner_program); } ProgramBuilder::ProgramBuilder(cldnn::engine& engine, const ExecutionConfig& config) @@ -141,43 +140,8 @@ void ProgramBuilder::cleanup_build() { #endif } -std::shared_ptr ProgramBuilder::build(const std::vector>& ops, bool partial_build, bool is_inner_program) { +std::shared_ptr ProgramBuilder::build(const std::vector>& ops, bool is_inner_program) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "ProgramBuilder::build"); - // In the case of inner program, allow_new_shape_infer flag is setted by outside of program. - // So, do not check allow_new_shape_infer for inner program build - for (const auto& op : ops) { - if (requires_new_shape_infer(op)) { - allow_new_shape_infer = true; - break; - } - } - bool is_dynamic = false; - for (const auto& op : ops) { - if (op->is_dynamic()) { - is_dynamic = true; - break; - } - } - - if (is_inner_program) { - allow_new_shape_infer = (m_config.get_allow_new_shape_infer() || allow_new_shape_infer); - } - - // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, - // smaller # of kernels are built compared to static models. - // So having smaller batch size is even better for dynamic model as we can do more parallel build. - if (is_dynamic) { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(4));; - } else { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(8));; - } - - m_config.set_property(ov::intel_gpu::partial_build_program(partial_build)); - m_config.set_property(ov::intel_gpu::optimize_data(true)); - m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer)); - //if (has_lstm) - m_config.set_property(ov::intel_gpu::use_onednn(true)); - m_config.finalize(m_engine); prepare_build(); { @@ -223,7 +187,6 @@ bool ProgramBuilder::is_op_supported(const std::shared_ptr& op) { if (!data_types_are_supported(op.get())) return false; - allow_new_shape_infer = requires_new_shape_infer(op); CreateSingleLayerPrimitive(op); cleanup_build(); DisableQueryMode(); @@ -280,7 +243,7 @@ std::vector ProgramBuilder::GetInputInfo(const std::shared_pt // Note: Currently Split/Variadic Split are divided to multiple crops // LSTMCell contains its own body network, and each output has a unique pid // But there is no need to maintain output port index for the next node e.g. Result - bool is_legacy_multiple_outputs = !allow_new_shape_infer + bool is_legacy_multiple_outputs = !use_new_shape_infer() || ov::is_type(prevOp) || ov::is_type(prevOp) || ov::is_type(prevOp); diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 7c3a1b0e452fa8..22b0262c62b2d3 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -317,15 +317,16 @@ void SyncInferRequest::enqueue() { m_internal_outputs = network->execute(dependencies); auto network_enqueue_end = std::chrono::high_resolution_clock::now(); + const auto& config = network->get_config(); + // If dump layers path is set, only runs first inference. - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0 && debug_config->dump_iteration.empty()) { + GPU_DEBUG_IF(!config.get_dump_tensors_path().empty() && config.get_dump_iterations().empty()) { GPU_DEBUG_INFO << "Only run first inference to dump layers." << std::endl; exit(0); } auto enqueue_end = std::chrono::high_resolution_clock::now(); - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) { + GPU_DEBUG_IF(config.get_host_time_profiling()) { network_enqueue_time = std::chrono::duration_cast(network_enqueue_end - network_enqueue_start).count(); const uint64_t total_time = std::chrono::duration_cast(enqueue_end - enqueue_start).count(); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp index 6c0d50be96e7ae..b269fbc2c9eb4d 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp @@ -16,7 +16,7 @@ namespace ov::intel_gpu { -DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size) +DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric) : ov::pass::MatcherPass() { GPU_DEBUG_GET_INSTANCE(debug_config); using namespace ov::pass::pattern; @@ -55,9 +55,7 @@ DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size config.scale_dt = element::f16; config.group_sizes = shape_group_size; - // AZP does not support grouped size dyn-quan - // XXX: This is currently wrapped as GPU_DEBUG_IF as dynamic_quantize_asym is not exposed through public API. - GPU_DEBUG_IF(debug_config->dynamic_quantize_asym && group_size == UINT64_MAX) { + if (asymmetric && group_size == UINT64_MAX) { config.quantization_type = QuantizationType::Asymmetric; config.quantization_dt = element::u8; config.zp_dt = element::u8; // it supports u8 only now diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp index 85d32fbfdcea84..f8b13685389f1d 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp @@ -11,7 +11,7 @@ namespace ov::intel_gpu { class DynamicQuantizeFullyConnected: public ov::pass::MatcherPass { public: OPENVINO_MATCHER_PASS_RTTI("DynamicQuantizeFullyConnected"); - DynamicQuantizeFullyConnected(uint64_t group_size); + DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric = false); }; } // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 339ee80ba7a3eb..6a1b09d1b4f329 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -988,13 +988,9 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - bool disable_horizontal_fc_fusion = false; - bool disable_fc_swiglu_fusion = false; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_horizontal_fc_fusion == 1) - disable_horizontal_fc_fusion = true; - GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1) - disable_fc_swiglu_fusion = true; + bool disable_horizontal_fc_fusion = config.get_disable_horizontal_fc_fusion(); + bool disable_fc_swiglu_fusion = config.get_disable_fc_swiglu_fusion(); + // mlp fusion is only supported for cldnn on high performant GPUis bool fuse_mlp_swiglu = !device_info.supports_immad && device_info.execution_units_count >= 128 && @@ -1052,6 +1048,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); if (device_info.supports_immad) { + bool asymmetric_dyn_quant = config.get_asym_dynamic_quantization(); auto dynamic_quantization_group_size = config.get_dynamic_quantization_group_size(); pass_config->set_callback([=](const_node_ptr& root) -> bool { for (size_t i = 0 ; i < root->get_input_node_shared_ptr(0)->get_output_size(); ++i) { @@ -1070,7 +1067,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // AZP does not support 8bit weight // XXX: This is currently wrapped as GPU_DEBUG_IF as dynamic_quantize_asym is not exposed through public API. - GPU_DEBUG_IF(debug_config->dynamic_quantize_asym + GPU_DEBUG_IF(asymmetric_dyn_quant && (root->get_input_element_type(1) == ov::element::i8 || root->get_input_element_type(1) == ov::element::u8)) { GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: asym quantization does not support 8bit weight" << std::endl; return true; @@ -1094,7 +1091,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { return false; }); - manager.register_pass(dynamic_quantization_group_size); + manager.register_pass(dynamic_quantization_group_size, asymmetric_dyn_quant); } // Remove Pad in front of MaxPool if both the pads_begin and pads_end are zero. diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index f185a2f321489d..19d841036cd6de 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -68,7 +68,7 @@ void ExecutionConfig::finalize_impl(std::shared_ptr context) { if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { m_enable_lp_transformations = info.supports_imad || info.supports_immad; } - if (info.supports_immad) { + if (!is_set_by_user(ov::intel_gpu::use_onednn) && info.supports_immad) { m_use_onednn = true; } if (get_use_onednn()) { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp index 441494b41c4f8f..520cb9bd22e073 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp @@ -295,9 +295,6 @@ device_info init_device_info(const cl::Device& device, const cl::Context& contex GPU_DEBUG_INFO << "GPU version: " << static_cast(info.gfx_ver.major) << "." << static_cast(info.gfx_ver.minor) << "." << static_cast(info.gfx_ver.revision) << (info.has_separate_cache ? " with separate cache" : "") << std::endl; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_onednn) - info.supports_immad = false; } else if (nv_device_attr_supported) { info.gfx_ver = {static_cast(device.getInfo()), static_cast(device.getInfo()), From 730f70a21f5d2fe7bf8a3d69ecf365cefe237bac Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 24 Dec 2024 15:13:09 +0400 Subject: [PATCH 16/44] extended bool any parsing options Signed-off-by: Vladimir Paramuzov --- src/core/src/any.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp index 54c6b4f9d868f5..8de26ad7c4d173 100644 --- a/src/core/src/any.cpp +++ b/src/core/src/any.cpp @@ -6,6 +6,7 @@ #include #include +#include "openvino/util/common_util.hpp" namespace { template bool contains_type_index(Container&& types, const std::type_info& user_type) { @@ -202,9 +203,14 @@ namespace util { void Read::operator()(std::istream& is, bool& value) const { std::string str; is >> str; - if (str == "YES") { + + std::set off = {"0", "false", "off", "no"}; + std::set on = {"1", "true", "on", "yes"}; + str = util::to_lower(str); + + if (on.count(str)) { value = true; - } else if (str == "NO") { + } else if (off.count(str)) { value = false; } else { OPENVINO_THROW("Could not convert to bool from string " + str); From 797f894dc021c56a79044ee7148d9d6e10887f5a Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 14 Jan 2025 10:13:37 +0400 Subject: [PATCH 17/44] debug properties wip Signed-off-by: Vladimir Paramuzov --- .../include/intel_gpu/runtime/internal_properties.hpp | 1 + src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl | 1 + src/plugins/intel_gpu/src/graph/layout_optimizer.cpp | 1 + src/plugins/intel_gpu/src/graph/program.cpp | 5 ++--- src/plugins/intel_gpu/src/plugin/graph.cpp | 5 ++--- src/plugins/intel_gpu/src/plugin/plugin.cpp | 5 ----- 6 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index 5924457193765d..23b48ac4d9a920 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -96,6 +96,7 @@ static constexpr Property dump_graphs_path{ static constexpr Property dump_profiling_data_path{"GPU_DUMP_PROFILING_DATA_PATH"}; static constexpr Property dump_sources_path{"GPU_DUMP_SOURCES_PATH"}; static constexpr Property dump_tensors_path{"GPU_DUMP_TENSORS_PATH"}; +static constexpr Property dry_run_path{"GPU_DRY_RUN_PATH"}; static constexpr Property dump_tensors{"DUMP_TENSORS"}; static constexpr Property dump_layers{"GPU_DUMP_LAYERS"}; static constexpr Property dump_tensors_format{"DUMP_TENSORS_FORMAT"}; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index 9d63b6b15e3368..126a01a75acee5 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -74,3 +74,4 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usa OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "Preallocation setting") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, std::vector{}, "List of layers to load raw binary") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dry_run_path, "", "Enables mode which partially compiles a model and stores runtime model into specified directory") diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 067ca8c8f5fb0d..78e669f0e1152f 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1131,6 +1131,7 @@ bool layout_optimizer::is_primitive_implemented_for_onednn(program_node& node) { impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node) { #ifdef GPU_DEBUG_CONFIG + GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(!debug_config->forced_impl_types.empty()) { // Forcing impl type of one primitive diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index a455c1e07b18f4..dbadc079c0da82 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -491,9 +491,8 @@ void program::build_program(bool is_internal) { run_graph_compilation(); { post_optimize_graph(is_internal); } - GPU_DEBUG_GET_INSTANCE(debug_config); #ifdef GPU_DEBUG_CONFIG - if (debug_config->dry_run_path.empty() || is_internal) { + if (get_config().get_dry_run_path().empty() || is_internal) { #else { #endif @@ -608,7 +607,7 @@ void program::post_optimize_graph(bool is_internal) { auto partial_build = _config.get_partial_build_program(); #ifdef GPU_DEBUG_CONFIG GPU_DEBUG_GET_INSTANCE(debug_config); - if (!is_internal && (!partial_build || !debug_config->dry_run_path.empty())) { + if (!is_internal && (!partial_build || !_config.get_dry_run_path().empty())) { #else if (!is_internal && !partial_build) { #endif diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 8fca999126fa1e..d04adc77ee67af 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -206,9 +206,8 @@ void Graph::build(std::shared_ptr program) { m_network = std::make_shared(program, m_stream_id); } - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dry_run_path.empty()) { - ov::pass::Serialize(debug_config->dry_run_path, "").run_on_model(get_runtime_model()); + GPU_DEBUG_IF(!m_config.get_dry_run_path().empty()) { + ov::pass::Serialize(m_config.get_dry_run_path(), "").run_on_model(get_runtime_model()); exit(0); } diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index bb5ff6d37542ee..fd0f2482bc4712 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -829,11 +829,6 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { std::shared_ptr program; - GPU_DEBUG_IF(debug_config->base_batch_for_memory_estimation > 0) { - size_t user_specified_base_batch_size = debug_config->base_batch_for_memory_estimation; - base_batch_size = (user_specified_base_batch_size != base_batch_size) ? user_specified_base_batch_size : base_batch_size; - } - auto cloned_model = model->clone(); try { From 7d05f99ca6588dde4fb374509f3c9e6a7a22580f Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 14 Jan 2025 10:14:09 +0400 Subject: [PATCH 18/44] fix apply rt info Signed-off-by: Vladimir Paramuzov --- src/inference/dev_api/openvino/runtime/plugin_config.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 08dd148b6d1c36..0101733b6517d5 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -243,7 +243,7 @@ class OPENVINO_RUNTIME_API PluginConfig { if (!is_set_by_user(property)) { auto rt_info_val = rt_info.find(property.name()); if (rt_info_val != rt_info.end()) { - set_property(property(rt_info_val->second.template as())); + set_user_property(property(rt_info_val->second.template as())); } } } From c652884c4859d396f9599c5b816b1be0aa2d452e Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 15 Jan 2025 16:11:20 +0400 Subject: [PATCH 19/44] wip Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 51 ++++++++++++++----- src/inference/src/dev/plugin_config.cpp | 40 +++++++-------- src/inference/tests/unit/config_test.cpp | 43 ++++++++++++---- .../intel_gpu/runtime/execution_config.hpp | 13 +++-- .../include/intel_gpu/runtime/options.inl | 2 +- .../src/runtime/execution_config.cpp | 21 ++++++-- .../intel_gpu/src/runtime/ocl/ocl_event.cpp | 10 ---- 7 files changed, 117 insertions(+), 63 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 0101733b6517d5..a598d81165eea6 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -11,6 +11,10 @@ #include "openvino/runtime/properties.hpp" #include "openvino/core/except.hpp" +#ifndef EXPAND + #define EXPAND(N) N +#endif + #ifndef COUNT_N #define COUNT_N(_1, _2, _3, _4, _5, N, ...) N #endif @@ -23,14 +27,11 @@ #define CAT(a, b) a ## b #endif -#ifndef EXPAND - #define EXPAND(N) N -#endif - #define GET_EXCEPT_LAST_IMPL(N, ...) CAT(GET_EXCEPT_LAST_IMPL_, N)(__VA_ARGS__) #define GET_EXCEPT_LAST_IMPL_2(_0, _1) _0 #define GET_EXCEPT_LAST_IMPL_3(_0, _1, _2) _0, _1 #define GET_EXCEPT_LAST_IMPL_4(_0, _1, _2, _3) _0, _1, _2 +#define GET_EXCEPT_LAST_IMPL_5(_0, _1, _2, _3, _4) _0, _1, _2, _3 #define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__)) @@ -45,10 +46,25 @@ #define GET_LAST(...) GET_LAST_IMPL(COUNT(__VA_ARGS__), _, __VA_ARGS__ ,,,,,,,,,,,) -#define OV_CONFIG_DECLARE_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ +#define OV_CONFIG_DECLARE_LOCAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ ConfigOption m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; +#define OV_CONFIG_DECLARE_GLOBAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ + static ConfigOption m_ ## PropertyVar; + +#define OV_CONFIG_DECLARE_LOCAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \ + const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() const { \ + if (m_is_finalized) { \ + return m_ ## PropertyVar.value; \ + } else { \ + if (m_user_properties.find(PropertyNamespace::PropertyVar.name()) != m_user_properties.end()) { \ + return m_user_properties.at(PropertyNamespace::PropertyVar.name()).as(); \ + } else { \ + return m_ ## PropertyVar.value; \ + } \ + } \ + } -#define OV_CONFIG_DECLARE_GETTERS(PropertyNamespace, PropertyVar, Visibility, ...) \ +#define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \ const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() const { \ if (m_is_finalized) { \ return m_ ## PropertyVar.value; \ @@ -68,13 +84,16 @@ { #PropertyNamespace "::" #PropertyVar, PropertyNamespace::PropertyVar.name(), GET_LAST(__VA_ARGS__)}, #define OV_CONFIG_RELEASE_OPTION(PropertyNamespace, PropertyVar, ...) \ - OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE, __VA_ARGS__) + OV_CONFIG_LOCAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE, __VA_ARGS__) #define OV_CONFIG_RELEASE_INTERNAL_OPTION(PropertyNamespace, PropertyVar, ...) \ - OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE_INTERNAL, __VA_ARGS__) + OV_CONFIG_LOCAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE_INTERNAL, __VA_ARGS__) #define OV_CONFIG_DEBUG_OPTION(PropertyNamespace, PropertyVar, ...) \ - OV_CONFIG_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__) + OV_CONFIG_LOCAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__) + +#define OV_CONFIG_DEBUG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG_GLOBAL, __VA_ARGS__) namespace ov { #define ENABLE_DEBUG_CAPS @@ -82,8 +101,9 @@ enum class OptionVisibility : uint8_t { RELEASE = 1 << 0, // Option can be set for any build type via public interface, environment and config file RELEASE_INTERNAL = 1 << 1, // Option can be set for any build type via environment and config file only DEBUG = 1 << 2, // Option can be set for debug builds only via environment and config file + DEBUG_GLOBAL = 1 << 3, // Global option can be set for debug builds only via environment and config file #ifdef ENABLE_DEBUG_CAPS - ANY = 0x07, // Any visibility is valid including DEBUG + ANY = 0x0F, // Any visibility is valid including DEBUG #else ANY = 0x03, // Any visibility is valid excluding DEBUG #endif @@ -213,6 +233,10 @@ class OPENVINO_RUNTIME_API PluginConfig { util::EnableIfAllStringAny set_property(Properties&&... properties) { set_property(ov::AnyMap{std::forward(properties)...}); } + template + util::EnableIfAllStringAny set_user_property(Properties&&... properties) { + set_user_property(ov::AnyMap{std::forward(properties)...}); + } std::string to_string() const; @@ -243,13 +267,14 @@ class OPENVINO_RUNTIME_API PluginConfig { if (!is_set_by_user(property)) { auto rt_info_val = rt_info.find(property.name()); if (rt_info_val != rt_info.end()) { - set_user_property(property(rt_info_val->second.template as())); + set_user_property({property(rt_info_val->second.template as())}, OptionVisibility::RELEASE | OptionVisibility::RELEASE_INTERNAL); } } } ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; - ov::AnyMap read_env(const std::vector& prefixes) const; + ov::AnyMap read_env() const; + ov::Any read_env(const std::string& s) const; void cleanup_unsupported(ov::AnyMap& config) const; std::map m_options_map; @@ -265,6 +290,8 @@ class OPENVINO_RUNTIME_API PluginConfig { void print_help() const; bool m_is_finalized = false; + + const char* m_allowed_env_prefix = "OV_"; }; } // namespace ov diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index b756894d5e414f..01a6f6811c9829 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -150,7 +150,7 @@ void PluginConfig::apply_debug_options(std::shared_ptr context) set_user_property(config_properties, OptionVisibility::ANY, throw_on_error); } - ov::AnyMap env_properties = read_env({"OV_"}); + ov::AnyMap env_properties = read_env(); cleanup_unsupported(env_properties); #ifdef ENABLE_DEBUG_CAPS for (auto& prop : env_properties) { @@ -190,32 +190,30 @@ ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std return config; } -ov::AnyMap PluginConfig::read_env(const std::vector& prefixes) const { +ov::AnyMap PluginConfig::read_env() const { ov::AnyMap config; for (auto& kv : m_options_map) { - for (auto& prefix : prefixes) { - auto var_name = prefix + kv.first; - const auto& val = ov::util::getenv_string(var_name.c_str()); - - if (!val.empty()) { - if (dynamic_cast*>(kv.second) != nullptr) { - const std::set off = {"0", "false", "off", "no"}; - const std::set on = {"1", "true", "on", "yes"}; - - const auto& val_lower = ov::util::to_lower(val); - if (off.count(val_lower)) { - config[kv.first] = false; - } else if (on.count(val_lower)) { - config[kv.first] = true; - } else { - OPENVINO_THROW("Unexpected value for boolean property: ", val); - } + auto var_name = m_allowed_env_prefix + kv.first; + const auto& val = ov::util::getenv_string(var_name.c_str()); + + if (!val.empty()) { + if (dynamic_cast*>(kv.second) != nullptr) { + const std::set off = {"0", "false", "off", "no"}; + const std::set on = {"1", "true", "on", "yes"}; + + const auto& val_lower = ov::util::to_lower(val); + if (off.count(val_lower)) { + config[kv.first] = false; + } else if (on.count(val_lower)) { + config[kv.first] = true; } else { - config[kv.first] = val; + OPENVINO_THROW("Unexpected value for boolean property: ", val); } - break; + } else { + config[kv.first] = val; } + break; } } diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index d2c99585ab015b..6f385473b1c627 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -18,6 +18,7 @@ static constexpr Property high_level_proper static constexpr Property low_level_property{"LOW_LEVEL_PROPERTY"}; static constexpr Property release_internal_property{"RELEASE_INTERNAL_PROPERTY"}; static constexpr Property debug_property{"DEBUG_PROPERTY"}; +static constexpr Property debug_global_property{"DEBUG_GLOBAL_PROPERTY"}; struct EmptyTestConfig : public ov::PluginConfig { @@ -32,14 +33,17 @@ struct EmptyTestConfig : public ov::PluginConfig { struct NotEmptyTestConfig : public ov::PluginConfig { NotEmptyTestConfig() { - #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "") OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "") OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "") - #undef OV_CONFIG_OPTION + OV_CONFIG_DEBUG_GLOBAL_OPTION(, debug_global_property, 4, "") + #undef OV_CONFIG_LOCAL_OPTION + #undef OV_CONFIG_GLOBAL_OPTION } NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() { @@ -49,14 +53,17 @@ struct NotEmptyTestConfig : public ov::PluginConfig { } } - #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__) + #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__) OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__) + #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__) OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__) OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "") OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "") OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "") - #undef OV_CONFIG_OPTION + OV_CONFIG_DEBUG_GLOBAL_OPTION(, debug_global_property, 4, "") + #undef OV_CONFIG_LOCAL_OPTION + #undef OV_CONFIG_GLOBAL_OPTION std::vector get_supported_properties() const { std::vector supported_properties; @@ -80,6 +87,15 @@ struct NotEmptyTestConfig : public ov::PluginConfig { using ov::PluginConfig::is_set_by_user; }; +#define OV_CONFIG_LOCAL_OPTION(...) +#define OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ + ConfigOption NotEmptyTestConfig::m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; + + OV_CONFIG_DEBUG_GLOBAL_OPTION(, debug_global_property, 4, "") + +#undef OV_CONFIG_LOCAL_OPTION +#undef OV_CONFIG_GLOBAL_OPTION + TEST(plugin_config, can_create_empty_config) { ASSERT_NO_THROW( EmptyTestConfig cfg; @@ -90,7 +106,7 @@ TEST(plugin_config, can_create_empty_config) { TEST(plugin_config, can_create_not_empty_config) { ASSERT_NO_THROW( NotEmptyTestConfig cfg; - ASSERT_EQ(cfg.get_supported_properties().size(), 6); + ASSERT_EQ(cfg.get_supported_properties().size(), 7); ); } @@ -111,7 +127,7 @@ TEST(plugin_config, throw_for_unsupported_property) { TEST(plugin_config, can_direct_access_to_properties) { NotEmptyTestConfig cfg; ASSERT_EQ(cfg.m_int_property.value, cfg.get_int_property()); - ASSERT_NO_THROW(cfg.set_property(int_property(1))); + ASSERT_NO_THROW(cfg.set_user_property(int_property(1))); ASSERT_EQ(cfg.m_int_property.value, -1); // user property doesn't impact member value until finalize() is called cfg.m_int_property.value = 2; @@ -120,7 +136,7 @@ TEST(plugin_config, can_direct_access_to_properties) { TEST(plugin_config, finalization_updates_member) { NotEmptyTestConfig cfg; - ASSERT_NO_THROW(cfg.set_property(bool_property(false))); + ASSERT_NO_THROW(cfg.set_user_property(bool_property(false))); ASSERT_EQ(cfg.m_bool_property.value, true); // user property doesn't impact member value until finalize() is called cfg.finalize(nullptr, {}); @@ -137,7 +153,7 @@ TEST(plugin_config, get_property_before_finalization_returns_user_property_if_se cfg.m_bool_property.value = false; // update member directly ASSERT_EQ(cfg.get_bool_property(), false); // OK, return the class member value as no user property was set - ASSERT_NO_THROW(cfg.set_property(bool_property(true))); + ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); ASSERT_TRUE(cfg.is_set_by_user(bool_property)); ASSERT_EQ(cfg.get_bool_property(), true); // now user property value is returned ASSERT_EQ(cfg.m_bool_property.value, false); // but class member is not updated @@ -150,7 +166,7 @@ TEST(plugin_config, get_property_before_finalization_returns_user_property_if_se TEST(plugin_config, finalization_updates_dependant_properties) { NotEmptyTestConfig cfg; - cfg.set_property(high_level_property("value1")); + cfg.set_user_property(high_level_property("value1")); ASSERT_TRUE(cfg.is_set_by_user(high_level_property)); ASSERT_FALSE(cfg.is_set_by_user(low_level_property)); @@ -204,8 +220,8 @@ TEST(plugin_config, can_copy_config) { TEST(plugin_config, set_property_throw_for_non_release_options) { NotEmptyTestConfig cfg; - ASSERT_ANY_THROW(cfg.set_property(release_internal_property(10))); - ASSERT_ANY_THROW(cfg.set_property(debug_property(10))); + ASSERT_ANY_THROW(cfg.set_user_property({release_internal_property(10)}, OptionVisibility::RELEASE)); + ASSERT_ANY_THROW(cfg.set_user_property({debug_property(10)}, OptionVisibility::RELEASE)); } TEST(plugin_config, visibility_is_correct) { @@ -214,3 +230,8 @@ TEST(plugin_config, visibility_is_correct) { ASSERT_EQ(cfg.get_option_ptr(debug_property.name())->get_visibility(), OptionVisibility::DEBUG); ASSERT_EQ(cfg.get_option_ptr(int_property.name())->get_visibility(), OptionVisibility::RELEASE); } + +TEST(plugin_config, can_get_global_property) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_debug_global_property(), 4); +} diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index f8f639d8f67a25..be8a60b65d023f 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -24,9 +24,12 @@ struct ExecutionConfig : public ov::PluginConfig { void finalize(cldnn::engine& engine); using ov::PluginConfig::finalize; - #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_GETTERS(__VA_ARGS__) + #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__) + #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__) #include "intel_gpu/runtime/options.inl" - #undef OV_CONFIG_OPTION + + #undef OV_CONFIG_LOCAL_OPTION + #undef OV_CONFIG_GLOBAL_OPTION protected: void finalize_impl(std::shared_ptr context) override; @@ -39,9 +42,11 @@ struct ExecutionConfig : public ov::PluginConfig { void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); - #define OV_CONFIG_OPTION(...) OV_CONFIG_DECLARE_OPTION(__VA_ARGS__) + #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__) + #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__) #include "intel_gpu/runtime/options.inl" - #undef OV_CONFIG_OPTION + #undef OV_CONFIG_LOCAL_OPTION + #undef OV_CONFIG_GLOBAL_OPTION }; } // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index 126a01a75acee5..46bfc496ea8411 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -48,7 +48,7 @@ OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disa OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, help, false, "Print help message for all config options") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem") diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 19d841036cd6de..7e2c82f84b7d7a 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -13,10 +13,21 @@ namespace ov::intel_gpu { +#define OV_CONFIG_LOCAL_OPTION(...) +#define OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ + ConfigOption ExecutionConfig::m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; + +#include "intel_gpu/runtime/options.inl" + +#undef OV_CONFIG_LOCAL_OPTION +#undef OV_CONFIG_GLOBAL_OPTION + ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { - #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) #include "intel_gpu/runtime/options.inl" - #undef OV_CONFIG_OPTION + #undef OV_CONFIG_LOCAL_OPTION + #undef OV_CONFIG_GLOBAL_OPTION } ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { @@ -152,9 +163,11 @@ void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { const ov::PluginConfig::OptionsDesc& ExecutionConfig::get_options_desc() const { static ov::PluginConfig::OptionsDesc help_map { - #define OV_CONFIG_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__) + #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__) + #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__) #include "intel_gpu/runtime/options.inl" - #undef OV_CONFIG_OPTION + #undef OV_CONFIG_LOCAL_OPTION + #undef OV_CONFIG_GLOBAL_OPTION }; return help_map; } diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp index dbf2a01aa4eadf..d722b5f3bc1bfd 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp @@ -219,16 +219,6 @@ bool ocl_events::get_profiling_info_impl(std::listprint_multi_kernel_perf) { - if (period.stage == instrumentation::profiling_stage::executing) { - GPU_DEBUG_TRACE << "Multi-kernel time: "; - for (auto& duration : all_durations[period.stage]) - GPU_DEBUG_TRACE << " " << (duration.second - duration.first) / 1000; - GPU_DEBUG_TRACE << " Total " << sum / 1000 << std::endl; - } - } - info.push_back(get_profiling_interval(period.stage, 0, sum)); } From c53bb9b281345e41de294ffe620efaf636092882 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 20 Jan 2025 11:58:36 +0400 Subject: [PATCH 20/44] [GPU] Global static vars. Removed old debug config Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 17 +- src/inference/src/dev/plugin_config.cpp | 46 +- .../intel_gpu/plugin/program_builder.hpp | 3 +- .../intel_gpu/runtime/debug_configuration.hpp | 115 +--- .../intel_gpu/runtime/internal_properties.hpp | 14 +- .../include/intel_gpu/runtime/memory_pool.hpp | 6 +- .../include/intel_gpu/runtime/options.inl | 26 +- .../include/intel_gpu/runtime/profiling.hpp | 3 +- .../intel_gpu/runtime/shape_predictor.hpp | 35 +- .../intel_gpu/src/graph/debug_helper.cpp | 239 ++++--- .../intel_gpu/src/graph/debug_helper.hpp | 4 - .../graph_optimizer/post_optimize_weights.cpp | 4 - .../graph/graph_optimizer/reorder_inputs.cpp | 2 - .../graph/impls/onednn/convolution_onednn.cpp | 2 - .../intel_gpu/src/graph/layout_optimizer.cpp | 62 -- src/plugins/intel_gpu/src/graph/network.cpp | 33 +- .../intel_gpu/src/graph/primitive_inst.cpp | 60 +- src/plugins/intel_gpu/src/graph/program.cpp | 4 +- .../src/graph/program_dump_graph.cpp | 1 - .../dynamic_quantize_kernel_ref.cpp | 1 - src/plugins/intel_gpu/src/plugin/graph.cpp | 4 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 6 +- .../intel_gpu/src/plugin/program_builder.cpp | 45 -- .../src/plugin/sync_infer_request.cpp | 20 +- .../dynamic_quantize_fully_connected.cpp | 1 - .../transformations/fc_horizontal_fusion.cpp | 1 - .../src/plugin/transformations_pipeline.cpp | 8 +- .../src/runtime/debug_configuration.cpp | 601 +----------------- src/plugins/intel_gpu/src/runtime/device.cpp | 3 +- src/plugins/intel_gpu/src/runtime/engine.cpp | 3 +- .../intel_gpu/src/runtime/memory_pool.cpp | 36 +- .../intel_gpu/src/runtime/shape_predictor.cpp | 10 +- .../module_tests/shape_predictor_test.cpp | 8 +- .../unit/test_cases/debug_config_gpu_test.cpp | 2 +- 34 files changed, 333 insertions(+), 1092 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index a598d81165eea6..8c9252eb78e48c 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -65,16 +65,11 @@ } #define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \ - const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() const { \ - if (m_is_finalized) { \ + static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \ + auto v = read_env(PropertyNamespace::PropertyVar.name(), m_allowed_env_prefix, &m_ ## PropertyVar); \ + if (v.empty()) \ return m_ ## PropertyVar.value; \ - } else { \ - if (m_user_properties.find(PropertyNamespace::PropertyVar.name()) != m_user_properties.end()) { \ - return m_user_properties.at(PropertyNamespace::PropertyVar.name()).as(); \ - } else { \ - return m_ ## PropertyVar.value; \ - } \ - } \ + return v.as(); \ } #define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \ @@ -274,7 +269,7 @@ class OPENVINO_RUNTIME_API PluginConfig { ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; ov::AnyMap read_env() const; - ov::Any read_env(const std::string& s) const; + static ov::Any read_env(const std::string& option_name, const std::string& prefix, const ConfigOptionBase* option); void cleanup_unsupported(ov::AnyMap& config) const; std::map m_options_map; @@ -291,7 +286,7 @@ class OPENVINO_RUNTIME_API PluginConfig { bool m_is_finalized = false; - const char* m_allowed_env_prefix = "OV_"; + inline static const std::string m_allowed_env_prefix = "OV_"; }; } // namespace ov diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index 01a6f6811c9829..cd4d13968e67fd 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -190,30 +190,38 @@ ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std return config; } +ov::Any PluginConfig::read_env(const std::string& option_name, const std::string& prefix, const ConfigOptionBase* option) { + auto var_name = prefix + option_name; + const auto& val = ov::util::getenv_string(var_name.c_str()); + + if (!val.empty()) { + if (dynamic_cast*>(option) != nullptr) { + const std::set off = {"0", "false", "off", "no"}; + const std::set on = {"1", "true", "on", "yes"}; + + const auto& val_lower = ov::util::to_lower(val); + if (off.count(val_lower)) { + return false; + } else if (on.count(val_lower)) { + return true; + } else { + OPENVINO_THROW("Unexpected value for boolean property: ", val); + } + } else { + return val; + } + } else { + return ov::Any(); + } +} + ov::AnyMap PluginConfig::read_env() const { ov::AnyMap config; for (auto& kv : m_options_map) { - auto var_name = m_allowed_env_prefix + kv.first; - const auto& val = ov::util::getenv_string(var_name.c_str()); - + auto val = read_env(kv.first, m_allowed_env_prefix, kv.second); if (!val.empty()) { - if (dynamic_cast*>(kv.second) != nullptr) { - const std::set off = {"0", "false", "off", "no"}; - const std::set on = {"1", "true", "on", "yes"}; - - const auto& val_lower = ov::util::to_lower(val); - if (off.count(val_lower)) { - config[kv.first] = false; - } else if (on.count(val_lower)) { - config[kv.first] = true; - } else { - OPENVINO_THROW("Unexpected value for boolean property: ", val); - } - } else { - config[kv.first] = val; - } - break; + config[kv.first] = val; } } diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp index 6b28c02930d530..99220ec3087221 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp @@ -138,7 +138,6 @@ class ProgramBuilder final { void add_primitive(const ov::Node& op, std::shared_ptr prim, std::vector aliases = {}); bool use_new_shape_infer() const { return m_config.get_allow_new_shape_infer(); } - bool is_inner_program() const { return m_is_inner_program; } bool is_query_mode() { return queryMode; } @@ -156,6 +155,8 @@ class ProgramBuilder final { std::shared_ptr m_topology; CustomLayerMap m_custom_layers; + bool allow_new_shape_infer = false; + bool queryMode; std::shared_ptr m_task_executor; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index 2ce1397c44bb68..489399c1255ce0 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -10,6 +10,7 @@ #include #include +#include "intel_gpu/runtime/execution_config.hpp" namespace ov::intel_gpu { // Verbose log levels: @@ -39,6 +40,7 @@ enum class LogLevel : int8_t { TRACE_DETAIL = 4 }; +std::ostream& get_verbose_stream(); } // namespace ov::intel_gpu #ifdef GPU_DEBUG_CONFIG @@ -47,23 +49,26 @@ enum class LogLevel : int8_t { #else #define SEPARATE '/' #endif + +#define GPU_PREFIX "GPU_Debug: " + #define GPU_FILENAME (strrchr(__FILE__, SEPARATE) ? strrchr(__FILE__, SEPARATE) + 1 : __FILE__) #define GPU_DEBUG_IF(cond) if (cond) #define GPU_DEBUG_CODE(...) __VA_ARGS__ #define GPU_DEBUG_DEFINE_MEM_LOGGER(stage) \ - cldnn::instrumentation::mem_usage_logger mem_logger{stage, cldnn::debug_configuration::get_instance()->verbose >= 2}; + cldnn::instrumentation::mem_usage_logger mem_logger{stage, ov::intel_gpu::ExecutionConfig::get_verbose() >= 2}; #define GPU_DEBUG_PROFILED_STAGE(stage) \ auto stage_prof = cldnn::instrumentation::profiled_stage(\ - !cldnn::debug_configuration::get_instance()->dump_profiling_data.empty(), *this, stage) + !get_config().get_dump_profiling_data_path().empty(), *this, stage) #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val) stage_prof.set_cache_hit(val) #define GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO(info) stage_prof.add_memalloc_info(info) -#define GPU_DEBUG_LOG_RAW_INT(min_verbose_level) if (cldnn::debug_configuration::get_instance()->verbose >= min_verbose_level) \ - ((cldnn::debug_configuration::get_instance()->verbose_color == 0) ? GPU_DEBUG_LOG_PREFIX : GPU_DEBUG_LOG_COLOR_PREFIX) +#define GPU_DEBUG_LOG_RAW_INT(min_verbose_level) if (ov::intel_gpu::ExecutionConfig::get_verbose() >= min_verbose_level) \ + (ov::intel_gpu::ExecutionConfig::get_verbose_color() ? GPU_DEBUG_LOG_PREFIX : GPU_DEBUG_LOG_COLOR_PREFIX) #define GPU_DEBUG_LOG_RAW(min_verbose_level) GPU_DEBUG_LOG_RAW_INT(static_cast::type>(min_verbose_level)) #define GPU_DEBUG_LOG_PREFIX \ - *cldnn::debug_configuration::verbose_stream << cldnn::debug_configuration::prefix << GPU_FILENAME << ":" <<__LINE__ << ":" << __func__ << ": " -#define GPU_DEBUG_LOG_COLOR_PREFIX *cldnn::debug_configuration::verbose_stream << DARK_GRAY << cldnn::debug_configuration::prefix << \ + ov::intel_gpu::get_verbose_stream() << GPU_PREFIX << GPU_FILENAME << ":" << __LINE__ << ":" << __func__ << ": " +#define GPU_DEBUG_LOG_COLOR_PREFIX ov::intel_gpu::get_verbose_stream() << DARK_GRAY << GPU_PREFIX << \ BLUE << GPU_FILENAME << ":" << PURPLE << __LINE__ << ":" << CYAN << __func__ << ": " << RESET #define DARK_GRAY "\033[1;30m" #define BLUE "\033[1;34m" @@ -77,107 +82,11 @@ enum class LogLevel : int8_t { #define GPU_DEBUG_PROFILED_STAGE(stage) #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val) #define GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO(info) -#define GPU_DEBUG_LOG_RAW(min_verbose_level) if (0) *cldnn::debug_configuration::verbose_stream << cldnn::debug_configuration::prefix +#define GPU_DEBUG_LOG_RAW(min_verbose_level) if (0) ov::intel_gpu::get_verbose_stream() << GPU_PREFIX #endif -// Macro below is inserted to avoid unused variable warning when GPU_DEBUG_CONFIG is OFF -#define GPU_DEBUG_GET_INSTANCE(name) auto name = cldnn::debug_configuration::get_instance(); (void)(name); - #define GPU_DEBUG_COUT GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::DISABLED) #define GPU_DEBUG_INFO GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::INFO) #define GPU_DEBUG_LOG GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::LOG) #define GPU_DEBUG_TRACE GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::TRACE) #define GPU_DEBUG_TRACE_DETAIL GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::TRACE_DETAIL) - -namespace cldnn { - -class debug_configuration { -private: - debug_configuration(); - -public: - static const char *prefix; - int help; // Print help messages - int verbose; // Verbose execution - int verbose_color; // Print verbose color - std::string verbose_file; // Verbose log to file - int list_layers; // Print list layers - int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive - int print_input_data_shapes; // Print the input data_shape for benchmark_app. - int disable_usm; // Disable usm usage - int disable_onednn; // Disable onednn for discrete GPU (no effect for integrated GPU) - int disable_onednn_opt_post_ops; // Disable onednn optimize post operators - std::string dump_profiling_data; // Enables dump of extended performance profiling to specified dir - int dump_profiling_data_per_iter; // Enables dump of extended performance profiling to specified dir for each iteration - int host_time_profiling; // Enables measurement of scheduling time spend on the host - std::string dump_graphs; // Dump optimized graph - std::string dump_sources; // Dump opencl sources - std::string dump_layers_path; // Enable dumping intermediate buffers and set the dest path - std::vector dump_layers; // Dump intermediate buffers of specified layers only - std::string dry_run_path; // Dry run and serialize execution graph into the specified path - int dump_layers_dst_only; // Dump only output of layers - int dump_layers_result; // Dump result layers - int dump_layers_input; // Dump input layers - int dump_layers_limit_batch; // Limit the size of batch to dump - int dump_layers_raw; // Dump raw data. - int dump_layers_binary; // Dump binary data. - int dump_memory_pool; // Dump memory pool status at each iteration - std::set dump_memory_pool_iters; // List of iteration's memory pool status - std::string dump_memory_pool_path; // Enable dumping memory pool status to csv file and set the dest path - int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation - std::vector after_proc; // Start inference after the listed processes - int serialize_compile; // Serialize creating primitives and compiling kernels - std::vector forced_impl_types; // Force implementation type either ocl or onednn - int max_kernels_per_batch; // Maximum number of kernels in a batch during compiling kernels - int impls_cache_capacity; // The maximum number of entries in the kernel impl cache - int enable_sdpa; // Allows to control SDPA decomposition - int disable_async_compilation; // Disable async compilation - int disable_winograd_conv; // Disable Winograd conv - int disable_dynamic_impl; // Disable dynamic implementation - int disable_runtime_buffer_fusing; // Disable runtime buffer fusing - int disable_memory_reuse; // Disable memmory reuse among layers - int disable_build_time_weight_reorder_for_dynamic_nodes; // Disable build time weight reordering for dynamic nodes - int disable_runtime_skip_reorder; // Disable runtime skip reorder - int disable_primitive_fusing; // Disable primitive fusing - int disable_fake_alignment; // Disable fake alignment - int use_usm_host; // Set explicit usm_host usage for network input and output - std::vector dynamic_quantize_layers_without_onednn; // Specify Fully-connected layers which enable Dynamic quantization - int use_kv_cache_compression; // Enable KV-cache compression - int dynamic_quantize_group_size; // Enable Dynamic quantization for fully connected primitive by specified group size - int dynamic_quantize_asym; // Use asymmetric dynamic quantization - int disable_horizontal_fc_fusion; // Disable fc horizontal fusion - int disable_fc_swiglu_fusion; // Disable swiglu fusion to fc - std::set dump_iteration; // Dump n-th execution of network. - std::vector load_layers_raw_dump; // List of layers to load dumped raw binary and filenames - static const debug_configuration *get_instance(); - bool is_target_dump_prof_data_iteration(int64_t iteration) const; - std::vector get_filenames_for_matched_layer_loading_binaries(const std::string& id) const; - std::string get_name_for_dump(const std::string& file_name) const; - bool is_layer_for_dumping(const std::string& layerName, bool is_output = false, bool is_input = false) const; - bool is_target_iteration(int64_t iteration) const; - std::string get_matched_from_filelist(const std::vector& file_names, std::string pattern) const; - bool is_layer_name_matched(const std::string& layer_name, const std::string& pattern) const; - - struct memory_preallocation_params { - bool is_initialized = false; - - // Iterations mode preallocation - size_t next_iters_preallocation_count = 0; - size_t max_per_iter_size = 0; - size_t max_per_dim_diff = 0; - - // Percentage mode preallocation - float buffers_preallocation_ratio = 0.0f; - } mem_preallocation_params; - - struct dump_profiling_data_iter_params { - bool is_enabled = false; - int64_t start = 0; - int64_t end = 0; - } dump_prof_data_iter_params; - - static std::ostream* verbose_stream; - static const int DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET = -2; -}; - -} // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index 23b48ac4d9a920..f00cf86da5e50b 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -4,6 +4,7 @@ #pragma once +#include "intel_gpu/runtime/shape_predictor.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" @@ -89,21 +90,24 @@ static constexpr Property use_onednn{"USE_ONEDNN"} static constexpr Property help{"HELP"}; static constexpr Property verbose{"VERBOSE"}; +static constexpr Property verbose_color{"VERBOSE_COLOR"}; static constexpr Property log_to_file{"GPU_LOG_TO_FILE"}; static constexpr Property disable_usm{"GPU_DISABLE_USM"}; static constexpr Property disable_onednn_post_ops_opt{"DISABLE_ONEDNN_POST_OPS_OPT"}; static constexpr Property dump_graphs_path{"GPU_DUMP_GRAPHS_PATH"}; static constexpr Property dump_profiling_data_path{"GPU_DUMP_PROFILING_DATA_PATH"}; +static constexpr Property dump_profiling_data_per_iter{"GPU_DUMP_PROFILING_DATA_PER_ITER"}; static constexpr Property dump_sources_path{"GPU_DUMP_SOURCES_PATH"}; static constexpr Property dump_tensors_path{"GPU_DUMP_TENSORS_PATH"}; static constexpr Property dry_run_path{"GPU_DRY_RUN_PATH"}; static constexpr Property dump_tensors{"DUMP_TENSORS"}; -static constexpr Property dump_layers{"GPU_DUMP_LAYERS"}; +static constexpr Property, ov::PropertyMutability::RW> dump_layer_names{"GPU_DUMP_LAYER_NAMES"}; static constexpr Property dump_tensors_format{"DUMP_TENSORS_FORMAT"}; static constexpr Property dump_memory_pool_path{"GPU_DUMP_MEMORY_POOL_PATH"}; +static constexpr Property dump_memory_pool{"GPU_DUMP_MEMORY_POOL"}; static constexpr Property dump_batch_limit{"GPU_DUMP_BATCH_LIMIT"}; static constexpr Property, ov::PropertyMutability::RW> dump_iterations{"GPU_DUMP_ITERATIONS"}; -static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; +static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; static constexpr Property impls_cache_capacity{"IMPLS_CACHE_CAPACITY"}; static constexpr Property disable_async_compilation{"DISABLE_ASYNC_COMPILATION"}; static constexpr Property disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"}; @@ -112,10 +116,12 @@ static constexpr Property disable_post_ops_fus static constexpr Property disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"}; static constexpr Property disable_fc_swiglu_fusion{"DISABLE_FC_SWIGLU_FUSION"}; static constexpr Property disable_fake_alignment{"DISABLE_FAKE_ALIGNMENT"}; -static constexpr Property use_usm_host{"USE_USM_HOST"}; +static constexpr Property disable_runtime_skip_reorder{"DISABLE_RUNTIME_SKIP_REORDER"}; +static constexpr Property usm_policy{"USM_POLICY"}; static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; -static constexpr Property mem_prealloc_options{"MEM_PREALLOC_OPTIONS"}; +static constexpr Property shape_predictor_settings{"SHAPE_PREDICTOR_SETTINGS"}; static constexpr Property, ov::PropertyMutability::RW> load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"}; +static constexpr Property, ov::PropertyMutability::RW> start_after_processes{"START_AFTER_PROCESSES"}; } // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp index 87e0b2990b7902..1d27eaf63efb86 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp @@ -4,6 +4,7 @@ #pragma once +#include "intel_gpu/runtime/execution_config.hpp" #include "layout.hpp" #include "memory_caps.hpp" #include "utils.hpp" @@ -110,8 +111,6 @@ struct padded_pool_comparer { // - Improve memory consumption class memory_pool { - memory_pool(); - memory_ptr alloc_memory(const layout& layout, allocation_type type, bool reset = true); static bool has_conflict(const memory_set&, const std::unordered_set&, uint32_t network_id); @@ -119,9 +118,10 @@ class memory_pool { std::map, padded_pool_comparer> _padded_pool; std::multimap _no_reusable_pool; engine* _engine; + const ExecutionConfig& _config; public: - explicit memory_pool(engine& engine); + explicit memory_pool(engine& engine, const ExecutionConfig& config); ~memory_pool(); memory_ptr get_memory(const layout& layout, const primitive_id& id, diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index 46bfc496ea8411..3b77804979f6f8 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -15,7 +15,6 @@ OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode:: OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") OV_CONFIG_RELEASE_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application") OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not") - OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks") OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level") OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property") @@ -34,7 +33,7 @@ OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floati OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_lp_transformations, false, "Enable/Disable Low precision transformations set") - +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, shape_predictor_settings, {10, 16 * 1024, 2, 1.1f}, "Preallocation settings") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "Type of the queue that must be used for model execution. May be in-order or out-of-order") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, optimize_data, false, "Enable/Disable data flow optimizations for cldnn::program") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_memory_pool, true, "Enable/Disable memory pool usage") @@ -43,15 +42,19 @@ OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, custom_outputs, std::vector::max(), "Max number of batch elements to dump") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, dump_profiling_data_per_iter, false, "Save profiling data w/o per-iteration aggregation") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file") + OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_onednn_post_ops_opt, false, "Disable optimization pass for onednn post-ops") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data_path, "", "Save csv file with per-stage and per-primitive profiling data to specified folder") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs_path, "", "Save intermediate graph representations during model compilation pipeline to specified folder") @@ -59,19 +62,20 @@ OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources_path, "", "Save generated sou OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_path, "", "Save intermediate in/out tensors of each primitive to specified folder") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, ov::intel_gpu::DumpTensors::all, "Tensor types to dump. Supported values: all, inputs, outputs") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_format, ov::intel_gpu::DumpFormat::text, "Format of the tensors dump. Supported values: binary, text, text_raw") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_layers, "", "Activate dump for specified layers only") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_layer_names, std::vector{}, "Activate dump for specified layers only") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool_path, "", "Save csv file with memory pool info to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool, false, "Enable verbose output for memory pool") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, std::set{}, "Space separated list of iterations where other dump options should be enabled") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits::max(), "Max number of batch elements to dump") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, false, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, 0, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "Disable fusions of operations as post-ops/fused-ops") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "Disable pass which merges QKV projections into single MatMul") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fc_swiglu_fusion, false, "Disable pass which merges FC and SwiGLU ops") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fake alignment feature which tries to keep gpu friendly memory alignment for arbitrary tensor shapes") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, use_usm_host, false, "Enforce USM host usage for all allocations") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_memory_reuse, false, "Disable memory reuse for activation tensors") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_skip_reorder, false, "Disable skip reorder optimization applied in runtime") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations") -OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, mem_prealloc_options, "", "Preallocation setting") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, std::vector{}, "List of layers to load raw binary") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, start_after_processes, std::vector{}, "Start inference after specified list of processes") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dry_run_path, "", "Enables mode which partially compiles a model and stores runtime model into specified directory") diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp index b3eca9a78fba89..6d5d91bb5b783a 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp @@ -9,6 +9,7 @@ #include #include #include +#include "intel_gpu/runtime/execution_config.hpp" #if defined(_WIN32) #ifndef NOMINMAX @@ -157,7 +158,7 @@ class profiled_stage { , _obj(obj) , _stage(stage) { GPU_DEBUG_IF(profiling_enabled) { - _per_iter_mode = cldnn::debug_configuration::get_instance()->dump_profiling_data_per_iter != 0; + _per_iter_mode = ov::intel_gpu::ExecutionConfig::get_dump_profiling_data_per_iter(); _start = std::chrono::high_resolution_clock::now(); } } diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp index 469c676b4b0311..a246c6d1fdda8f 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp @@ -14,23 +14,20 @@ class engine; struct ShapePredictor { public: - using Ptr = std::shared_ptr; - ShapePredictor(const engine* engine, float buffers_preallocation_ratio) - : _engine(engine) - , _buffers_preallocation_ratio(buffers_preallocation_ratio) { - static_assert(_max_deque_size >= 2, "[GPU] Deque is supposed to contain at least 2 elements for prediction"); - } + struct Settings { + // Iterations mode preallocation + size_t next_iters_preallocation_count = 10; + size_t max_per_iter_size = 16 * 1024; + size_t max_per_dim_diff = 2; - ShapePredictor(const engine* engine, - size_t next_iters_preallocation_count, - size_t max_per_iter_size, - size_t max_per_dim_diff, - float buffers_preallocation_ratio) + // Percentage mode preallocation + float buffers_preallocation_ratio = 1.1f; + }; + + using Ptr = std::shared_ptr; + ShapePredictor(const engine* engine, const Settings& settings) : _engine(engine) - , _next_iters_preallocation_count(next_iters_preallocation_count) - , _max_per_iter_size(max_per_iter_size) - , _max_per_dim_diff(max_per_dim_diff) - , _buffers_preallocation_ratio(buffers_preallocation_ratio) { + , _settings(settings) { static_assert(_max_deque_size >= 2, "[GPU] Deque is supposed to contain at least 2 elements for prediction"); } @@ -73,13 +70,7 @@ struct ShapePredictor { std::map> _shapes_info; const engine* _engine; - // Iterations mode preallocation - const size_t _next_iters_preallocation_count = 10; - const size_t _max_per_iter_size = 16 * 1024; // 16KB => maximum preallocation size is 16KB * 10iters = 160KB - const size_t _max_per_dim_diff = 2; - - // Percentage mode preallocation - const float _buffers_preallocation_ratio = 1.0f; + const Settings _settings; }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/debug_helper.cpp b/src/plugins/intel_gpu/src/graph/debug_helper.cpp index b69d10e137010e..3cd7767d5fe21b 100644 --- a/src/plugins/intel_gpu/src/graph/debug_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/debug_helper.cpp @@ -3,6 +3,9 @@ // #include "debug_helper.hpp" +#include +#include "intel_gpu/runtime/execution_config.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/util/file_util.hpp" #ifdef GPU_DEBUG_CONFIG @@ -44,8 +47,7 @@ template void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) { auto&& size = mem->get_layout().get_tensor(); - GPU_DEBUG_GET_INSTANCE(debug_config); - auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1); + auto batch_size = std::max(std::min(ExecutionConfig::get_dump_batch_limit(), size.batch[0]), 1); tensor tmp_size(size); tmp_size.batch[0] = batch_size; if (tmp_size == size) { @@ -121,8 +123,7 @@ void unpack(cldnn::data_types type, uint8_t input, int8_t &v0, int8_t &v1) { void dump_i4u4(cldnn::data_types type, memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) { auto&& size = mem->get_layout().get_tensor(); - GPU_DEBUG_GET_INSTANCE(debug_config); - auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1); + auto batch_size = std::max(std::min(ExecutionConfig::get_dump_batch_limit(), size.batch[0]), 1); tensor tmp_size(size); tmp_size.batch[0] = batch_size; if (tmp_size == size) { @@ -160,11 +161,16 @@ void dump_i4u4(cldnn::data_types type, memory::ptr mem, stream& stream, std::ofs file_stream << buffer.str(); } -void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std::string layerName, bool dump_raw) { - std::cout << "Dump " << (dump_raw ? "raw " : "") << layerName << std::endl; - GPU_DEBUG_GET_INSTANCE(debug_config); - std::string filename = debug_config->get_name_for_dump(layerName); - filename = debug_config->dump_layers_path + filename + ".txt"; +std::string get_name_for_dump(const std::string& file_name) { + std::string filename = file_name; + std::replace(filename.begin(), filename.end(), '\\', '_'); + std::replace(filename.begin(), filename.end(), '/', '_'); + std::replace(filename.begin(), filename.end(), ' ', '_'); + std::replace(filename.begin(), filename.end(), ':', '_'); + return filename; +} + +void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std::string filename, bool dump_raw) { std::ofstream file_stream(filename); if (!mem) { file_stream << "Empty" << std::endl; @@ -195,9 +201,7 @@ void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std std::cout << "Dump for this data type is not supported: " << dt_to_str(mem_dt) << std::endl; } -} // namespace - -static std::string get_file_path_for_binary_dump(cldnn::layout layout, std::string name) { +std::string get_file_path_for_binary_dump(cldnn::layout layout, const std::string& name, const std::string& dump_layers_path) { std::string filename; std::string data_type = ov::element::Type(layout.data_type).get_type_name(); std::string format = layout.format.to_string(); @@ -207,29 +211,128 @@ static std::string get_file_path_for_binary_dump(cldnn::layout layout, std::stri tensor += ("_" + to_string(dims[r])); } -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_GET_INSTANCE(debug_config); - std::string layer_name = debug_config->get_name_for_dump(name); - filename = debug_config->dump_layers_path + layer_name - + "__" + data_type + "_" + tensor + "__" + format + ".bin"; -#endif + std::string layer_name = get_name_for_dump(name); + filename = dump_layers_path + layer_name + "__" + data_type + "_" + tensor + "__" + format + ".bin"; return filename; } +bool is_target_iteration(int64_t iteration, const std::set dump_iteration) { + if (iteration < 0) + return true; + + if (dump_iteration.empty()) + return true; + + if (dump_iteration.find(iteration) == std::end(dump_iteration)) + return false; + + return true; +} + +std::string get_matched_from_filelist(const std::vector& file_names, std::string pattern) { + for (const auto& file : file_names) { + auto found = file.find(pattern); + if (found != std::string::npos) { + return file; + } + } + + return std::string(); +} + +bool is_layer_name_matched(const std::string& layer_name, const std::string& pattern) { + auto upper_layer_name = std::string(layer_name.length(), '\0'); + std::transform(layer_name.begin(), layer_name.end(), upper_layer_name.begin(), ::toupper); + auto upper_pattern = std::string(pattern.length(), '\0'); + std::transform(pattern.begin(), pattern.end(), upper_pattern.begin(), ::toupper); + + // Check pattern from exec_graph + size_t pos = upper_layer_name.find(':'); + auto upper_exec_graph_name = upper_layer_name.substr(pos + 1, upper_layer_name.size()); + if (upper_exec_graph_name.compare(upper_pattern) == 0) { + return true; + } + + // Check pattern with regular expression + std::regex re(upper_pattern); + return std::regex_match(upper_layer_name, re); +} + +bool is_layer_for_dumping(const ExecutionConfig& config, const std::string& layer_name, bool is_output = false, bool is_input = false) { + bool dump_out = config.get_dump_tensors() == ov::intel_gpu::DumpTensors::all || config.get_dump_tensors() == ov::intel_gpu::DumpTensors::out; + bool dump_in = config.get_dump_tensors() == ov::intel_gpu::DumpTensors::all || config.get_dump_tensors() == ov::intel_gpu::DumpTensors::in; + // Dump result layer + if (is_output == true && dump_out && (layer_name.find("constant:") == std::string::npos)) + return true; + + // Dump all layers + if (config.get_dump_layer_names().empty() && !dump_out && !dump_in) + return true; + + // Dump input layers + size_t pos = layer_name.find(':'); + auto type = layer_name.substr(0, pos); + if (is_input == true && type == "parameter" && dump_in) + return true; + + auto dump_layers = config.get_dump_layer_names(); + + auto iter = std::find_if(dump_layers.begin(), dump_layers.end(), [&](const std::string& dl){ + return is_layer_name_matched(layer_name, dl); + }); + return (iter != dump_layers.end()); +} + +std::vector get_filenames_for_matched_layer_loading_binaries(const ExecutionConfig& config, const std::string& id) { + std::vector file_names; + if (config.get_load_dump_raw_binary().empty()) + return file_names; + + for (const auto& load_layer : config.get_load_dump_raw_binary()) { + size_t file = load_layer.rfind(":"); + if (file != std::string::npos) { + if (id == load_layer.substr(0, file)) { + auto file_name_str = load_layer.substr(file + 1); + size_t head = 0; + size_t found = 0; + do { + found = file_name_str.find(",", head); + if (found != std::string::npos) + file_names.push_back(file_name_str.substr(head, (found - head))); + else + file_names.push_back(file_name_str.substr(head)); + + head = found+1; + GPU_DEBUG_LOG << " Layer name loading raw dump : " << load_layer.substr(0, file) << " / the dump file : " + << file_names.back() << std::endl; + } while (found != std::string::npos); + + return file_names; + } + } + } + + return file_names; +} + + +} // namespace + NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) : m_inst(inst) , m_stream(inst.get_network().get_stream()) , m_network(inst.get_network()) , m_program(inst.get_network().get_program().get()) , m_iter(m_network.iteration) { + const auto& config = m_network.get_config(); // Load binary dump for input layers - if (!debug_config->load_layers_raw_dump.empty()) { + if (!config.get_load_dump_raw_binary().empty()) { const std::string layer_name = m_inst.id(); - auto files = debug_config->get_filenames_for_matched_layer_loading_binaries(layer_name); + auto files = get_filenames_for_matched_layer_loading_binaries(config, layer_name); if (!files.empty()) { if (m_inst.is_input()) { // Loading binary dumps for output tensors of input-layers : only one output exists or index(dstN) exists - auto dump_file = debug_config->get_matched_from_filelist(files, "_dst0__"); + auto dump_file = get_matched_from_filelist(files, "_dst0__"); OPENVINO_ASSERT((files.size() == 1 || dump_file.length() != 0), "Unexpected binary dump for input layer"); OPENVINO_ASSERT(files.size() == m_inst.outputs_memory_count(), "Mis-match dump file count"); @@ -238,7 +341,7 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) auto dump_file = files[0]; if (files.size() > 1 || m_inst.outputs_memory_count() != 1) { std::string pattern = "_dst" + std::to_string(i) + "__"; - dump_file = debug_config->get_matched_from_filelist(files, pattern); + dump_file = get_matched_from_filelist(files, pattern); } OPENVINO_ASSERT((dump_file.length() > 0), "Could not find expected pattern '_dst[N]__' for binary dump"); GPU_DEBUG_COUT << " Load binary dump : " << dump_file << " for " << layer_name << std::endl; @@ -253,18 +356,18 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) output_mem->copy_from(m_stream, static_cast(&bin[0]), true); } } else { - auto check_dst = debug_config->get_matched_from_filelist(files, "_dst0__"); + auto check_dst = get_matched_from_filelist(files, "_dst0__"); OPENVINO_ASSERT(check_dst.length() == 0, "Expected to load binaries for inputs of " + layer_name); // Loading input tensors for any layer - auto dump_file = debug_config->get_matched_from_filelist(files, "_src0__"); + auto dump_file = get_matched_from_filelist(files, "_src0__"); OPENVINO_ASSERT(dump_file.length() != 0, "Could not find expected pattern '_src[N]__' for binary dump input : " + layer_name); for (size_t i = 0; i < m_inst.dependencies().size(); i++) { auto dump_file = files[0]; if (files.size() > 1 || m_inst.dependencies().size() != 1) { std::string pattern = "_src" + std::to_string(i) + "__"; - dump_file = debug_config->get_matched_from_filelist(files, pattern); + dump_file = get_matched_from_filelist(files, pattern); } if (dump_file.length() == 0) { GPU_DEBUG_COUT << " Skip loading for input(" << i << ") of " << layer_name << std::endl; @@ -290,11 +393,11 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) } // Dump input buffers of 'inst' - if (debug_config->dump_layers_path.length() > 0) { - const std::string layer_name = inst.id(); + if (config.get_dump_tensors_path().length() > 0) { + const std::string& layer_name = inst.id(); - if (debug_config->is_target_iteration(m_iter) && - debug_config->dump_layers_dst_only == 0 && debug_config->is_layer_for_dumping(layer_name)) { + if (is_target_iteration(m_iter, config.get_dump_iterations()) && + config.get_dump_tensors() != ov::intel_gpu::DumpTensors::out && is_layer_for_dumping(config, layer_name)) { std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" + layer_name + ":"; for (size_t i = 0; i < m_inst.dependencies().size(); i++) { std::string name = get_file_prefix() + "_src" + std::to_string(i); @@ -306,24 +409,27 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) auto dep = m_inst.dependencies().at(i); auto input_layout = dep.first->get_output_layout(dep.second); - GPU_DEBUG_IF(debug_config->dump_layers_binary) { + if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary) { // Binary dump : raw - auto filename = get_file_path_for_binary_dump(input_layout, name); + auto filename = get_file_path_for_binary_dump(input_layout, name, config.get_dump_tensors_path()); mem_lock lock(input_mem, m_stream); ov::util::save_binary(filename, lock.data(), input_mem->size()); - GPU_DEBUG_COUT << " Dump layer src : " << layer_name << " to " << filename << std::endl; + GPU_DEBUG_COUT << " Dump layer src : " << layer_name << " to " << filename << std::endl; debug_str_for_bin_load += (filename + ","); } else { + const bool dump_raw = config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::text_raw; + GPU_DEBUG_COUT << " Dump " << (dump_raw ? "raw " : "") << name << std::endl; + auto filename = config.get_dump_tensors_path() + get_name_for_dump(name) + ".txt"; log_memory_to_file(input_mem, input_layout, m_stream, name, - debug_config->dump_layers_raw); + dump_raw); } } - if (debug_config->dump_layers_binary && !inst.is_input()) { + if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary && !inst.is_input()) { debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"'; GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl; } @@ -333,13 +439,14 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) NodeDebugHelper::~NodeDebugHelper() { + const auto& config = m_network.get_config(); // Dump output buffers of 'inst' - if (debug_config->dump_layers_path.length() > 0) { + if (config.get_dump_tensors_path().length() > 0) { m_stream.finish(); const std::string layer_name = m_inst.id(); - GPU_DEBUG_IF(debug_config->is_target_iteration(m_iter) && - debug_config->is_layer_for_dumping(layer_name, m_inst.is_output(), m_inst.is_input())) { + if (is_target_iteration(m_iter, config.get_dump_iterations()) && + is_layer_for_dumping(config, layer_name, m_inst.is_output(), m_inst.is_input())) { std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" + layer_name + ":"; for (size_t i = 0; i < m_inst.outputs_memory_count(); i++) { @@ -350,22 +457,29 @@ NodeDebugHelper::~NodeDebugHelper() { continue; } - GPU_DEBUG_IF(debug_config->dump_layers_binary) { + if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary) { // Binary dump : raw auto output_layout = m_inst.get_output_layout(i); - auto filename = get_file_path_for_binary_dump(output_layout, name); + auto filename = get_file_path_for_binary_dump(output_layout, name, config.get_dump_tensors_path()); mem_lock lock(output_mem, m_stream); ov::util::save_binary(filename, lock.data(), output_mem->size()); GPU_DEBUG_COUT << " Dump layer dst : " << layer_name << " to " << filename << std::endl; debug_str_for_bin_load += (filename + ","); } else { + const bool dump_raw = config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::text_raw; + GPU_DEBUG_COUT << " Dump " << (dump_raw ? "raw " : "") << name << std::endl; + auto filename = config.get_dump_tensors_path() + get_name_for_dump(name) + ".txt"; // Text dump - log_memory_to_file(output_mem, m_inst.get_output_layout(i), m_stream, name, debug_config->dump_layers_raw); + log_memory_to_file(output_mem, + m_inst.get_output_layout(i), + m_stream, + name, + dump_raw); } } - GPU_DEBUG_IF(debug_config->dump_layers_binary && m_inst.is_input()) { + if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary && m_inst.is_input()) { debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"'; GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl;; } @@ -377,13 +491,14 @@ NetworkDebugHelper::NetworkDebugHelper(const network& net) : m_network(net) , m_iter(net.iteration) { auto net_id = m_network.get_id(); - GPU_DEBUG_IF(debug_config->dump_memory_pool > 0) { - auto& iters = debug_config->dump_memory_pool_iters; + const auto& config = m_network.get_config(); + if (config.get_dump_memory_pool()) { + auto& iters = config.get_dump_iterations(); if (iters.empty() || iters.find(m_iter) != iters.end()) { GPU_DEBUG_COUT << "============================================================================" << std::endl; GPU_DEBUG_COUT << "Start network execution (net_id : " << net_id << ", iter :" << m_iter << ")" << std::endl; if (m_iter == 0 && net_id > 0) { - dump_memory_pool(debug_config->dump_memory_pool_path, m_iter); + dump_memory_pool(config.get_dump_memory_pool_path(), m_iter); GPU_DEBUG_COUT << "============================================================================" << std::endl; } } @@ -391,38 +506,14 @@ NetworkDebugHelper::NetworkDebugHelper(const network& net) GPU_DEBUG_TRACE << "============================================================================" << std::endl; GPU_DEBUG_TRACE << "Start network execution (net_id : " << net_id << ", iter :" << m_iter << ")" << std::endl; } - - if (debug_config->list_layers == 1) { - for (auto& inst : m_network._exec_order) { - GPU_DEBUG_COUT << inst->id() << std::endl; - if (inst->get_node().is_type()) { - auto& loop_node = inst->get_node().as(); - for (auto& prim : loop_node.get_body_program()->get_processing_order()) { - GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; - } - } else if (inst->get_node().is_type()) { - auto& cond_node = inst->get_node().as(); - GPU_DEBUG_COUT << "* Branch_True" << std::endl; - for (auto& prim : cond_node.get_branch_true().inner_program->get_processing_order()) { - GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; - } - GPU_DEBUG_COUT << "* Branch_False" << std::endl; - for (auto& prim : cond_node.get_branch_false().inner_program->get_processing_order()) { - GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; - } - } - } - - if (!m_network.is_internal()) - exit(0); - } } NetworkDebugHelper::~NetworkDebugHelper() { auto prog = m_network.get_program().get(); auto net_id = m_network.get_id(); + const auto& config = prog->get_config(); // print '-data_shape' option for benchmark_app - if (debug_config->print_input_data_shapes == 1) { + if (config.get_verbose() >= 4) { std::stringstream data_shape_str; auto add_string = [&data_shape_str](std::string str) { data_shape_str << ((data_shape_str.rdbuf()->in_avail() == 0) ? " -data_shape " : ",") << str; @@ -443,7 +534,7 @@ NetworkDebugHelper::~NetworkDebugHelper() { << data_shape_str.str() << std::endl; } - if (!debug_config->dump_graphs.empty() && debug_config->is_target_iteration(m_iter)) { + if (!config.get_dump_graphs_path().empty() && is_target_iteration(m_iter, config.get_dump_iterations())) { auto get_fixed_str = [](int value, int length = 2) -> std::string { std::ostringstream ss; ss << std::setw(length) << std::setfill('0') << std::to_string(value); @@ -459,10 +550,10 @@ NetworkDebugHelper::~NetworkDebugHelper() { } } - if (debug_config->dump_memory_pool > 0) { - auto& iters = debug_config->dump_memory_pool_iters; + if (config.get_dump_memory_pool()) { + auto& iters = config.get_dump_iterations(); if (iters.empty() || iters.find(m_iter) != iters.end()) { - dump_memory_pool(debug_config->dump_memory_pool_path, m_iter); + dump_memory_pool(config.get_dump_memory_pool_path(), m_iter); GPU_DEBUG_COUT << "============================================================================" << std::endl; } } diff --git a/src/plugins/intel_gpu/src/graph/debug_helper.hpp b/src/plugins/intel_gpu/src/graph/debug_helper.hpp index 61572433cea494..e2137733cf73fc 100644 --- a/src/plugins/intel_gpu/src/graph/debug_helper.hpp +++ b/src/plugins/intel_gpu/src/graph/debug_helper.hpp @@ -39,8 +39,6 @@ class NodeDebugHelper { const network& m_network; const program* m_program; const size_t m_iter; - - const debug_configuration* debug_config = cldnn ::debug_configuration ::get_instance(); }; class NetworkDebugHelper { @@ -52,8 +50,6 @@ class NetworkDebugHelper { void dump_memory_pool(std::string dump_path, int64_t curr_iter) const; const network& m_network; const size_t m_iter; - - const debug_configuration* debug_config = cldnn ::debug_configuration ::get_instance(); }; #define NETWORK_DEBUG(net) NetworkDebugHelper __network_debug_helper(net) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp index 4d21869dfa3953..2786a9e8e85b99 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp @@ -43,10 +43,6 @@ void post_optimize_weights::optimize_weights(T& node, program& p) { return; if (impl->is_dynamic()) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_build_time_weight_reorder_for_dynamic_nodes) { - return; - } // TODO: To relax current limitation w.r.t the future optimization of weight reorder process // In dynamic shape, selected weight format can change in runtime. However reordering blocked format to blocked format is not fully verified yet. // So we need to enable other primitives such as convolution with verifying reorder b/w the possible layouts diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index 269a88052e7bb0..088afd84f5ff6a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -679,8 +679,6 @@ void insert_reorders(program& p, const std::map& fm } // namespace void reorder_inputs::run(program& p, reorder_factory& rf) { - GPU_DEBUG_GET_INSTANCE(debug_config); - auto& lo = p.get_layout_optimizer(); auto fmt_map = get_preferred_formats(p, lo); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index 6f8502423047b6..0f8d271bc3a6cd 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -157,7 +157,6 @@ struct convolution_onednn : typed_primitive_onednn_impl { dnnl::memory::desc desc = onednn::layout_to_memory_desc(a_zp->get_layout(), dnnl::memory::format_tag::a, true); args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC, a_zp->get_onednn_memory(desc)}); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_TRACE_DETAIL << instance.id() << " activations_zero_points: " << " " << a_zp->get_layout().to_short_string() << std::endl; } @@ -167,7 +166,6 @@ struct convolution_onednn : typed_primitive_onednn_impl { dnnl::memory::desc desc = onednn::layout_to_memory_desc(w_zp->get_layout(), dnnl::memory::format_tag::a, true); args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS, w_zp->get_onednn_memory(desc)}); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_TRACE_DETAIL << instance.id() << " weights_zero_points: " << " " << w_zp->get_layout().to_short_string() << std::endl; } diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 78e669f0e1152f..dfd65cd9b58067 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1129,74 +1129,12 @@ bool layout_optimizer::is_primitive_implemented_for_onednn(program_node& node) { return false; } -impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node) { -#ifdef GPU_DEBUG_CONFIG - - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->forced_impl_types.empty()) { - // Forcing impl type of one primitive - for (const auto& forced_impl_type : debug_config->forced_impl_types) { - if (node.is_type()) { - if (forced_impl_type == "fc:ocl") - return impl_types::ocl; - else if (forced_impl_type == "fc:onednn") - return impl_types::onednn; - } else if (node.is_type()) { - if (forced_impl_type == "gemm:ocl") - return impl_types::ocl; - else if (forced_impl_type == "gemm:onednn") - return impl_types::onednn; - } else if (node.is_type()) { - if (forced_impl_type == "do:cpu") - return impl_types::cpu; - else if (forced_impl_type == "do:ocl") - return impl_types::ocl; - } else if (node.is_type()) { - if (forced_impl_type == "reduce:ocl") - return impl_types::ocl; - else if (forced_impl_type == "reduce:onednn") - return impl_types::onednn; - } else if (node.is_type()) { - if (forced_impl_type == "concat:ocl") - return impl_types::ocl; - else if (forced_impl_type == "concat:onednn") - return impl_types::onednn; - } - - // Forcing one layer - size_t found_type = forced_impl_type.rfind(":"); - if (found_type != std::string::npos) { - impl_types preferred_type = impl_types::any; - auto impl_type = forced_impl_type.substr(found_type + 1); - if (impl_type == "ocl") - preferred_type = impl_types::ocl; - else if (impl_type == "onednn") - preferred_type = impl_types::onednn; - else if (impl_type == "cpu") - preferred_type = impl_types::cpu; - - if (node.id() == forced_impl_type.substr(0, found_type)) { - GPU_DEBUG_LOG << " Forced implementation type : " << forced_impl_type.substr(0, found_type) << " : " - << forced_impl_type.substr(found_type + 1) << std::endl; - return preferred_type; - } - } - } - } -#endif - - return impl_types::any; -} - impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format preferred_format) { if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) { auto forced_impl = _forcing_map.at(node.id()).second; if (forced_impl != impl_types::any) return forced_impl; } - auto forced_impl = get_forced_impl_type_by_config(node); - if (forced_impl != impl_types::any) - return forced_impl; auto shape_type = shape_types::any; diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index d6642ac27e56c3..66d890b51e4692 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -60,7 +60,7 @@ namespace cldnn { namespace { #ifdef GPU_DEBUG_CONFIG -void dump_perf_data_raw(std::string dump_path, const std::list>& exec_order) { +void dump_perf_data_raw(std::string dump_path, bool per_iter_mode, const std::list>& exec_order) { auto layouts_to_str = [](const std::vector& layouts) -> std::string { std::stringstream s; for (size_t i = 0; i < layouts.size(); i++) { @@ -71,7 +71,6 @@ void dump_perf_data_raw(std::string dump_path, const std::listdump_profiling_data_per_iter != 0; const std::string perf_raw_csv_header = per_iter_mode ? "prim_id,prim_type,stage,net_in_shapes,in_shapes,out_shapes,impl,iter,time_usec\n" : "prim_id,prim_type,stage,net_in_shapes,in_shapes,out_shapes,impl,iters,time_usec\n"; std::ofstream of(dump_path); @@ -139,13 +138,12 @@ void dump_perf_data_raw(std::string dump_path, const std::list& pids) { bool need_to_wait; do { need_to_wait = false; struct stat buffer; - for (auto pid : debug_config->after_proc) { + for (auto pid : pids) { auto path = "/proc/" + pid; std::cout << "check " + path << std::endl; if (stat(path.c_str(), &buffer) == 0) { @@ -158,8 +156,8 @@ void wait_for_the_turn() { } #else -void dump_perf_data_raw(std::string, const std::list>&) {} -void wait_for_the_turn() {} +void dump_perf_data_raw(std::string, bool per_iter_mode, const std::list>&) {} +void wait_for_the_turn(const std::vector& pids) {} #endif } // namespace @@ -177,30 +175,19 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo , _config(program->get_config()) , _engine(program->get_engine()) , _stream(stream) - , _memory_pool(new memory_pool(program->get_engine())) + , _memory_pool(new memory_pool(program->get_engine(), program->get_config())) , _internal(is_internal) , _is_primary_stream(is_primary_stream) , _enable_profiling(program->get_config().get_enable_profiling()) , _reset_arguments(true) - , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_buffers_preallocation_ratio())) { + , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_shape_predictor_settings())) { if (!_internal) { net_id = get_unique_net_id(); } - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->after_proc.size() != 0) { - wait_for_the_turn(); + GPU_DEBUG_IF(get_config().get_start_after_processes().size() != 0) { + wait_for_the_turn(get_config().get_start_after_processes()); } - - GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { - auto& mem_preallocation_params = debug_config->mem_preallocation_params; - _shape_predictor.reset(new ShapePredictor(&program->get_engine(), - mem_preallocation_params.next_iters_preallocation_count, - mem_preallocation_params.max_per_iter_size, - mem_preallocation_params.max_per_dim_diff, - mem_preallocation_params.buffers_preallocation_ratio)); - } - calculate_weights_cache_capacity(); allocate_primitives(); configure_primitives_second_output(); @@ -239,7 +226,7 @@ network::~network() { _program->cancel_compilation_context(); _memory_pool->clear_pool_for_network(net_id); GPU_DEBUG_IF(!_config.get_dump_profiling_data_path().empty()) { - dump_perf_data_raw(_config.get_dump_profiling_data_path() + "/perf_raw" + std::to_string(net_id) + ".csv", _exec_order); + dump_perf_data_raw(_config.get_dump_profiling_data_path() + "/perf_raw" + std::to_string(net_id) + ".csv", false, _exec_order); } } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 22f6ef4571b3c7..9bb8f96fd25c65 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -557,7 +557,6 @@ void primitive_inst::clear_output_memory() { void primitive_inst::realloc_if_needed(bool prev_execution_skipped) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("realloc_if_needed: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::memory_allocation); const auto& users = get_user_insts(); @@ -836,11 +835,6 @@ void primitive_inst::realloc_if_needed(bool prev_execution_skipped) { } int32_t tmp_prealloc_count = get_prealloc_iter_num(); - GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { - // If debug config is set, repsect the config most - tmp_prealloc_count = -1; - } - // If we allocated too large memory, reclaim the memory. for (size_t i = 0; i < updated_layouts.size(); ++i) { bool reclaim = 0; @@ -1274,8 +1268,7 @@ void primitive_inst::update_paddings() { void primitive_inst::do_runtime_skip_reorder() { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_skip_reorder: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_skip_reorder) { + GPU_DEBUG_IF(get_config().get_disable_runtime_skip_reorder()) { return; } if (can_be_optimized()) @@ -2748,42 +2741,31 @@ bool primitive_inst::is_valid_fusion() const { } void primitive_inst::add_profiling_data(instrumentation::pipeline_stage stage, bool cache_hit, std::string memalloc_info, int64_t time, bool per_iter_mode) { - GPU_DEBUG_GET_INSTANCE(debug_config); -#ifdef GPU_DEBUG_CONFIG - int64_t curr_iter = -1; - GPU_DEBUG_IF(debug_config->dump_prof_data_iter_params.is_enabled) { - curr_iter = get_network().get_current_iteration_num(); - } - GPU_DEBUG_IF(curr_iter < 0 || debug_config->is_target_dump_prof_data_iteration(curr_iter)) { -#else - { -#endif - instrumentation::perf_counter_key key { - _network.get_input_layouts(), - _impl_params->input_layouts, - _impl_params->output_layouts, - get_implementation_name(), - stage, + instrumentation::perf_counter_key key { + _network.get_input_layouts(), + _impl_params->input_layouts, + _impl_params->output_layouts, + get_implementation_name(), + stage, #ifdef GPU_DEBUG_CONFIG - per_iter_mode ? get_network().get_current_iteration_num() : 0, + per_iter_mode ? get_network().get_current_iteration_num() : 0, #else - 0, + 0, #endif - cache_hit, - memalloc_info - }; - - auto hash = instrumentation::perf_counter_hash()(key); - auto& d = _profiling_data[hash]; - if (_profiling_info.find(hash) == _profiling_info.end()) { - _profiling_info.emplace(hash, key); - } + cache_hit, + memalloc_info + }; - auto& total_time = std::get<0>(d); - auto& total_iter = std::get<1>(d); - total_time += time; - total_iter++; + auto hash = instrumentation::perf_counter_hash()(key); + auto& d = _profiling_data[hash]; + if (_profiling_info.find(hash) == _profiling_info.end()) { + _profiling_info.emplace(hash, key); } + + auto& total_time = std::get<0>(d); + auto& total_iter = std::get<1>(d); + total_time += time; + total_iter++; } std::string primitive_inst::get_implementation_name() const { diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index dbadc079c0da82..ad384306b9e081 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -221,7 +221,6 @@ program::~program() { } void program::init_program() { - GPU_DEBUG_GET_INSTANCE(debug_config); set_options(); pm = std::unique_ptr(new pass_manager(*this)); @@ -606,7 +605,6 @@ void program::post_optimize_graph(bool is_internal) { auto partial_build = _config.get_partial_build_program(); #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_GET_INSTANCE(debug_config); if (!is_internal && (!partial_build || !_config.get_dry_run_path().empty())) { #else if (!is_internal && !partial_build) { @@ -1650,7 +1648,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { std::pair program::get_estimated_device_mem_usage() { auto max_alloc_size = get_engine().get_device_info().max_alloc_mem_size; - memory_pool pool(get_engine()); + memory_pool pool(get_engine(), get_config()); int64_t const_sum = 0; #ifdef __unix__ diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index 61daf949e762f0..0e735683db2533 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -139,7 +139,6 @@ void close_stream(std::ofstream& graph) { graph.close(); } std::string get_node_id(const program_node* ptr) { return "node_" + std::to_string(reinterpret_cast(ptr)); } void dump_full_node(std::ofstream& out, const program_node* node) { - GPU_DEBUG_GET_INSTANCE(debug_config); try { out << node->type()->to_string(*node); } catch(const std::exception& e) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp index a103a159faaf5d..72f3cc9120b9f1 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp @@ -70,7 +70,6 @@ JitConstants DynamicQuantizeKernelRef::GetJitConstants(const dynamic_quantize_pa } CommonDispatchData DynamicQuantizeKernelRef::SetDefault(const dynamic_quantize_params& params) const { - GPU_DEBUG_GET_INSTANCE(debug_config); CommonDispatchData dispatchData; OPENVINO_ASSERT(params.outputs[0].GetLayout() == DataLayout::bfyx, "It supports only 4d tensor"); diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index d04adc77ee67af..0be6f52aa500f3 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -136,8 +136,8 @@ Graph::Graph(std::shared_ptr graph, uint16_t stream_id) } Graph::~Graph() { - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) { - const auto log_level = cldnn::debug_configuration::get_instance()->host_time_profiling; + GPU_DEBUG_IF(m_config.get_host_time_profiling()) { + const auto log_level = m_config.get_host_time_profiling(); auto get_time_str = [](int64_t time_mcs, int64_t iters_num = 1) { double time = static_cast(time_mcs); diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index fd0f2482bc4712..52b47ad52be0ae 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -81,7 +81,8 @@ bool requires_new_shape_infer(const std::shared_ptr& op) { // HACK: SearchSorted has specific shape requirements. // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine, // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid. - if (ov::is_type(op)) + // Similar case for STFT. + if (ov::is_type(op) || ov::is_type(op)) return true; if (ov::is_type(op)) @@ -589,8 +590,6 @@ bool Plugin::is_metric(const std::string& name) const { ov::Any Plugin::get_metric(const std::string& name, const ov::AnyMap& options) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::get_metric"); - GPU_DEBUG_GET_INSTANCE(debug_config); - auto device_id = get_property(ov::device::id.name(), options).as(); auto iter = m_device_map.find(std::to_string(cldnn::device_query::device_id)); @@ -764,7 +763,6 @@ std::vector Plugin::get_device_capabilities(const cldnn::device_inf } uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { - GPU_DEBUG_GET_INSTANCE(debug_config); auto device_id = get_property(ov::device::id.name(), options).as(); auto context = get_default_contexts().at(device_id); const auto& device_info = context->get_engine().get_device_info(); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index f30bd9464a4238..5d9871d1fff29a 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -327,51 +327,6 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptradd_primitive(prim); } -bool ProgramBuilder::requires_new_shape_infer(const std::shared_ptr& op) const { - if (op->is_dynamic()) { - return true; - } - - // HACK: SearchSorted has specific shape requirements. - // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine, - // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid. - // Similar case for STFT. - if (ov::is_type(op) || ov::is_type(op)) - return true; - - if (ov::is_type(op)) - return true; - - if (ov::is_type(op)) { - const auto body_function = std::static_pointer_cast(op)->get_function(); - if (body_function->is_dynamic()) - return true; - } - - if (ov::is_type(op) || ov::is_type(op)) { - return true; - } - // When input node has dynamic shape with 4 dimension, this function return false - // because op.is_dynamic() which only checks input shapes return false. - // So, in the case of input data, we need to check output shape. - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).is_dynamic()) - return true; - } - - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).size() > 6) - return true; - } - - for (size_t i = 0; i < op->get_input_size(); i++) { - if (op->get_input_partial_shape(i).size() > 6) - return true; - } - - return false; -} - int64_t ProgramBuilder::get_parameter_index(const std::shared_ptr& parameter) const { return m_model->get_parameter_index(parameter); } diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 22b0262c62b2d3..9189941ea8b3a6 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -33,9 +33,8 @@ namespace { inline bool can_use_usm_host(const cldnn::engine& engine, const uint64_t total_output_bytes) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->use_usm_host == 1) { return true; } - GPU_DEBUG_IF(debug_config->use_usm_host == 2) { return false; } + GPU_DEBUG_IF(ov::intel_gpu::ExecutionConfig::get_usm_policy() == 1) { return true; } + GPU_DEBUG_IF(ov::intel_gpu::ExecutionConfig::get_usm_policy() == 2) { return false; } auto can_use_usm = engine.use_unified_shared_memory(); // When output size is large, it is better not to write to usm_host directly @@ -113,20 +112,9 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr& c : ov::ISyncInferRequest(compiled_model) , m_graph(compiled_model->get_graph(0)) , m_context(std::static_pointer_cast(compiled_model->get_context_impl())) - , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_buffers_preallocation_ratio())) + , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_shape_predictor_settings())) , m_enable_profiling(m_graph->get_config().get_enable_profiling()) , m_use_external_queue(m_graph->use_external_queue()) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { - auto& mem_preallocation_params = debug_config->mem_preallocation_params; - m_shape_predictor.reset( - new cldnn::ShapePredictor(&m_graph->get_engine(), - mem_preallocation_params.next_iters_preallocation_count, - mem_preallocation_params.max_per_iter_size, - mem_preallocation_params.max_per_dim_diff, - mem_preallocation_params.buffers_preallocation_ratio)); - } - init_mappings(); allocate_inputs(); allocate_outputs(); @@ -504,7 +492,7 @@ void SyncInferRequest::wait() { } auto wait_end = std::chrono::high_resolution_clock::now(); - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) { + GPU_DEBUG_IF(m_graph->get_config().get_host_time_profiling()) { auto& exec_time_info = m_graph->host_exec_times.back(); const uint64_t total_time = std::chrono::duration_cast(wait_end - wait_start).count(); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp index b269fbc2c9eb4d..66fe9d9c9e0fc6 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp @@ -18,7 +18,6 @@ namespace ov::intel_gpu { DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric) : ov::pass::MatcherPass() { - GPU_DEBUG_GET_INSTANCE(debug_config); using namespace ov::pass::pattern; using QuantizationType = ov::op::internal::DynamicQuantize::QuantizationType; diff --git a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp index e2090a4d2b5eb8..29e82e4acac904 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp @@ -20,7 +20,6 @@ namespace ov::intel_gpu { FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion(bool fuse_mlp_swiglu) { using namespace ov::pass::pattern; - GPU_DEBUG_GET_INSTANCE(debug_config); // Three FCs connected to the same input size_t min_num_fcs_to_fuse = 3; // Note: diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 6a1b09d1b4f329..72054ec3b1fd44 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -412,10 +412,6 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); pass_config->set_callback([&](const std::shared_ptr node){ - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->enable_sdpa != -1) { - GPU_DEBUG_CODE(return cldnn::debug_configuration::get_instance()->enable_sdpa == 1); - } - if (!config.get_enable_sdpa_optimization()) return false; @@ -926,7 +922,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { pass_config->disable(); pass_config->disable(); - float activations_scale_factor = config.get_property(ov::hint::activations_scale_factor); + float activations_scale_factor = config.get_activations_scale_factor(); if (activations_scale_factor > 0.f && infer_precision == ov::element::f16 && !enableInt8) { using namespace ov::pass::low_precision; @@ -1100,7 +1096,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // This is supposed to be the last pass to ensure that we don't have name collisions until // GPU plugin stops using friendly names for program creation manager.register_pass(true); - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->verbose >= 1) { + GPU_DEBUG_IF(config.get_verbose() >= 1) { manager.register_pass(); } manager.run_passes(func); diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp index 550d740f772a16..ab72354626b5cb 100644 --- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp +++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp @@ -3,604 +3,21 @@ // #include "intel_gpu/runtime/debug_configuration.hpp" -#include -#include +#include "intel_gpu/runtime/execution_config.hpp" #include -#include -#include -#include -#include #include -namespace cldnn { -const char *debug_configuration::prefix = "GPU_Debug: "; -std::ostream* debug_configuration::verbose_stream; +namespace ov::intel_gpu { -// Default policy is that dump_configuration will override other configuration from IE. - -#ifdef GPU_DEBUG_CONFIG - -#define GPU_DEBUG_COUT_ std::cout << cldnn::debug_configuration::prefix - -template -void print_option(std::string option_name, T option_value) { - GPU_DEBUG_COUT_ << "Config " << option_name << " = " << option_value << std::endl; -} - -static std::string to_upper_case(const std::string& var) { - std::stringstream s; - - for (size_t i = 0; i < var.size(); i++) { - if (std::isupper(var[i])) { - if (i != 0) { - s << "_"; - } - s << var[i]; - } else { - s << static_cast(std::toupper(var[i])); - } - } - - return s.str(); -} - -static std::vector get_possible_option_names(const std::string& var, std::vector allowed_option_prefixes) { - std::vector result; - - for (auto& prefix : allowed_option_prefixes) { - result.push_back(prefix + var); - result.push_back(prefix + to_upper_case(var)); - } - - return result; -} - -template -T convert_to(const std::string &str) { - std::istringstream ss(str); - T res; - ss >> res; - return res; -} - -template <> -std::string convert_to(const std::string &str) { - return str; -} - -static std::set parse_int_set(std::string& str) { - std::set int_array; - // eliminate '"' from string to avoid parsing error - str.erase(std::remove_if(str.begin(), str.end(), [](char c) { - return c == '\"'; }), str.end()); - if (str.size() > 0) { - str = " " + str + " "; - std::istringstream ss(str); - std::string token; - while (ss >> token) { - try { - int_array.insert(static_cast(std::stol(token))); - } catch(const std::exception &) { - int_array.clear(); - GPU_DEBUG_COUT << "Argument was ignored. It cannot be parsed to integer array: " << str << std::endl; - break; - } - } - } - return int_array; -} - -template -void get_debug_env_var(const std::string &var, T &val, std::vector allowed_option_prefixes) { - bool found = false; - for (auto o : get_possible_option_names(var, allowed_option_prefixes)) { - if (const auto env_var = std::getenv(o.c_str())) { - val = convert_to(env_var); - found = true; - } - } - - if (found) { - print_option(var, val); - } -} - -template -void get_gpu_debug_env_var(const std::string &var, T &val) { - return get_debug_env_var(var, val, {"OV_GPU_"}); -} - -template -void get_common_debug_env_var(const std::string &var, T &val) { - // The list below should be prioritized from lowest to highest prefix priority - // If an option is set several times with different prefixes, version with the highest priority will be actually used. - // This may allow to enable global option with some value and override this value for GPU plugin - // For example: OV_GPU_Verbose=2 OV_Verbose=1 ./my_app => this->verbose == 2 - // In that case we enable Verbose (with level = 1) for all OV components that support this option, but for GPU plugin we increase verbose level to 2 - std::vector allowed_option_prefixes = { - "OV_", - "OV_GPU_" - }; - - return get_debug_env_var(var, val, allowed_option_prefixes); -} - -static void print_help_messages() { - std::vector> message_list; - message_list.emplace_back("OV_GPU_Help", "Print help messages"); - message_list.emplace_back("OV_GPU_Verbose", "Verbose execution"); - message_list.emplace_back("OV_GPU_VerboseColor", "Print verbose color"); - message_list.emplace_back("OV_GPU_VerboseFile", "Filename to dump verbose log"); - message_list.emplace_back("OV_GPU_ListLayers", "Print layers names"); - message_list.emplace_back("OV_GPU_PrintMultiKernelPerf", "Print execution time of each kernel in multi-kernel primitimive"); - message_list.emplace_back("OV_GPU_PrintInputDataShapes", "Print data_shapes of input layers for benchmark_app."); - message_list.emplace_back("OV_GPU_DisableUsm", "Disable usm usage"); - message_list.emplace_back("OV_GPU_DisableOnednn", "Disable onednn for discrete GPU (no effect for integrated GPU)"); - message_list.emplace_back("OV_GPU_DisableOnednnOptPostOps", "Disable onednn optimize post operators"); - message_list.emplace_back("OV_GPU_DumpProfilingData", "Enables dump of extended profiling information to specified directory." - " Please use OV_GPU_DumpProfilingDataPerIter=1 env variable to collect performance per iteration." - " Note: Performance impact may be significant as this option enforces host side sync after each primitive"); - message_list.emplace_back("OV_GPU_DumpProfilingDataIteration", "Enable collecting profiling data only at iterations with requested range. " - "For example for dump profiling data only when iteration is from 10 to 20, you can use " - "OV_GPU_DumpProfilingDataIteration='10..20'. Additionally, you can dump profiling data only " - "from one specific iteration by giving the same values for the start and end, and the open " - "ended range is also available by range from given start to the last iteration as -1. e.g. " - "OV_GPU_DumpProfilingDataIteration='10..-1'"); - message_list.emplace_back("OV_GPU_HostTimeProfiling", "Enable collecting of model enqueue time spent on the host"); - message_list.emplace_back("OV_GPU_DumpGraphs", "1) dump ngraph before and after transformation. 2) dump graph in model compiling." - "3) dump graph in execution."); - message_list.emplace_back("OV_GPU_DumpSources", "Dump opencl sources"); - message_list.emplace_back("OV_GPU_DumpLayersPath", "Enable dumping intermediate buffers and set the dest path"); - message_list.emplace_back("OV_GPU_DumpLayers", "Dump intermediate buffers of specified layers only, separated by space." - " Support case-insensitive and regular expression. For example .*conv.*"); - message_list.emplace_back("OV_GPU_DumpLayersResult", "Dump output buffers of result layers only"); - message_list.emplace_back("OV_GPU_DumpLayersInput", "Dump intermediate buffers of input layers only"); - message_list.emplace_back("OV_GPU_DumpLayersDstOnly", "Dump only output of layers"); - message_list.emplace_back("OV_GPU_DumpLayersLimitBatch", "Limit the size of batch to dump"); - message_list.emplace_back("OV_GPU_DumpLayersRaw", "If true, dump data is stored in raw memory format."); - message_list.emplace_back("OV_GPU_DumpLayersRawBinary", "If true, dump data is stored in binary format."); - message_list.emplace_back("OV_GPU_DryRunPath", "Dry run and serialize execution graph into the specified path"); - message_list.emplace_back("OV_GPU_BaseBatchForMemEstimation", "Base batch size to be used in memory estimation"); - message_list.emplace_back("OV_GPU_AfterProc", "Run inference after the specified process PIDs are finished, separated by space." - " Supported on only on linux."); - message_list.emplace_back("OV_GPU_SerialCompile", "Serialize creating primitives and compiling kernels"); - message_list.emplace_back("OV_GPU_ForceImplTypes", "Force implementation type of a target primitive or layer. [primitive or layer_name]:[impl_type]" - " For example fc:onednn gemm:onednn reduce:ocl do:cpu" - " For primitives fc, gemm, do, reduce, concat are supported. Separated by space."); - message_list.emplace_back("OV_GPU_MaxKernelsPerBatch", "Maximum number of kernels in a batch during compiling kernels"); - message_list.emplace_back("OV_GPU_ImplsCacheCapacity", "The maximum number of entries in the kernel impl cache"); - message_list.emplace_back("OV_GPU_DisableAsyncCompilation", "Disable async compilation"); - message_list.emplace_back("OV_GPU_DisableWinogradConv", "Disable Winograd convolution"); - message_list.emplace_back("OV_GPU_DisableDynamicImpl", "Disable dynamic implementation"); - message_list.emplace_back("OV_GPU_DisableRuntimeBufferFusing", "Disable runtime buffer fusing"); - message_list.emplace_back("OV_GPU_DisableMemoryReuse", "Disable memory reuse"); - message_list.emplace_back("OV_GPU_EnableSDPA", "This allows the enforcement of SDPA decomposition logic: 0 completely disables SDPA kernel usage, " - "and 1 enables it for all the cases."); - message_list.emplace_back("OV_GPU_DumpMemoryPool", "Dump memory pool contents of each iteration"); - message_list.emplace_back("OV_GPU_DumpMemoryPoolIters", "List of iterations to dump memory pool status, separated by space."); - message_list.emplace_back("OV_GPU_DumpMemoryPoolPath", "Enable dumping memory pool status to csv file and set the dest path"); - message_list.emplace_back("OV_GPU_DisableBuildTimeWeightReorderForDynamicNodes", "Disable build time weight reorder for dynmaic nodes."); - message_list.emplace_back("OV_GPU_DisableRuntimeSkipReorder", "Disable runtime skip reorder."); - message_list.emplace_back("OV_GPU_DisablePrimitiveFusing", "Disable primitive fusing"); - message_list.emplace_back("OV_GPU_DisableFakeAlignment", "Disable fake alignment"); - message_list.emplace_back("OV_GPU_UseUsmHost", "Set explicit policy for usm host usage for network input/output. " - "0: default, 1: use usm_host, 2: do not use usm_host"); - message_list.emplace_back("OV_GPU_KVCacheCompression", "Enable/Disable KV-cache compression"); - message_list.emplace_back("OV_GPU_DynamicQuantizeLayersWithoutOnednn", "Enable Dynamic quantization for specified Fully connected layers only, " - "separated by space. Support case-insensitive and regular expression. For example .*fully_connected.*"); - message_list.emplace_back("OV_GPU_DynamicQuantizeGroupSize", "Specify a group size of dynamic quantization to enable " - "dynamic quantization for Fully-connected primitive."); - message_list.emplace_back("OV_GPU_DynamicQuantizeAsym", "Enable asymmetric dynamic quantization when set as 1."); - message_list.emplace_back("OV_GPU_DisableHorizontalFCFusion", "Disable horizontal fc fusion"); - message_list.emplace_back("OV_GPU_DisableFCSwigluFusion", "Disable fc + swiglu fusion"); - message_list.emplace_back("OV_GPU_DumpIteration", "Dump n-th execution of network, separated by space."); - message_list.emplace_back("OV_GPU_MemPreallocationOptions", "Controls buffer pre-allocation feature. Expects 4 values separated by space in " - "the following order: number of iterations for pre-allocation(int), max size of single iteration in bytes(int), " - "max per-dim allowed diff(int), unconditional buffers preallocation ratio(float). For example for disabling memory " - "preallocation at all, you can use OV_GPU_MemPreallocationOptions='0 0 0 1.0'"); - message_list.emplace_back("OV_GPU_LoadDumpRawBinary", - "Specified layers which are loading dumped binary files generated by OV_GPU_DumpLayersRawBinary debug-config." - " Currently, other layers except input-layer('parameter' type) are loading binaries for only input." - " Different input or output tensors are seperated by ','. Different layers are separated by space. For example, " - " \"[input_layer_name1]:[binary_dumped_file1],[binary_dump_file2] [input_layer_name2]:[binary_dump_1],[binary_dump_2]\""); - - auto max_name_length_item = std::max_element(message_list.begin(), message_list.end(), - [](std::pair& a, std::pair& b){ - return a.first.size() < b.first.size(); - }); - int name_width = static_cast(max_name_length_item->first.size()) + 2; - - GPU_DEBUG_COUT_ << "Supported environment variables for debugging" << std::endl; - for (auto& p : message_list) { - GPU_DEBUG_COUT_ << " - " << std::left << std::setw(name_width) << p.first + " " << p.second << std::endl; - } -} - -#endif - -debug_configuration::debug_configuration() - : help(0) - , verbose(0) - , verbose_color(0) - , verbose_file() - , list_layers(0) - , print_multi_kernel_perf(0) - , print_input_data_shapes(0) - , disable_usm(0) - , disable_onednn(0) - , disable_onednn_opt_post_ops(0) - , dump_profiling_data(std::string("")) - , dump_profiling_data_per_iter(0) - , host_time_profiling(0) - , dump_graphs(std::string()) - , dump_sources(std::string()) - , dump_layers_path(std::string()) - , dry_run_path(std::string()) - , dump_layers_dst_only(0) - , dump_layers_result(0) - , dump_layers_input(0) - , dump_layers_limit_batch(std::numeric_limits::max()) - , dump_layers_raw(0) - , dump_layers_binary(0) - , dump_memory_pool(0) - , dump_memory_pool_path(std::string()) - , base_batch_for_memory_estimation(-1) - , serialize_compile(0) - , max_kernels_per_batch(0) - , impls_cache_capacity(-1) - , enable_sdpa(-1) - , disable_async_compilation(0) - , disable_winograd_conv(0) - , disable_dynamic_impl(0) - , disable_runtime_buffer_fusing(0) - , disable_memory_reuse(0) - , disable_build_time_weight_reorder_for_dynamic_nodes(0) - , disable_runtime_skip_reorder(0) - , disable_primitive_fusing(0) - , disable_fake_alignment(0) - , use_usm_host(0) - , use_kv_cache_compression(-1) - , dynamic_quantize_group_size(DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) - , dynamic_quantize_asym(0) - , disable_horizontal_fc_fusion(0) - , disable_fc_swiglu_fusion(0) { -#ifdef GPU_DEBUG_CONFIG - get_gpu_debug_env_var("Help", help); - get_common_debug_env_var("Verbose", verbose); - get_gpu_debug_env_var("VerboseColor", verbose_color); - get_gpu_debug_env_var("VerboseFile", verbose_file); - get_gpu_debug_env_var("ListLayers", list_layers); - get_gpu_debug_env_var("PrintMultiKernelPerf", print_multi_kernel_perf); - get_gpu_debug_env_var("PrintInputDataShapes", print_input_data_shapes); - get_gpu_debug_env_var("DisableUsm", disable_usm); - get_gpu_debug_env_var("DumpGraphs", dump_graphs); - get_gpu_debug_env_var("DumpSources", dump_sources); - get_gpu_debug_env_var("DumpLayersPath", dump_layers_path); - get_gpu_debug_env_var("DumpLayersLimitBatch", dump_layers_limit_batch); - get_gpu_debug_env_var("DumpLayersRaw", dump_layers_raw); - get_gpu_debug_env_var("DumpLayersRawBinary", dump_layers_binary); - get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only); - get_gpu_debug_env_var("DumpLayersResult", dump_layers_result); - get_gpu_debug_env_var("DumpLayersInput", dump_layers_input); - get_gpu_debug_env_var("DisableOnednn", disable_onednn); - get_gpu_debug_env_var("DisableOnednnOptPostOps", disable_onednn_opt_post_ops); - get_gpu_debug_env_var("DumpProfilingData", dump_profiling_data); - get_gpu_debug_env_var("DumpProfilingDataPerIter", dump_profiling_data_per_iter); - get_gpu_debug_env_var("HostTimeProfiling", host_time_profiling); - std::string dump_prof_data_iter_str; - get_gpu_debug_env_var("DumpProfilingDataIteration", dump_prof_data_iter_str); - get_gpu_debug_env_var("DryRunPath", dry_run_path); - get_gpu_debug_env_var("DumpMemoryPool", dump_memory_pool); - std::string dump_runtime_memory_pool_iters_str; - get_gpu_debug_env_var("DumpMemoryPoolIters", dump_runtime_memory_pool_iters_str); - get_gpu_debug_env_var("DumpMemoryPoolPath", dump_memory_pool_path); - get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation); - std::string dump_layers_str; - get_gpu_debug_env_var("DumpLayers", dump_layers_str); - std::string after_proc_str; - get_gpu_debug_env_var("AfterProc", after_proc_str); - get_gpu_debug_env_var("SerialCompile", serialize_compile); - std::string forced_impl_types_str; - get_gpu_debug_env_var("ForceImplTypes", forced_impl_types_str); - get_gpu_debug_env_var("MaxKernelsPerBatch", max_kernels_per_batch); - get_gpu_debug_env_var("ImplsCacheCapacity", impls_cache_capacity); - get_gpu_debug_env_var("EnableSDPA", enable_sdpa); - get_gpu_debug_env_var("DisableAsyncCompilation", disable_async_compilation); - get_gpu_debug_env_var("DisableWinogradConv", disable_winograd_conv); - get_gpu_debug_env_var("DisableDynamicImpl", disable_dynamic_impl); - get_gpu_debug_env_var("DisableRuntimeBufferFusing", disable_runtime_buffer_fusing); - get_gpu_debug_env_var("DisableMemoryReuse", disable_memory_reuse); - get_gpu_debug_env_var("DisableBuildTimeWeightReorderForDynamicNodes", disable_build_time_weight_reorder_for_dynamic_nodes); - get_gpu_debug_env_var("DisableRuntimeSkipReorder", disable_runtime_skip_reorder); - get_gpu_debug_env_var("DisablePrimitiveFusing", disable_primitive_fusing); - get_gpu_debug_env_var("DisableFakeAlignment", disable_fake_alignment); - get_gpu_debug_env_var("UseUsmHost", use_usm_host); - get_gpu_debug_env_var("KVCacheCompression", use_kv_cache_compression); - get_gpu_debug_env_var("DynamicQuantizeGroupSize", dynamic_quantize_group_size); - get_gpu_debug_env_var("DynamicQuantizeAsym", dynamic_quantize_asym); - get_gpu_debug_env_var("DisableHorizontalFCFusion", disable_horizontal_fc_fusion); - get_gpu_debug_env_var("DisableFCSwigluFusion", disable_fc_swiglu_fusion); - std::string dump_iteration_str; - get_gpu_debug_env_var("DumpIteration", dump_iteration_str); - std::string mem_preallocation_params_str; - get_gpu_debug_env_var("MemPreallocationOptions", mem_preallocation_params_str); - std::string load_dump_raw_bin_str; - get_gpu_debug_env_var("LoadDumpRawBinary", load_dump_raw_bin_str); - std::string dynamic_quantize_layers_without_onednn_str; - get_gpu_debug_env_var("DynamicQuantizeLayersWithoutOnednn", dynamic_quantize_layers_without_onednn_str); - - if (help > 0) { - print_help_messages(); - exit(0); - } - - if (verbose_file.length() > 0) { +std::ostream& get_verbose_stream() { + if (ExecutionConfig::get_log_to_file().length() > 0) { static std::ofstream fout; - fout.open(verbose_file); - verbose_stream = &fout; + if (!fout.is_open()) + fout.open(ExecutionConfig::get_log_to_file()); + return fout; } else { - verbose_stream = &std::cout; - } - - if (dump_prof_data_iter_str.length() > 0) { - dump_prof_data_iter_str = " " + dump_prof_data_iter_str + " "; - std::istringstream iss(dump_prof_data_iter_str); - char dot; - int64_t start, end; - bool is_valid_range = false; - if (iss >> start >> dot >> dot >> end) { - if (start <= end || end == -1) { - try { - is_valid_range = true; - dump_prof_data_iter_params.start = start; - dump_prof_data_iter_params.end = end; - } catch(const std::exception &) { - is_valid_range = false; - } - } - } - if (!is_valid_range) - std::cout << "OV_GPU_DumpProfilingDataIteration was ignored. It cannot be parsed to valid iteration range." << std::endl; - dump_prof_data_iter_params.is_enabled = is_valid_range; - } - - if (dump_layers_str.length() > 0) { - // Insert delimiter for easier parsing when used - dump_layers_str = " " + dump_layers_str + " "; - std::stringstream ss(dump_layers_str); - std::string layer; - while (ss >> layer) { - dump_layers.push_back(layer); - } - } - - if (dynamic_quantize_layers_without_onednn_str.length() > 0) { - // Insert delimiter for easier parsing when used - dynamic_quantize_layers_without_onednn_str = " " + dynamic_quantize_layers_without_onednn_str + " "; - std::stringstream ss(dynamic_quantize_layers_without_onednn_str); - std::string layer; - while (ss >> layer) { - dynamic_quantize_layers_without_onednn.push_back(layer); - } + return std::cout; } - - if (forced_impl_types_str.length() > 0) { - forced_impl_types_str = " " + forced_impl_types_str + " "; - std::stringstream ss(forced_impl_types_str); - std::string type; - while (ss >> type) { - forced_impl_types.push_back(type); - } - } - - // Parsing for loading binary files - if (load_dump_raw_bin_str.length() > 0) { - load_dump_raw_bin_str = " " + load_dump_raw_bin_str + " "; - std::stringstream ss(load_dump_raw_bin_str); - std::string type; - while (ss >> type) { - load_layers_raw_dump.push_back(type); - } - } - - if (dump_iteration_str.size() > 0) { - dump_iteration = parse_int_set(dump_iteration_str); - } - - if (dump_runtime_memory_pool_iters_str.size() > 0) { - dump_memory_pool_iters = parse_int_set(dump_runtime_memory_pool_iters_str); - } - - if (mem_preallocation_params_str.size() > 0) { - mem_preallocation_params_str = " " + mem_preallocation_params_str + " "; - std::istringstream ss(mem_preallocation_params_str); - std::vector params; - std::string param; - while (ss >> param) - params.push_back(param); - - bool correct_params = params.size() == 4; - if (correct_params) { - try { - mem_preallocation_params.next_iters_preallocation_count = std::stol(params[0]); - mem_preallocation_params.max_per_iter_size = std::stol(params[1]); - mem_preallocation_params.max_per_dim_diff = std::stol(params[2]); - mem_preallocation_params.buffers_preallocation_ratio = std::stof(params[3]); - } catch(const std::exception &) { - correct_params = false; - } - } - - if (!correct_params) - GPU_DEBUG_COUT_ << "OV_GPU_MemPreallocationOptions were ignored, because they cannot be parsed.\n"; - - mem_preallocation_params.is_initialized = correct_params; - } - - if (after_proc_str.length() > 0) { -#ifdef _WIN32 - GPU_DEBUG_COUT_ << "Warning: OV_GPU_AfterProc is supported only on linux" << std::endl; -#else - after_proc_str = " " + after_proc_str + " "; // Insert delimiter for easier parsing when used - std::stringstream ss(after_proc_str); - std::string pid; - while (ss >> pid) { - after_proc.push_back(pid); - } -#endif - } -#endif -} - -const debug_configuration *debug_configuration::get_instance() { - static std::unique_ptr instance(nullptr); -#ifdef GPU_DEBUG_CONFIG - static std::mutex _m; - std::lock_guard lock(_m); - if (nullptr == instance) - instance.reset(new debug_configuration()); - return instance.get(); -#else - return nullptr; -#endif -} - -bool debug_configuration::is_target_dump_prof_data_iteration(int64_t iteration) const { -#ifdef GPU_DEBUG_CONFIG - if (iteration < 0) - return true; - - if (dump_prof_data_iter_params.start > iteration) - return false; - - if (dump_prof_data_iter_params.start <= dump_prof_data_iter_params.end && - dump_prof_data_iter_params.end < iteration) - return false; - - return true; -#else - return false; -#endif } -std::vector debug_configuration::get_filenames_for_matched_layer_loading_binaries(const std::string& id) const { - std::vector file_names; -#ifdef GPU_DEBUG_CONFIG - if (load_layers_raw_dump.empty()) - return file_names; - - for (const auto& load_layer : load_layers_raw_dump) { - size_t file = load_layer.rfind(":"); - if (file != std::string::npos) { - if (id == load_layer.substr(0, file)) { - auto file_name_str = load_layer.substr(file + 1); - size_t head = 0; - size_t found = 0; - do { - found = file_name_str.find(",", head); - if (found != std::string::npos) - file_names.push_back(file_name_str.substr(head, (found - head))); - else - file_names.push_back(file_name_str.substr(head)); - - head = found+1; - GPU_DEBUG_LOG << " Layer name loading raw dump : " << load_layer.substr(0, file) << " / the dump file : " - << file_names.back() << std::endl; - } while (found != std::string::npos); - - return file_names; - } - } - } -#endif - - return file_names; -} - -std::string debug_configuration::get_matched_from_filelist(const std::vector& file_names, std::string pattern) const { -#ifdef GPU_DEBUG_CONFIG - for (const auto& file : file_names) { - auto found = file.find(pattern); - if (found != std::string::npos) { - return file; - } - } -#endif - return std::string(); -} - -std::string debug_configuration::get_name_for_dump(const std::string& file_name) const { - std::string filename = file_name; -#ifdef GPU_DEBUG_CONFIG - std::replace(filename.begin(), filename.end(), '\\', '_'); - std::replace(filename.begin(), filename.end(), '/', '_'); - std::replace(filename.begin(), filename.end(), ' ', '_'); - std::replace(filename.begin(), filename.end(), ':', '_'); -#endif - return filename; -} - -bool debug_configuration::is_layer_name_matched(const std::string& layer_name, const std::string& pattern) const { -#ifdef GPU_DEBUG_CONFIG - auto upper_layer_name = std::string(layer_name.length(), '\0'); - std::transform(layer_name.begin(), layer_name.end(), upper_layer_name.begin(), ::toupper); - auto upper_pattern = std::string(pattern.length(), '\0'); - std::transform(pattern.begin(), pattern.end(), upper_pattern.begin(), ::toupper); - - // Check pattern from exec_graph - size_t pos = upper_layer_name.find(':'); - auto upper_exec_graph_name = upper_layer_name.substr(pos + 1, upper_layer_name.size()); - if (upper_exec_graph_name.compare(upper_pattern) == 0) { - return true; - } - - // Check pattern with regular expression - std::regex re(upper_pattern); - return std::regex_match(upper_layer_name, re); -#else - return false; -#endif -} - -bool debug_configuration::is_layer_for_dumping(const std::string& layer_name, bool is_output, bool is_input) const { -#ifdef GPU_DEBUG_CONFIG - // Dump result layer - if (is_output == true && dump_layers_result == 1 && - (layer_name.find("constant:") == std::string::npos)) - return true; - // Dump all layers - if (dump_layers.empty() && dump_layers_result == 0 && dump_layers_input == 0) - return true; - - // Dump input layers - size_t pos = layer_name.find(':'); - auto type = layer_name.substr(0, pos); - if (is_input == true && type == "parameter" && dump_layers_input == 1) - return true; - - auto iter = std::find_if(dump_layers.begin(), dump_layers.end(), [&](const std::string& dl){ - return is_layer_name_matched(layer_name, dl); - }); - return (iter != dump_layers.end()); -#else - return false; -#endif -} - -bool debug_configuration::is_target_iteration(int64_t iteration) const { -#ifdef GPU_DEBUG_CONFIG - if (iteration < 0) - return true; - - if (dump_iteration.empty()) - return true; - - if (dump_iteration.find(iteration) == std::end(dump_iteration)) - return false; - - return true; -#else - return false; -#endif -} -} // namespace cldnn +} // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/runtime/device.cpp b/src/plugins/intel_gpu/src/runtime/device.cpp index fa027ebe9e2e33..428d18f6c51775 100644 --- a/src/plugins/intel_gpu/src/runtime/device.cpp +++ b/src/plugins/intel_gpu/src/runtime/device.cpp @@ -65,8 +65,7 @@ float device::get_gops(cldnn::data_types dt) const { } bool device::use_unified_shared_memory() const { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_usm) { + GPU_DEBUG_IF(ExecutionConfig::get_disable_usm()) { return false; } if (get_mem_caps().supports_usm()) { diff --git a/src/plugins/intel_gpu/src/runtime/engine.cpp b/src/plugins/intel_gpu/src/runtime/engine.cpp index b5ec7da3fab705..73bceb8bea8659 100644 --- a/src/plugins/intel_gpu/src/runtime/engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/engine.cpp @@ -71,8 +71,7 @@ const device::ptr engine::get_device() const { } bool engine::use_unified_shared_memory() const { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_usm) { + GPU_DEBUG_IF(ExecutionConfig::get_disable_usm()) { return false; } if (_device->get_mem_caps().supports_usm()) { diff --git a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp index 1417680c985632..5ddd70f8c6bd3b 100644 --- a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp +++ b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp @@ -52,7 +52,6 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive auto type = mem->get_allocation_type(); const auto _layout_bytes_count = _layout.bytes_count(); - GPU_DEBUG_GET_INSTANCE(debug_config); { auto it = _non_padded_pool.lower_bound(_layout_bytes_count); @@ -67,7 +66,7 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive } if (it->second._users.empty()) { #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = it->first; total_mem_size_non_padded_pool -= released_mem_size; if (type == allocation_type::usm_host) @@ -104,7 +103,7 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive } if (list_itr->_users.empty()) { #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = mem->size(); total_mem_size_padded_pool -= released_mem_size; if (type == allocation_type::usm_host) @@ -128,14 +127,14 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive } } #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto iter = std::find_if(_no_reusable_mems.begin(), _no_reusable_mems.end(), [&](const cldnn::memory_record& r) { return (network_id == r._network_id && type == r._type && mem->get_internal_params().mem == r._memory->get_internal_params().mem); }); if (iter != _no_reusable_mems.end()) { - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = iter->_users.begin()->_mem_size; total_mem_size_no_reusable -= released_mem_size; if (type == allocation_type::usm_host) @@ -183,8 +182,7 @@ memory::ptr memory_pool::get_from_non_padded_pool(const layout& layout, memory_record({{MEM_USER(unique_id, network_id, prim_id, layout_bytes_count)}}, mem, network_id, type)); #ifdef GPU_DEBUG_CONFIG { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { total_mem_size_non_padded_pool += layout_bytes_count; if (type == allocation_type::usm_host) mem_size_non_padded_pool_host += layout_bytes_count; @@ -225,8 +223,7 @@ memory::ptr memory_pool::get_from_padded_pool(const layout& layout, memory_record({{MEM_USER(unique_id, network_id, prim_id, mem->size())}}, mem, network_id, type)); #ifdef GPU_DEBUG_CONFIG { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { const auto allocated_mem_size = mem->size(); total_mem_size_padded_pool += allocated_mem_size; if (type == allocation_type::usm_host) @@ -242,8 +239,7 @@ memory::ptr memory_pool::get_from_padded_pool(const layout& layout, _padded_pool.emplace(layout, std::move(list)); #ifdef GPU_DEBUG_CONFIG { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { const auto allocated_mem_size = mem->size(); total_mem_size_padded_pool += allocated_mem_size; if (type == allocation_type::usm_host) @@ -300,8 +296,7 @@ memory::ptr memory_pool::get_memory(const layout& layout, bool reset, bool is_dynamic) { bool do_reuse = reusable_across_network; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_memory_reuse) { + GPU_DEBUG_IF(_config.get_disable_memory_reuse()) { do_reuse = false; } if (do_reuse) { @@ -316,7 +311,7 @@ memory::ptr memory_pool::get_memory(const layout& layout, // images (reuse not yet implemented) auto mem = alloc_memory(layout, type, reset); #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto allocated_mem_size = mem->size(); _no_reusable_mems.push_back( memory_record({{MEM_USER(unique_id, network_id, prim_id, allocated_mem_size)}}, mem, network_id, type)); @@ -330,7 +325,7 @@ memory::ptr memory_pool::get_memory(const layout& layout, } else { auto mem = alloc_memory(layout, type, reset); #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto allocated_mem_size = mem->size(); _no_reusable_mems.push_back( memory_record({{MEM_USER(unique_id, network_id, prim_id, allocated_mem_size)}}, mem, network_id, type)); @@ -344,7 +339,6 @@ memory::ptr memory_pool::get_memory(const layout& layout, } void memory_pool::clear_pool_for_network(uint32_t network_id) { - GPU_DEBUG_GET_INSTANCE(debug_config); // free up _non_padded_pool for this network { auto itr = _non_padded_pool.begin(); @@ -354,7 +348,7 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { if (record._network_id == network_id) { #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = itr->first; total_mem_size_non_padded_pool -= released_mem_size; if (record._type == allocation_type::usm_host) @@ -388,7 +382,7 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { if (list.empty()) { #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = itr->first.bytes_count(); total_mem_size_padded_pool -= released_mem_size; if (type == allocation_type::usm_host) @@ -404,12 +398,12 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { #ifdef GPU_DEBUG_CONFIG // free up _no_reusable_mems for this network - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto itr = _no_reusable_mems.begin(); while (itr != _no_reusable_mems.end()) { auto& record = *itr; if (itr->_network_id == network_id) { - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = itr->_users.begin()->_mem_size; total_mem_size_no_reusable -= released_mem_size; if (record._type == allocation_type::usm_host) @@ -439,7 +433,7 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { } } -memory_pool::memory_pool(engine& engine) : _engine(&engine) { } +memory_pool::memory_pool(engine& engine, const ExecutionConfig& config) : _engine(&engine), _config(config) { } #ifdef GPU_DEBUG_CONFIG inline std::string get_mb_size(size_t size) { diff --git a/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp b/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp index 3b925f07361fff..3a028218a3f62f 100644 --- a/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp +++ b/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp @@ -67,7 +67,7 @@ std::pair ShapePredictor::predict_preallocation_shape(const std int32_t custom_prealloc_dim) { size_t next_iters_prealloc_count = custom_next_iters_prealloc_count > 0 ? static_cast(custom_next_iters_prealloc_count) - : _next_iters_preallocation_count; + : _settings.next_iters_preallocation_count; const auto& current_shape = layout.get_shape(); auto dt_bitwidth = ov::element::Type(layout.data_type).bitwidth(); @@ -122,7 +122,7 @@ std::pair ShapePredictor::predict_preallocation_shape(const std // to avoid huge unexpected memory preallocations if (can_use_iterations_preallocation) { for (size_t i = 0; i < diffs[0].size(); ++i) { - if (diffs[0][i] > _max_per_dim_diff) { + if (diffs[0][i] > _settings.max_per_dim_diff) { can_use_iterations_preallocation = false; break; } @@ -132,7 +132,7 @@ std::pair ShapePredictor::predict_preallocation_shape(const std for (size_t i = 0; i < current_shape.size(); ++i) single_iter_shape.push_back(diffs[0][i] == 0 ? current_shape[i] : 1); - if (ceil_div(ov::shape_size(single_iter_shape) * dt_bitwidth, 8) > _max_per_iter_size) + if (ceil_div(ov::shape_size(single_iter_shape) * dt_bitwidth, 8) > _settings.max_per_iter_size) can_use_iterations_preallocation = false; } @@ -142,13 +142,13 @@ std::pair ShapePredictor::predict_preallocation_shape(const std auto preallocation_shape = diffs[0] * mul_shape; auto new_shape = current_shape + preallocation_shape; return {true, new_shape}; - } else if (_buffers_preallocation_ratio > 1.0f) { + } else if (_settings.buffers_preallocation_ratio > 1.0f) { if (format::is_blocked(layout.format)) return {false, {}}; // Apply percentage buffer preallocation auto current_shape_size = ov::shape_size(current_shape); ov::Shape new_shape_size(current_shape.size(), 1); - new_shape_size[0] = static_cast(current_shape_size * _buffers_preallocation_ratio); + new_shape_size[0] = static_cast(current_shape_size * _settings.buffers_preallocation_ratio); return {true, new_shape_size}; } } diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp index 637add35fd4fd8..c89cd5d93b709f 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp @@ -23,7 +23,7 @@ TEST_P(shape_predictor_tests, prediction) { auto& expected_predicted_shape = p.expected_predicted_shape; auto& engine = get_test_engine(); - ShapePredictor sp(&engine, p.buffers_preallocation_ratio); + ShapePredictor sp(&engine, ShapePredictor::Settings{ .buffers_preallocation_ratio = p.buffers_preallocation_ratio }); std::pair result; for (auto& shape : in_shapes) @@ -74,7 +74,7 @@ TEST_P(shape_predictor_tests_b_fs_yx_fsv16, prediction) { auto& expected_predicted_shape = p.expected_predicted_shape; auto& engine = get_test_engine(); - ShapePredictor sp(&engine, p.buffers_preallocation_ratio); + ShapePredictor sp(&engine, ShapePredictor::Settings{ .buffers_preallocation_ratio = p.buffers_preallocation_ratio }); std::pair result; for (auto& shape : in_shapes) @@ -121,8 +121,8 @@ INSTANTIATE_TEST_SUITE_P(smoke, shape_predictor_tests_b_fs_yx_fsv16, TEST(shape_predictor_tests, check_max_buffer_size) { auto& engine = get_test_engine(); - const auto& buffers_preallocation_ratio = 1.1; - ShapePredictor sp(&engine, buffers_preallocation_ratio); + const auto& buffers_preallocation_ratio = 1.1f; + ShapePredictor sp(&engine, ShapePredictor::Settings{ .buffers_preallocation_ratio = buffers_preallocation_ratio }); const auto max_alloc_mem_size = engine.get_device_info().max_alloc_mem_size; auto layout = cldnn::layout({static_cast(max_alloc_mem_size)}, ov::element::u8, format::bfyx); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp index 6a96b694eea1c5..7c08fe85c07eeb 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp @@ -10,7 +10,7 @@ using namespace ::tests; TEST(debug_config_test, check_debug_config_off_on_release) { #ifdef NDEBUG - GPU_DEBUG_GET_INSTANCE(debug_config); + auto config = get_test_default_config(get_test_engine()); GPU_DEBUG_IF(1) { GTEST_FAIL(); /* This should be disabled in case of release build */ } From db3e55ba7e68e2c32986d23f7e54ccbc6cafae50 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 21 Jan 2025 11:30:05 +0400 Subject: [PATCH 21/44] fix visitors Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 92 +++++++++++++++++++ src/inference/src/dev/plugin_config.cpp | 10 +- .../intel_gpu/plugin/program_builder.hpp | 2 - src/plugins/intel_gpu/src/plugin/graph.cpp | 35 +------ 4 files changed, 100 insertions(+), 39 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 8c9252eb78e48c..b7e759fb3cd6a6 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -289,4 +289,96 @@ class OPENVINO_RUNTIME_API PluginConfig { inline static const std::string m_allowed_env_prefix = "OV_"; }; +template <> +class OPENVINO_API AttributeAdapter + : public DirectValueAccessor { +public: + AttributeAdapter(ConfigOptionBase*& value) : DirectValueAccessor(value) {} + + OPENVINO_RTTI("AttributeAdapter"); +}; + +template <> +class OPENVINO_API AttributeAdapter + : public DirectValueAccessor { +public: + AttributeAdapter(ov::AnyMap& value) : DirectValueAccessor(value) {} + + OPENVINO_RTTI("AttributeAdapter"); +}; + +template +class OstreamAttributeVisitor : public ov::AttributeVisitor { + OStreamType& os; + +public: + OstreamAttributeVisitor(OStreamType& os) : os(os) {} + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + os << adapter.get(); + } + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + if (auto a = ov::as_type>(&adapter)) { + return handle_option(a->get()); + } else if (auto a = ov::as_type>(&adapter)) { + const auto& props = a->get(); + os << props.size(); + for (auto& kv : props) { + os << kv.first << kv.second.as(); + } + } else { + OPENVINO_THROW("Attribute ", name, " can't be processed\n"); + } + } + + void handle_option(ConfigOptionBase* option) { + if (option->get_visibility() == OptionVisibility::RELEASE || option->get_visibility() == OptionVisibility::RELEASE_INTERNAL) + os << option->get_any().as(); + } +}; + +template +class IstreamAttributeVisitor : public ov::AttributeVisitor { + IStreamType& is; + +public: + IstreamAttributeVisitor(IStreamType& is) : is(is) {} + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + if (auto a = ov::as_type>(&adapter)) { + return handle_option(a->get()); + } else if (auto a = ov::as_type>(&adapter)) { + size_t size; + is >> size; + ov::AnyMap props; + for (size_t i = 0; i < size; i++) { + std::string name, val; + is >> name; + is >> val; + props[name] = val; + + } + a->set(props); + } else { + OPENVINO_THROW("Attribute ", name, " can't be processed\n"); + } + } + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + bool val; + is >> val; + adapter.set(val); + } + + void handle_option(ConfigOptionBase* option) { + if (option->get_visibility() == OptionVisibility::RELEASE || option->get_visibility() == OptionVisibility::RELEASE_INTERNAL) { + std::string s; + is >> s; + if (option->is_valid_value(s)) + option->set_any(s); + } + } +}; + } // namespace ov diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index cd4d13968e67fd..e950a164a41002 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -123,13 +123,9 @@ void PluginConfig::finalize(std::shared_ptr context, const ov::R } bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) { - for (const auto& prop : m_user_properties) { - auto val = prop.second.as(); - visitor.on_attribute(prop.first + "__user", val); - } - for (const auto& prop : m_options_map) { - auto val = prop.second->get_any().as(); - visitor.on_attribute(prop.first + "__internal", val); + visitor.on_attribute("m_user_properties", m_user_properties); + for (auto& prop : m_options_map) { + visitor.on_attribute(prop.first + "__internal", prop.second); } visitor.on_attribute("is_finalized", m_is_finalized); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp index 99220ec3087221..553f16152cb2bb 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp @@ -155,8 +155,6 @@ class ProgramBuilder final { std::shared_ptr m_topology; CustomLayerMap m_custom_layers; - bool allow_new_shape_infer = false; - bool queryMode; std::shared_ptr m_task_executor; diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 0be6f52aa500f3..318e36cc5c8a09 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -4,6 +4,7 @@ #include "intel_gpu/graph/serialization/helpers.hpp" #include "intel_gpu/runtime/layout.hpp" +#include "openvino/core/any.hpp" #include "openvino/runtime/plugin_config.hpp" #include "openvino/runtime/threading/executor_manager.hpp" #include "openvino/runtime/exec_model_info.hpp" @@ -35,34 +36,6 @@ #include namespace ov::intel_gpu { -namespace { - - -class OstreamAttributeVisitor : public ov::AttributeVisitor { - cldnn::BinaryOutputBuffer& os; - - template - void append_attribute(const std::string& name, const T& value) { - os << name; - os << value; - } -public: - OstreamAttributeVisitor(cldnn::BinaryOutputBuffer& os) : os(os) {} - - void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { - OPENVINO_THROW("Attribute ", name, " can't be processed\n"); - } - - void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { - append_attribute(name, adapter.get()); - } - - void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { - append_attribute(name, adapter.get()); - } -}; - -} // namespace Graph::Graph(std::shared_ptr model, const RemoteContextImpl::Ptr& context, const ExecutionConfig& config, uint16_t stream_id) : m_context(context) @@ -115,7 +88,9 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context ib >> perfEntry.parentPrimitive; } } - // ib >> m_config; + + IstreamAttributeVisitor visitor(ib); + m_config.visit_attributes(visitor); auto imported_prog = std::make_shared(get_engine(), m_config); imported_prog->load(ib); @@ -541,7 +516,7 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) { ob << perf_item.second.second.parentPrimitive; } } - OstreamAttributeVisitor visitor(ob); + OstreamAttributeVisitor visitor(ob); m_config.visit_attributes(visitor); ob.set_stream(m_network->get_stream_ptr().get()); From abdec311eb4c986650953462c003d757bf4d4429 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 21 Jan 2025 13:00:56 +0400 Subject: [PATCH 22/44] build fixes Signed-off-by: Vladimir Paramuzov --- src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl | 1 + src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index 3b77804979f6f8..7e7651ede111e7 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -15,6 +15,7 @@ OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode:: OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") OV_CONFIG_RELEASE_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application") OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not") +OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_reservation, false, "Cpu Reservation means reserve cpus which will not be used by other plugin or compiled model") OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks") OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level") OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property") diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 72054ec3b1fd44..841551c73b671f 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -1070,7 +1070,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } // AZP does not support grouped size dyn-quan - GPU_DEBUG_IF(debug_config->dynamic_quantize_asym && (dynamic_quantization_group_size != UINT64_MAX)) { + GPU_DEBUG_IF(asymmetric_dyn_quant && (dynamic_quantization_group_size != UINT64_MAX)) { GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: asym quantization does not support grouped quantization" << " ('DynamicQuantizeAsym' is enabled with grouped size dyn-quan)" << std::endl; return true; From 81ca005744984d7ee352f07899b82926cbad9bad Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Thu, 23 Jan 2025 14:33:00 +0400 Subject: [PATCH 23/44] minor fixes Signed-off-by: Vladimir Paramuzov --- src/inference/CMakeLists.txt | 4 + .../openvino/runtime/internal_properties.hpp | 6 + .../openvino/runtime/plugin_config.hpp | 23 ++- src/inference/src/dev/plugin_config.cpp | 21 ++- src/inference/tests/unit/config_test.cpp | 38 +++- src/plugins/intel_cpu/src/config.cpp | 2 +- .../intel_cpu/src/internal_properties.hpp | 7 +- .../intel_gpu/plugin/program_builder.hpp | 1 - .../intel_gpu/plugin/remote_context.hpp | 1 + .../intel_gpu/runtime/debug_configuration.hpp | 57 +++--- .../intel_gpu/runtime/execution_config.hpp | 8 +- .../intel_gpu/runtime/internal_properties.hpp | 1 - .../include/intel_gpu/runtime/options.inl | 4 +- .../graph_optimizer/prepare_buffer_fusing.cpp | 3 +- .../select_preferred_formats.cpp | 2 +- src/plugins/intel_gpu/src/plugin/graph.cpp | 2 + src/plugins/intel_gpu/src/plugin/plugin.cpp | 174 +++--------------- .../src/plugin/sync_infer_request.cpp | 2 +- .../src/runtime/execution_config.cpp | 156 +++++++++++++++- .../concurrency/gpu_reservation_test.cpp | 6 +- 20 files changed, 295 insertions(+), 223 deletions(-) diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt index 7e6cae62b85b67..9b2967e9f80fae 100644 --- a/src/inference/CMakeLists.txt +++ b/src/inference/CMakeLists.txt @@ -76,6 +76,10 @@ target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_OPENVINO_RUNTIME_API $<$:PROXY_PLUGIN_ENABLED>) +if(ENABLE_DEBUG_CAPS) + target_compile_definitions(${TARGET_NAME}_obj PUBLIC ENABLE_DEBUG_CAPS) +endif() + target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE $ $<$:$>) diff --git a/src/inference/dev_api/openvino/runtime/internal_properties.hpp b/src/inference/dev_api/openvino/runtime/internal_properties.hpp index 9b2f08c17a7fe0..e26024622580e7 100644 --- a/src/inference/dev_api/openvino/runtime/internal_properties.hpp +++ b/src/inference/dev_api/openvino/runtime/internal_properties.hpp @@ -90,5 +90,11 @@ static constexpr Property compiled_model_runtime_p */ static constexpr Property query_model_ratio{"QUERY_MODEL_RATIO"}; +/** + * @brief Allow execution of low precision transformations in plugin's pipelines + * @ingroup ov_dev_api_plugin_api + */ +static constexpr Property enable_lp_transformations{"LP_TRANSFORMS_MODE"}; + } // namespace internal } // namespace ov diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index b7e759fb3cd6a6..531bbda8d346f8 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -1,10 +1,9 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once -#include #include #include "openvino/core/attribute_visitor.hpp" #include "openvino/runtime/iremote_context.hpp" @@ -64,6 +63,7 @@ } \ } +#ifdef ENABLE_DEBUG_CAPS #define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \ static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \ auto v = read_env(PropertyNamespace::PropertyVar.name(), m_allowed_env_prefix, &m_ ## PropertyVar); \ @@ -71,6 +71,12 @@ return m_ ## PropertyVar.value; \ return v.as(); \ } +#else +#define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \ + static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \ + return m_ ## PropertyVar.value; \ + } +#endif #define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \ m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; @@ -91,16 +97,15 @@ OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG_GLOBAL, __VA_ARGS__) namespace ov { -#define ENABLE_DEBUG_CAPS enum class OptionVisibility : uint8_t { RELEASE = 1 << 0, // Option can be set for any build type via public interface, environment and config file RELEASE_INTERNAL = 1 << 1, // Option can be set for any build type via environment and config file only DEBUG = 1 << 2, // Option can be set for debug builds only via environment and config file DEBUG_GLOBAL = 1 << 3, // Global option can be set for debug builds only via environment and config file #ifdef ENABLE_DEBUG_CAPS - ANY = 0x0F, // Any visibility is valid including DEBUG + ANY = 0x0F, // Any visibility is valid including DEBUG & DEBUG_GLOBAL #else - ANY = 0x03, // Any visibility is valid excluding DEBUG + ANY = 0x03, // Any visibility is valid excluding DEBUG & DEBUG_GLOBAL #endif }; @@ -235,14 +240,14 @@ class OPENVINO_RUNTIME_API PluginConfig { std::string to_string() const; - void finalize(std::shared_ptr context, const ov::RTMap& rt_info); + void finalize(const IRemoteContext* context, const ov::Model* model); bool visit_attributes(ov::AttributeVisitor& visitor); protected: - virtual void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) {} - virtual void apply_debug_options(std::shared_ptr context); - virtual void finalize_impl(std::shared_ptr context) {} + virtual void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) {} + virtual void apply_debug_options(const IRemoteContext* context); + virtual void finalize_impl(const IRemoteContext* context) {} template bool is_set_by_user(const ov::Property& property) const { diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index e950a164a41002..4b0a3266b52f71 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -35,29 +35,28 @@ size_t get_terminal_width() { } else { return default_width; } -#else +#elif __linux__ struct winsize w; if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) { return w.ws_col; } else { return default_width; } -#endif // _WIN32 +#else + return default_width; +#endif } } namespace ov { - ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility allowed_visibility) const { if (m_user_properties.find(name) != m_user_properties.end()) { return m_user_properties.at(name); } auto option = get_option_ptr(name); - if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) { - OPENVINO_THROW("Couldn't get unknown property: ", name); - } + OPENVINO_ASSERT((allowed_visibility & option->get_visibility()) == option->get_visibility(), "Couldn't get unknown property: ", name); return option->get_any(); } @@ -98,11 +97,13 @@ void PluginConfig::set_user_property(const ov::AnyMap& config, OptionVisibility } } -void PluginConfig::finalize(std::shared_ptr context, const ov::RTMap& rt_info) { +void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* model) { if (m_is_finalized) return; - apply_rt_info(context, rt_info); + if (model) + apply_model_specific_options(context, *model); + apply_debug_options(context); // Copy internal properties before applying hints to ensure that // a property set by hint won't be overriden by a value in user config. @@ -132,7 +133,7 @@ bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) { return true; } -void PluginConfig::apply_debug_options(std::shared_ptr context) { +void PluginConfig::apply_debug_options(const IRemoteContext* context) { const bool throw_on_error = false; if (context) { diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index 6f385473b1c627..68b8cb3b617fd3 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -1,8 +1,11 @@ -// Copyright (C) 2018-2024 Intel Corporation +// Copyright (C) 2018-2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "openvino/core/any.hpp" +#include "openvino/core/model.hpp" +#include "openvino/core/node_vector.hpp" +#include "openvino/op/parameter.hpp" #include "openvino/runtime/plugin_config.hpp" #include @@ -18,7 +21,7 @@ static constexpr Property high_level_proper static constexpr Property low_level_property{"LOW_LEVEL_PROPERTY"}; static constexpr Property release_internal_property{"RELEASE_INTERNAL_PROPERTY"}; static constexpr Property debug_property{"DEBUG_PROPERTY"}; -static constexpr Property debug_global_property{"DEBUG_GLOBAL_PROPERTY"}; +static constexpr Property debug_global_property{"DEBUG_GLOBAL_PROPERTY"}; struct EmptyTestConfig : public ov::PluginConfig { @@ -73,14 +76,14 @@ struct NotEmptyTestConfig : public ov::PluginConfig { return supported_properties; } - void finalize_impl(std::shared_ptr context) override { + void finalize_impl(const IRemoteContext* context) override { if (!is_set_by_user(low_level_property)) { m_low_level_property.value = m_high_level_property.value; } } - void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override { - apply_rt_info_property(high_level_property, rt_info); + void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) override { + apply_rt_info_property(high_level_property, model.get_rt_info("runtime_options")); } using ov::PluginConfig::get_option_ptr; @@ -185,12 +188,17 @@ TEST(plugin_config, can_set_property_from_rt_info) { {int_property.name(), 10} // int_property is not applied from rt info }; + auto p1 = std::make_shared(); + auto r1 = std::make_shared(p1); + ov::Model m(ov::OutputVector{r1}, ov::ParameterVector{p1}); + m.set_rt_info(rt_info, {"runtime_options"}); + // default values ASSERT_EQ(cfg.m_high_level_property.value, ""); ASSERT_EQ(cfg.m_low_level_property.value, ""); ASSERT_EQ(cfg.m_int_property.value, -1); - cfg.finalize(nullptr, rt_info); + cfg.finalize(nullptr, &m); ASSERT_EQ(cfg.m_high_level_property.value, "value1"); ASSERT_EQ(cfg.m_low_level_property.value, "value1"); // dependant is updated too @@ -235,3 +243,21 @@ TEST(plugin_config, can_get_global_property) { NotEmptyTestConfig cfg; ASSERT_EQ(cfg.get_debug_global_property(), 4); } + +TEST(plugin_config, global_property_read_env_on_each_call) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_debug_global_property(), 4); +#ifdef ENABLE_DEBUG_CAPS + std::string env_var1 = "OV_DEBUG_GLOBAL_PROPERTY=10"; + ::putenv(env_var1.data()); + ASSERT_EQ(cfg.get_debug_global_property(), 10); + + std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20"; + ::putenv(env_var2.data()); + ASSERT_EQ(cfg.get_debug_global_property(), 20); +#else + std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20"; + ::putenv(env_var2.data()); + ASSERT_EQ(cfg.get_debug_global_property(), 4); // no effect for build w/o debug caps +#endif +} diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index db53bb0c531b1a..8627f82b28574d 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -210,7 +210,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { ov::internal::exclusive_async_requests.name(), ". Expected only true/false"); } - } else if (key == ov::intel_cpu::lp_transforms_mode.name()) { + } else if (key == ov::internal::enable_lp_transformations.name()) { try { lpTransformsMode = val.as() ? LPTransformsMode::On : LPTransformsMode::Off; } catch (ov::Exception&) { diff --git a/src/plugins/intel_cpu/src/internal_properties.hpp b/src/plugins/intel_cpu/src/internal_properties.hpp index 320539721ca09a..3a806e13a30ea1 100644 --- a/src/plugins/intel_cpu/src/internal_properties.hpp +++ b/src/plugins/intel_cpu/src/internal_properties.hpp @@ -16,11 +16,6 @@ namespace intel_cpu { */ static constexpr Property cpu_runtime_cache_capacity{"CPU_RUNTIME_CACHE_CAPACITY"}; -/** - * @brief Allow low precision transform. - */ -static constexpr Property lp_transforms_mode{"LP_TRANSFORMS_MODE"}; - /** * @brief Enum to define possible snippets mode hints. */ @@ -69,4 +64,4 @@ inline std::istream& operator>>(std::istream& is, SnippetsMode& mode) { static constexpr Property snippets_mode{"SNIPPETS_MODE"}; } // namespace intel_cpu -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp index 553f16152cb2bb..389c7f7e2b54a1 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp @@ -168,7 +168,6 @@ class ProgramBuilder final { void prepare_build(); void cleanup_build(); - // TODO(eunsoo): remove createTopolpgyOnly argument and add another method to create topology from ngraph function std::shared_ptr build(const std::vector>& ops, bool innerProgram = false); void CreateSingleLayerPrimitive(const std::shared_ptr& op); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp index 8440bd6824ef9e..98e74b76a8b502 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp @@ -37,6 +37,7 @@ class RemoteContextImpl : public ov::IRemoteContext { ov::SoPtr create_tensor(const ov::element::Type& type, const ov::Shape& shape, const ov::AnyMap& params) override; cldnn::engine& get_engine() { return *m_engine; } + const cldnn::engine& get_engine() const { return *m_engine; } ov::intel_gpu::gpu_handle_param get_external_queue() const { return m_external_queue; } cldnn::memory::ptr try_get_cached_memory(size_t hash); diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index 489399c1255ce0..08134ce420a4f6 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -4,11 +4,8 @@ #pragma once #include -#include -#include -#include -#include #include +#include #include "intel_gpu/runtime/execution_config.hpp" namespace ov::intel_gpu { @@ -44,37 +41,47 @@ std::ostream& get_verbose_stream(); } // namespace ov::intel_gpu #ifdef GPU_DEBUG_CONFIG -#if defined(_WIN32) -#define SEPARATE '\\' -#else -#define SEPARATE '/' -#endif -#define GPU_PREFIX "GPU_Debug: " +namespace color { +static constexpr const char dark_gray[] = "\033[1;30m"; +static constexpr const char blue[] = "\033[1;34m"; +static constexpr const char purple[] = "\033[1;35m"; +static constexpr const char cyan[] = "\033[1;36m"; +static constexpr const char reset[] = "\033[0m"; +} // namespace color + +static constexpr const char prefix[] = "GPU_Debug: "; -#define GPU_FILENAME (strrchr(__FILE__, SEPARATE) ? strrchr(__FILE__, SEPARATE) + 1 : __FILE__) #define GPU_DEBUG_IF(cond) if (cond) #define GPU_DEBUG_CODE(...) __VA_ARGS__ + #define GPU_DEBUG_DEFINE_MEM_LOGGER(stage) \ cldnn::instrumentation::mem_usage_logger mem_logger{stage, ov::intel_gpu::ExecutionConfig::get_verbose() >= 2}; -#define GPU_DEBUG_PROFILED_STAGE(stage) \ - auto stage_prof = cldnn::instrumentation::profiled_stage(\ + +#define GPU_DEBUG_PROFILED_STAGE(stage) \ + auto stage_prof = cldnn::instrumentation::profiled_stage( \ !get_config().get_dump_profiling_data_path().empty(), *this, stage) + #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val) stage_prof.set_cache_hit(val) #define GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO(info) stage_prof.add_memalloc_info(info) +#define GPU_DEBUG_LOG_PREFIX ov::intel_gpu::get_verbose_stream() \ + << prefix \ + << std::filesystem::path(__FILE__).filename().generic_string() << ":" \ + << std::to_string(__LINE__) << ":" \ + << __func__ << ": " + +#define GPU_DEBUG_LOG_COLOR_PREFIX ov::intel_gpu::get_verbose_stream() \ + << color::dark_gray << std::string(prefix) \ + << color::blue << std::filesystem::path(__FILE__).filename().generic_string() << ":" \ + << color::purple << std::to_string(__LINE__) << ":" \ + << color::cyan << __func__ << ": " << color::reset + #define GPU_DEBUG_LOG_RAW_INT(min_verbose_level) if (ov::intel_gpu::ExecutionConfig::get_verbose() >= min_verbose_level) \ - (ov::intel_gpu::ExecutionConfig::get_verbose_color() ? GPU_DEBUG_LOG_PREFIX : GPU_DEBUG_LOG_COLOR_PREFIX) -#define GPU_DEBUG_LOG_RAW(min_verbose_level) GPU_DEBUG_LOG_RAW_INT(static_cast::type>(min_verbose_level)) -#define GPU_DEBUG_LOG_PREFIX \ - ov::intel_gpu::get_verbose_stream() << GPU_PREFIX << GPU_FILENAME << ":" << __LINE__ << ":" << __func__ << ": " -#define GPU_DEBUG_LOG_COLOR_PREFIX ov::intel_gpu::get_verbose_stream() << DARK_GRAY << GPU_PREFIX << \ - BLUE << GPU_FILENAME << ":" << PURPLE << __LINE__ << ":" << CYAN << __func__ << ": " << RESET -#define DARK_GRAY "\033[1;30m" -#define BLUE "\033[1;34m" -#define PURPLE "\033[1;35m" -#define CYAN "\033[1;36m" -#define RESET "\033[0m" + (ov::intel_gpu::ExecutionConfig::get_verbose_color() ? GPU_DEBUG_LOG_COLOR_PREFIX : GPU_DEBUG_LOG_PREFIX) + +#define GPU_DEBUG_LOG_RAW(min_verbose_level) \ + GPU_DEBUG_LOG_RAW_INT(static_cast::type>(min_verbose_level)) #else #define GPU_DEBUG_IF(cond) if (0) #define GPU_DEBUG_CODE(...) @@ -82,7 +89,7 @@ std::ostream& get_verbose_stream(); #define GPU_DEBUG_PROFILED_STAGE(stage) #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val) #define GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO(info) -#define GPU_DEBUG_LOG_RAW(min_verbose_level) if (0) ov::intel_gpu::get_verbose_stream() << GPU_PREFIX +#define GPU_DEBUG_LOG_RAW(min_verbose_level) if (0) ov::intel_gpu::get_verbose_stream() #endif #define GPU_DEBUG_COUT GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::DISABLED) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index be8a60b65d023f..bce3e3101e2160 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -4,6 +4,7 @@ #pragma once +#include "openvino/core/any.hpp" #include "openvino/runtime/plugin_config.hpp" #include "intel_gpu/runtime/device_info.hpp" #include "intel_gpu/runtime/internal_properties.hpp" @@ -31,9 +32,12 @@ struct ExecutionConfig : public ov::PluginConfig { #undef OV_CONFIG_LOCAL_OPTION #undef OV_CONFIG_GLOBAL_OPTION + const ov::AnyMap& get_user_properties() const { return m_user_properties; } + protected: - void finalize_impl(std::shared_ptr context) override; - void apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) override; + void finalize_impl(const IRemoteContext* context) override; + void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) override; + void apply_rt_info(const IRemoteContext* context, const ov::RTMap& rt_info, bool is_llm); const ov::PluginConfig::OptionsDesc& get_options_desc() const override; void apply_user_properties(const cldnn::device_info& info); diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index f00cf86da5e50b..afb32d3393d6b4 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -83,7 +83,6 @@ static constexpr Property allow_new_shape_infer{"G static constexpr Property, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"}; static constexpr Property force_implementations{"GPU_FORCE_IMPLEMENTATIONS"}; static constexpr Property config_file{"CONFIG_FILE"}; -static constexpr Property enable_lp_transformations{"LP_TRANSFORMS_MODE"}; static constexpr Property buffers_preallocation_ratio{"GPU_BUFFERS_PREALLOCATION_RATIO"}; static constexpr Property max_kernels_per_batch{"GPU_MAX_KERNELS_PER_BATCH"}; static constexpr Property use_onednn{"USE_ONEDNN"}; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index 7e7651ede111e7..61e4315807449c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -31,9 +31,9 @@ OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, " OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "") OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching") OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision") +OV_CONFIG_RELEASE_OPTION(ov::internal, enable_lp_transformations, false, "Enable/Disable Low precision transformations set") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file") -OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_lp_transformations, false, "Enable/Disable Low precision transformations set") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, shape_predictor_settings, {10, 16 * 1024, 2, 1.1f}, "Preallocation settings") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "Type of the queue that must be used for model execution. May be in-order or out-of-order") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, optimize_data, false, "Enable/Disable data flow optimizations for cldnn::program") @@ -49,7 +49,7 @@ OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Con OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, help, false, "Print help message for all config options") OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity") -OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose_color, false, "Enable coloring for verbose logs") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose_color, true, "Enable coloring for verbose logs") OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem") OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, usm_policy, 0, "0: default, 1: use usm_host, 2: do not use usm_host") OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits::max(), "Max number of batch elements to dump") diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 8e8cac35fa1fb0..1c79ab27101808 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -80,8 +80,7 @@ bool concat_in_place_optimization::match(const program_node& concat_node, if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph()) return false; bool do_runtime_buffer_fusing = true; - const auto& config = concat_node.get_config(); - GPU_DEBUG_IF(config.get_disable_runtime_buffer_fusing()) { + GPU_DEBUG_IF(concat_node.get_config().get_disable_runtime_buffer_fusing()) { do_runtime_buffer_fusing = false; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp index a5d98be69771a9..8b3a73c74f3aa6 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp @@ -121,7 +121,7 @@ void select_preferred_formats::run(program& p) { optimize_conv_permute(*n); } } catch (std::exception& exception) { - GPU_DEBUG_INFO << "WARNING(select_preferred_formats): " << exception.what() << std::endl; + GPU_DEBUG_LOG << "WARNING(select_preferred_formats): " << exception.what() << std::endl; } print_selected_formats(*n); } diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 318e36cc5c8a09..bcf6a26420dc57 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -91,6 +91,8 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context IstreamAttributeVisitor visitor(ib); m_config.visit_attributes(visitor); + m_config.set_user_property(config.get_user_properties()); // Copy user properties if those were modified on import call + m_config.finalize(context.get(), nullptr); auto imported_prog = std::make_shared(get_engine(), m_config); imported_prog->load(ib); diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 52b47ad52be0ae..3f6b88ff238030 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -28,12 +27,7 @@ #include "intel_gpu/runtime/itt.hpp" #include "openvino/core/any.hpp" #include "openvino/core/deprecated.hpp" -#include "openvino/op/gather.hpp" -#include "openvino/op/concat.hpp" -#include "openvino/op/paged_attention.hpp" #include "openvino/pass/manager.hpp" -#include "openvino/pass/pattern/op/wrap_type.hpp" -#include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/visualize_tree.hpp" #include "openvino/runtime/device_id_parser.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" @@ -42,8 +36,6 @@ #include "openvino/runtime/performance_heuristics.hpp" #include "openvino/runtime/plugin_config.hpp" #include "openvino/runtime/properties.hpp" -#include "openvino/util/common_util.hpp" -#include "ov_ops/dynamic_quantize.hpp" #include "openvino/util/weights_path.hpp" #include "transformations/common_optimizations/dimension_tracking.hpp" #include "transformations/init_node_info.hpp" @@ -60,106 +52,6 @@ using Time = std::chrono::high_resolution_clock; namespace ov::intel_gpu { -namespace { - -ov::RTMap get_rt_info(const ov::Model& model) { - ov::RTMap rt_info; - if (model.has_rt_info("runtime_options")) - rt_info = model.get_rt_info("runtime_options"); - - if (model.has_rt_info("__weights_path")) { - rt_info[ov::weights_path.name()] = model.get_rt_info("__weights_path"); - } - return rt_info; -} - -bool requires_new_shape_infer(const std::shared_ptr& op) { - if (op->is_dynamic()) { - return true; - } - - // HACK: SearchSorted has specific shape requirements. - // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine, - // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid. - // Similar case for STFT. - if (ov::is_type(op) || ov::is_type(op)) - return true; - - if (ov::is_type(op)) - return true; - - if (ov::is_type(op)) { - const auto body_function = std::static_pointer_cast(op)->get_function(); - if (body_function->is_dynamic()) - return true; - } - - if (ov::is_type(op) || ov::is_type(op)) { - return true; - } - // When input node has dynamic shape with 4 dimension, this function return false - // because op.is_dynamic() which only checks input shapes return false. - // So, in the case of input data, we need to check output shape. - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).is_dynamic()) - return true; - } - - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).size() > 6) - return true; - } - - for (size_t i = 0; i < op->get_input_size(); i++) { - if (op->get_input_partial_shape(i).size() > 6) - return true; - } - - return false; -} - -void set_model_properties(const ov::Model& model, ExecutionConfig& config) { - const auto& ops = model.get_ordered_ops(); - // In the case of inner program, allow_new_shape_infer flag is setted by outside of program. - // So, do not check allow_new_shape_infer for inner program build - for (const auto& op : ops) { - if (requires_new_shape_infer(op)) { - config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); - break; - } - } - bool is_dynamic = false; - for (const auto& op : ops) { - if (op->is_dynamic()) { - is_dynamic = true; - break; - } - } - bool has_lstm = false; - for (const auto& op : ops) { - if (ov::is_type(op)) { - has_lstm = true; - break; - } - } - - // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, - // smaller # of kernels are built compared to static models. - // So having smaller batch size is even better for dynamic model as we can do more parallel build. - if (is_dynamic) { - config.set_property(ov::intel_gpu::max_kernels_per_batch(4)); - } else { - config.set_property(ov::intel_gpu::max_kernels_per_batch(8)); - } - - config.set_property(ov::intel_gpu::optimize_data(true)); - - if (has_lstm) - config.set_property(ov::intel_gpu::use_onednn(true)); -} - -} // namespace - #define FACTORY_DECLARATION(op_version, op_name) \ void __register ## _ ## op_name ## _ ## op_version(); @@ -170,33 +62,6 @@ void set_model_properties(const ov::Model& model, ExecutionConfig& config) { #include "intel_gpu/plugin/primitives_list.hpp" #undef REGISTER_FACTORY -const auto is_llm = [](const std::shared_ptr& model) -> bool { - using namespace ov::pass::pattern; - - auto past = wrap_type(); - auto convert_past = wrap_type({past}); - auto gather_input = std::make_shared(OutputVector{past, convert_past}); - auto beam_idx = wrap_type(); - auto gather_past = wrap_type({gather_input, beam_idx, wrap_type()}); - auto gather_convert = wrap_type({gather_past}); - auto concat_past_input = std::make_shared(OutputVector{past, convert_past, gather_past, gather_convert}); - auto concat = wrap_type({concat_past_input, any_input()}); - auto convert_present = wrap_type({concat}); - auto present_input = std::make_shared(OutputVector{concat, convert_present}); - auto present = wrap_type({present_input}); - - auto kvcache_matcher = std::make_shared(present, "KVCacheMatcher"); - - for (auto& op : model->get_ordered_ops()) { - if (kvcache_matcher->match(op) || - ov::is_type(op)) { - return true; - } - } - - return false; -}; - void Plugin::register_primitives() const { #define REGISTER_FACTORY(op_version, op_name) FACTORY_CALL(op_version, op_name) #include "intel_gpu/plugin/primitives_list.hpp" @@ -237,12 +102,24 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p auto cloned_model = model->clone(); OPENVINO_ASSERT(cloned_model != nullptr, "[GPU] Failed to clone model!"); - GPU_DEBUG_IF(!config.get_dump_graphs_path().empty()) { - auto path_base = config.get_dump_graphs_path() + "/" + cloned_model->get_name(); + // Here we create a copy of the config to finalize it and ensure that transformation pipe can use correct options values + // This is manily needed to correctly update lower level properties when higher level option is set by user + // For example, transformation use inference_precision hint which may be updated by execution_mode property. + // Update itself will happen on finalization stage, so we must call it to have correct passes flow. + // The reason why we can't do finalization once and then just run all graph transformations is that + // part of the tranformations may actually impact some properties. For example, LSTMSequence op presense + // impacts value of use_onednn property. But in order to understand if there's an op of this type we have to run + // common optimizations which may do subgraph fusion to LSTMSequence op. So basically, final value of use_onednn + // property can be computed for transformed model only. + auto config_copy = config; + config_copy.finalize(context.get(), model.get()); + + GPU_DEBUG_IF(!config_copy.get_dump_graphs_path().empty()) { + auto path_base = config_copy.get_dump_graphs_path() + "/" + cloned_model->get_name(); ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } - transform_model(cloned_model, config, context); + transform_model(cloned_model, config_copy, context); // Transformations for some reason may drop output tensor names, so here we copy those from the original model auto new_results = cloned_model->get_results(); @@ -257,8 +134,8 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p new_res->set_friendly_name(old_res->get_friendly_name()); } - GPU_DEBUG_IF(!config.get_dump_graphs_path().empty()) { - auto path_base = config.get_dump_graphs_path() + "/" + cloned_model->get_name() + "_" + "transformed_func"; + GPU_DEBUG_IF(!config_copy.get_dump_graphs_path().empty()) { + auto path_base = config_copy.get_dump_graphs_path() + "/" + cloned_model->get_name() + "_" + "transformed_func"; ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } return cloned_model; @@ -307,10 +184,10 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config, OptionVisibility::RELEASE); - set_model_properties(*model, config); - config.finalize(context, get_rt_info(*model)); auto transformed_model = clone_and_transform_model(model, config, context); + + config.finalize(context.get(), transformed_model.get()); { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model::CreateCompiledModel"); return std::make_shared(transformed_model, shared_from_this(), context, config); @@ -326,10 +203,12 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - set_model_properties(*model, config); - config.finalize(context_impl, get_rt_info(*model)); + config.set_user_property(orig_config, OptionVisibility::RELEASE); auto transformed_model = clone_and_transform_model(model, config, context_impl); + + config.finalize(context_impl.get(), transformed_model.get()); + return std::make_shared(transformed_model, shared_from_this(), context_impl, config); } @@ -393,8 +272,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config, OptionVisibility::RELEASE); - set_model_properties(*model, config); - config.finalize(ctx, get_rt_info(*model)); + config.finalize(ctx.get(), model.get()); ProgramBuilder prog(ctx->get_engine(), config); @@ -449,7 +327,6 @@ std::shared_ptr Plugin::import_model(std::istream& model, ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(_orig_config, OptionVisibility::RELEASE); - config.finalize(context_impl, {}); ov::CacheMode cache_mode = config.get_cache_mode(); ov::EncryptionCallbacks encryption_callbacks = config.get_cache_encryption_callbacks(); @@ -768,7 +645,6 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { const auto& device_info = context->get_engine().get_device_info(); auto config = m_configs_map.at(device_id); config.set_property(ov::intel_gpu::partial_build_program(true)); - config.finalize(context, {}); uint32_t n_streams = static_cast(config.get_num_streams()); uint64_t occupied_device_mem = 0; auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as>(); @@ -821,6 +697,8 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { OPENVINO_THROW("[GPU_MAX_BATCH_SIZE] ov::hint::model should be std::shared_ptr type"); } + config.finalize(context.get(), model.get()); + size_t base_batch_size = 16; // empirically decided for DG1 auto& engine = get_default_context(device_id)->get_engine(); diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 9189941ea8b3a6..1633bb022480a5 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -305,7 +305,7 @@ void SyncInferRequest::enqueue() { m_internal_outputs = network->execute(dependencies); auto network_enqueue_end = std::chrono::high_resolution_clock::now(); - const auto& config = network->get_config(); + [[maybe_unused]] const auto& config = network->get_config(); // If dump layers path is set, only runs first inference. GPU_DEBUG_IF(!config.get_dump_tensors_path().empty() && config.get_dump_iterations().empty()) { diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 7e2c82f84b7d7a..5b90ef246846a5 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -5,6 +5,20 @@ #include "intel_gpu/runtime/execution_config.hpp" #include "intel_gpu/plugin/remote_context.hpp" #include "openvino/core/any.hpp" +#include "openvino/core/model.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/loop.hpp" +#include "openvino/op/lstm_sequence.hpp" +#include "openvino/op/paged_attention.hpp" +#include "openvino/op/search_sorted.hpp" +#include "openvino/op/stft.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/label.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "ov_ops/dynamic_quantize.hpp" #include "openvino/runtime/internal_properties.hpp" #include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/runtime/plugin_config.hpp" @@ -13,6 +27,93 @@ namespace ov::intel_gpu { +namespace { + +ov::RTMap get_rt_info(const ov::Model& model) { + ov::RTMap rt_info; + if (model.has_rt_info("runtime_options")) + rt_info = model.get_rt_info("runtime_options"); + + if (model.has_rt_info("__weights_path")) { + rt_info[ov::weights_path.name()] = model.get_rt_info("__weights_path"); + } + return rt_info; +} + + +bool requires_new_shape_infer(const std::shared_ptr& op) { + if (op->is_dynamic()) { + return true; + } + + // HACK: SearchSorted has specific shape requirements. + // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine, + // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid. + // Similar case for STFT. + if (ov::is_type(op) || ov::is_type(op)) + return true; + + if (ov::is_type(op)) + return true; + + if (ov::is_type(op)) { + const auto body_function = std::static_pointer_cast(op)->get_function(); + if (body_function->is_dynamic()) + return true; + } + + if (ov::is_type(op) || ov::is_type(op)) { + return true; + } + // When input node has dynamic shape with 4 dimension, this function return false + // because op.is_dynamic() which only checks input shapes return false. + // So, in the case of input data, we need to check output shape. + for (size_t i = 0; i < op->get_output_size(); i++) { + if (op->get_output_partial_shape(i).is_dynamic()) + return true; + } + + for (size_t i = 0; i < op->get_output_size(); i++) { + if (op->get_output_partial_shape(i).size() > 6) + return true; + } + + for (size_t i = 0; i < op->get_input_size(); i++) { + if (op->get_input_partial_shape(i).size() > 6) + return true; + } + + return false; +} + +bool is_llm(const ov::Model& model) { + using namespace ov::pass::pattern; + + auto past = wrap_type(); + auto convert_past = wrap_type({past}); + auto gather_input = std::make_shared(OutputVector{past, convert_past}); + auto beam_idx = wrap_type(); + auto gather_past = wrap_type({gather_input, beam_idx, wrap_type()}); + auto gather_convert = wrap_type({gather_past}); + auto concat_past_input = std::make_shared(OutputVector{past, convert_past, gather_past, gather_convert}); + auto concat = wrap_type({concat_past_input, any_input()}); + auto convert_present = wrap_type({concat}); + auto present_input = std::make_shared(OutputVector{concat, convert_present}); + auto present = wrap_type({present_input}); + + auto kvcache_matcher = std::make_shared(present, "KVCacheMatcher"); + + for (auto& op : model.get_ordered_ops()) { + if (kvcache_matcher->match(op) || ov::is_type(op)) { + return true; + } + } + + return false; +}; + +} // namespace + #define OV_CONFIG_LOCAL_OPTION(...) #define OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ ConfigOption ExecutionConfig::m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; @@ -49,15 +150,17 @@ ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { void ExecutionConfig::finalize(cldnn::engine& engine) { auto ctx = std::make_shared("GPU", std::vector{engine.get_device()}); - PluginConfig::finalize(ctx, {}); + PluginConfig::finalize(ctx.get(), nullptr); } -void ExecutionConfig::apply_rt_info(std::shared_ptr context, const ov::RTMap& rt_info) { - const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); +void ExecutionConfig::apply_rt_info(const IRemoteContext* context, const ov::RTMap& rt_info, bool is_llm) { + const auto& info = dynamic_cast(context)->get_engine().get_device_info(); if (!info.supports_immad) { apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); - apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); } + if (!info.supports_immad || !is_llm) + apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with @@ -68,15 +171,54 @@ void ExecutionConfig::apply_rt_info(std::shared_ptr context, con } } -void ExecutionConfig::finalize_impl(std::shared_ptr context) { +void ExecutionConfig::apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) { + apply_rt_info(context, get_rt_info(model), is_llm(model)); + + const auto& ops = model.get_ops(); + + auto process_op = [this](std::shared_ptr op) { + if (requires_new_shape_infer(op)) { + m_allow_new_shape_infer = true; + } + // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, + // smaller # of kernels are built compared to static models. + // So having smaller batch size is even better for dynamic model as we can do more parallel build. + if (op->is_dynamic()) { + m_max_kernels_per_batch = 4; + } + + // Allow using onednn for models with LSTMSequence op as it's much more performant than existing ocl impl + if (ov::is_type(op)) { + m_use_onednn = true; + } + }; + + // In the case of inner program, allow_new_shape_infer flag is setted by outside of program. + // So, do not check allow_new_shape_infer for inner program build + for (const auto& op : ops) { + if (auto multi_subgraph_op = ov::as_type_ptr(op)) { + for (const auto& sub_graph : multi_subgraph_op->get_functions()) { + for (auto& sub_op : sub_graph->get_ops()) { + process_op(sub_op); + } + } + } else { + process_op(op); + } + } + + m_optimize_data = true; +} + +void ExecutionConfig::finalize_impl(const IRemoteContext* context) { if (m_help) { print_help(); exit(-1); } - const auto& info = std::dynamic_pointer_cast(context)->get_engine().get_device_info(); + const auto& info = dynamic_cast(context)->get_engine().get_device_info(); apply_hints(info); - if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { + if (!is_set_by_user(ov::internal::enable_lp_transformations)) { m_enable_lp_transformations = info.supports_imad || info.supports_immad; } if (!is_set_by_user(ov::intel_gpu::use_onednn) && info.supports_immad) { diff --git a/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp b/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp index fb9711e7605859..c78b472b9c54cb 100644 --- a/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp +++ b/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp @@ -28,6 +28,10 @@ TEST_F(GpuReservationTest, Mutiple_CompiledModel_Reservation) { models.emplace_back(ov::test::utils::make_multi_single_conv()); auto core = ov::test::utils::PluginCache::get().core(); + + auto available_devices = core->get_available_devices(); + if (std::find(available_devices.begin(), available_devices.end(), ov::test::utils::DEVICE_CPU) == available_devices.end()) + GTEST_SKIP(); core->set_property(target_devices[1], config); ov::AnyMap property_config = {{ov::num_streams.name(), 1}, @@ -54,4 +58,4 @@ TEST_F(GpuReservationTest, Mutiple_CompiledModel_Reservation) { if (thread.joinable()) thread.join(); } -} \ No newline at end of file +} From ea6f10fbec21da6a17cd5923c8fb4c431006d13e Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Thu, 23 Jan 2025 15:27:28 +0400 Subject: [PATCH 24/44] cut off debug properties for release build Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 29 +++++++++---------- src/inference/tests/unit/config_test.cpp | 8 ++--- .../intel_gpu/runtime/debug_configuration.hpp | 2 ++ .../include/intel_gpu/runtime/profiling.hpp | 2 +- .../prepare_primitive_fusing.cpp | 4 +-- .../prepare_primitive_fusing_through.cpp | 2 +- .../src/graph/impls/ocl/kernels_cache.cpp | 10 ++----- src/plugins/intel_gpu/src/graph/network.cpp | 13 +++++---- src/plugins/intel_gpu/src/graph/program.cpp | 2 +- .../src/graph/program_dump_graph.cpp | 3 +- src/plugins/intel_gpu/src/plugin/graph.cpp | 15 +++++----- src/plugins/intel_gpu/src/plugin/plugin.cpp | 9 +++--- .../src/plugin/transformations_pipeline.cpp | 6 ++-- .../src/runtime/debug_configuration.cpp | 5 ++-- .../src/runtime/execution_config.cpp | 2 +- 15 files changed, 54 insertions(+), 58 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 531bbda8d346f8..51f5e8571daa38 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -63,21 +63,6 @@ } \ } -#ifdef ENABLE_DEBUG_CAPS -#define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \ - static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \ - auto v = read_env(PropertyNamespace::PropertyVar.name(), m_allowed_env_prefix, &m_ ## PropertyVar); \ - if (v.empty()) \ - return m_ ## PropertyVar.value; \ - return v.as(); \ - } -#else -#define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \ - static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \ - return m_ ## PropertyVar.value; \ - } -#endif - #define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \ m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; @@ -90,12 +75,24 @@ #define OV_CONFIG_RELEASE_INTERNAL_OPTION(PropertyNamespace, PropertyVar, ...) \ OV_CONFIG_LOCAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE_INTERNAL, __VA_ARGS__) +#ifdef ENABLE_DEBUG_CAPS +#define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \ + static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \ + auto v = read_env(PropertyNamespace::PropertyVar.name(), m_allowed_env_prefix, &m_ ## PropertyVar); \ + if (v.empty()) \ + return m_ ## PropertyVar.value; \ + return v.as(); \ + } #define OV_CONFIG_DEBUG_OPTION(PropertyNamespace, PropertyVar, ...) \ OV_CONFIG_LOCAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__) #define OV_CONFIG_DEBUG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, ...) \ OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG_GLOBAL, __VA_ARGS__) - +#else +#define OV_CONFIG_DEBUG_OPTION(...) +#define OV_CONFIG_DEBUG_GLOBAL_OPTION(...) +#define OV_CONFIG_DECLARE_GLOBAL_GETTER(...) +#endif namespace ov { enum class OptionVisibility : uint8_t { RELEASE = 1 << 0, // Option can be set for any build type via public interface, environment and config file diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index 68b8cb3b617fd3..75336542cb7828 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -239,6 +239,7 @@ TEST(plugin_config, visibility_is_correct) { ASSERT_EQ(cfg.get_option_ptr(int_property.name())->get_visibility(), OptionVisibility::RELEASE); } +#ifdef ENABLE_DEBUG_CAPS TEST(plugin_config, can_get_global_property) { NotEmptyTestConfig cfg; ASSERT_EQ(cfg.get_debug_global_property(), 4); @@ -247,7 +248,6 @@ TEST(plugin_config, can_get_global_property) { TEST(plugin_config, global_property_read_env_on_each_call) { NotEmptyTestConfig cfg; ASSERT_EQ(cfg.get_debug_global_property(), 4); -#ifdef ENABLE_DEBUG_CAPS std::string env_var1 = "OV_DEBUG_GLOBAL_PROPERTY=10"; ::putenv(env_var1.data()); ASSERT_EQ(cfg.get_debug_global_property(), 10); @@ -255,9 +255,5 @@ TEST(plugin_config, global_property_read_env_on_each_call) { std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20"; ::putenv(env_var2.data()); ASSERT_EQ(cfg.get_debug_global_property(), 20); -#else - std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20"; - ::putenv(env_var2.data()); - ASSERT_EQ(cfg.get_debug_global_property(), 4); // no effect for build w/o debug caps -#endif } +#endif diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index 08134ce420a4f6..4367d2062d7325 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -53,6 +53,7 @@ static constexpr const char reset[] = "\033[0m"; static constexpr const char prefix[] = "GPU_Debug: "; #define GPU_DEBUG_IF(cond) if (cond) +#define GPU_DEBUG_VALUE_OR(debug_value, release_value) debug_value #define GPU_DEBUG_CODE(...) __VA_ARGS__ #define GPU_DEBUG_DEFINE_MEM_LOGGER(stage) \ @@ -84,6 +85,7 @@ static constexpr const char prefix[] = "GPU_Debug: "; GPU_DEBUG_LOG_RAW_INT(static_cast::type>(min_verbose_level)) #else #define GPU_DEBUG_IF(cond) if (0) +#define GPU_DEBUG_VALUE_OR(debug_value, release_value) release_value #define GPU_DEBUG_CODE(...) #define GPU_DEBUG_DEFINE_MEM_LOGGER(stage) #define GPU_DEBUG_PROFILED_STAGE(stage) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp index 6d5d91bb5b783a..7d0dba80e7c017 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp @@ -158,7 +158,7 @@ class profiled_stage { , _obj(obj) , _stage(stage) { GPU_DEBUG_IF(profiling_enabled) { - _per_iter_mode = ov::intel_gpu::ExecutionConfig::get_dump_profiling_data_per_iter(); + _per_iter_mode = GPU_DEBUG_VALUE_OR(ov::intel_gpu::ExecutionConfig::get_dump_profiling_data_per_iter(), false); _start = std::chrono::high_resolution_clock::now(); } } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index cc3b5ff2260ea6..622b7cff4101ad 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -56,7 +56,7 @@ using namespace cldnn; void prepare_primitive_fusing::run(program& p) { - if (p.get_config().get_disable_post_ops_fusions()) + GPU_DEBUG_IF(p.get_config().get_disable_post_ops_fusions()) return; fuse_reorders(p); @@ -168,7 +168,7 @@ void prepare_primitive_fusing::fuse_reorders(program &p) { } void prepare_primitive_fusing::fuse_swiglu(program &p) { - bool disable_fc_swiglu_fusion = p.get_config().get_disable_fc_swiglu_fusion(); + bool disable_fc_swiglu_fusion = GPU_DEBUG_VALUE_OR(p.get_config().get_disable_fc_swiglu_fusion(), false); // Apply only for high performant GPU if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 128) return; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp index 78b494c52645de..33320126a9d910 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp @@ -16,7 +16,7 @@ using namespace cldnn; void prepare_primitive_fusing_through::run(program& p) { - if (p.get_config().get_disable_post_ops_fusions()) + GPU_DEBUG_IF(p.get_config().get_disable_post_ops_fusions()) return; auto try_fuse_through = [&](program_node& node) -> std::vector { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp index e5c1fe016b96df..c02ad09044ae32 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp @@ -238,10 +238,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, b.hash_value = std::hash()(full_code); - std::string dump_sources_dir = ""; - GPU_DEBUG_IF(!_config.get_dump_sources_path().empty()) { - dump_sources_dir = _config.get_dump_sources_path(); - } + std::string dump_sources_dir = GPU_DEBUG_VALUE_OR(_config.get_dump_sources_path(), ""); // Add -g -s to build options to allow IGC assembly dumper to associate assembler sources with corresponding OpenCL kernel code lines // Should be used with the IGC_ShaderDump option @@ -297,10 +294,9 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co auto& cl_build_device = dynamic_cast(*_device); bool dump_sources = batch.dump_custom_program; - std::string dump_sources_dir = ""; - GPU_DEBUG_IF(!_config.get_dump_sources_path().empty()) { + std::string dump_sources_dir = GPU_DEBUG_VALUE_OR(_config.get_dump_sources_path(), ""); + GPU_DEBUG_IF(!dump_sources_dir.empty()) { dump_sources = true; - dump_sources_dir = _config.get_dump_sources_path(); } std::string err_log; // accumulated build log from all program's parts (only contains messages from parts which diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 66d890b51e4692..214778c397dbbc 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -157,7 +157,6 @@ void wait_for_the_turn(const std::vector& pids) { #else void dump_perf_data_raw(std::string, bool per_iter_mode, const std::list>&) {} -void wait_for_the_turn(const std::vector& pids) {} #endif } // namespace @@ -185,9 +184,10 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo net_id = get_unique_net_id(); } - GPU_DEBUG_IF(get_config().get_start_after_processes().size() != 0) { - wait_for_the_turn(get_config().get_start_after_processes()); - } + GPU_DEBUG_CODE( + if (get_config().get_start_after_processes().size() != 0) { + wait_for_the_turn(get_config().get_start_after_processes()); + }); calculate_weights_cache_capacity(); allocate_primitives(); configure_primitives_second_output(); @@ -225,8 +225,9 @@ network::~network() { if (_program != nullptr) _program->cancel_compilation_context(); _memory_pool->clear_pool_for_network(net_id); - GPU_DEBUG_IF(!_config.get_dump_profiling_data_path().empty()) { - dump_perf_data_raw(_config.get_dump_profiling_data_path() + "/perf_raw" + std::to_string(net_id) + ".csv", false, _exec_order); + std::string dump_path = GPU_DEBUG_VALUE_OR(_config.get_dump_profiling_data_path(), ""); + GPU_DEBUG_IF(!dump_path.empty()) { + dump_perf_data_raw(dump_path + "/perf_raw" + std::to_string(net_id) + ".csv", false, _exec_order); } } diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index ad384306b9e081..6bbb0f2512905d 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -1376,7 +1376,7 @@ program::primitives_info program::get_current_stage_info() const { } void program::save_pass_info(std::string pass_name) { - if (!_config.get_dump_graphs_path().empty()) + GPU_DEBUG_IF(!_config.get_dump_graphs_path().empty()) optimizer_passes_info.emplace_back(pass_name, get_current_stage_info()); } diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index 0e735683db2533..eb5c152a361a16 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -3,6 +3,7 @@ // #include "program_dump_graph.h" +#include "intel_gpu/runtime/debug_configuration.hpp" #include "to_string_utils.h" #include "data_inst.h" #include "condition_inst.h" @@ -156,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) { } // namespace std::string get_dir_path(const ExecutionConfig& config) { - auto path = config.get_dump_graphs_path(); + std::string path = GPU_DEBUG_VALUE_OR(config.get_dump_graphs_path(), ""); if (path.empty()) { return {}; } diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index bcf6a26420dc57..cba0d6aab9276d 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -113,9 +113,8 @@ Graph::Graph(std::shared_ptr graph, uint16_t stream_id) } Graph::~Graph() { - GPU_DEBUG_IF(m_config.get_host_time_profiling()) { - const auto log_level = m_config.get_host_time_profiling(); - + auto log_level = GPU_DEBUG_VALUE_OR(m_config.get_host_time_profiling(), 0); + GPU_DEBUG_IF(log_level) { auto get_time_str = [](int64_t time_mcs, int64_t iters_num = 1) { double time = static_cast(time_mcs); time /= iters_num; @@ -183,17 +182,19 @@ void Graph::build(std::shared_ptr program) { m_network = std::make_shared(program, m_stream_id); } - GPU_DEBUG_IF(!m_config.get_dry_run_path().empty()) { - ov::pass::Serialize(m_config.get_dry_run_path(), "").run_on_model(get_runtime_model()); + std::string dry_run_path = GPU_DEBUG_VALUE_OR(m_config.get_dry_run_path(), ""); + std::string dump_graphs_path = GPU_DEBUG_VALUE_OR(m_config.get_dump_graphs_path(), ""); + GPU_DEBUG_IF(!dry_run_path.empty()) { + ov::pass::Serialize(dry_run_path, "").run_on_model(get_runtime_model()); exit(0); } - GPU_DEBUG_IF(!m_config.get_dump_graphs_path().empty() && m_stream_id == 0) { + GPU_DEBUG_IF(!dump_graphs_path.empty() && m_stream_id == 0) { static int net_id = 0; auto steps_info = get_network()->get_optimizer_passes_info(); size_t step_idx = 0; for (auto& step : steps_info) { - auto xml_path = m_config.get_dump_graphs_path() + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml"; + auto xml_path = dump_graphs_path + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml"; ov::pass::Serialize(xml_path, "").run_on_model(get_runtime_model(step.second, true)); step_idx++; } diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 3f6b88ff238030..a6c53bd539bc49 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -114,8 +114,9 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p auto config_copy = config; config_copy.finalize(context.get(), model.get()); - GPU_DEBUG_IF(!config_copy.get_dump_graphs_path().empty()) { - auto path_base = config_copy.get_dump_graphs_path() + "/" + cloned_model->get_name(); + std::string dump_path = GPU_DEBUG_VALUE_OR(config_copy.get_dump_graphs_path(), ""); + GPU_DEBUG_IF(!dump_path.empty()) { + auto path_base = dump_path + "/" + cloned_model->get_name(); ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } @@ -134,8 +135,8 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p new_res->set_friendly_name(old_res->get_friendly_name()); } - GPU_DEBUG_IF(!config_copy.get_dump_graphs_path().empty()) { - auto path_base = config_copy.get_dump_graphs_path() + "/" + cloned_model->get_name() + "_" + "transformed_func"; + GPU_DEBUG_IF(!dump_path.empty()) { + auto path_base = dump_path + "/" + cloned_model->get_name() + "_" + "transformed_func"; ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } return cloned_model; diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 841551c73b671f..0d6d83f2f2982e 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -984,8 +984,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - bool disable_horizontal_fc_fusion = config.get_disable_horizontal_fc_fusion(); - bool disable_fc_swiglu_fusion = config.get_disable_fc_swiglu_fusion(); + bool disable_horizontal_fc_fusion = GPU_DEBUG_VALUE_OR(config.get_disable_horizontal_fc_fusion(), false); + bool disable_fc_swiglu_fusion = GPU_DEBUG_VALUE_OR(config.get_disable_fc_swiglu_fusion(), false); // mlp fusion is only supported for cldnn on high performant GPUis bool fuse_mlp_swiglu = !device_info.supports_immad && @@ -1044,7 +1044,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); if (device_info.supports_immad) { - bool asymmetric_dyn_quant = config.get_asym_dynamic_quantization(); + bool asymmetric_dyn_quant = GPU_DEBUG_VALUE_OR(config.get_asym_dynamic_quantization(), false); auto dynamic_quantization_group_size = config.get_dynamic_quantization_group_size(); pass_config->set_callback([=](const_node_ptr& root) -> bool { for (size_t i = 0 ; i < root->get_input_node_shared_ptr(0)->get_output_size(); ++i) { diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp index ab72354626b5cb..7dcb5020f56cc1 100644 --- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp +++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp @@ -2,13 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // +#ifdef GPU_DEBUG_CONFIG #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/runtime/execution_config.hpp" #include #include namespace ov::intel_gpu { - std::ostream& get_verbose_stream() { if (ExecutionConfig::get_log_to_file().length() > 0) { static std::ofstream fout; @@ -19,5 +19,6 @@ std::ostream& get_verbose_stream() { return std::cout; } } - } // namespace ov::intel_gpu + +#endif diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 5b90ef246846a5..ffbd977dad42da 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -211,7 +211,7 @@ void ExecutionConfig::apply_model_specific_options(const IRemoteContext* context } void ExecutionConfig::finalize_impl(const IRemoteContext* context) { - if (m_help) { + GPU_DEBUG_IF(m_help) { print_help(); exit(-1); } From b7dc614c45a41e99a7bad158ea944011d90f28c1 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 24 Jan 2025 15:12:06 +0400 Subject: [PATCH 25/44] config clone. Visibility fixes Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 16 ++++++------ src/inference/src/dev/plugin_config.cpp | 9 +++++-- src/plugins/intel_gpu/CMakeLists.txt | 1 + .../intel_gpu/plugin/remote_context.hpp | 1 - .../intel_gpu/runtime/execution_config.hpp | 1 + .../include/intel_gpu/runtime/options.inl | 2 +- .../graph_optimizer/propagate_constants.cpp | 2 +- .../intel_gpu/src/plugin/compiled_model.cpp | 3 ++- .../intel_gpu/src/plugin/ops/condition.cpp | 2 +- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 2 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 2 +- .../src/runtime/debug_configuration.cpp | 7 ++--- .../src/runtime/execution_config.cpp | 26 +++++++++++++------ 13 files changed, 46 insertions(+), 28 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 51f5e8571daa38..ef4d810e0a4bce 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -99,11 +99,7 @@ enum class OptionVisibility : uint8_t { RELEASE_INTERNAL = 1 << 1, // Option can be set for any build type via environment and config file only DEBUG = 1 << 2, // Option can be set for debug builds only via environment and config file DEBUG_GLOBAL = 1 << 3, // Global option can be set for debug builds only via environment and config file -#ifdef ENABLE_DEBUG_CAPS - ANY = 0x0F, // Any visibility is valid including DEBUG & DEBUG_GLOBAL -#else - ANY = 0x03, // Any visibility is valid excluding DEBUG & DEBUG_GLOBAL -#endif + ANY = 0xFF, // Any visibility is valid }; inline OptionVisibility operator&(OptionVisibility a, OptionVisibility b) { @@ -126,6 +122,8 @@ inline std::ostream& operator<<(std::ostream& os, const OptionVisibility& visibi case OptionVisibility::RELEASE: os << "RELEASE"; break; case OptionVisibility::RELEASE_INTERNAL: os << "RELEASE_INTERNAL"; break; case OptionVisibility::DEBUG: os << "DEBUG"; break; + case OptionVisibility::DEBUG_GLOBAL: os << "DEBUG_GLOBAL"; break; + case OptionVisibility::ANY: os << "ANY"; break; default: os << "UNKNOWN"; break; } @@ -181,11 +179,13 @@ struct ConfigOption : public ConfigOptionBase { return *this; } - bool operator==(const T& val) const { - return value == val; + template>> + bool operator==(const U& val) const { + return value == static_cast(val); } - bool operator!=(const T& val) const { + template>> + bool operator!=(const U& val) const { return !(*this == val); } diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index 4b0a3266b52f71..2b568ab5bbe310 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -135,6 +135,11 @@ bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) { void PluginConfig::apply_debug_options(const IRemoteContext* context) { const bool throw_on_error = false; +#ifdef ENABLE_DEBUG_CAPS + constexpr const auto allowed_visibility = OptionVisibility::ANY; +#else + constexpr const auto allowed_visibility = OptionVisibility::RELEASE; +#endif if (context) { ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); @@ -144,7 +149,7 @@ void PluginConfig::apply_debug_options(const IRemoteContext* context) { std::cout << "Non default config value for " << prop.first << " = " << prop.second.as() << std::endl; } #endif - set_user_property(config_properties, OptionVisibility::ANY, throw_on_error); + set_user_property(config_properties, allowed_visibility, throw_on_error); } ov::AnyMap env_properties = read_env(); @@ -154,7 +159,7 @@ void PluginConfig::apply_debug_options(const IRemoteContext* context) { std::cout << "Non default env value for " << prop.first << " = " << prop.second.as() << std::endl; } #endif - set_user_property(env_properties, OptionVisibility::ANY, throw_on_error); + set_user_property(env_properties, allowed_visibility, throw_on_error); } ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { diff --git a/src/plugins/intel_gpu/CMakeLists.txt b/src/plugins/intel_gpu/CMakeLists.txt index 3a847c292c30ba..3322f2f3f1a875 100644 --- a/src/plugins/intel_gpu/CMakeLists.txt +++ b/src/plugins/intel_gpu/CMakeLists.txt @@ -51,6 +51,7 @@ endif() if(ENABLE_GPU_DEBUG_CAPS) add_definitions(-DGPU_DEBUG_CONFIG=1) + add_definitions(-DENABLE_DEBUG_CAPS=1) endif() set(INTEL_GPU_TARGET_OCL_VERSION "200" CACHE STRING "Target version of OpenCL which should be used by GPU plugin") diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp index 98e74b76a8b502..8f7e7ac45bca2e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp @@ -19,7 +19,6 @@ #include #include #include -#include namespace ov::intel_gpu { diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index bce3e3101e2160..68e4ea69642a00 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -21,6 +21,7 @@ struct ExecutionConfig : public ov::PluginConfig { ExecutionConfig(const ExecutionConfig& other); ExecutionConfig& operator=(const ExecutionConfig& other); + ExecutionConfig clone() const; void finalize(cldnn::engine& engine); using ov::PluginConfig::finalize; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index 61e4315807449c..6d513ef1cb2eda 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -32,8 +32,8 @@ OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "") OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching") OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision") OV_CONFIG_RELEASE_OPTION(ov::internal, enable_lp_transformations, false, "Enable/Disable Low precision transformations set") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file") -OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, shape_predictor_settings, {10, 16 * 1024, 2, 1.1f}, "Preallocation settings") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "Type of the queue that must be used for model execution. May be in-order or out-of-order") OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, optimize_data, false, "Enable/Disable data flow optimizations for cldnn::program") diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index c632019cf91b01..95c746d5d59791 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -143,7 +143,7 @@ propagate_constants::calculate(engine& engine, if (!has_non_trivial_constants) return {}; - ExecutionConfig cf_config = config; + ExecutionConfig cf_config = config.clone(); cf_config.set_property(ov::intel_gpu::optimize_data(false)); cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs)); cf_config.finalize(engine); diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 3fc1439d56a727..5b8edb9e7d08c9 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -5,6 +5,7 @@ #include "openvino/runtime/iplugin.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/util/weights_path.hpp" #include "intel_gpu/graph/serialization/binary_buffer.hpp" @@ -298,7 +299,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const { return decltype(ov::execution_devices)::value_type{m_context->get_device_name()}; } - return m_config.get_property(name); + return m_config.get_property(name, OptionVisibility::RELEASE); } std::shared_ptr CompiledModel::create_sync_infer_request() const { diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index 3d287eb46c465e..da080544363d00 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -21,7 +21,7 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ << internal_body->get_friendly_name() << ", num inputs: " << op->get_input_size() << std::endl; - auto config = p.get_config(); + auto config = p.get_config().clone(); config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); config.finalize(p.get_engine()); diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index 08b014d72206c6..556738c5df52ea 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -297,7 +297,7 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p // impacts value of use_onednn property. But in order to understand if there's an op of this type we have to run // common optimizations which may do subgraph fusion to LSTMSequence op. So basically, final value of use_onednn // property can be computed for transformed model only. - auto config_copy = config; + auto config_copy = config.clone(); config_copy.finalize(context.get(), model.get()); std::string dump_path = GPU_DEBUG_VALUE_OR(config_copy.get_dump_graphs_path(), ""); diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp index 7dcb5020f56cc1..cb36a8e0349457 100644 --- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp +++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#ifdef GPU_DEBUG_CONFIG #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/runtime/execution_config.hpp" #include @@ -10,6 +9,7 @@ namespace ov::intel_gpu { std::ostream& get_verbose_stream() { +#ifdef GPU_DEBUG_CONFIG if (ExecutionConfig::get_log_to_file().length() > 0) { static std::ofstream fout; if (!fout.is_open()) @@ -18,7 +18,8 @@ std::ostream& get_verbose_stream() { } else { return std::cout; } +#else + return std::cout; +#endif } } // namespace ov::intel_gpu - -#endif diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index ffbd977dad42da..bef767da04186c 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -133,7 +133,7 @@ ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { m_user_properties = other.m_user_properties; - m_is_finalized = false; // copy is not automatically finalized + m_is_finalized = other.m_is_finalized; for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } @@ -141,13 +141,19 @@ ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { m_user_properties = other.m_user_properties; - m_is_finalized = false; // copy is not automatically finalized + m_is_finalized = other.m_is_finalized; for (const auto& kv : other.m_options_map) { m_options_map.at(kv.first)->set_any(kv.second->get_any()); } return *this; } +ExecutionConfig ExecutionConfig::clone() const { + ExecutionConfig new_config = *this; + new_config.m_is_finalized = false; + return new_config; +} + void ExecutionConfig::finalize(cldnn::engine& engine) { auto ctx = std::make_shared("GPU", std::vector{engine.get_device()}); PluginConfig::finalize(ctx.get(), nullptr); @@ -166,7 +172,7 @@ void ExecutionConfig::apply_rt_info(const IRemoteContext* context, const ov::RTM // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not // using that mechanism. - if (get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) { + if (m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE) { apply_rt_info_property(ov::weights_path, rt_info); } } @@ -224,17 +230,21 @@ void ExecutionConfig::finalize_impl(const IRemoteContext* context) { if (!is_set_by_user(ov::intel_gpu::use_onednn) && info.supports_immad) { m_use_onednn = true; } - if (get_use_onednn()) { + if (m_use_onednn) { m_queue_type = QueueTypes::in_order; } - // Enable KV-cache compression by default for non-systolic platforms - if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { - m_kv_cache_precision = ov::element::i8; + if (!is_set_by_user(ov::hint::kv_cache_precision) || m_kv_cache_precision == ov::element::undefined) { + if (info.supports_immad) { // MFDNN-11755 + m_kv_cache_precision = m_inference_precision; + } else { + // Enable KV-cache compression by default for non-systolic platforms only + m_kv_cache_precision = ov::element::i8; + } } // Enable dynamic quantization by default for non-systolic platforms - if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && m_dynamic_quantization_group_size == 0 && !info.supports_immad) { m_dynamic_quantization_group_size = 32; } From a00e1e83dbc59b1b86a24f479a8c99e57941abce Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 24 Jan 2025 17:02:14 +0400 Subject: [PATCH 26/44] added comment about config copy Signed-off-by: Vladimir Paramuzov --- src/inference/tests/unit/config_test.cpp | 4 +++- .../intel_gpu/include/intel_gpu/runtime/execution_config.hpp | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index 75336542cb7828..e112e00e1e4241 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -21,8 +21,10 @@ static constexpr Property high_level_proper static constexpr Property low_level_property{"LOW_LEVEL_PROPERTY"}; static constexpr Property release_internal_property{"RELEASE_INTERNAL_PROPERTY"}; static constexpr Property debug_property{"DEBUG_PROPERTY"}; -static constexpr Property debug_global_property{"DEBUG_GLOBAL_PROPERTY"}; +#ifdef ENABLE_DEBUG_CAPS +static constexpr Property debug_global_property{"DEBUG_GLOBAL_PROPERTY"}; +#endif struct EmptyTestConfig : public ov::PluginConfig { std::vector get_supported_properties() const { diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 68e4ea69642a00..a56d63e7119caf 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -19,6 +19,11 @@ struct ExecutionConfig : public ov::PluginConfig { explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); } explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); } + // Default operators copy config as is including finalized flag state + // In case if the config need updates after finalization clone() method shall be used as it resets finalized flag value. + // That's needed to avoid unexpected options update as we call finalization twice: in transformation pipeline + // and in cldnn::program c-tor (which is needed to handle unit tests mainly). So this second call may cause unwanted side effects + // if config is not marked as finalized, which could have easily happened if copy operator reset finalization flag ExecutionConfig(const ExecutionConfig& other); ExecutionConfig& operator=(const ExecutionConfig& other); ExecutionConfig clone() const; From 7fe88bec30f270298f1a4c81ce97d3f0a25ec2ba Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 27 Jan 2025 09:45:36 +0400 Subject: [PATCH 27/44] fix tests Signed-off-by: Vladimir Paramuzov --- src/inference/src/dev/plugin_config.cpp | 2 +- src/inference/tests/unit/config_test.cpp | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index 2b568ab5bbe310..3917b7155dbd1d 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -173,7 +173,7 @@ ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std nlohmann::json json_config; try { ifs >> json_config; - } catch (const std::exception& e) { + } catch (const std::exception&) { return config; } diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index e112e00e1e4241..8ee6c8c6aefe82 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -111,7 +111,11 @@ TEST(plugin_config, can_create_empty_config) { TEST(plugin_config, can_create_not_empty_config) { ASSERT_NO_THROW( NotEmptyTestConfig cfg; +#ifdef ENABLE_DEBUG_CAPS ASSERT_EQ(cfg.get_supported_properties().size(), 7); +#else + ASSERT_EQ(cfg.get_supported_properties().size(), 5); +#endif ); } @@ -237,8 +241,11 @@ TEST(plugin_config, set_property_throw_for_non_release_options) { TEST(plugin_config, visibility_is_correct) { NotEmptyTestConfig cfg; ASSERT_EQ(cfg.get_option_ptr(release_internal_property.name())->get_visibility(), OptionVisibility::RELEASE_INTERNAL); - ASSERT_EQ(cfg.get_option_ptr(debug_property.name())->get_visibility(), OptionVisibility::DEBUG); ASSERT_EQ(cfg.get_option_ptr(int_property.name())->get_visibility(), OptionVisibility::RELEASE); + +#ifdef ENABLE_DEBUG_CAPS + ASSERT_EQ(cfg.get_option_ptr(debug_property.name())->get_visibility(), OptionVisibility::DEBUG); +#endif } #ifdef ENABLE_DEBUG_CAPS From b2f9000b6eff651f354c1d0f0227bb9dfc4c0451 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 27 Jan 2025 15:51:25 +0400 Subject: [PATCH 28/44] build fixes Signed-off-by: Vladimir Paramuzov --- .../dev_api/openvino/runtime/plugin_config.hpp | 4 ++-- src/inference/tests/unit/config_test.cpp | 17 +++++++++-------- .../intel_gpu/runtime/execution_config.hpp | 8 ++++---- .../include/intel_gpu/runtime/memory_pool.hpp | 2 +- .../intel_gpu/src/runtime/execution_config.cpp | 8 ++++---- 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index ef4d810e0a4bce..e55e7ab86f7c8e 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -292,7 +292,7 @@ class OPENVINO_RUNTIME_API PluginConfig { }; template <> -class OPENVINO_API AttributeAdapter +class OPENVINO_RUNTIME_API AttributeAdapter : public DirectValueAccessor { public: AttributeAdapter(ConfigOptionBase*& value) : DirectValueAccessor(value) {} @@ -301,7 +301,7 @@ class OPENVINO_API AttributeAdapter }; template <> -class OPENVINO_API AttributeAdapter +class OPENVINO_RUNTIME_API AttributeAdapter : public DirectValueAccessor { public: AttributeAdapter(ov::AnyMap& value) : DirectValueAccessor(value) {} diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index 8ee6c8c6aefe82..fd0a8eec04261a 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -38,8 +38,8 @@ struct EmptyTestConfig : public ov::PluginConfig { struct NotEmptyTestConfig : public ov::PluginConfig { NotEmptyTestConfig() { - #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) - #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)) + #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)) OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") @@ -58,8 +58,8 @@ struct NotEmptyTestConfig : public ov::PluginConfig { } } - #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__) OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__) - #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__) OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__) + #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__)) EXPAND(OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__)) + #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__)) EXPAND(OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__)) OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") @@ -109,13 +109,14 @@ TEST(plugin_config, can_create_empty_config) { } TEST(plugin_config, can_create_not_empty_config) { - ASSERT_NO_THROW( - NotEmptyTestConfig cfg; #ifdef ENABLE_DEBUG_CAPS - ASSERT_EQ(cfg.get_supported_properties().size(), 7); + size_t expected_options_num = 7; #else - ASSERT_EQ(cfg.get_supported_properties().size(), 5); + size_t expected_options_num = 5; #endif + ASSERT_NO_THROW( + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_supported_properties().size(), expected_options_num); ); } diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index a56d63e7119caf..ef62419dffafdd 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -31,8 +31,8 @@ struct ExecutionConfig : public ov::PluginConfig { void finalize(cldnn::engine& engine); using ov::PluginConfig::finalize; - #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__) - #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__) + #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__)) + #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__)) #include "intel_gpu/runtime/options.inl" #undef OV_CONFIG_LOCAL_OPTION @@ -52,8 +52,8 @@ struct ExecutionConfig : public ov::PluginConfig { void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); - #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__) - #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__) + #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__)) + #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__)) #include "intel_gpu/runtime/options.inl" #undef OV_CONFIG_LOCAL_OPTION #undef OV_CONFIG_GLOBAL_OPTION diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp index 1d27eaf63efb86..ed1da835de0f29 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp @@ -118,7 +118,7 @@ class memory_pool { std::map, padded_pool_comparer> _padded_pool; std::multimap _no_reusable_pool; engine* _engine; - const ExecutionConfig& _config; + [[maybe_unused]] const ExecutionConfig& _config; public: explicit memory_pool(engine& engine, const ExecutionConfig& config); diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index bef767da04186c..0b26374aef4fa9 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -124,8 +124,8 @@ bool is_llm(const ov::Model& model) { #undef OV_CONFIG_GLOBAL_OPTION ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { - #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) - #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_OPTION_MAPPING(__VA_ARGS__) + #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)) + #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)) #include "intel_gpu/runtime/options.inl" #undef OV_CONFIG_LOCAL_OPTION #undef OV_CONFIG_GLOBAL_OPTION @@ -315,8 +315,8 @@ void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { const ov::PluginConfig::OptionsDesc& ExecutionConfig::get_options_desc() const { static ov::PluginConfig::OptionsDesc help_map { - #define OV_CONFIG_LOCAL_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__) - #define OV_CONFIG_GLOBAL_OPTION(...) OV_CONFIG_OPTION_HELP(__VA_ARGS__) + #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_HELP(__VA_ARGS__)) + #define OV_CONFIG_GLOBAL_OPTION(...) (OV_CONFIG_OPTION_HELP(__VA_ARGS__)) #include "intel_gpu/runtime/options.inl" #undef OV_CONFIG_LOCAL_OPTION #undef OV_CONFIG_GLOBAL_OPTION From e2e8682d5404310da4708af94db28916f47c4fd8 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 27 Jan 2025 16:15:19 +0400 Subject: [PATCH 29/44] cpu test fix Signed-off-by: Vladimir Paramuzov --- .../behavior/ov_plugin/properties_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp index 6d288d9f5ede8b..fc98a2659f9127 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp @@ -52,7 +52,7 @@ const std::vector cpu_inproperties = { {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT), {ov::hint::num_requests.name(), "should be int"}}, {{ov::num_streams.name(), "OFF"}}, - {{ov::hint::enable_cpu_pinning.name(), "OFF"}}, + {{ov::hint::enable_cpu_pinning.name(), "NOT_BOOL"}}, }; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, From 3402f1e700aebba95d16f7b6e037d36822c65278 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 27 Jan 2025 16:53:26 +0400 Subject: [PATCH 30/44] improve bool any parsing Signed-off-by: Vladimir Paramuzov --- src/core/src/any.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp index 8de26ad7c4d173..6d6652278e370b 100644 --- a/src/core/src/any.cpp +++ b/src/core/src/any.cpp @@ -6,6 +6,7 @@ #include #include +#include #include "openvino/util/common_util.hpp" namespace { template @@ -204,13 +205,14 @@ void Read::operator()(std::istream& is, bool& value) const { std::string str; is >> str; - std::set off = {"0", "false", "off", "no"}; - std::set on = {"1", "true", "on", "yes"}; + using namespace std::literals; + constexpr std::array off = {"0"sv, "false"sv, "off"sv, "no"sv}; + constexpr std::array on = {"1"sv, "true"sv, "on"sv, "yes"sv}; str = util::to_lower(str); - if (on.count(str)) { + if (std::find(on.begin(), on.end(), str) != on.end()) { value = true; - } else if (off.count(str)) { + } else if (std::find(off.begin(), off.end(), str) != off.end()) { value = false; } else { OPENVINO_THROW("Could not convert to bool from string " + str); From 0ea319bd6dcaa9fca2683f90145fb89bc77836f7 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 27 Jan 2025 17:01:40 +0400 Subject: [PATCH 31/44] fix cpplint Signed-off-by: Vladimir Paramuzov --- src/plugins/intel_gpu/src/runtime/execution_config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 0b26374aef4fa9..4442a72c8c2694 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -110,7 +110,7 @@ bool is_llm(const ov::Model& model) { } return false; -}; +} } // namespace From e4d85e0ff48be23032c86c284fabac23f800be1a Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 28 Jan 2025 09:25:31 +0400 Subject: [PATCH 32/44] fix build errors Signed-off-by: Vladimir Paramuzov --- src/core/src/any.cpp | 5 ++--- .../intel_gpu/include/intel_gpu/runtime/memory_pool.hpp | 2 +- src/plugins/intel_gpu/src/runtime/execution_config.cpp | 2 +- src/plugins/intel_gpu/src/runtime/memory_pool.cpp | 4 +++- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp index 6d6652278e370b..3117f69114926c 100644 --- a/src/core/src/any.cpp +++ b/src/core/src/any.cpp @@ -205,9 +205,8 @@ void Read::operator()(std::istream& is, bool& value) const { std::string str; is >> str; - using namespace std::literals; - constexpr std::array off = {"0"sv, "false"sv, "off"sv, "no"sv}; - constexpr std::array on = {"1"sv, "true"sv, "on"sv, "yes"sv}; + constexpr std::array off = {"0", "false", "off", "no"}; + constexpr std::array on = {"1", "true", "on", "yes"}; str = util::to_lower(str); if (std::find(on.begin(), on.end(), str) != on.end()) { diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp index ed1da835de0f29..1d27eaf63efb86 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp @@ -118,7 +118,7 @@ class memory_pool { std::map, padded_pool_comparer> _padded_pool; std::multimap _no_reusable_pool; engine* _engine; - [[maybe_unused]] const ExecutionConfig& _config; + const ExecutionConfig& _config; public: explicit memory_pool(engine& engine, const ExecutionConfig& config); diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 4442a72c8c2694..1ea3abc9eae4a3 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -316,7 +316,7 @@ void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { const ov::PluginConfig::OptionsDesc& ExecutionConfig::get_options_desc() const { static ov::PluginConfig::OptionsDesc help_map { #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_HELP(__VA_ARGS__)) - #define OV_CONFIG_GLOBAL_OPTION(...) (OV_CONFIG_OPTION_HELP(__VA_ARGS__)) + #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_HELP(__VA_ARGS__)) #include "intel_gpu/runtime/options.inl" #undef OV_CONFIG_LOCAL_OPTION #undef OV_CONFIG_GLOBAL_OPTION diff --git a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp index 5ddd70f8c6bd3b..a899f78eb565d3 100644 --- a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp +++ b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp @@ -433,7 +433,9 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { } } -memory_pool::memory_pool(engine& engine, const ExecutionConfig& config) : _engine(&engine), _config(config) { } +memory_pool::memory_pool(engine& engine, const ExecutionConfig& config) : _engine(&engine), _config(config) { + (void)(_config); // Silence unused warning +} #ifdef GPU_DEBUG_CONFIG inline std::string get_mb_size(size_t size) { From a694b8f0d8c351dddf3c678ec8224ea36edc5305 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 28 Jan 2025 10:00:37 +0400 Subject: [PATCH 33/44] build fixes Signed-off-by: Vladimir Paramuzov --- src/core/src/any.cpp | 1 + src/inference/dev_api/openvino/runtime/plugin_config.hpp | 4 ++-- src/inference/src/dev/plugin_config.cpp | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp index 3117f69114926c..0ed7d3e213aa21 100644 --- a/src/core/src/any.cpp +++ b/src/core/src/any.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "openvino/util/common_util.hpp" namespace { template diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index e55e7ab86f7c8e..132e243cc42e79 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -26,7 +26,7 @@ #define CAT(a, b) a ## b #endif -#define GET_EXCEPT_LAST_IMPL(N, ...) CAT(GET_EXCEPT_LAST_IMPL_, N)(__VA_ARGS__) +#define GET_EXCEPT_LAST_IMPL(N, ...) EXPAND(CAT(GET_EXCEPT_LAST_IMPL_, N)(__VA_ARGS__)) #define GET_EXCEPT_LAST_IMPL_2(_0, _1) _0 #define GET_EXCEPT_LAST_IMPL_3(_0, _1, _2) _0, _1 #define GET_EXCEPT_LAST_IMPL_4(_0, _1, _2, _3) _0, _1, _2 @@ -34,7 +34,7 @@ #define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__)) -#define GET_LAST_IMPL(N, ...) CAT(GET_LAST_IMPL_, N)(__VA_ARGS__) +#define GET_LAST_IMPL(N, ...) EXPAND(CAT(GET_LAST_IMPL_, N)(__VA_ARGS__)) #define GET_LAST_IMPL_0(_0, ...) _0 #define GET_LAST_IMPL_1(_0, _1, ...) _1 #define GET_LAST_IMPL_2(_0, _1, _2, ...) _2 diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index 3917b7155dbd1d..1e1bbf3fece6a0 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -35,7 +35,7 @@ size_t get_terminal_width() { } else { return default_width; } -#elif __linux__ +#elif defined(__linux__) struct winsize w; if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) { return w.ws_col; From d1cd693c521d599ceb7a9483630263c87822d85d Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 28 Jan 2025 10:29:35 +0400 Subject: [PATCH 34/44] build fix Signed-off-by: Vladimir Paramuzov --- .../tests/unit/module_tests/shape_predictor_test.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp index c89cd5d93b709f..6d4b8fd7388e76 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp @@ -23,7 +23,9 @@ TEST_P(shape_predictor_tests, prediction) { auto& expected_predicted_shape = p.expected_predicted_shape; auto& engine = get_test_engine(); - ShapePredictor sp(&engine, ShapePredictor::Settings{ .buffers_preallocation_ratio = p.buffers_preallocation_ratio }); + ShapePredictor::Settings settings; + settings.buffers_preallocation_ratio = p.buffers_preallocation_ratio; + ShapePredictor sp(&engine, settings); std::pair result; for (auto& shape : in_shapes) @@ -74,7 +76,9 @@ TEST_P(shape_predictor_tests_b_fs_yx_fsv16, prediction) { auto& expected_predicted_shape = p.expected_predicted_shape; auto& engine = get_test_engine(); - ShapePredictor sp(&engine, ShapePredictor::Settings{ .buffers_preallocation_ratio = p.buffers_preallocation_ratio }); + ShapePredictor::Settings settings; + settings.buffers_preallocation_ratio = p.buffers_preallocation_ratio; + ShapePredictor sp(&engine, settings); std::pair result; for (auto& shape : in_shapes) @@ -122,7 +126,9 @@ TEST(shape_predictor_tests, check_max_buffer_size) { auto& engine = get_test_engine(); const auto& buffers_preallocation_ratio = 1.1f; - ShapePredictor sp(&engine, ShapePredictor::Settings{ .buffers_preallocation_ratio = buffers_preallocation_ratio }); + ShapePredictor::Settings settings; + settings.buffers_preallocation_ratio = buffers_preallocation_ratio; + ShapePredictor sp(&engine, settings); const auto max_alloc_mem_size = engine.get_device_info().max_alloc_mem_size; auto layout = cldnn::layout({static_cast(max_alloc_mem_size)}, ov::element::u8, format::bfyx); From 6794136886996e9e9c9125c79dd67a6fa33cb618 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 28 Jan 2025 13:22:33 +0400 Subject: [PATCH 35/44] unit test fixes Signed-off-by: Vladimir Paramuzov --- .../include/intel_gpu/graph/network.hpp | 3 +-- src/plugins/intel_gpu/src/graph/network.cpp | 5 ++-- .../test_cases/fully_connected_gpu_test.cpp | 26 +++++++++---------- .../tests/unit/test_cases/loop_gpu_test.cpp | 1 - .../tests/unit/test_utils/program_wrapper.h | 4 ++- 5 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index 2a99b9dfef4b76..dabfe8ce09acd6 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -197,7 +197,7 @@ struct network { void set_reuse_variable_mem(bool reuse = false); bool is_reuse_variable_mem() { return _reuse_variable_mem; } - const ExecutionConfig& get_config() const { return _config; } + const ExecutionConfig& get_config() const { return _program->get_config(); } std::shared_ptr get_shape_predictor() { return _shape_predictor; } void set_shape_predictor(std::shared_ptr shape_predictor) { _shape_predictor = shape_predictor; } @@ -210,7 +210,6 @@ struct network { using output_chains_map = std::map>; uint32_t net_id = 0; program::ptr _program; - ExecutionConfig _config; engine& _engine; stream::ptr _stream; std::unique_ptr _memory_pool; diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 214778c397dbbc..1b310fd4542f86 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -171,7 +171,6 @@ opt pass). */ network::network(program::ptr program, stream::ptr stream, bool is_internal, bool is_primary_stream) : _program(program) - , _config(program->get_config()) , _engine(program->get_engine()) , _stream(stream) , _memory_pool(new memory_pool(program->get_engine(), program->get_config())) @@ -225,7 +224,7 @@ network::~network() { if (_program != nullptr) _program->cancel_compilation_context(); _memory_pool->clear_pool_for_network(net_id); - std::string dump_path = GPU_DEBUG_VALUE_OR(_config.get_dump_profiling_data_path(), ""); + std::string dump_path = GPU_DEBUG_VALUE_OR(get_config().get_dump_profiling_data_path(), ""); GPU_DEBUG_IF(!dump_path.empty()) { dump_perf_data_raw(dump_path + "/perf_raw" + std::to_string(net_id) + ".csv", false, _exec_order); } @@ -385,7 +384,7 @@ void network::calculate_weights_cache_capacity() { } // Sum all weights constants for each stream - required_mem_size += weights_const_size * _config.get_num_streams(); + required_mem_size += weights_const_size * get_config().get_num_streams(); // Add all other constants (shared between streams) required_mem_size += total_const_size - weights_const_size; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index 00fd36f16aaf9c..52209883247f99 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -1555,7 +1555,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_property(ov::hint::dynamic_quantization_group_size(32)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(32)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1643,7 +1643,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1669,7 +1669,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1753,7 +1753,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1780,9 +1780,9 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); if (is_dyn_quan) { - config.set_property(ov::hint::dynamic_quantization_group_size(32)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(32)); } else { - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); } network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1923,7 +1923,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl = { in_layout.format, "", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim1", fc_impl }, { "fc_prim2", fc_impl } })); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1952,7 +1952,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -2905,7 +2905,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topo, config); network.set_input_data("input", input_mem); @@ -2931,7 +2931,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false); @@ -3031,7 +3031,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bf_tiled", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topo, config); network.set_input_data("input", input_mem); @@ -3057,7 +3057,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false); @@ -4210,7 +4210,7 @@ TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_sta this->test_compressed_int4_scale_dyn_quan_weight_i4(false, 320, 1024, 1024, 32, 32, true); } -// Test weight zp for INT8 ASYM +// Test weight zp for INT8 ASYM TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_128_large) { this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 320, 4096, 4096, 128, 128, true); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp index 16c35e04aa3f17..fc20272b2dec6b 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp @@ -1105,7 +1105,6 @@ static void test_loop_gpu_wo_trip_count_update_primitive_id(ov::PartialShape bod auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx}); - std::vector body_input_layouts; for (size_t i = 0; i < body_input_layout.size(); i++) { if (body_input_layout[i].is_dynamic()) diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h b/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h index 77b519d8e3cf5f..09c081abbb6fe6 100644 --- a/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h +++ b/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h @@ -38,7 +38,9 @@ namespace cldnn p.prepare_memory_dependencies(); } static void update_configs_properties(program& p, const ov::AnyMap& properties) { - p._config.set_property(properties); + auto config_copy = p._config.clone(); + config_copy.set_property(properties); + p._config = config_copy; } }; From 6747b23b79d368c4a298d1def42886e7f6fe0f2a Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 28 Jan 2025 13:46:56 +0400 Subject: [PATCH 36/44] fix func tests Signed-off-by: Vladimir Paramuzov --- .../intel_gpu/src/runtime/execution_config.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 1ea3abc9eae4a3..d69fabcb346dd8 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -172,7 +172,7 @@ void ExecutionConfig::apply_rt_info(const IRemoteContext* context, const ov::RTM // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not // using that mechanism. - if (m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE) { + if (get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) { apply_rt_info_property(ov::weights_path, rt_info); } } @@ -217,7 +217,7 @@ void ExecutionConfig::apply_model_specific_options(const IRemoteContext* context } void ExecutionConfig::finalize_impl(const IRemoteContext* context) { - GPU_DEBUG_IF(m_help) { + GPU_DEBUG_IF(get_help()) { print_help(); exit(-1); } @@ -230,13 +230,13 @@ void ExecutionConfig::finalize_impl(const IRemoteContext* context) { if (!is_set_by_user(ov::intel_gpu::use_onednn) && info.supports_immad) { m_use_onednn = true; } - if (m_use_onednn) { + if (get_use_onednn()) { m_queue_type = QueueTypes::in_order; } - if (!is_set_by_user(ov::hint::kv_cache_precision) || m_kv_cache_precision == ov::element::undefined) { + if (!is_set_by_user(ov::hint::kv_cache_precision) || get_kv_cache_precision() == ov::element::undefined) { if (info.supports_immad) { // MFDNN-11755 - m_kv_cache_precision = m_inference_precision; + m_kv_cache_precision = get_inference_precision(); } else { // Enable KV-cache compression by default for non-systolic platforms only m_kv_cache_precision = ov::element::i8; @@ -244,7 +244,7 @@ void ExecutionConfig::finalize_impl(const IRemoteContext* context) { } // Enable dynamic quantization by default for non-systolic platforms - if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && m_dynamic_quantization_group_size == 0 && !info.supports_immad) { + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && get_dynamic_quantization_group_size() == 0 && !info.supports_immad) { m_dynamic_quantization_group_size = 32; } From 5bef3d2f1cc0e4a38512e584e81c10199654cf7d Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 28 Jan 2025 15:51:21 +0400 Subject: [PATCH 37/44] don't visit finalize flag Signed-off-by: Vladimir Paramuzov --- src/inference/src/dev/plugin_config.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index 1e1bbf3fece6a0..1fe42a147d2188 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -128,7 +128,6 @@ bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) { for (auto& prop : m_options_map) { visitor.on_attribute(prop.first + "__internal", prop.second); } - visitor.on_attribute("is_finalized", m_is_finalized); return true; } From 15051c2bb8408a0c975793810587b611577d88b9 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 29 Jan 2025 09:34:36 +0400 Subject: [PATCH 38/44] few fixes and refactor Signed-off-by: Vladimir Paramuzov --- src/inference/CMakeLists.txt | 4 +++ src/inference/src/dev/plugin_config.cpp | 32 ++++++++++------------- src/inference/tests/unit/config_test.cpp | 33 +++++++++++++++++++----- 3 files changed, 44 insertions(+), 25 deletions(-) diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt index 9b2967e9f80fae..40528735f80071 100644 --- a/src/inference/CMakeLists.txt +++ b/src/inference/CMakeLists.txt @@ -137,6 +137,10 @@ target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE) +if(ENABLE_DEBUG_CAPS) + target_compile_definitions(${TARGET_NAME}_s PUBLIC ENABLE_DEBUG_CAPS) +endif() + set_target_properties(${TARGET_NAME}_s PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) # LTO diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index 1fe42a147d2188..31160a8e30f83c 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -64,10 +64,7 @@ ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility all void PluginConfig::set_property(const ov::AnyMap& config) { OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; - + for (auto& [name, val] : config) { get_option_ptr(name)->set_any(val); } } @@ -75,10 +72,7 @@ void PluginConfig::set_property(const ov::AnyMap& config) { void PluginConfig::set_user_property(const ov::AnyMap& config, OptionVisibility allowed_visibility, bool throw_on_error) { OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; - + for (auto& [name, val] : config) { auto option = get_option_ptr(name); if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) { if (throw_on_error) @@ -144,8 +138,8 @@ void PluginConfig::apply_debug_options(const IRemoteContext* context) { ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); cleanup_unsupported(config_properties); #ifdef ENABLE_DEBUG_CAPS - for (auto& prop : config_properties) { - std::cout << "Non default config value for " << prop.first << " = " << prop.second.as() << std::endl; + for (auto& [name, val] : config_properties) { + std::cout << "Non default config value for " << name << " = " << val.as() << std::endl; } #endif set_user_property(config_properties, allowed_visibility, throw_on_error); @@ -154,8 +148,8 @@ void PluginConfig::apply_debug_options(const IRemoteContext* context) { ov::AnyMap env_properties = read_env(); cleanup_unsupported(env_properties); #ifdef ENABLE_DEBUG_CAPS - for (auto& prop : env_properties) { - std::cout << "Non default env value for " << prop.first << " = " << prop.second.as() << std::endl; + for (auto& [name, val] : env_properties) { + std::cout << "Non default env value for " << name << " = " << val.as() << std::endl; } #endif set_user_property(env_properties, allowed_visibility, throw_on_error); @@ -219,10 +213,10 @@ ov::Any PluginConfig::read_env(const std::string& option_name, const std::string ov::AnyMap PluginConfig::read_env() const { ov::AnyMap config; - for (auto& kv : m_options_map) { - auto val = read_env(kv.first, m_allowed_env_prefix, kv.second); + for (auto& [name, option] : m_options_map) { + auto val = read_env(name, m_allowed_env_prefix, option); if (!val.empty()) { - config[kv.first] = val; + config[name] = val; } } @@ -247,12 +241,12 @@ std::string PluginConfig::to_string() const { ss << "-----------------------------------------\n"; ss << "PROPERTIES:\n"; - for (const auto& option : m_options_map) { - ss << "\t" << option.first << ": " << option.second->get_any().as() << std::endl; + for (const auto& [name, option] : m_options_map) { + ss << "\t" << name << ": " << option->get_any().as() << std::endl; } ss << "USER PROPERTIES:\n"; - for (const auto& user_prop : m_user_properties) { - ss << "\t" << user_prop.first << ": " << user_prop.second.as() << std::endl; + for (const auto& [name, val] : m_user_properties) { + ss << "\t" << name << ": " << val.as() << std::endl; } return ss.str(); diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index fd0a8eec04261a..9a82c1f5b6fefb 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -19,10 +19,10 @@ static constexpr Property bool_property{"BOOL_PROP static constexpr Property int_property{"INT_PROPERTY"}; static constexpr Property high_level_property{"HIGH_LEVEL_PROPERTY"}; static constexpr Property low_level_property{"LOW_LEVEL_PROPERTY"}; -static constexpr Property release_internal_property{"RELEASE_INTERNAL_PROPERTY"}; -static constexpr Property debug_property{"DEBUG_PROPERTY"}; +static constexpr Property release_internal_property{"RELEASE_INTERNAL_PROPERTY"}; #ifdef ENABLE_DEBUG_CAPS +static constexpr Property debug_property{"DEBUG_PROPERTY"}; static constexpr Property debug_global_property{"DEBUG_GLOBAL_PROPERTY"}; #endif @@ -236,7 +236,9 @@ TEST(plugin_config, can_copy_config) { TEST(plugin_config, set_property_throw_for_non_release_options) { NotEmptyTestConfig cfg; ASSERT_ANY_THROW(cfg.set_user_property({release_internal_property(10)}, OptionVisibility::RELEASE)); +#ifdef ENABLE_DEBUG_CAPS ASSERT_ANY_THROW(cfg.set_user_property({debug_property(10)}, OptionVisibility::RELEASE)); +#endif } TEST(plugin_config, visibility_is_correct) { @@ -249,6 +251,26 @@ TEST(plugin_config, visibility_is_correct) { #endif } +TEST(plugin_config, can_read_from_env) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_int_property(), -1); + std::string env_var1 = "OV_INT_PROPERTY=10"; + ::putenv(env_var1.data()); + ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only + +#ifdef ENABLE_DEBUG_CAPS + std::string env_var2 = "OV_DEBUG_PROPERTY=20"; + ::putenv(env_var2.data()); + ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option +#endif + + cfg.finalize(nullptr, nullptr); + ASSERT_EQ(cfg.get_int_property(), 10); +#ifdef ENABLE_DEBUG_CAPS + ASSERT_EQ(cfg.get_debug_property(), 20); +#endif +} + #ifdef ENABLE_DEBUG_CAPS TEST(plugin_config, can_get_global_property) { NotEmptyTestConfig cfg; @@ -256,14 +278,13 @@ TEST(plugin_config, can_get_global_property) { } TEST(plugin_config, global_property_read_env_on_each_call) { - NotEmptyTestConfig cfg; - ASSERT_EQ(cfg.get_debug_global_property(), 4); + ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 4); std::string env_var1 = "OV_DEBUG_GLOBAL_PROPERTY=10"; ::putenv(env_var1.data()); - ASSERT_EQ(cfg.get_debug_global_property(), 10); + ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 10); std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20"; ::putenv(env_var2.data()); - ASSERT_EQ(cfg.get_debug_global_property(), 20); + ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 20); } #endif From 69d6dc426fe4bf86ff06a70db201eaa433291e6c Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 29 Jan 2025 10:23:57 +0400 Subject: [PATCH 39/44] debug caps fixes and tests Signed-off-by: Vladimir Paramuzov --- src/inference/src/dev/plugin_config.cpp | 5 ++ src/inference/tests/unit/config_test.cpp | 76 +++++++++++++++++++++++- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index 31160a8e30f83c..7eafba4b8bbb28 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -98,7 +98,12 @@ void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* mode if (model) apply_model_specific_options(context, *model); +#ifdef ENABLE_DEBUG_CAPS + // For now we apply env/config only for build with debug caps, but it can be updated in the future to allow + // reading release options for any build type apply_debug_options(context); +#endif // ENABLE_DEBUG_CAPS + // Copy internal properties before applying hints to ensure that // a property set by hint won't be overriden by a value in user config. // E.g num_streams=AUTO && hint=THROUGHPUT diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index 9a82c1f5b6fefb..a5c7a092a5f2a7 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -3,13 +3,17 @@ // #include "openvino/core/any.hpp" +#include "openvino/core/except.hpp" #include "openvino/core/model.hpp" #include "openvino/core/node_vector.hpp" #include "openvino/op/parameter.hpp" +#include "openvino/runtime/iremote_context.hpp" #include "openvino/runtime/plugin_config.hpp" #include +#include #include +#include using namespace ::testing; using namespace ov; @@ -26,6 +30,38 @@ static constexpr Property debug_property{"DEBUG static constexpr Property debug_global_property{"DEBUG_GLOBAL_PROPERTY"}; #endif +namespace { +void dump_config(const std::string& filename, const std::map& config) { + nlohmann::json jsonConfig; + for (const auto& item : config) { + std::string deviceName = item.first; + for (const auto& option : item.second) { + // primary property + std::stringstream strm; + option.second.print(strm); + auto property_string = strm.str(); + jsonConfig[deviceName][option.first] = property_string; + } + } + + std::ofstream ofs(filename); + if (!ofs.is_open()) { + throw std::runtime_error("Can't load config file \"" + filename + "\"."); + } + + ofs << jsonConfig; +} +class DummyRemoteContext : public ov::IRemoteContext { +public: + const std::string& get_device_name() const override { static const std::string device_name = "SOME_DEVICE"; return device_name; } + const ov::AnyMap& get_property() const override { OPENVINO_NOT_IMPLEMENTED; }; + ov::SoPtr create_tensor(const ov::element::Type& type, + const ov::Shape& shape, + const ov::AnyMap& params = {}) override { OPENVINO_NOT_IMPLEMENTED; } + ov::SoPtr create_host_tensor(const ov::element::Type type, const ov::Shape& shape) override { OPENVINO_NOT_IMPLEMENTED; } +}; +} // namespace + struct EmptyTestConfig : public ov::PluginConfig { std::vector get_supported_properties() const { std::vector supported_properties; @@ -251,12 +287,12 @@ TEST(plugin_config, visibility_is_correct) { #endif } -TEST(plugin_config, can_read_from_env) { +TEST(plugin_config, can_read_from_env_with_debug_caps) { NotEmptyTestConfig cfg; ASSERT_EQ(cfg.get_int_property(), -1); std::string env_var1 = "OV_INT_PROPERTY=10"; ::putenv(env_var1.data()); - ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only + ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only for build with debug caps #ifdef ENABLE_DEBUG_CAPS std::string env_var2 = "OV_DEBUG_PROPERTY=20"; @@ -265,12 +301,46 @@ TEST(plugin_config, can_read_from_env) { #endif cfg.finalize(nullptr, nullptr); - ASSERT_EQ(cfg.get_int_property(), 10); + #ifdef ENABLE_DEBUG_CAPS + ASSERT_EQ(cfg.get_int_property(), 10); ASSERT_EQ(cfg.get_debug_property(), 20); +#else + ASSERT_EQ(cfg.get_int_property(), -1); // no effect #endif } +TEST(plugin_config, can_read_from_config) { + const std::filesystem::path filepath = "config.json"; + try { + NotEmptyTestConfig cfg; + ov::AnyMap config { + int_property(10), + #ifdef ENABLE_DEBUG_CAPS + debug_property(20), + #endif + }; + + DummyRemoteContext ctx; + dump_config(filepath.generic_string(), {{ctx.get_device_name(), config }}); + + ASSERT_EQ(cfg.get_int_property(), -1); // config is applied after finalization only for build with debug caps + #ifdef ENABLE_DEBUG_CAPS + ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option + #endif + + cfg.finalize(&ctx, nullptr); + #ifdef ENABLE_DEBUG_CAPS + ASSERT_EQ(cfg.get_int_property(), 10); + ASSERT_EQ(cfg.get_debug_property(), 20); + #else + ASSERT_EQ(cfg.get_int_property(), -1); // no effect + #endif + } catch (std::exception&) { } + + std::filesystem::remove(filepath); +} + #ifdef ENABLE_DEBUG_CAPS TEST(plugin_config, can_get_global_property) { NotEmptyTestConfig cfg; From e50d8bbb7ff3ce5b2e9e3a850a036473171eced9 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 29 Jan 2025 11:58:19 +0400 Subject: [PATCH 40/44] Update env/config usage Signed-off-by: Vladimir Paramuzov --- .../openvino/runtime/plugin_config.hpp | 5 +- src/inference/src/dev/plugin_config.cpp | 50 ++++++------- src/inference/tests/unit/config_test.cpp | 75 +++++++++++-------- .../intel_gpu/runtime/internal_properties.hpp | 1 + .../include/intel_gpu/runtime/options.inl | 1 + .../src/runtime/execution_config.cpp | 6 ++ 6 files changed, 75 insertions(+), 63 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index 132e243cc42e79..eaa9a75ae52afe 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -243,7 +243,8 @@ class OPENVINO_RUNTIME_API PluginConfig { protected: virtual void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) {} - virtual void apply_debug_options(const IRemoteContext* context); + void apply_env_options(); + void apply_config_options(std::string_view device_name, std::string_view config_path = ""); virtual void finalize_impl(const IRemoteContext* context) {} template @@ -269,7 +270,7 @@ class OPENVINO_RUNTIME_API PluginConfig { } } - ov::AnyMap read_config_file(const std::string& filename, const std::string& target_device_name) const; + ov::AnyMap read_config_file(std::string_view filename, std::string_view target_device_name) const; ov::AnyMap read_env() const; static ov::Any read_env(const std::string& option_name, const std::string& prefix, const ConfigOptionBase* option); void cleanup_unsupported(ov::AnyMap& config) const; diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index 7eafba4b8bbb28..b608a2a3cfc969 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -98,12 +98,6 @@ void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* mode if (model) apply_model_specific_options(context, *model); -#ifdef ENABLE_DEBUG_CAPS - // For now we apply env/config only for build with debug caps, but it can be updated in the future to allow - // reading release options for any build type - apply_debug_options(context); -#endif // ENABLE_DEBUG_CAPS - // Copy internal properties before applying hints to ensure that // a property set by hint won't be overriden by a value in user config. // E.g num_streams=AUTO && hint=THROUGHPUT @@ -116,6 +110,10 @@ void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* mode finalize_impl(context); +#ifdef ENABLE_DEBUG_CAPS + apply_env_options(); +#endif + // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization m_user_properties.clear(); @@ -131,39 +129,35 @@ bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) { return true; } -void PluginConfig::apply_debug_options(const IRemoteContext* context) { - const bool throw_on_error = false; -#ifdef ENABLE_DEBUG_CAPS - constexpr const auto allowed_visibility = OptionVisibility::ANY; -#else - constexpr const auto allowed_visibility = OptionVisibility::RELEASE; -#endif +void PluginConfig::apply_env_options() { + ov::AnyMap env_properties = read_env(); + cleanup_unsupported(env_properties); + for (auto& [name, val] : env_properties) { + std::cout << "Non default env value for " << name << " = " << val.as() << std::endl; + } + set_property(env_properties); +} - if (context) { - ov::AnyMap config_properties = read_config_file("config.json", context->get_device_name()); +void PluginConfig::apply_config_options(std::string_view device_name, std::string_view config_path) { + if (!config_path.empty()) { + ov::AnyMap config_properties = read_config_file(std::string(config_path), device_name); cleanup_unsupported(config_properties); #ifdef ENABLE_DEBUG_CAPS for (auto& [name, val] : config_properties) { std::cout << "Non default config value for " << name << " = " << val.as() << std::endl; } #endif - set_user_property(config_properties, allowed_visibility, throw_on_error); + set_property(config_properties); } - - ov::AnyMap env_properties = read_env(); - cleanup_unsupported(env_properties); -#ifdef ENABLE_DEBUG_CAPS - for (auto& [name, val] : env_properties) { - std::cout << "Non default env value for " << name << " = " << val.as() << std::endl; - } -#endif - set_user_property(env_properties, allowed_visibility, throw_on_error); } -ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std::string& target_device_name) const { +ov::AnyMap PluginConfig::read_config_file(std::string_view filename, std::string_view target_device_name) const { + if (filename.empty()) + return {}; + ov::AnyMap config; - std::ifstream ifs(filename); + std::ifstream ifs(std::string{filename}); if (!ifs.is_open()) { return config; } @@ -175,7 +169,7 @@ ov::AnyMap PluginConfig::read_config_file(const std::string& filename, const std return config; } - DeviceIDParser parser(target_device_name); + DeviceIDParser parser(std::string{target_device_name}); for (auto item = json_config.cbegin(), end = json_config.cend(); item != end; ++item) { const std::string& device_name = item.key(); if (DeviceIDParser(device_name).get_device_name() != parser.get_device_name()) diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index a5c7a092a5f2a7..adde70c139b96d 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -11,6 +11,7 @@ #include "openvino/runtime/plugin_config.hpp" #include +#include #include #include #include @@ -31,6 +32,9 @@ static constexpr Property debug_global_property #endif namespace { +const std::string test_config_path = "test_debug_config_path.json"; +const std::string device_name = "SOME_DEVICE"; + void dump_config(const std::string& filename, const std::map& config) { nlohmann::json jsonConfig; for (const auto& item : config) { @@ -51,15 +55,7 @@ void dump_config(const std::string& filename, const std::map create_tensor(const ov::element::Type& type, - const ov::Shape& shape, - const ov::AnyMap& params = {}) override { OPENVINO_NOT_IMPLEMENTED; } - ov::SoPtr create_host_tensor(const ov::element::Type type, const ov::Shape& shape) override { OPENVINO_NOT_IMPLEMENTED; } -}; + } // namespace struct EmptyTestConfig : public ov::PluginConfig { @@ -118,6 +114,9 @@ struct NotEmptyTestConfig : public ov::PluginConfig { if (!is_set_by_user(low_level_property)) { m_low_level_property.value = m_high_level_property.value; } +#ifdef ENABLE_DEBUG_CAPS + apply_config_options(device_name, test_config_path); +#endif } void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) override { @@ -288,30 +287,37 @@ TEST(plugin_config, visibility_is_correct) { } TEST(plugin_config, can_read_from_env_with_debug_caps) { - NotEmptyTestConfig cfg; - ASSERT_EQ(cfg.get_int_property(), -1); - std::string env_var1 = "OV_INT_PROPERTY=10"; - ::putenv(env_var1.data()); - ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only for build with debug caps + try { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_int_property(), -1); + std::string env_var1 = "OV_INT_PROPERTY=10"; + ::putenv(env_var1.data()); + ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only for build with debug caps #ifdef ENABLE_DEBUG_CAPS - std::string env_var2 = "OV_DEBUG_PROPERTY=20"; - ::putenv(env_var2.data()); - ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option + std::string env_var2 = "OV_DEBUG_PROPERTY=20"; + ::putenv(env_var2.data()); + ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option #endif - cfg.finalize(nullptr, nullptr); + cfg.finalize(nullptr, nullptr); #ifdef ENABLE_DEBUG_CAPS - ASSERT_EQ(cfg.get_int_property(), 10); - ASSERT_EQ(cfg.get_debug_property(), 20); + ASSERT_EQ(cfg.get_int_property(), 10); + ASSERT_EQ(cfg.get_debug_property(), 20); #else - ASSERT_EQ(cfg.get_int_property(), -1); // no effect + ASSERT_EQ(cfg.get_int_property(), -1); // no effect +#endif + } catch (std::exception&) {} + + ::unsetenv("OV_INT_PROPERTY"); +#ifdef ENABLE_DEBUG_CAPS + ::unsetenv("OV_DEBUG_PROPERTY"); #endif } TEST(plugin_config, can_read_from_config) { - const std::filesystem::path filepath = "config.json"; + const std::filesystem::path filepath = test_config_path; try { NotEmptyTestConfig cfg; ov::AnyMap config { @@ -321,15 +327,14 @@ TEST(plugin_config, can_read_from_config) { #endif }; - DummyRemoteContext ctx; - dump_config(filepath.generic_string(), {{ctx.get_device_name(), config }}); + dump_config(filepath.generic_string(), {{device_name, config }}); ASSERT_EQ(cfg.get_int_property(), -1); // config is applied after finalization only for build with debug caps #ifdef ENABLE_DEBUG_CAPS ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option #endif - cfg.finalize(&ctx, nullptr); + cfg.finalize(nullptr, nullptr); #ifdef ENABLE_DEBUG_CAPS ASSERT_EQ(cfg.get_int_property(), 10); ASSERT_EQ(cfg.get_debug_property(), 20); @@ -348,13 +353,17 @@ TEST(plugin_config, can_get_global_property) { } TEST(plugin_config, global_property_read_env_on_each_call) { - ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 4); - std::string env_var1 = "OV_DEBUG_GLOBAL_PROPERTY=10"; - ::putenv(env_var1.data()); - ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 10); - - std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20"; - ::putenv(env_var2.data()); - ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 20); + try { + ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 4); + std::string env_var1 = "OV_DEBUG_GLOBAL_PROPERTY=10"; + ::putenv(env_var1.data()); + ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 10); + + std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20"; + ::putenv(env_var2.data()); + ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 20); + } catch (std::exception&) {} + + ::unsetenv("OV_DEBUG_GLOBAL_PROPERTY"); } #endif diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index afb32d3393d6b4..f037c8b02d36be 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -90,6 +90,7 @@ static constexpr Property use_onednn{"USE_ONEDNN"} static constexpr Property help{"HELP"}; static constexpr Property verbose{"VERBOSE"}; static constexpr Property verbose_color{"VERBOSE_COLOR"}; +static constexpr Property debug_config{"DEBUG_CONFIG"}; static constexpr Property log_to_file{"GPU_LOG_TO_FILE"}; static constexpr Property disable_usm{"GPU_DISABLE_USM"}; static constexpr Property disable_onednn_post_ops_opt{"DISABLE_ONEDNN_POST_OPS_OPT"}; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl index 6d513ef1cb2eda..14bd79a9c60e99 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -55,6 +55,7 @@ OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, usm_policy, 0, "0: default, 1: use OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits::max(), "Max number of batch elements to dump") OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, dump_profiling_data_per_iter, false, "Save profiling data w/o per-iteration aggregation") OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, debug_config, "", "Path to debug config in json format") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_onednn_post_ops_opt, false, "Disable optimization pass for onednn post-ops") OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data_path, "", "Save csv file with per-stage and per-primitive profiling data to specified folder") diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index d69fabcb346dd8..24a66c8f5ef214 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -251,6 +251,12 @@ void ExecutionConfig::finalize_impl(const IRemoteContext* context) { if (!get_force_implementations().empty()) { m_optimize_data = true; } + +#ifdef ENABLE_DEBUG_CAPS + // For now we apply env/config only for build with debug caps, but it can be updated in the future to allow + // reading release options for any build type + apply_config_options(context->get_device_name(), get_debug_config()); +#endif // ENABLE_DEBUG_CAPS } void ExecutionConfig::apply_hints(const cldnn::device_info& info) { From c9c9e84b1b132c9995f13e850ebf6b90d6271092 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 29 Jan 2025 13:27:13 +0400 Subject: [PATCH 41/44] removed json dep in unit tests Signed-off-by: Vladimir Paramuzov --- src/inference/tests/unit/config_test.cpp | 29 +++++------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index adde70c139b96d..682ee50eb81c79 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -12,9 +12,9 @@ #include #include -#include #include #include +#include using namespace ::testing; using namespace ov; @@ -35,25 +35,13 @@ namespace { const std::string test_config_path = "test_debug_config_path.json"; const std::string device_name = "SOME_DEVICE"; -void dump_config(const std::string& filename, const std::map& config) { - nlohmann::json jsonConfig; - for (const auto& item : config) { - std::string deviceName = item.first; - for (const auto& option : item.second) { - // primary property - std::stringstream strm; - option.second.print(strm); - auto property_string = strm.str(); - jsonConfig[deviceName][option.first] = property_string; - } - } - +void dump_config(const std::string& filename, const std::string& config_content) { std::ofstream ofs(filename); if (!ofs.is_open()) { - throw std::runtime_error("Can't load config file \"" + filename + "\"."); + throw std::runtime_error("Can't save config file \"" + filename + "\"."); } - ofs << jsonConfig; + ofs << config_content; } } // namespace @@ -320,14 +308,9 @@ TEST(plugin_config, can_read_from_config) { const std::filesystem::path filepath = test_config_path; try { NotEmptyTestConfig cfg; - ov::AnyMap config { - int_property(10), - #ifdef ENABLE_DEBUG_CAPS - debug_property(20), - #endif - }; + std::string config = "{\"SOME_DEVICE\":{\"DEBUG_PROPERTY\":\"20\",\"INT_PROPERTY\":\"10\"}}"; - dump_config(filepath.generic_string(), {{device_name, config }}); + dump_config(filepath.generic_string(), config); ASSERT_EQ(cfg.get_int_property(), -1); // config is applied after finalization only for build with debug caps #ifdef ENABLE_DEBUG_CAPS From 1c2e0475c48daf4e98d0db35c925aa67facf9cd0 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 29 Jan 2025 13:49:18 +0400 Subject: [PATCH 42/44] fix env setting Signed-off-by: Vladimir Paramuzov --- src/inference/tests/unit/config_test.cpp | 34 ++++++++++++++++-------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index 682ee50eb81c79..cde86d96f6b1ee 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -44,6 +44,22 @@ void dump_config(const std::string& filename, const std::string& config_content) ofs << config_content; } +void set_env(const std::string& name, const std::string& value) { +#ifdef _WIN32 + _putenv_s(name.c_str(), value.c_str()); +#else + ::setenv(name.c_str(), value.c_str(), 1); +#endif +} + +void unset_env(const std::string& name) { +#ifdef _WIN32 + _putenv_s(name.c_str(), env_value.c_str()); +#else + ::unsetenv(name.c_str()); +#endif +} + } // namespace struct EmptyTestConfig : public ov::PluginConfig { @@ -278,13 +294,11 @@ TEST(plugin_config, can_read_from_env_with_debug_caps) { try { NotEmptyTestConfig cfg; ASSERT_EQ(cfg.get_int_property(), -1); - std::string env_var1 = "OV_INT_PROPERTY=10"; - ::putenv(env_var1.data()); + set_env("OV_INT_PROPERTY", "10"); ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only for build with debug caps #ifdef ENABLE_DEBUG_CAPS - std::string env_var2 = "OV_DEBUG_PROPERTY=20"; - ::putenv(env_var2.data()); + set_env("OV_DEBUG_PROPERTY", "20"); ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option #endif @@ -298,9 +312,9 @@ TEST(plugin_config, can_read_from_env_with_debug_caps) { #endif } catch (std::exception&) {} - ::unsetenv("OV_INT_PROPERTY"); + unset_env("OV_INT_PROPERTY"); #ifdef ENABLE_DEBUG_CAPS - ::unsetenv("OV_DEBUG_PROPERTY"); + unset_env("OV_DEBUG_PROPERTY"); #endif } @@ -338,15 +352,13 @@ TEST(plugin_config, can_get_global_property) { TEST(plugin_config, global_property_read_env_on_each_call) { try { ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 4); - std::string env_var1 = "OV_DEBUG_GLOBAL_PROPERTY=10"; - ::putenv(env_var1.data()); + set_env("OV_DEBUG_GLOBAL_PROPERTY", "10"); ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 10); - std::string env_var2 = "OV_DEBUG_GLOBAL_PROPERTY=20"; - ::putenv(env_var2.data()); + set_env("OV_DEBUG_GLOBAL_PROPERTY", "20"); ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 20); } catch (std::exception&) {} - ::unsetenv("OV_DEBUG_GLOBAL_PROPERTY"); + unset_env("OV_DEBUG_GLOBAL_PROPERTY"); } #endif From 0a113340bac8bee7a89482c9ee805f3612a37a0e Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 29 Jan 2025 14:36:08 +0400 Subject: [PATCH 43/44] fix env unset Signed-off-by: Vladimir Paramuzov --- src/inference/tests/unit/config_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp index cde86d96f6b1ee..9cfb9b98c34c3f 100644 --- a/src/inference/tests/unit/config_test.cpp +++ b/src/inference/tests/unit/config_test.cpp @@ -54,7 +54,7 @@ void set_env(const std::string& name, const std::string& value) { void unset_env(const std::string& name) { #ifdef _WIN32 - _putenv_s(name.c_str(), env_value.c_str()); + _putenv_s(name.c_str(), ""); #else ::unsetenv(name.c_str()); #endif From 6797fcfad66f74be79ecc67ab561ea576657edfe Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 31 Jan 2025 10:43:05 +0400 Subject: [PATCH 44/44] apply review comments Signed-off-by: Vladimir Paramuzov --- .../dev_api/openvino/runtime/plugin_config.hpp | 8 ++++---- src/inference/src/dev/plugin_config.cpp | 16 +++++++++------- .../intel_gpu/src/runtime/execution_config.cpp | 14 ++++++-------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp index eaa9a75ae52afe..0890bd5192f98d 100644 --- a/src/inference/dev_api/openvino/runtime/plugin_config.hpp +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -134,9 +134,9 @@ struct ConfigOptionBase { explicit ConfigOptionBase() {} virtual ~ConfigOptionBase() = default; - virtual void set_any(const ov::Any any) = 0; + virtual void set_any(const ov::Any& any) = 0; virtual ov::Any get_any() const = 0; - virtual bool is_valid_value(ov::Any val) = 0; + virtual bool is_valid_value(const ov::Any& val) const = 0; virtual OptionVisibility get_visibility() const = 0; }; @@ -147,7 +147,7 @@ struct ConfigOption : public ConfigOptionBase { T value; constexpr static const auto visibility = visibility_; - void set_any(const ov::Any any) override { + void set_any(const ov::Any& any) override { if (validator) OPENVINO_ASSERT(validator(any.as()), "Invalid value: ", any.as()); value = any.as(); @@ -157,7 +157,7 @@ struct ConfigOption : public ConfigOptionBase { return ov::Any(value); } - bool is_valid_value(ov::Any val) override { + bool is_valid_value(const ov::Any& val) const override { try { auto v = val.as(); return validator ? validator(v) : true; diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp index b608a2a3cfc969..eda2d85670d845 100644 --- a/src/inference/src/dev/plugin_config.cpp +++ b/src/inference/src/dev/plugin_config.cpp @@ -11,6 +11,8 @@ #include #include #include +#include +#include #ifdef JSON_HEADER # include @@ -190,16 +192,16 @@ ov::Any PluginConfig::read_env(const std::string& option_name, const std::string if (!val.empty()) { if (dynamic_cast*>(option) != nullptr) { - const std::set off = {"0", "false", "off", "no"}; - const std::set on = {"1", "true", "on", "yes"}; + constexpr std::array off = {"0", "false", "off", "no"}; + constexpr std::array on = {"1", "true", "on", "yes"}; + const auto& val_lower = util::to_lower(val); - const auto& val_lower = ov::util::to_lower(val); - if (off.count(val_lower)) { - return false; - } else if (on.count(val_lower)) { + if (std::find(on.begin(), on.end(), val_lower) != on.end()) { return true; + } else if (std::find(off.begin(), off.end(), val_lower) != off.end()) { + return false; } else { - OPENVINO_THROW("Unexpected value for boolean property: ", val); + OPENVINO_THROW("Unexpected value for boolean property: " + val); } } else { return val; diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 24a66c8f5ef214..5641aeb96abd84 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -164,7 +164,7 @@ void ExecutionConfig::apply_rt_info(const IRemoteContext* context, const ov::RTM if (!info.supports_immad) { apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); } - if (!info.supports_immad || !is_llm) + if (!is_llm) apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); @@ -182,7 +182,7 @@ void ExecutionConfig::apply_model_specific_options(const IRemoteContext* context const auto& ops = model.get_ops(); - auto process_op = [this](std::shared_ptr op) { + std::function)> process_op = [&, this](std::shared_ptr op) { if (requires_new_shape_infer(op)) { m_allow_new_shape_infer = true; } @@ -197,20 +197,18 @@ void ExecutionConfig::apply_model_specific_options(const IRemoteContext* context if (ov::is_type(op)) { m_use_onednn = true; } - }; - // In the case of inner program, allow_new_shape_infer flag is setted by outside of program. - // So, do not check allow_new_shape_infer for inner program build - for (const auto& op : ops) { if (auto multi_subgraph_op = ov::as_type_ptr(op)) { for (const auto& sub_graph : multi_subgraph_op->get_functions()) { for (auto& sub_op : sub_graph->get_ops()) { process_op(sub_op); } } - } else { - process_op(op); } + }; + + for (const auto& op : ops) { + process_op(op); } m_optimize_data = true;