diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp index 54c6b4f9d868f5..0ed7d3e213aa21 100644 --- a/src/core/src/any.cpp +++ b/src/core/src/any.cpp @@ -6,6 +6,9 @@ #include #include +#include +#include +#include "openvino/util/common_util.hpp" namespace { template bool contains_type_index(Container&& types, const std::type_info& user_type) { @@ -202,9 +205,14 @@ namespace util { void Read::operator()(std::istream& is, bool& value) const { std::string str; is >> str; - if (str == "YES") { + + constexpr std::array off = {"0", "false", "off", "no"}; + constexpr std::array on = {"1", "true", "on", "yes"}; + str = util::to_lower(str); + + if (std::find(on.begin(), on.end(), str) != on.end()) { value = true; - } else if (str == "NO") { + } else if (std::find(off.begin(), off.end(), str) != off.end()) { value = false; } else { OPENVINO_THROW("Could not convert to bool from string " + str); diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt index 22c28c2acde6e6..40528735f80071 100644 --- a/src/inference/CMakeLists.txt +++ b/src/inference/CMakeLists.txt @@ -76,6 +76,10 @@ target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_OPENVINO_RUNTIME_API $<$:PROXY_PLUGIN_ENABLED>) +if(ENABLE_DEBUG_CAPS) + target_compile_definitions(${TARGET_NAME}_obj PUBLIC ENABLE_DEBUG_CAPS) +endif() + target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE $ $<$:$>) @@ -87,7 +91,7 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE # for ov_plugins.hpp $,$>,${CMAKE_CURRENT_BINARY_DIR}/$,${CMAKE_CURRENT_BINARY_DIR}>) -target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt openvino::util openvino::core::dev) +target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt openvino::util openvino::core::dev nlohmann_json::nlohmann_json) ov_mark_target_as_cc(${TARGET_NAME}_obj) # OpenVINO Runtime is public API => need to mark this library as important for ABI free @@ -133,6 +137,10 @@ target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE) +if(ENABLE_DEBUG_CAPS) + target_compile_definitions(${TARGET_NAME}_s PUBLIC ENABLE_DEBUG_CAPS) +endif() + set_target_properties(${TARGET_NAME}_s PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) # LTO diff --git a/src/inference/dev_api/openvino/runtime/internal_properties.hpp b/src/inference/dev_api/openvino/runtime/internal_properties.hpp index 9b2f08c17a7fe0..e26024622580e7 100644 --- a/src/inference/dev_api/openvino/runtime/internal_properties.hpp +++ b/src/inference/dev_api/openvino/runtime/internal_properties.hpp @@ -90,5 +90,11 @@ static constexpr Property compiled_model_runtime_p */ static constexpr Property query_model_ratio{"QUERY_MODEL_RATIO"}; +/** + * @brief Allow execution of low precision transformations in plugin's pipelines + * @ingroup ov_dev_api_plugin_api + */ +static constexpr Property enable_lp_transformations{"LP_TRANSFORMS_MODE"}; + } // namespace internal } // namespace ov diff --git a/src/inference/dev_api/openvino/runtime/plugin_config.hpp b/src/inference/dev_api/openvino/runtime/plugin_config.hpp new file mode 100644 index 00000000000000..0890bd5192f98d --- /dev/null +++ b/src/inference/dev_api/openvino/runtime/plugin_config.hpp @@ -0,0 +1,387 @@ +// Copyright (C) 2024-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include "openvino/core/attribute_visitor.hpp" +#include "openvino/runtime/iremote_context.hpp" +#include "openvino/runtime/properties.hpp" +#include "openvino/core/except.hpp" + +#ifndef EXPAND + #define EXPAND(N) N +#endif + +#ifndef COUNT_N + #define COUNT_N(_1, _2, _3, _4, _5, N, ...) N +#endif + +#ifndef COUNT + #define COUNT(...) EXPAND(COUNT_N(__VA_ARGS__, 5, 4, 3, 2, 1)) +#endif + +#ifndef CAT + #define CAT(a, b) a ## b +#endif + +#define GET_EXCEPT_LAST_IMPL(N, ...) EXPAND(CAT(GET_EXCEPT_LAST_IMPL_, N)(__VA_ARGS__)) +#define GET_EXCEPT_LAST_IMPL_2(_0, _1) _0 +#define GET_EXCEPT_LAST_IMPL_3(_0, _1, _2) _0, _1 +#define GET_EXCEPT_LAST_IMPL_4(_0, _1, _2, _3) _0, _1, _2 +#define GET_EXCEPT_LAST_IMPL_5(_0, _1, _2, _3, _4) _0, _1, _2, _3 + +#define GET_EXCEPT_LAST(...) EXPAND(GET_EXCEPT_LAST_IMPL(COUNT(__VA_ARGS__), __VA_ARGS__)) + +#define GET_LAST_IMPL(N, ...) EXPAND(CAT(GET_LAST_IMPL_, N)(__VA_ARGS__)) +#define GET_LAST_IMPL_0(_0, ...) _0 +#define GET_LAST_IMPL_1(_0, _1, ...) _1 +#define GET_LAST_IMPL_2(_0, _1, _2, ...) _2 +#define GET_LAST_IMPL_3(_0, _1, _2, _3, ...) _3 +#define GET_LAST_IMPL_4(_0, _1, _2, _3, _4, ...) _4 +#define GET_LAST_IMPL_5(_0, _1, _2, _3, _4, _5, ...) _5 +#define GET_LAST_IMPL_6(_0, _1, _2, _3, _4, _5, _6, ...) _6 + +#define GET_LAST(...) GET_LAST_IMPL(COUNT(__VA_ARGS__), _, __VA_ARGS__ ,,,,,,,,,,,) + +#define OV_CONFIG_DECLARE_LOCAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ + ConfigOption m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; +#define OV_CONFIG_DECLARE_GLOBAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ + static ConfigOption m_ ## PropertyVar; + +#define OV_CONFIG_DECLARE_LOCAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \ + const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() const { \ + if (m_is_finalized) { \ + return m_ ## PropertyVar.value; \ + } else { \ + if (m_user_properties.find(PropertyNamespace::PropertyVar.name()) != m_user_properties.end()) { \ + return m_user_properties.at(PropertyNamespace::PropertyVar.name()).as(); \ + } else { \ + return m_ ## PropertyVar.value; \ + } \ + } \ + } + +#define OV_CONFIG_OPTION_MAPPING(PropertyNamespace, PropertyVar, ...) \ + m_options_map[PropertyNamespace::PropertyVar.name()] = & m_ ## PropertyVar; + +#define OV_CONFIG_OPTION_HELP(PropertyNamespace, PropertyVar, Visibility, DefaultValue, ...) \ + { #PropertyNamespace "::" #PropertyVar, PropertyNamespace::PropertyVar.name(), GET_LAST(__VA_ARGS__)}, + +#define OV_CONFIG_RELEASE_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_LOCAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE, __VA_ARGS__) + +#define OV_CONFIG_RELEASE_INTERNAL_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_LOCAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::RELEASE_INTERNAL, __VA_ARGS__) + +#ifdef ENABLE_DEBUG_CAPS +#define OV_CONFIG_DECLARE_GLOBAL_GETTER(PropertyNamespace, PropertyVar, Visibility, ...) \ + static const decltype(PropertyNamespace::PropertyVar)::value_type& get_##PropertyVar() { \ + auto v = read_env(PropertyNamespace::PropertyVar.name(), m_allowed_env_prefix, &m_ ## PropertyVar); \ + if (v.empty()) \ + return m_ ## PropertyVar.value; \ + return v.as(); \ + } +#define OV_CONFIG_DEBUG_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_LOCAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG, __VA_ARGS__) + +#define OV_CONFIG_DEBUG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, ...) \ + OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, OptionVisibility::DEBUG_GLOBAL, __VA_ARGS__) +#else +#define OV_CONFIG_DEBUG_OPTION(...) +#define OV_CONFIG_DEBUG_GLOBAL_OPTION(...) +#define OV_CONFIG_DECLARE_GLOBAL_GETTER(...) +#endif +namespace ov { +enum class OptionVisibility : uint8_t { + RELEASE = 1 << 0, // Option can be set for any build type via public interface, environment and config file + RELEASE_INTERNAL = 1 << 1, // Option can be set for any build type via environment and config file only + DEBUG = 1 << 2, // Option can be set for debug builds only via environment and config file + DEBUG_GLOBAL = 1 << 3, // Global option can be set for debug builds only via environment and config file + ANY = 0xFF, // Any visibility is valid +}; + +inline OptionVisibility operator&(OptionVisibility a, OptionVisibility b) { + typedef std::underlying_type::type underlying_type; + return static_cast(static_cast(a) & static_cast(b)); +} + +inline OptionVisibility operator|(OptionVisibility a, OptionVisibility b) { + typedef std::underlying_type::type underlying_type; + return static_cast(static_cast(a) | static_cast(b)); +} + +inline OptionVisibility operator~(OptionVisibility a) { + typedef std::underlying_type::type underlying_type; + return static_cast(~static_cast(a)); +} + +inline std::ostream& operator<<(std::ostream& os, const OptionVisibility& visibility) { + switch (visibility) { + case OptionVisibility::RELEASE: os << "RELEASE"; break; + case OptionVisibility::RELEASE_INTERNAL: os << "RELEASE_INTERNAL"; break; + case OptionVisibility::DEBUG: os << "DEBUG"; break; + case OptionVisibility::DEBUG_GLOBAL: os << "DEBUG_GLOBAL"; break; + case OptionVisibility::ANY: os << "ANY"; break; + default: os << "UNKNOWN"; break; + } + + return os; +} + +struct ConfigOptionBase { + explicit ConfigOptionBase() {} + virtual ~ConfigOptionBase() = default; + + virtual void set_any(const ov::Any& any) = 0; + virtual ov::Any get_any() const = 0; + virtual bool is_valid_value(const ov::Any& val) const = 0; + virtual OptionVisibility get_visibility() const = 0; +}; + +template +struct ConfigOption : public ConfigOptionBase { + ConfigOption(const T& default_val, std::function validator = nullptr) + : ConfigOptionBase(), value(default_val), validator(validator) {} + T value; + constexpr static const auto visibility = visibility_; + + void set_any(const ov::Any& any) override { + if (validator) + OPENVINO_ASSERT(validator(any.as()), "Invalid value: ", any.as()); + value = any.as(); + } + + ov::Any get_any() const override { + return ov::Any(value); + } + + bool is_valid_value(const ov::Any& val) const override { + try { + auto v = val.as(); + return validator ? validator(v) : true; + } catch (std::exception&) { + return false; + } + } + + OptionVisibility get_visibility() const override { + return visibility; + } + + operator T() const { + return value; + } + + ConfigOption& operator=(const T& val) { + value = val; + return *this; + } + + template>> + bool operator==(const U& val) const { + return value == static_cast(val); + } + + template>> + bool operator!=(const U& val) const { + return !(*this == val); + } + +private: + std::function validator; +}; + +// Base class for configuration of plugins +// Implementation should provide a list of properties with default values and validators (optional) +// and prepare a map string property name -> ConfigOptionBase pointer +// For the sake of efficiency, we expect that plugin properties are defined as class members of the derived class +// and accessed directly in the plugin's code (i.e. w/o get_property()/set_property() calls) +// get/set property members are provided to handle external property access +// The class provides a helpers to read the properties from configuration file and from environment variables +// +// Expected order of properties resolution: +// 1. Assign default value for each property per device +// 2. Save user properties passed via Core::set_property() call to user_properties +// 3. Save user properties passed via Core::compile_model() call to user_properties +// 4. Apply RT info properties to user_properties if they were not set by user +// 5. Read and apply properties from the config file as user_properties +// 6. Read and apply properties from the the environment variables as user_properties +// 7. Apply user_properties to actual plugin properties +// 8. Update dependant properties if they were not set by user either way +class OPENVINO_RUNTIME_API PluginConfig { +public: + PluginConfig() {} + virtual ~PluginConfig() = default; + + // Disable copy and move as we need to setup m_options_map properly and ensure that + // values are a part of current config object + PluginConfig(const PluginConfig& other) = delete; + PluginConfig& operator=(const PluginConfig& other) = delete; + PluginConfig(PluginConfig&& other) = delete; + PluginConfig& operator=(PluginConfig&& other) = delete; + + void set_property(const ov::AnyMap& properties); + void set_user_property(const ov::AnyMap& properties, OptionVisibility allowed_visibility = OptionVisibility::ANY, bool throw_on_error = true); + Any get_property(const std::string& name, OptionVisibility allowed_visibility = OptionVisibility::ANY) const; + + template + util::EnableIfAllStringAny set_property(Properties&&... properties) { + set_property(ov::AnyMap{std::forward(properties)...}); + } + template + util::EnableIfAllStringAny set_user_property(Properties&&... properties) { + set_user_property(ov::AnyMap{std::forward(properties)...}); + } + + std::string to_string() const; + + void finalize(const IRemoteContext* context, const ov::Model* model); + + bool visit_attributes(ov::AttributeVisitor& visitor); + +protected: + virtual void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) {} + void apply_env_options(); + void apply_config_options(std::string_view device_name, std::string_view config_path = ""); + virtual void finalize_impl(const IRemoteContext* context) {} + + template + bool is_set_by_user(const ov::Property& property) const { + return m_user_properties.find(property.name()) != m_user_properties.end(); + } + + ConfigOptionBase* get_option_ptr(const std::string& name) const { + auto it = m_options_map.find(name); + OPENVINO_ASSERT(it != m_options_map.end(), "Option not found: ", name); + OPENVINO_ASSERT(it->second != nullptr, "Option is invalid: ", name); + + return it->second; + } + + template + void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { + if (!is_set_by_user(property)) { + auto rt_info_val = rt_info.find(property.name()); + if (rt_info_val != rt_info.end()) { + set_user_property({property(rt_info_val->second.template as())}, OptionVisibility::RELEASE | OptionVisibility::RELEASE_INTERNAL); + } + } + } + + ov::AnyMap read_config_file(std::string_view filename, std::string_view target_device_name) const; + ov::AnyMap read_env() const; + static ov::Any read_env(const std::string& option_name, const std::string& prefix, const ConfigOptionBase* option); + void cleanup_unsupported(ov::AnyMap& config) const; + + std::map m_options_map; + + // List of properties explicitly set by user via Core::set_property() or Core::compile_model() or ov::Model's runtime info + ov::AnyMap m_user_properties; + using OptionMapEntry = decltype(m_options_map)::value_type; + + // property variable name, string name, default value, description + using OptionsDesc = std::vector>; + virtual const OptionsDesc& get_options_desc() const { static OptionsDesc empty; return empty; } + const std::string get_help_message(const std::string& name = "") const; + void print_help() const; + + bool m_is_finalized = false; + + inline static const std::string m_allowed_env_prefix = "OV_"; +}; + +template <> +class OPENVINO_RUNTIME_API AttributeAdapter + : public DirectValueAccessor { +public: + AttributeAdapter(ConfigOptionBase*& value) : DirectValueAccessor(value) {} + + OPENVINO_RTTI("AttributeAdapter"); +}; + +template <> +class OPENVINO_RUNTIME_API AttributeAdapter + : public DirectValueAccessor { +public: + AttributeAdapter(ov::AnyMap& value) : DirectValueAccessor(value) {} + + OPENVINO_RTTI("AttributeAdapter"); +}; + +template +class OstreamAttributeVisitor : public ov::AttributeVisitor { + OStreamType& os; + +public: + OstreamAttributeVisitor(OStreamType& os) : os(os) {} + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + os << adapter.get(); + } + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + if (auto a = ov::as_type>(&adapter)) { + return handle_option(a->get()); + } else if (auto a = ov::as_type>(&adapter)) { + const auto& props = a->get(); + os << props.size(); + for (auto& kv : props) { + os << kv.first << kv.second.as(); + } + } else { + OPENVINO_THROW("Attribute ", name, " can't be processed\n"); + } + } + + void handle_option(ConfigOptionBase* option) { + if (option->get_visibility() == OptionVisibility::RELEASE || option->get_visibility() == OptionVisibility::RELEASE_INTERNAL) + os << option->get_any().as(); + } +}; + +template +class IstreamAttributeVisitor : public ov::AttributeVisitor { + IStreamType& is; + +public: + IstreamAttributeVisitor(IStreamType& is) : is(is) {} + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + if (auto a = ov::as_type>(&adapter)) { + return handle_option(a->get()); + } else if (auto a = ov::as_type>(&adapter)) { + size_t size; + is >> size; + ov::AnyMap props; + for (size_t i = 0; i < size; i++) { + std::string name, val; + is >> name; + is >> val; + props[name] = val; + + } + a->set(props); + } else { + OPENVINO_THROW("Attribute ", name, " can't be processed\n"); + } + } + + void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { + bool val; + is >> val; + adapter.set(val); + } + + void handle_option(ConfigOptionBase* option) { + if (option->get_visibility() == OptionVisibility::RELEASE || option->get_visibility() == OptionVisibility::RELEASE_INTERNAL) { + std::string s; + is >> s; + if (option->is_valid_value(s)) + option->set_any(s); + } + } +}; + +} // namespace ov diff --git a/src/inference/src/dev/plugin_config.cpp b/src/inference/src/dev/plugin_config.cpp new file mode 100644 index 00000000000000..eda2d85670d845 --- /dev/null +++ b/src/inference/src/dev/plugin_config.cpp @@ -0,0 +1,323 @@ +// Copyright (C) 2024-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/runtime/plugin_config.hpp" +#include "openvino/core/any.hpp" +#include "openvino/core/except.hpp" +#include "openvino/runtime/device_id_parser.hpp" +#include "openvino/util/common_util.hpp" +#include "openvino/util/env_util.hpp" +#include +#include +#include +#include +#include + +#ifdef JSON_HEADER +# include +#else +# include +#endif + +#ifdef _WIN32 +#include +#else +#include +#include +#endif + +namespace { +size_t get_terminal_width() { + const size_t default_width = 120; +#ifdef _WIN32 + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) { + return csbi.srWindow.Right - csbi.srWindow.Left + 1; + } else { + return default_width; + } +#elif defined(__linux__) + struct winsize w; + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) { + return w.ws_col; + } else { + return default_width; + } +#else + return default_width; +#endif +} +} + +namespace ov { + +ov::Any PluginConfig::get_property(const std::string& name, OptionVisibility allowed_visibility) const { + if (m_user_properties.find(name) != m_user_properties.end()) { + return m_user_properties.at(name); + } + + auto option = get_option_ptr(name); + OPENVINO_ASSERT((allowed_visibility & option->get_visibility()) == option->get_visibility(), "Couldn't get unknown property: ", name); + + return option->get_any(); +} + +void PluginConfig::set_property(const ov::AnyMap& config) { + OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); + + for (auto& [name, val] : config) { + get_option_ptr(name)->set_any(val); + } +} + +void PluginConfig::set_user_property(const ov::AnyMap& config, OptionVisibility allowed_visibility, bool throw_on_error) { + OPENVINO_ASSERT(!m_is_finalized, "Setting property after config finalization is prohibited"); + + for (auto& [name, val] : config) { + auto option = get_option_ptr(name); + if ((allowed_visibility & option->get_visibility()) != option->get_visibility()) { + if (throw_on_error) + OPENVINO_THROW("Couldn't set unknown property: ", name); + else + continue; + } + if (!option->is_valid_value(val)) { + if (throw_on_error) + OPENVINO_THROW("Invalid value: ", val.as(), " for property: ", name, "\nProperty description: ", get_help_message(name)); + else + continue; + } + + m_user_properties[name] = val; + } +} + +void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* model) { + if (m_is_finalized) + return; + + if (model) + apply_model_specific_options(context, *model); + + // Copy internal properties before applying hints to ensure that + // a property set by hint won't be overriden by a value in user config. + // E.g num_streams=AUTO && hint=THROUGHPUT + // If we apply hints first and then copy all values from user config to internal one, + // then we'll get num_streams=AUTO in final config while some integer number is expected. + for (const auto& prop : m_user_properties) { + auto& option = m_options_map.at(prop.first); + option->set_any(prop.second); + } + + finalize_impl(context); + +#ifdef ENABLE_DEBUG_CAPS + apply_env_options(); +#endif + + // Clear properties after finalize_impl to be able to check if a property was set by user during plugin-side finalization + m_user_properties.clear(); + + m_is_finalized = true; +} + +bool PluginConfig::visit_attributes(ov::AttributeVisitor& visitor) { + visitor.on_attribute("m_user_properties", m_user_properties); + for (auto& prop : m_options_map) { + visitor.on_attribute(prop.first + "__internal", prop.second); + } + + return true; +} + +void PluginConfig::apply_env_options() { + ov::AnyMap env_properties = read_env(); + cleanup_unsupported(env_properties); + for (auto& [name, val] : env_properties) { + std::cout << "Non default env value for " << name << " = " << val.as() << std::endl; + } + set_property(env_properties); +} + +void PluginConfig::apply_config_options(std::string_view device_name, std::string_view config_path) { + if (!config_path.empty()) { + ov::AnyMap config_properties = read_config_file(std::string(config_path), device_name); + cleanup_unsupported(config_properties); +#ifdef ENABLE_DEBUG_CAPS + for (auto& [name, val] : config_properties) { + std::cout << "Non default config value for " << name << " = " << val.as() << std::endl; + } +#endif + set_property(config_properties); + } +} + +ov::AnyMap PluginConfig::read_config_file(std::string_view filename, std::string_view target_device_name) const { + if (filename.empty()) + return {}; + + ov::AnyMap config; + + std::ifstream ifs(std::string{filename}); + if (!ifs.is_open()) { + return config; + } + + nlohmann::json json_config; + try { + ifs >> json_config; + } catch (const std::exception&) { + return config; + } + + DeviceIDParser parser(std::string{target_device_name}); + for (auto item = json_config.cbegin(), end = json_config.cend(); item != end; ++item) { + const std::string& device_name = item.key(); + if (DeviceIDParser(device_name).get_device_name() != parser.get_device_name()) + continue; + + const auto& item_value = item.value(); + for (auto option = item_value.cbegin(), item_value_end = item_value.cend(); option != item_value_end; ++option) { + config[option.key()] = option.value().get(); + } + } + + return config; +} + +ov::Any PluginConfig::read_env(const std::string& option_name, const std::string& prefix, const ConfigOptionBase* option) { + auto var_name = prefix + option_name; + const auto& val = ov::util::getenv_string(var_name.c_str()); + + if (!val.empty()) { + if (dynamic_cast*>(option) != nullptr) { + constexpr std::array off = {"0", "false", "off", "no"}; + constexpr std::array on = {"1", "true", "on", "yes"}; + const auto& val_lower = util::to_lower(val); + + if (std::find(on.begin(), on.end(), val_lower) != on.end()) { + return true; + } else if (std::find(off.begin(), off.end(), val_lower) != off.end()) { + return false; + } else { + OPENVINO_THROW("Unexpected value for boolean property: " + val); + } + } else { + return val; + } + } else { + return ov::Any(); + } +} + +ov::AnyMap PluginConfig::read_env() const { + ov::AnyMap config; + + for (auto& [name, option] : m_options_map) { + auto val = read_env(name, m_allowed_env_prefix, option); + if (!val.empty()) { + config[name] = val; + } + } + + return config; +} + +void PluginConfig::cleanup_unsupported(ov::AnyMap& config) const { + for (auto it = config.begin(); it != config.end();) { + auto& name = it->first; + auto opt_it = std::find_if(m_options_map.begin(), m_options_map.end(), [&](const OptionMapEntry& o) { return o.first == name; }); + if (opt_it == m_options_map.end()) { + it = config.erase(it); + } else { + ++it; + } + } +} + +std::string PluginConfig::to_string() const { + std::stringstream ss; + + ss << "-----------------------------------------\n"; + ss << "PROPERTIES:\n"; + + for (const auto& [name, option] : m_options_map) { + ss << "\t" << name << ": " << option->get_any().as() << std::endl; + } + ss << "USER PROPERTIES:\n"; + for (const auto& [name, val] : m_user_properties) { + ss << "\t" << name << ": " << val.as() << std::endl; + } + + return ss.str(); +} + +void PluginConfig::print_help() const { + auto format_text = [](const std::string& cpp_name, const std::string& str_name, const std::string& desc, size_t max_name_width, size_t max_width) { + std::istringstream words(desc); + std::ostringstream formatted_text; + std::string word; + std::vector words_vec; + + while (words >> word) { + words_vec.push_back(word); + } + + size_t j = 0; + size_t count_of_desc_lines = (desc.length() + max_width - 1) / max_width; + for (size_t i = 0 ; i < std::max(2, count_of_desc_lines); i++) { + if (i == 0) { + formatted_text << std::left << std::setw(max_name_width) << cpp_name; + } else if (i == 1) { + formatted_text << std::left << std::setw(max_name_width) << str_name; + } else { + formatted_text << std::left << std::setw(max_name_width) << ""; + } + + formatted_text << " | "; + + size_t line_length = max_name_width + 3; + for (; j < words_vec.size();) { + line_length += words_vec[j].size() + 1; + if (line_length > max_width) { + break; + } else { + formatted_text << words_vec[j] << " "; + } + j++; + } + formatted_text << "\n"; + } + return formatted_text.str(); + }; + + const auto& options_desc = get_options_desc(); + std::stringstream ss; + auto max_name_length_item = std::max_element(options_desc.begin(), options_desc.end(), + [](const OptionsDesc::value_type& a, const OptionsDesc::value_type& b){ + return std::get<0>(a).size() < std::get<0>(b).size(); + }); + + const size_t max_name_width = static_cast(std::get<0>(*max_name_length_item).size() + std::get<1>(*max_name_length_item).size()); + const size_t terminal_width = get_terminal_width(); + ss << std::left << std::setw(max_name_width) << "Option name" << " | " << " Description " << "\n"; + ss << std::left << std::setw(terminal_width) << std::setfill('-') << "" << "\n"; + for (auto& kv : options_desc) { + ss << format_text(std::get<0>(kv), std::get<1>(kv), std::get<2>(kv), max_name_width, terminal_width) << "\n"; + } + + std::cout << ss.str(); +} + +const std::string PluginConfig::get_help_message(const std::string& name) const { + const auto& options_desc = get_options_desc(); + auto it = std::find_if(options_desc.begin(), options_desc.end(), [&](const OptionsDesc::value_type& v) { return std::get<1>(v) == name; }); + if (it != options_desc.end()) { + return std::get<2>(*it); + } + + return ""; +} + +} // namespace ov diff --git a/src/inference/tests/unit/config_test.cpp b/src/inference/tests/unit/config_test.cpp new file mode 100644 index 00000000000000..9cfb9b98c34c3f --- /dev/null +++ b/src/inference/tests/unit/config_test.cpp @@ -0,0 +1,364 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/core/any.hpp" +#include "openvino/core/except.hpp" +#include "openvino/core/model.hpp" +#include "openvino/core/node_vector.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/runtime/iremote_context.hpp" +#include "openvino/runtime/plugin_config.hpp" + +#include +#include +#include +#include +#include + +using namespace ::testing; +using namespace ov; + +static constexpr Property unsupported_property{"UNSUPPORTED_PROPERTY"}; +static constexpr Property bool_property{"BOOL_PROPERTY"}; +static constexpr Property int_property{"INT_PROPERTY"}; +static constexpr Property high_level_property{"HIGH_LEVEL_PROPERTY"}; +static constexpr Property low_level_property{"LOW_LEVEL_PROPERTY"}; +static constexpr Property release_internal_property{"RELEASE_INTERNAL_PROPERTY"}; + +#ifdef ENABLE_DEBUG_CAPS +static constexpr Property debug_property{"DEBUG_PROPERTY"}; +static constexpr Property debug_global_property{"DEBUG_GLOBAL_PROPERTY"}; +#endif + +namespace { +const std::string test_config_path = "test_debug_config_path.json"; +const std::string device_name = "SOME_DEVICE"; + +void dump_config(const std::string& filename, const std::string& config_content) { + std::ofstream ofs(filename); + if (!ofs.is_open()) { + throw std::runtime_error("Can't save config file \"" + filename + "\"."); + } + + ofs << config_content; +} + +void set_env(const std::string& name, const std::string& value) { +#ifdef _WIN32 + _putenv_s(name.c_str(), value.c_str()); +#else + ::setenv(name.c_str(), value.c_str(), 1); +#endif +} + +void unset_env(const std::string& name) { +#ifdef _WIN32 + _putenv_s(name.c_str(), ""); +#else + ::unsetenv(name.c_str()); +#endif +} + +} // namespace + +struct EmptyTestConfig : public ov::PluginConfig { + std::vector get_supported_properties() const { + std::vector supported_properties; + for (const auto& kv : m_options_map) { + supported_properties.push_back(kv.first); + } + return supported_properties; + } +}; + +struct NotEmptyTestConfig : public ov::PluginConfig { + NotEmptyTestConfig() { + #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)) + #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)) + OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") + OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") + OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") + OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "") + OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "") + OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "") + OV_CONFIG_DEBUG_GLOBAL_OPTION(, debug_global_property, 4, "") + #undef OV_CONFIG_LOCAL_OPTION + #undef OV_CONFIG_GLOBAL_OPTION + } + + NotEmptyTestConfig(const NotEmptyTestConfig& other) : NotEmptyTestConfig() { + m_user_properties = other.m_user_properties; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); + } + } + + #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__)) EXPAND(OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__)) + #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__)) EXPAND(OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__)) + OV_CONFIG_RELEASE_OPTION(, bool_property, true, "") + OV_CONFIG_RELEASE_OPTION(, int_property, -1, "") + OV_CONFIG_RELEASE_OPTION(, high_level_property, "", "") + OV_CONFIG_RELEASE_OPTION(, low_level_property, "", "") + OV_CONFIG_RELEASE_INTERNAL_OPTION(, release_internal_property, 1, "") + OV_CONFIG_DEBUG_OPTION(, debug_property, 2, "") + OV_CONFIG_DEBUG_GLOBAL_OPTION(, debug_global_property, 4, "") + #undef OV_CONFIG_LOCAL_OPTION + #undef OV_CONFIG_GLOBAL_OPTION + + std::vector get_supported_properties() const { + std::vector supported_properties; + for (const auto& kv : m_options_map) { + supported_properties.push_back(kv.first); + } + return supported_properties; + } + + void finalize_impl(const IRemoteContext* context) override { + if (!is_set_by_user(low_level_property)) { + m_low_level_property.value = m_high_level_property.value; + } +#ifdef ENABLE_DEBUG_CAPS + apply_config_options(device_name, test_config_path); +#endif + } + + void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) override { + apply_rt_info_property(high_level_property, model.get_rt_info("runtime_options")); + } + + using ov::PluginConfig::get_option_ptr; + using ov::PluginConfig::is_set_by_user; +}; + +#define OV_CONFIG_LOCAL_OPTION(...) +#define OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ + ConfigOption NotEmptyTestConfig::m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; + + OV_CONFIG_DEBUG_GLOBAL_OPTION(, debug_global_property, 4, "") + +#undef OV_CONFIG_LOCAL_OPTION +#undef OV_CONFIG_GLOBAL_OPTION + +TEST(plugin_config, can_create_empty_config) { + ASSERT_NO_THROW( + EmptyTestConfig cfg; + ASSERT_EQ(cfg.get_supported_properties().size(), 0); + ); +} + +TEST(plugin_config, can_create_not_empty_config) { +#ifdef ENABLE_DEBUG_CAPS + size_t expected_options_num = 7; +#else + size_t expected_options_num = 5; +#endif + ASSERT_NO_THROW( + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_supported_properties().size(), expected_options_num); + ); +} + +TEST(plugin_config, can_set_get_property) { + NotEmptyTestConfig cfg; + ASSERT_NO_THROW(cfg.get_bool_property()); + ASSERT_EQ(cfg.get_bool_property(), true); + ASSERT_NO_THROW(cfg.set_property(bool_property(false))); + ASSERT_EQ(cfg.get_bool_property(), false); +} + +TEST(plugin_config, throw_for_unsupported_property) { + NotEmptyTestConfig cfg; + ASSERT_ANY_THROW(cfg.get_property(unsupported_property.name())); + ASSERT_ANY_THROW(cfg.set_property(unsupported_property(10.0f))); +} + +TEST(plugin_config, can_direct_access_to_properties) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.m_int_property.value, cfg.get_int_property()); + ASSERT_NO_THROW(cfg.set_user_property(int_property(1))); + ASSERT_EQ(cfg.m_int_property.value, -1); // user property doesn't impact member value until finalize() is called + + cfg.m_int_property.value = 2; + ASSERT_EQ(cfg.get_int_property(), 1); // stil 1 as user property was set previously +} + +TEST(plugin_config, finalization_updates_member) { + NotEmptyTestConfig cfg; + ASSERT_NO_THROW(cfg.set_user_property(bool_property(false))); + ASSERT_EQ(cfg.m_bool_property.value, true); // user property doesn't impact member value until finalize() is called + + cfg.finalize(nullptr, {}); + + ASSERT_EQ(cfg.m_bool_property.value, false); // now the value has changed +} + +TEST(plugin_config, get_property_before_finalization_returns_user_property_if_set) { + NotEmptyTestConfig cfg; + + ASSERT_EQ(cfg.get_bool_property(), true); // default value + ASSERT_EQ(cfg.m_bool_property.value, true); // default value + + cfg.m_bool_property.value = false; // update member directly + ASSERT_EQ(cfg.get_bool_property(), false); // OK, return the class member value as no user property was set + + ASSERT_NO_THROW(cfg.set_user_property(bool_property(true))); + ASSERT_TRUE(cfg.is_set_by_user(bool_property)); + ASSERT_EQ(cfg.get_bool_property(), true); // now user property value is returned + ASSERT_EQ(cfg.m_bool_property.value, false); // but class member is not updated + + cfg.finalize(nullptr, {}); + ASSERT_EQ(cfg.get_bool_property(), cfg.m_bool_property.value); // equal after finalization + ASSERT_FALSE(cfg.is_set_by_user(bool_property)); // and user property is cleared +} + +TEST(plugin_config, finalization_updates_dependant_properties) { + NotEmptyTestConfig cfg; + + cfg.set_user_property(high_level_property("value1")); + ASSERT_TRUE(cfg.is_set_by_user(high_level_property)); + ASSERT_FALSE(cfg.is_set_by_user(low_level_property)); + + cfg.finalize(nullptr, {}); + ASSERT_EQ(cfg.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg.m_low_level_property.value, "value1"); + ASSERT_FALSE(cfg.is_set_by_user(high_level_property)); + ASSERT_FALSE(cfg.is_set_by_user(low_level_property)); +} + +TEST(plugin_config, can_set_property_from_rt_info) { + NotEmptyTestConfig cfg; + + RTMap rt_info = { + {high_level_property.name(), "value1"}, + {int_property.name(), 10} // int_property is not applied from rt info + }; + + auto p1 = std::make_shared(); + auto r1 = std::make_shared(p1); + ov::Model m(ov::OutputVector{r1}, ov::ParameterVector{p1}); + m.set_rt_info(rt_info, {"runtime_options"}); + + // default values + ASSERT_EQ(cfg.m_high_level_property.value, ""); + ASSERT_EQ(cfg.m_low_level_property.value, ""); + ASSERT_EQ(cfg.m_int_property.value, -1); + + cfg.finalize(nullptr, &m); + + ASSERT_EQ(cfg.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg.m_low_level_property.value, "value1"); // dependant is updated too + ASSERT_EQ(cfg.m_int_property.value, -1); // still default +} + +TEST(plugin_config, can_copy_config) { + NotEmptyTestConfig cfg1; + + cfg1.m_high_level_property.value = "value1"; + cfg1.m_low_level_property.value = "value2"; + cfg1.m_int_property.value = 1; + cfg1.set_property(bool_property(false)); + + NotEmptyTestConfig cfg2 = cfg1; + ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg2.m_low_level_property.value, "value2"); + ASSERT_EQ(cfg2.m_int_property.value, 1); + ASSERT_EQ(cfg2.get_bool_property(), false); // ensure user properties are copied too + + // check that cfg1 modification doesn't impact a copy + cfg1.set_property(high_level_property("value3")); + cfg1.m_int_property.value = 3; + ASSERT_EQ(cfg2.m_high_level_property.value, "value1"); + ASSERT_EQ(cfg2.m_int_property.value, 1); +} + +TEST(plugin_config, set_property_throw_for_non_release_options) { + NotEmptyTestConfig cfg; + ASSERT_ANY_THROW(cfg.set_user_property({release_internal_property(10)}, OptionVisibility::RELEASE)); +#ifdef ENABLE_DEBUG_CAPS + ASSERT_ANY_THROW(cfg.set_user_property({debug_property(10)}, OptionVisibility::RELEASE)); +#endif +} + +TEST(plugin_config, visibility_is_correct) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_option_ptr(release_internal_property.name())->get_visibility(), OptionVisibility::RELEASE_INTERNAL); + ASSERT_EQ(cfg.get_option_ptr(int_property.name())->get_visibility(), OptionVisibility::RELEASE); + +#ifdef ENABLE_DEBUG_CAPS + ASSERT_EQ(cfg.get_option_ptr(debug_property.name())->get_visibility(), OptionVisibility::DEBUG); +#endif +} + +TEST(plugin_config, can_read_from_env_with_debug_caps) { + try { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_int_property(), -1); + set_env("OV_INT_PROPERTY", "10"); + ASSERT_EQ(cfg.get_int_property(), -1); // env is applied after finalization only for build with debug caps + +#ifdef ENABLE_DEBUG_CAPS + set_env("OV_DEBUG_PROPERTY", "20"); + ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option +#endif + + cfg.finalize(nullptr, nullptr); + +#ifdef ENABLE_DEBUG_CAPS + ASSERT_EQ(cfg.get_int_property(), 10); + ASSERT_EQ(cfg.get_debug_property(), 20); +#else + ASSERT_EQ(cfg.get_int_property(), -1); // no effect +#endif + } catch (std::exception&) {} + + unset_env("OV_INT_PROPERTY"); +#ifdef ENABLE_DEBUG_CAPS + unset_env("OV_DEBUG_PROPERTY"); +#endif +} + +TEST(plugin_config, can_read_from_config) { + const std::filesystem::path filepath = test_config_path; + try { + NotEmptyTestConfig cfg; + std::string config = "{\"SOME_DEVICE\":{\"DEBUG_PROPERTY\":\"20\",\"INT_PROPERTY\":\"10\"}}"; + + dump_config(filepath.generic_string(), config); + + ASSERT_EQ(cfg.get_int_property(), -1); // config is applied after finalization only for build with debug caps + #ifdef ENABLE_DEBUG_CAPS + ASSERT_EQ(cfg.get_debug_property(), 2); // same for debug option + #endif + + cfg.finalize(nullptr, nullptr); + #ifdef ENABLE_DEBUG_CAPS + ASSERT_EQ(cfg.get_int_property(), 10); + ASSERT_EQ(cfg.get_debug_property(), 20); + #else + ASSERT_EQ(cfg.get_int_property(), -1); // no effect + #endif + } catch (std::exception&) { } + + std::filesystem::remove(filepath); +} + +#ifdef ENABLE_DEBUG_CAPS +TEST(plugin_config, can_get_global_property) { + NotEmptyTestConfig cfg; + ASSERT_EQ(cfg.get_debug_global_property(), 4); +} + +TEST(plugin_config, global_property_read_env_on_each_call) { + try { + ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 4); + set_env("OV_DEBUG_GLOBAL_PROPERTY", "10"); + ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 10); + + set_env("OV_DEBUG_GLOBAL_PROPERTY", "20"); + ASSERT_EQ(NotEmptyTestConfig::get_debug_global_property(), 20); + } catch (std::exception&) {} + + unset_env("OV_DEBUG_GLOBAL_PROPERTY"); +} +#endif diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index db53bb0c531b1a..8627f82b28574d 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -210,7 +210,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { ov::internal::exclusive_async_requests.name(), ". Expected only true/false"); } - } else if (key == ov::intel_cpu::lp_transforms_mode.name()) { + } else if (key == ov::internal::enable_lp_transformations.name()) { try { lpTransformsMode = val.as() ? LPTransformsMode::On : LPTransformsMode::Off; } catch (ov::Exception&) { diff --git a/src/plugins/intel_cpu/src/internal_properties.hpp b/src/plugins/intel_cpu/src/internal_properties.hpp index 320539721ca09a..3a806e13a30ea1 100644 --- a/src/plugins/intel_cpu/src/internal_properties.hpp +++ b/src/plugins/intel_cpu/src/internal_properties.hpp @@ -16,11 +16,6 @@ namespace intel_cpu { */ static constexpr Property cpu_runtime_cache_capacity{"CPU_RUNTIME_CACHE_CAPACITY"}; -/** - * @brief Allow low precision transform. - */ -static constexpr Property lp_transforms_mode{"LP_TRANSFORMS_MODE"}; - /** * @brief Enum to define possible snippets mode hints. */ @@ -69,4 +64,4 @@ inline std::istream& operator>>(std::istream& is, SnippetsMode& mode) { static constexpr Property snippets_mode{"SNIPPETS_MODE"}; } // namespace intel_cpu -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp index 6d288d9f5ede8b..fc98a2659f9127 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp @@ -52,7 +52,7 @@ const std::vector cpu_inproperties = { {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT), {ov::hint::num_requests.name(), "should be int"}}, {{ov::num_streams.name(), "OFF"}}, - {{ov::hint::enable_cpu_pinning.name(), "OFF"}}, + {{ov::hint::enable_cpu_pinning.name(), "NOT_BOOL"}}, }; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, diff --git a/src/plugins/intel_gpu/CMakeLists.txt b/src/plugins/intel_gpu/CMakeLists.txt index 3a847c292c30ba..3322f2f3f1a875 100644 --- a/src/plugins/intel_gpu/CMakeLists.txt +++ b/src/plugins/intel_gpu/CMakeLists.txt @@ -51,6 +51,7 @@ endif() if(ENABLE_GPU_DEBUG_CAPS) add_definitions(-DGPU_DEBUG_CONFIG=1) + add_definitions(-DENABLE_DEBUG_CAPS=1) endif() set(INTEL_GPU_TARGET_OCL_VERSION "200" CACHE STRING "Target version of OpenCL which should be used by GPU plugin") diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index 2a99b9dfef4b76..dabfe8ce09acd6 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -197,7 +197,7 @@ struct network { void set_reuse_variable_mem(bool reuse = false); bool is_reuse_variable_mem() { return _reuse_variable_mem; } - const ExecutionConfig& get_config() const { return _config; } + const ExecutionConfig& get_config() const { return _program->get_config(); } std::shared_ptr get_shape_predictor() { return _shape_predictor; } void set_shape_predictor(std::shared_ptr shape_predictor) { _shape_predictor = shape_predictor; } @@ -210,7 +210,6 @@ struct network { using output_chains_map = std::map>; uint32_t net_id = 0; program::ptr _program; - ExecutionConfig _config; engine& _engine; stream::ptr _stream; std::unique_ptr _memory_pool; diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index c775537a514dde..96bfff48820aaa 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -309,12 +309,11 @@ struct program { std::vector allocating_order; std::unique_ptr pm; std::unique_ptr _layout_optimizer; - bool is_internal; - bool _is_body_program; + bool is_internal = false; + bool _is_body_program = false; // if subgraph can be optimized if it consists of only inputs and corresponding outputs bool _can_be_optimized; std::unique_ptr _impls_cache; - const size_t _impls_cache_capacity = 300; std::shared_ptr _compilation_context; bool _loaded_from_cache = false; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index 869d32825b8761..cec27660baa2b4 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -43,7 +43,6 @@ class Plugin : public ov::IPlugin { bool is_metric(const std::string& name) const; ov::Any get_metric(const std::string& name, const ov::AnyMap& arguments) const; - void set_cache_info(const std::shared_ptr& model, ExecutionConfig& properties) const; public: Plugin(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp index 51087405f09769..389c7f7e2b54a1 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp @@ -79,7 +79,7 @@ struct PerfCounter { class ProgramBuilder final { public: - ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, bool partialBuild = false, + ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, std::shared_ptr task_executor = nullptr, std::shared_ptr compilation_context = nullptr, bool innerProgram = false); @@ -137,8 +137,7 @@ class ProgramBuilder final { void add_primitive(const ov::Node& op, std::shared_ptr prim, std::vector aliases = {}); - bool use_new_shape_infer() const { return allow_new_shape_infer; } - bool requires_new_shape_infer(const std::shared_ptr& op) const; + bool use_new_shape_infer() const { return m_config.get_allow_new_shape_infer(); } bool is_inner_program() const { return m_is_inner_program; } bool is_query_mode() { return queryMode; } @@ -156,8 +155,6 @@ class ProgramBuilder final { std::shared_ptr m_topology; CustomLayerMap m_custom_layers; - bool allow_new_shape_infer = false; - bool queryMode; std::shared_ptr m_task_executor; @@ -171,8 +168,7 @@ class ProgramBuilder final { void prepare_build(); void cleanup_build(); - // TODO(eunsoo): remove createTopolpgyOnly argument and add another method to create topology from ngraph function - std::shared_ptr build(const std::vector>& ops, bool partialBuild = false, bool innerProgram = false); + std::shared_ptr build(const std::vector>& ops, bool innerProgram = false); void CreateSingleLayerPrimitive(const std::shared_ptr& op); }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp index 8440bd6824ef9e..8f7e7ac45bca2e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp @@ -19,7 +19,6 @@ #include #include #include -#include namespace ov::intel_gpu { @@ -37,6 +36,7 @@ class RemoteContextImpl : public ov::IRemoteContext { ov::SoPtr create_tensor(const ov::element::Type& type, const ov::Shape& shape, const ov::AnyMap& params) override; cldnn::engine& get_engine() { return *m_engine; } + const cldnn::engine& get_engine() const { return *m_engine; } ov::intel_gpu::gpu_handle_param get_external_queue() const { return m_external_queue; } cldnn::memory::ptr try_get_cached_memory(size_t hash); diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index 2ce1397c44bb68..4367d2062d7325 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -4,12 +4,10 @@ #pragma once #include -#include -#include -#include -#include #include +#include +#include "intel_gpu/runtime/execution_config.hpp" namespace ov::intel_gpu { // Verbose log levels: @@ -39,145 +37,65 @@ enum class LogLevel : int8_t { TRACE_DETAIL = 4 }; +std::ostream& get_verbose_stream(); } // namespace ov::intel_gpu #ifdef GPU_DEBUG_CONFIG -#if defined(_WIN32) -#define SEPARATE '\\' -#else -#define SEPARATE '/' -#endif -#define GPU_FILENAME (strrchr(__FILE__, SEPARATE) ? strrchr(__FILE__, SEPARATE) + 1 : __FILE__) + +namespace color { +static constexpr const char dark_gray[] = "\033[1;30m"; +static constexpr const char blue[] = "\033[1;34m"; +static constexpr const char purple[] = "\033[1;35m"; +static constexpr const char cyan[] = "\033[1;36m"; +static constexpr const char reset[] = "\033[0m"; +} // namespace color + +static constexpr const char prefix[] = "GPU_Debug: "; + #define GPU_DEBUG_IF(cond) if (cond) +#define GPU_DEBUG_VALUE_OR(debug_value, release_value) debug_value #define GPU_DEBUG_CODE(...) __VA_ARGS__ + #define GPU_DEBUG_DEFINE_MEM_LOGGER(stage) \ - cldnn::instrumentation::mem_usage_logger mem_logger{stage, cldnn::debug_configuration::get_instance()->verbose >= 2}; -#define GPU_DEBUG_PROFILED_STAGE(stage) \ - auto stage_prof = cldnn::instrumentation::profiled_stage(\ - !cldnn::debug_configuration::get_instance()->dump_profiling_data.empty(), *this, stage) + cldnn::instrumentation::mem_usage_logger mem_logger{stage, ov::intel_gpu::ExecutionConfig::get_verbose() >= 2}; + +#define GPU_DEBUG_PROFILED_STAGE(stage) \ + auto stage_prof = cldnn::instrumentation::profiled_stage( \ + !get_config().get_dump_profiling_data_path().empty(), *this, stage) + #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val) stage_prof.set_cache_hit(val) #define GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO(info) stage_prof.add_memalloc_info(info) -#define GPU_DEBUG_LOG_RAW_INT(min_verbose_level) if (cldnn::debug_configuration::get_instance()->verbose >= min_verbose_level) \ - ((cldnn::debug_configuration::get_instance()->verbose_color == 0) ? GPU_DEBUG_LOG_PREFIX : GPU_DEBUG_LOG_COLOR_PREFIX) -#define GPU_DEBUG_LOG_RAW(min_verbose_level) GPU_DEBUG_LOG_RAW_INT(static_cast::type>(min_verbose_level)) -#define GPU_DEBUG_LOG_PREFIX \ - *cldnn::debug_configuration::verbose_stream << cldnn::debug_configuration::prefix << GPU_FILENAME << ":" <<__LINE__ << ":" << __func__ << ": " -#define GPU_DEBUG_LOG_COLOR_PREFIX *cldnn::debug_configuration::verbose_stream << DARK_GRAY << cldnn::debug_configuration::prefix << \ - BLUE << GPU_FILENAME << ":" << PURPLE << __LINE__ << ":" << CYAN << __func__ << ": " << RESET -#define DARK_GRAY "\033[1;30m" -#define BLUE "\033[1;34m" -#define PURPLE "\033[1;35m" -#define CYAN "\033[1;36m" -#define RESET "\033[0m" +#define GPU_DEBUG_LOG_PREFIX ov::intel_gpu::get_verbose_stream() \ + << prefix \ + << std::filesystem::path(__FILE__).filename().generic_string() << ":" \ + << std::to_string(__LINE__) << ":" \ + << __func__ << ": " + +#define GPU_DEBUG_LOG_COLOR_PREFIX ov::intel_gpu::get_verbose_stream() \ + << color::dark_gray << std::string(prefix) \ + << color::blue << std::filesystem::path(__FILE__).filename().generic_string() << ":" \ + << color::purple << std::to_string(__LINE__) << ":" \ + << color::cyan << __func__ << ": " << color::reset + +#define GPU_DEBUG_LOG_RAW_INT(min_verbose_level) if (ov::intel_gpu::ExecutionConfig::get_verbose() >= min_verbose_level) \ + (ov::intel_gpu::ExecutionConfig::get_verbose_color() ? GPU_DEBUG_LOG_COLOR_PREFIX : GPU_DEBUG_LOG_PREFIX) + +#define GPU_DEBUG_LOG_RAW(min_verbose_level) \ + GPU_DEBUG_LOG_RAW_INT(static_cast::type>(min_verbose_level)) #else #define GPU_DEBUG_IF(cond) if (0) +#define GPU_DEBUG_VALUE_OR(debug_value, release_value) release_value #define GPU_DEBUG_CODE(...) #define GPU_DEBUG_DEFINE_MEM_LOGGER(stage) #define GPU_DEBUG_PROFILED_STAGE(stage) #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val) #define GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO(info) -#define GPU_DEBUG_LOG_RAW(min_verbose_level) if (0) *cldnn::debug_configuration::verbose_stream << cldnn::debug_configuration::prefix +#define GPU_DEBUG_LOG_RAW(min_verbose_level) if (0) ov::intel_gpu::get_verbose_stream() #endif -// Macro below is inserted to avoid unused variable warning when GPU_DEBUG_CONFIG is OFF -#define GPU_DEBUG_GET_INSTANCE(name) auto name = cldnn::debug_configuration::get_instance(); (void)(name); - #define GPU_DEBUG_COUT GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::DISABLED) #define GPU_DEBUG_INFO GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::INFO) #define GPU_DEBUG_LOG GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::LOG) #define GPU_DEBUG_TRACE GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::TRACE) #define GPU_DEBUG_TRACE_DETAIL GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::TRACE_DETAIL) - -namespace cldnn { - -class debug_configuration { -private: - debug_configuration(); - -public: - static const char *prefix; - int help; // Print help messages - int verbose; // Verbose execution - int verbose_color; // Print verbose color - std::string verbose_file; // Verbose log to file - int list_layers; // Print list layers - int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive - int print_input_data_shapes; // Print the input data_shape for benchmark_app. - int disable_usm; // Disable usm usage - int disable_onednn; // Disable onednn for discrete GPU (no effect for integrated GPU) - int disable_onednn_opt_post_ops; // Disable onednn optimize post operators - std::string dump_profiling_data; // Enables dump of extended performance profiling to specified dir - int dump_profiling_data_per_iter; // Enables dump of extended performance profiling to specified dir for each iteration - int host_time_profiling; // Enables measurement of scheduling time spend on the host - std::string dump_graphs; // Dump optimized graph - std::string dump_sources; // Dump opencl sources - std::string dump_layers_path; // Enable dumping intermediate buffers and set the dest path - std::vector dump_layers; // Dump intermediate buffers of specified layers only - std::string dry_run_path; // Dry run and serialize execution graph into the specified path - int dump_layers_dst_only; // Dump only output of layers - int dump_layers_result; // Dump result layers - int dump_layers_input; // Dump input layers - int dump_layers_limit_batch; // Limit the size of batch to dump - int dump_layers_raw; // Dump raw data. - int dump_layers_binary; // Dump binary data. - int dump_memory_pool; // Dump memory pool status at each iteration - std::set dump_memory_pool_iters; // List of iteration's memory pool status - std::string dump_memory_pool_path; // Enable dumping memory pool status to csv file and set the dest path - int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation - std::vector after_proc; // Start inference after the listed processes - int serialize_compile; // Serialize creating primitives and compiling kernels - std::vector forced_impl_types; // Force implementation type either ocl or onednn - int max_kernels_per_batch; // Maximum number of kernels in a batch during compiling kernels - int impls_cache_capacity; // The maximum number of entries in the kernel impl cache - int enable_sdpa; // Allows to control SDPA decomposition - int disable_async_compilation; // Disable async compilation - int disable_winograd_conv; // Disable Winograd conv - int disable_dynamic_impl; // Disable dynamic implementation - int disable_runtime_buffer_fusing; // Disable runtime buffer fusing - int disable_memory_reuse; // Disable memmory reuse among layers - int disable_build_time_weight_reorder_for_dynamic_nodes; // Disable build time weight reordering for dynamic nodes - int disable_runtime_skip_reorder; // Disable runtime skip reorder - int disable_primitive_fusing; // Disable primitive fusing - int disable_fake_alignment; // Disable fake alignment - int use_usm_host; // Set explicit usm_host usage for network input and output - std::vector dynamic_quantize_layers_without_onednn; // Specify Fully-connected layers which enable Dynamic quantization - int use_kv_cache_compression; // Enable KV-cache compression - int dynamic_quantize_group_size; // Enable Dynamic quantization for fully connected primitive by specified group size - int dynamic_quantize_asym; // Use asymmetric dynamic quantization - int disable_horizontal_fc_fusion; // Disable fc horizontal fusion - int disable_fc_swiglu_fusion; // Disable swiglu fusion to fc - std::set dump_iteration; // Dump n-th execution of network. - std::vector load_layers_raw_dump; // List of layers to load dumped raw binary and filenames - static const debug_configuration *get_instance(); - bool is_target_dump_prof_data_iteration(int64_t iteration) const; - std::vector get_filenames_for_matched_layer_loading_binaries(const std::string& id) const; - std::string get_name_for_dump(const std::string& file_name) const; - bool is_layer_for_dumping(const std::string& layerName, bool is_output = false, bool is_input = false) const; - bool is_target_iteration(int64_t iteration) const; - std::string get_matched_from_filelist(const std::vector& file_names, std::string pattern) const; - bool is_layer_name_matched(const std::string& layer_name, const std::string& pattern) const; - - struct memory_preallocation_params { - bool is_initialized = false; - - // Iterations mode preallocation - size_t next_iters_preallocation_count = 0; - size_t max_per_iter_size = 0; - size_t max_per_dim_diff = 0; - - // Percentage mode preallocation - float buffers_preallocation_ratio = 0.0f; - } mem_preallocation_params; - - struct dump_profiling_data_iter_params { - bool is_enabled = false; - int64_t start = 0; - int64_t end = 0; - } dump_prof_data_iter_params; - - static std::ostream* verbose_stream; - static const int DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET = -2; -}; - -} // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 96e09605eaa998..ef62419dffafdd 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -1,177 +1,66 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once +#include "openvino/core/any.hpp" +#include "openvino/runtime/plugin_config.hpp" +#include "intel_gpu/runtime/device_info.hpp" #include "intel_gpu/runtime/internal_properties.hpp" -#include "intel_gpu/runtime/device.hpp" +#include "openvino/runtime/internal_properties.hpp" +#include namespace ov::intel_gpu { -enum class PropertyVisibility { - INTERNAL = 0, - PUBLIC = 1 -}; - -inline std::ostream& operator<<(std::ostream& os, const PropertyVisibility& visibility) { - switch (visibility) { - case PropertyVisibility::PUBLIC: os << "PUBLIC"; break; - case PropertyVisibility::INTERNAL: os << "INTERNAL"; break; - default: os << "UNKNOWN"; break; - } - - return os; -} - -class BaseValidator { -public: - using Ptr = std::shared_ptr; - virtual ~BaseValidator() = default; - virtual bool is_valid(const ov::Any& v) const = 0; -}; - -class FuncValidator : public BaseValidator { -public: -explicit FuncValidator(std::function func) : m_func(func) { } - bool is_valid(const ov::Any& v) const override { - return m_func(v); - } - -private: - std::function m_func; -}; - -// PropertyTypeValidator ensures that value can be converted to given property type -template -class PropertyTypeValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - try { - v.as(); - return true; - } catch (ov::Exception&) { - return false; - } - } -}; - -class ExecutionConfig { -public: +struct ExecutionConfig : public ov::PluginConfig { ExecutionConfig(); ExecutionConfig(std::initializer_list values) : ExecutionConfig() { set_property(ov::AnyMap(values)); } explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); } explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); } - void set_default(); - void set_property(const ov::AnyMap& properties); - void set_user_property(const ov::AnyMap& properties); - Any get_property(const std::string& name) const; - bool is_set_by_user(const std::string& name) const; - bool is_supported(const std::string& name) const; - void register_property_impl(const std::pair& propertiy, PropertyVisibility visibility, BaseValidator::Ptr validator); - - template ::type = true> - void register_property_impl() { } + // Default operators copy config as is including finalized flag state + // In case if the config need updates after finalization clone() method shall be used as it resets finalized flag value. + // That's needed to avoid unexpected options update as we call finalization twice: in transformation pipeline + // and in cldnn::program c-tor (which is needed to handle unit tests mainly). So this second call may cause unwanted side effects + // if config is not marked as finalized, which could have easily happened if copy operator reset finalization flag + ExecutionConfig(const ExecutionConfig& other); + ExecutionConfig& operator=(const ExecutionConfig& other); + ExecutionConfig clone() const; - template - void register_property_impl(const std::tuple, ValueT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared>()); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } + void finalize(cldnn::engine& engine); + using ov::PluginConfig::finalize; - template - typename std::enable_if::value, void>::type - register_property_impl(const std::tuple, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared(std::get<2>(property))); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } + #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_LOCAL_GETTER(__VA_ARGS__)) + #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_GLOBAL_GETTER(__VA_ARGS__)) + #include "intel_gpu/runtime/options.inl" - template - typename std::enable_if, ValidatorT>::value, void>::type - register_property_impl(const std::tuple, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) { - auto p = std::get<0>(property)(std::get<1>(property)); - auto v = std::dynamic_pointer_cast(std::make_shared(std::get<2>(property))); - register_property_impl(std::move(p), visibility, std::move(v)); - register_property_impl(properties...); - } + #undef OV_CONFIG_LOCAL_OPTION + #undef OV_CONFIG_GLOBAL_OPTION - template - void register_property(PropertyInitializer&&... properties) { - register_property_impl(properties...); - } + const ov::AnyMap& get_user_properties() const { return m_user_properties; } - template - util::EnableIfAllStringAny set_property(Properties&&... properties) { - set_property(ov::AnyMap{std::forward(properties)...}); - } - - template - util::EnableIfAllStringAny set_user_property(Properties&&... properties) { - set_user_property(ov::AnyMap{std::forward(properties)...}); - } - - template - bool is_set_by_user(const ov::Property& property) const { - return is_set_by_user(property.name()); - } - - template - T get_property(const ov::Property& property) const { - return get_property(property.name()).template as(); - } +protected: + void finalize_impl(const IRemoteContext* context) override; + void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) override; + void apply_rt_info(const IRemoteContext* context, const ov::RTMap& rt_info, bool is_llm); + const ov::PluginConfig::OptionsDesc& get_options_desc() const override; void apply_user_properties(const cldnn::device_info& info); - - // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call - // So this method should be called after setting all user properties, but before apply_user_properties() call. - void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info, const bool is_llm); - - std::string to_string() const; - -protected: void apply_hints(const cldnn::device_info& info); void apply_execution_hints(const cldnn::device_info& info); void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); - void apply_debug_options(const cldnn::device_info& info); - - template - void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { - if (!is_set_by_user(property)) { - auto rt_info_val = rt_info.find(property.name()); - if (rt_info_val != rt_info.end()) { - set_user_property(property(rt_info_val->second.template as())); - } - } - } -private: - ov::AnyMap internal_properties; - ov::AnyMap user_properties; - - std::map supported_properties; - std::map property_validators; - - bool finalized = false; + #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_LOCAL_OPTION(__VA_ARGS__)) + #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_DECLARE_GLOBAL_OPTION(__VA_ARGS__)) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_LOCAL_OPTION + #undef OV_CONFIG_GLOBAL_OPTION }; } // namespace ov::intel_gpu namespace cldnn { using ov::intel_gpu::ExecutionConfig; -} // namespace cldnn +} diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index 765333e971842e..f037c8b02d36be 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -4,6 +4,7 @@ #pragma once +#include "intel_gpu/runtime/shape_predictor.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" @@ -35,6 +36,40 @@ inline std::ostream& operator<<(std::ostream& os, const QueueTypes& val) { return os; } +enum class DumpFormat : uint8_t { + binary = 0, + text = 1, + text_raw = 2, +}; + +inline std::ostream& operator<<(std::ostream& os, const DumpFormat& val) { + switch (val) { + case DumpFormat::binary: os << "binary"; break; + case DumpFormat::text: os << "text"; break; + case DumpFormat::text_raw: os << "text_raw"; break; + default: os << "unknown"; + } + + return os; +} + +enum class DumpTensors : uint8_t { + all = 0, + in = 1, + out = 2, +}; + +inline std::ostream& operator<<(std::ostream& os, const DumpTensors& val) { + switch (val) { + case DumpTensors::all: os << "all"; break; + case DumpTensors::in: os << "in"; break; + case DumpTensors::out: os << "out"; break; + default: os << "unknown"; + } + + return os; +} + /** * @brief Defines queue type that must be used for model execution */ @@ -45,18 +80,49 @@ static constexpr Property optimize_data{"GPU_OPTIM static constexpr Property allow_static_input_reorder{"GPU_ALLOW_STATIC_INPUT_REORDER"}; static constexpr Property partial_build_program{"GPU_PARTIAL_BUILD"}; static constexpr Property allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"}; -static constexpr Property use_only_static_kernels_for_dynamic_shape{"GPU_USE_ONLY_STATIC_KERNELS_FOR_DYNAMIC_SHAPE"}; -static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; static constexpr Property, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"}; static constexpr Property force_implementations{"GPU_FORCE_IMPLEMENTATIONS"}; static constexpr Property config_file{"CONFIG_FILE"}; -static constexpr Property enable_lp_transformations{"LP_TRANSFORMS_MODE"}; -static constexpr Property max_dynamic_batch{"DYN_BATCH_LIMIT"}; -static constexpr Property nv12_two_inputs{"GPU_NV12_TWO_INPUTS"}; static constexpr Property buffers_preallocation_ratio{"GPU_BUFFERS_PREALLOCATION_RATIO"}; static constexpr Property max_kernels_per_batch{"GPU_MAX_KERNELS_PER_BATCH"}; static constexpr Property use_onednn{"USE_ONEDNN"}; +static constexpr Property help{"HELP"}; +static constexpr Property verbose{"VERBOSE"}; +static constexpr Property verbose_color{"VERBOSE_COLOR"}; +static constexpr Property debug_config{"DEBUG_CONFIG"}; +static constexpr Property log_to_file{"GPU_LOG_TO_FILE"}; +static constexpr Property disable_usm{"GPU_DISABLE_USM"}; +static constexpr Property disable_onednn_post_ops_opt{"DISABLE_ONEDNN_POST_OPS_OPT"}; +static constexpr Property dump_graphs_path{"GPU_DUMP_GRAPHS_PATH"}; +static constexpr Property dump_profiling_data_path{"GPU_DUMP_PROFILING_DATA_PATH"}; +static constexpr Property dump_profiling_data_per_iter{"GPU_DUMP_PROFILING_DATA_PER_ITER"}; +static constexpr Property dump_sources_path{"GPU_DUMP_SOURCES_PATH"}; +static constexpr Property dump_tensors_path{"GPU_DUMP_TENSORS_PATH"}; +static constexpr Property dry_run_path{"GPU_DRY_RUN_PATH"}; +static constexpr Property dump_tensors{"DUMP_TENSORS"}; +static constexpr Property, ov::PropertyMutability::RW> dump_layer_names{"GPU_DUMP_LAYER_NAMES"}; +static constexpr Property dump_tensors_format{"DUMP_TENSORS_FORMAT"}; +static constexpr Property dump_memory_pool_path{"GPU_DUMP_MEMORY_POOL_PATH"}; +static constexpr Property dump_memory_pool{"GPU_DUMP_MEMORY_POOL"}; +static constexpr Property dump_batch_limit{"GPU_DUMP_BATCH_LIMIT"}; +static constexpr Property, ov::PropertyMutability::RW> dump_iterations{"GPU_DUMP_ITERATIONS"}; +static constexpr Property host_time_profiling{"HOST_TIME_PROFILING"}; +static constexpr Property impls_cache_capacity{"IMPLS_CACHE_CAPACITY"}; +static constexpr Property disable_async_compilation{"DISABLE_ASYNC_COMPILATION"}; +static constexpr Property disable_runtime_buffer_fusing{"DISABLE_RUNTIME_BUFFER_FUSING"}; +static constexpr Property disable_memory_reuse{"DISABLE_MEMORY_REUSE"}; +static constexpr Property disable_post_ops_fusions{"DISABLE_POST_OPS_FUSIONS"}; +static constexpr Property disable_horizontal_fc_fusion{"DISABLE_HORIZONTAL_FC_FUSION"}; +static constexpr Property disable_fc_swiglu_fusion{"DISABLE_FC_SWIGLU_FUSION"}; +static constexpr Property disable_fake_alignment{"DISABLE_FAKE_ALIGNMENT"}; +static constexpr Property disable_runtime_skip_reorder{"DISABLE_RUNTIME_SKIP_REORDER"}; +static constexpr Property usm_policy{"USM_POLICY"}; +static constexpr Property asym_dynamic_quantization{"ASYM_DYNAMIC_QUANTIZATION"}; +static constexpr Property shape_predictor_settings{"SHAPE_PREDICTOR_SETTINGS"}; +static constexpr Property, ov::PropertyMutability::RW> load_dump_raw_binary{"LOAD_DUMP_RAW_BINARY"}; +static constexpr Property, ov::PropertyMutability::RW> start_after_processes{"START_AFTER_PROCESSES"}; + } // namespace ov::intel_gpu namespace cldnn { diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp index 87e0b2990b7902..1d27eaf63efb86 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp @@ -4,6 +4,7 @@ #pragma once +#include "intel_gpu/runtime/execution_config.hpp" #include "layout.hpp" #include "memory_caps.hpp" #include "utils.hpp" @@ -110,8 +111,6 @@ struct padded_pool_comparer { // - Improve memory consumption class memory_pool { - memory_pool(); - memory_ptr alloc_memory(const layout& layout, allocation_type type, bool reset = true); static bool has_conflict(const memory_set&, const std::unordered_set&, uint32_t network_id); @@ -119,9 +118,10 @@ class memory_pool { std::map, padded_pool_comparer> _padded_pool; std::multimap _no_reusable_pool; engine* _engine; + const ExecutionConfig& _config; public: - explicit memory_pool(engine& engine); + explicit memory_pool(engine& engine, const ExecutionConfig& config); ~memory_pool(); memory_ptr get_memory(const layout& layout, const primitive_id& id, diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl new file mode 100644 index 00000000000000..14bd79a9c60e99 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl @@ -0,0 +1,83 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Namespace, property name, default value, [validator], description +OV_CONFIG_RELEASE_OPTION(ov, enable_profiling, false, "Enable profiling for the plugin") +OV_CONFIG_RELEASE_OPTION(ov::device, id, "0", "ID of the current device") +OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty") +OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Number of streams to be used for inference") +OV_CONFIG_RELEASE_OPTION(ov, compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism") +OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16, + [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; }, "Model floating-point inference precision. Supported values: { f16, f32, undefined }") +OV_CONFIG_RELEASE_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings") +OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc") +OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy") +OV_CONFIG_RELEASE_OPTION(ov::hint, num_requests, 0, "Hint that provides number of requests populated by the application") +OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_pinning, false, "Controls if CPU threads are pinned to the cores or not") +OV_CONFIG_RELEASE_OPTION(ov::hint, enable_cpu_reservation, false, "Cpu Reservation means reserve cpus which will not be used by other plugin or compiled model") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, host_task_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls core types used for host tasks") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM, "Low-level hint that controls the queue throttle level") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available") +OV_CONFIG_RELEASE_OPTION(ov::internal, exclusive_async_requests, false, "") +OV_CONFIG_RELEASE_OPTION(ov::internal, query_model_ratio, 1.0f, "") +OV_CONFIG_RELEASE_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache") +OV_CONFIG_RELEASE_OPTION(ov, cache_encryption_callbacks, EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model") +OV_CONFIG_RELEASE_OPTION(ov::hint, dynamic_quantization_group_size, 0, "") +OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "") +OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching") +OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, 0.f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision") +OV_CONFIG_RELEASE_OPTION(ov::internal, enable_lp_transformations, false, "Enable/Disable Low precision transformations set") +OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file") + +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, shape_predictor_settings, {10, 16 * 1024, 2, 1.1f}, "Preallocation settings") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "Type of the queue that must be used for model execution. May be in-order or out-of-order") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, optimize_data, false, "Enable/Disable data flow optimizations for cldnn::program") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, enable_memory_pool, true, "Enable/Disable memory pool usage") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_static_input_reorder, false, "Controls if weights tensors can be reordered during model compilation to more friendly layout for specific kernel") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, custom_outputs, std::vector{}, "List of output primitive names") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, force_implementations, ImplForcingMap{}, "Specifies the list of forced implementations for the primitives") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, partial_build_program, false, "Early exit from model compilation process which allows faster execution graph dumping") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, allow_new_shape_infer, false, "Switch between new and old shape inference flow. Shall be removed soon") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, use_onednn, false, "Enable/Disable onednn for usage for particular model/platform") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, max_kernels_per_batch, 8, "Controls how many kernels we combine into batch for more efficient ocl compilation") +OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, impls_cache_capacity, 300, "Controls capacity of LRU implementations cache that is created for each program object for dynamic models") + +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, help, false, "Print help message for all config options") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose, 0, "Enable logging for debugging purposes. The higher value the more verbose output. 0 - Disabled, 4 - Maximum verbosity") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, verbose_color, true, "Enable coloring for verbose logs") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, disable_usm, false, "Disable USM memory allocations and use only cl_mem") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, usm_policy, 0, "0: default, 1: use usm_host, 2: do not use usm_host") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, dump_batch_limit, std::numeric_limits::max(), "Max number of batch elements to dump") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, dump_profiling_data_per_iter, false, "Save profiling data w/o per-iteration aggregation") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, log_to_file, "", "Save verbose log to specified file") +OV_CONFIG_DEBUG_GLOBAL_OPTION(ov::intel_gpu, debug_config, "", "Path to debug config in json format") + +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_onednn_post_ops_opt, false, "Disable optimization pass for onednn post-ops") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_profiling_data_path, "", "Save csv file with per-stage and per-primitive profiling data to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_graphs_path, "", "Save intermediate graph representations during model compilation pipeline to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_sources_path, "", "Save generated sources for each kernel to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_path, "", "Save intermediate in/out tensors of each primitive to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors, ov::intel_gpu::DumpTensors::all, "Tensor types to dump. Supported values: all, inputs, outputs") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_tensors_format, ov::intel_gpu::DumpFormat::text, "Format of the tensors dump. Supported values: binary, text, text_raw") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_layer_names, std::vector{}, "Activate dump for specified layers only") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool_path, "", "Save csv file with memory pool info to specified folder") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_memory_pool, false, "Enable verbose output for memory pool") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dump_iterations, std::set{}, "Space separated list of iterations where other dump options should be enabled") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, host_time_profiling, 0, "Measre and print host time spent from the beginning of the infer until all host work is done and plugin is ready to block thread on the final clFinish() call") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_async_compilation, false, "Disable feature that allows to asyncrhonously prepare static-shaped implementations for the primitives with shape-agnostic kernels selected during compilation") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_buffer_fusing, false, "Disable runtime inplace optimizations for operations like concat and crop") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_post_ops_fusions, false, "Disable fusions of operations as post-ops/fused-ops") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_horizontal_fc_fusion, false, "Disable pass which merges QKV projections into single MatMul") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fc_swiglu_fusion, false, "Disable pass which merges FC and SwiGLU ops") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_fake_alignment, false, "Disable fake alignment feature which tries to keep gpu friendly memory alignment for arbitrary tensor shapes") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_memory_reuse, false, "Disable memory reuse for activation tensors") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, disable_runtime_skip_reorder, false, "Disable skip reorder optimization applied in runtime") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, asym_dynamic_quantization, false, "Enforce asymmetric mode for dynamically quantized activations") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, load_dump_raw_binary, std::vector{}, "List of layers to load raw binary") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, start_after_processes, std::vector{}, "Start inference after specified list of processes") +OV_CONFIG_DEBUG_OPTION(ov::intel_gpu, dry_run_path, "", "Enables mode which partially compiles a model and stores runtime model into specified directory") diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp index b3eca9a78fba89..7d0dba80e7c017 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/profiling.hpp @@ -9,6 +9,7 @@ #include #include #include +#include "intel_gpu/runtime/execution_config.hpp" #if defined(_WIN32) #ifndef NOMINMAX @@ -157,7 +158,7 @@ class profiled_stage { , _obj(obj) , _stage(stage) { GPU_DEBUG_IF(profiling_enabled) { - _per_iter_mode = cldnn::debug_configuration::get_instance()->dump_profiling_data_per_iter != 0; + _per_iter_mode = GPU_DEBUG_VALUE_OR(ov::intel_gpu::ExecutionConfig::get_dump_profiling_data_per_iter(), false); _start = std::chrono::high_resolution_clock::now(); } } diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp index 469c676b4b0311..a246c6d1fdda8f 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp @@ -14,23 +14,20 @@ class engine; struct ShapePredictor { public: - using Ptr = std::shared_ptr; - ShapePredictor(const engine* engine, float buffers_preallocation_ratio) - : _engine(engine) - , _buffers_preallocation_ratio(buffers_preallocation_ratio) { - static_assert(_max_deque_size >= 2, "[GPU] Deque is supposed to contain at least 2 elements for prediction"); - } + struct Settings { + // Iterations mode preallocation + size_t next_iters_preallocation_count = 10; + size_t max_per_iter_size = 16 * 1024; + size_t max_per_dim_diff = 2; - ShapePredictor(const engine* engine, - size_t next_iters_preallocation_count, - size_t max_per_iter_size, - size_t max_per_dim_diff, - float buffers_preallocation_ratio) + // Percentage mode preallocation + float buffers_preallocation_ratio = 1.1f; + }; + + using Ptr = std::shared_ptr; + ShapePredictor(const engine* engine, const Settings& settings) : _engine(engine) - , _next_iters_preallocation_count(next_iters_preallocation_count) - , _max_per_iter_size(max_per_iter_size) - , _max_per_dim_diff(max_per_dim_diff) - , _buffers_preallocation_ratio(buffers_preallocation_ratio) { + , _settings(settings) { static_assert(_max_deque_size >= 2, "[GPU] Deque is supposed to contain at least 2 elements for prediction"); } @@ -73,13 +70,7 @@ struct ShapePredictor { std::map> _shapes_info; const engine* _engine; - // Iterations mode preallocation - const size_t _next_iters_preallocation_count = 10; - const size_t _max_per_iter_size = 16 * 1024; // 16KB => maximum preallocation size is 16KB * 10iters = 160KB - const size_t _max_per_dim_diff = 2; - - // Percentage mode preallocation - const float _buffers_preallocation_ratio = 1.0f; + const Settings _settings; }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp index 0f69379fa8e217..95d060ef0b760e 100644 --- a/src/plugins/intel_gpu/src/graph/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp @@ -149,7 +149,7 @@ void broadcast_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index d9f6ebd8b71872..fec36fb9ff5c81 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -278,7 +278,7 @@ void crop_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = _network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout()); diff --git a/src/plugins/intel_gpu/src/graph/debug_helper.cpp b/src/plugins/intel_gpu/src/graph/debug_helper.cpp index b69d10e137010e..3cd7767d5fe21b 100644 --- a/src/plugins/intel_gpu/src/graph/debug_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/debug_helper.cpp @@ -3,6 +3,9 @@ // #include "debug_helper.hpp" +#include +#include "intel_gpu/runtime/execution_config.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/util/file_util.hpp" #ifdef GPU_DEBUG_CONFIG @@ -44,8 +47,7 @@ template void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) { auto&& size = mem->get_layout().get_tensor(); - GPU_DEBUG_GET_INSTANCE(debug_config); - auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1); + auto batch_size = std::max(std::min(ExecutionConfig::get_dump_batch_limit(), size.batch[0]), 1); tensor tmp_size(size); tmp_size.batch[0] = batch_size; if (tmp_size == size) { @@ -121,8 +123,7 @@ void unpack(cldnn::data_types type, uint8_t input, int8_t &v0, int8_t &v1) { void dump_i4u4(cldnn::data_types type, memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) { auto&& size = mem->get_layout().get_tensor(); - GPU_DEBUG_GET_INSTANCE(debug_config); - auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1); + auto batch_size = std::max(std::min(ExecutionConfig::get_dump_batch_limit(), size.batch[0]), 1); tensor tmp_size(size); tmp_size.batch[0] = batch_size; if (tmp_size == size) { @@ -160,11 +161,16 @@ void dump_i4u4(cldnn::data_types type, memory::ptr mem, stream& stream, std::ofs file_stream << buffer.str(); } -void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std::string layerName, bool dump_raw) { - std::cout << "Dump " << (dump_raw ? "raw " : "") << layerName << std::endl; - GPU_DEBUG_GET_INSTANCE(debug_config); - std::string filename = debug_config->get_name_for_dump(layerName); - filename = debug_config->dump_layers_path + filename + ".txt"; +std::string get_name_for_dump(const std::string& file_name) { + std::string filename = file_name; + std::replace(filename.begin(), filename.end(), '\\', '_'); + std::replace(filename.begin(), filename.end(), '/', '_'); + std::replace(filename.begin(), filename.end(), ' ', '_'); + std::replace(filename.begin(), filename.end(), ':', '_'); + return filename; +} + +void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std::string filename, bool dump_raw) { std::ofstream file_stream(filename); if (!mem) { file_stream << "Empty" << std::endl; @@ -195,9 +201,7 @@ void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std std::cout << "Dump for this data type is not supported: " << dt_to_str(mem_dt) << std::endl; } -} // namespace - -static std::string get_file_path_for_binary_dump(cldnn::layout layout, std::string name) { +std::string get_file_path_for_binary_dump(cldnn::layout layout, const std::string& name, const std::string& dump_layers_path) { std::string filename; std::string data_type = ov::element::Type(layout.data_type).get_type_name(); std::string format = layout.format.to_string(); @@ -207,29 +211,128 @@ static std::string get_file_path_for_binary_dump(cldnn::layout layout, std::stri tensor += ("_" + to_string(dims[r])); } -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_GET_INSTANCE(debug_config); - std::string layer_name = debug_config->get_name_for_dump(name); - filename = debug_config->dump_layers_path + layer_name - + "__" + data_type + "_" + tensor + "__" + format + ".bin"; -#endif + std::string layer_name = get_name_for_dump(name); + filename = dump_layers_path + layer_name + "__" + data_type + "_" + tensor + "__" + format + ".bin"; return filename; } +bool is_target_iteration(int64_t iteration, const std::set dump_iteration) { + if (iteration < 0) + return true; + + if (dump_iteration.empty()) + return true; + + if (dump_iteration.find(iteration) == std::end(dump_iteration)) + return false; + + return true; +} + +std::string get_matched_from_filelist(const std::vector& file_names, std::string pattern) { + for (const auto& file : file_names) { + auto found = file.find(pattern); + if (found != std::string::npos) { + return file; + } + } + + return std::string(); +} + +bool is_layer_name_matched(const std::string& layer_name, const std::string& pattern) { + auto upper_layer_name = std::string(layer_name.length(), '\0'); + std::transform(layer_name.begin(), layer_name.end(), upper_layer_name.begin(), ::toupper); + auto upper_pattern = std::string(pattern.length(), '\0'); + std::transform(pattern.begin(), pattern.end(), upper_pattern.begin(), ::toupper); + + // Check pattern from exec_graph + size_t pos = upper_layer_name.find(':'); + auto upper_exec_graph_name = upper_layer_name.substr(pos + 1, upper_layer_name.size()); + if (upper_exec_graph_name.compare(upper_pattern) == 0) { + return true; + } + + // Check pattern with regular expression + std::regex re(upper_pattern); + return std::regex_match(upper_layer_name, re); +} + +bool is_layer_for_dumping(const ExecutionConfig& config, const std::string& layer_name, bool is_output = false, bool is_input = false) { + bool dump_out = config.get_dump_tensors() == ov::intel_gpu::DumpTensors::all || config.get_dump_tensors() == ov::intel_gpu::DumpTensors::out; + bool dump_in = config.get_dump_tensors() == ov::intel_gpu::DumpTensors::all || config.get_dump_tensors() == ov::intel_gpu::DumpTensors::in; + // Dump result layer + if (is_output == true && dump_out && (layer_name.find("constant:") == std::string::npos)) + return true; + + // Dump all layers + if (config.get_dump_layer_names().empty() && !dump_out && !dump_in) + return true; + + // Dump input layers + size_t pos = layer_name.find(':'); + auto type = layer_name.substr(0, pos); + if (is_input == true && type == "parameter" && dump_in) + return true; + + auto dump_layers = config.get_dump_layer_names(); + + auto iter = std::find_if(dump_layers.begin(), dump_layers.end(), [&](const std::string& dl){ + return is_layer_name_matched(layer_name, dl); + }); + return (iter != dump_layers.end()); +} + +std::vector get_filenames_for_matched_layer_loading_binaries(const ExecutionConfig& config, const std::string& id) { + std::vector file_names; + if (config.get_load_dump_raw_binary().empty()) + return file_names; + + for (const auto& load_layer : config.get_load_dump_raw_binary()) { + size_t file = load_layer.rfind(":"); + if (file != std::string::npos) { + if (id == load_layer.substr(0, file)) { + auto file_name_str = load_layer.substr(file + 1); + size_t head = 0; + size_t found = 0; + do { + found = file_name_str.find(",", head); + if (found != std::string::npos) + file_names.push_back(file_name_str.substr(head, (found - head))); + else + file_names.push_back(file_name_str.substr(head)); + + head = found+1; + GPU_DEBUG_LOG << " Layer name loading raw dump : " << load_layer.substr(0, file) << " / the dump file : " + << file_names.back() << std::endl; + } while (found != std::string::npos); + + return file_names; + } + } + } + + return file_names; +} + + +} // namespace + NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) : m_inst(inst) , m_stream(inst.get_network().get_stream()) , m_network(inst.get_network()) , m_program(inst.get_network().get_program().get()) , m_iter(m_network.iteration) { + const auto& config = m_network.get_config(); // Load binary dump for input layers - if (!debug_config->load_layers_raw_dump.empty()) { + if (!config.get_load_dump_raw_binary().empty()) { const std::string layer_name = m_inst.id(); - auto files = debug_config->get_filenames_for_matched_layer_loading_binaries(layer_name); + auto files = get_filenames_for_matched_layer_loading_binaries(config, layer_name); if (!files.empty()) { if (m_inst.is_input()) { // Loading binary dumps for output tensors of input-layers : only one output exists or index(dstN) exists - auto dump_file = debug_config->get_matched_from_filelist(files, "_dst0__"); + auto dump_file = get_matched_from_filelist(files, "_dst0__"); OPENVINO_ASSERT((files.size() == 1 || dump_file.length() != 0), "Unexpected binary dump for input layer"); OPENVINO_ASSERT(files.size() == m_inst.outputs_memory_count(), "Mis-match dump file count"); @@ -238,7 +341,7 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) auto dump_file = files[0]; if (files.size() > 1 || m_inst.outputs_memory_count() != 1) { std::string pattern = "_dst" + std::to_string(i) + "__"; - dump_file = debug_config->get_matched_from_filelist(files, pattern); + dump_file = get_matched_from_filelist(files, pattern); } OPENVINO_ASSERT((dump_file.length() > 0), "Could not find expected pattern '_dst[N]__' for binary dump"); GPU_DEBUG_COUT << " Load binary dump : " << dump_file << " for " << layer_name << std::endl; @@ -253,18 +356,18 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) output_mem->copy_from(m_stream, static_cast(&bin[0]), true); } } else { - auto check_dst = debug_config->get_matched_from_filelist(files, "_dst0__"); + auto check_dst = get_matched_from_filelist(files, "_dst0__"); OPENVINO_ASSERT(check_dst.length() == 0, "Expected to load binaries for inputs of " + layer_name); // Loading input tensors for any layer - auto dump_file = debug_config->get_matched_from_filelist(files, "_src0__"); + auto dump_file = get_matched_from_filelist(files, "_src0__"); OPENVINO_ASSERT(dump_file.length() != 0, "Could not find expected pattern '_src[N]__' for binary dump input : " + layer_name); for (size_t i = 0; i < m_inst.dependencies().size(); i++) { auto dump_file = files[0]; if (files.size() > 1 || m_inst.dependencies().size() != 1) { std::string pattern = "_src" + std::to_string(i) + "__"; - dump_file = debug_config->get_matched_from_filelist(files, pattern); + dump_file = get_matched_from_filelist(files, pattern); } if (dump_file.length() == 0) { GPU_DEBUG_COUT << " Skip loading for input(" << i << ") of " << layer_name << std::endl; @@ -290,11 +393,11 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) } // Dump input buffers of 'inst' - if (debug_config->dump_layers_path.length() > 0) { - const std::string layer_name = inst.id(); + if (config.get_dump_tensors_path().length() > 0) { + const std::string& layer_name = inst.id(); - if (debug_config->is_target_iteration(m_iter) && - debug_config->dump_layers_dst_only == 0 && debug_config->is_layer_for_dumping(layer_name)) { + if (is_target_iteration(m_iter, config.get_dump_iterations()) && + config.get_dump_tensors() != ov::intel_gpu::DumpTensors::out && is_layer_for_dumping(config, layer_name)) { std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" + layer_name + ":"; for (size_t i = 0; i < m_inst.dependencies().size(); i++) { std::string name = get_file_prefix() + "_src" + std::to_string(i); @@ -306,24 +409,27 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) auto dep = m_inst.dependencies().at(i); auto input_layout = dep.first->get_output_layout(dep.second); - GPU_DEBUG_IF(debug_config->dump_layers_binary) { + if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary) { // Binary dump : raw - auto filename = get_file_path_for_binary_dump(input_layout, name); + auto filename = get_file_path_for_binary_dump(input_layout, name, config.get_dump_tensors_path()); mem_lock lock(input_mem, m_stream); ov::util::save_binary(filename, lock.data(), input_mem->size()); - GPU_DEBUG_COUT << " Dump layer src : " << layer_name << " to " << filename << std::endl; + GPU_DEBUG_COUT << " Dump layer src : " << layer_name << " to " << filename << std::endl; debug_str_for_bin_load += (filename + ","); } else { + const bool dump_raw = config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::text_raw; + GPU_DEBUG_COUT << " Dump " << (dump_raw ? "raw " : "") << name << std::endl; + auto filename = config.get_dump_tensors_path() + get_name_for_dump(name) + ".txt"; log_memory_to_file(input_mem, input_layout, m_stream, name, - debug_config->dump_layers_raw); + dump_raw); } } - if (debug_config->dump_layers_binary && !inst.is_input()) { + if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary && !inst.is_input()) { debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"'; GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl; } @@ -333,13 +439,14 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) NodeDebugHelper::~NodeDebugHelper() { + const auto& config = m_network.get_config(); // Dump output buffers of 'inst' - if (debug_config->dump_layers_path.length() > 0) { + if (config.get_dump_tensors_path().length() > 0) { m_stream.finish(); const std::string layer_name = m_inst.id(); - GPU_DEBUG_IF(debug_config->is_target_iteration(m_iter) && - debug_config->is_layer_for_dumping(layer_name, m_inst.is_output(), m_inst.is_input())) { + if (is_target_iteration(m_iter, config.get_dump_iterations()) && + is_layer_for_dumping(config, layer_name, m_inst.is_output(), m_inst.is_input())) { std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" + layer_name + ":"; for (size_t i = 0; i < m_inst.outputs_memory_count(); i++) { @@ -350,22 +457,29 @@ NodeDebugHelper::~NodeDebugHelper() { continue; } - GPU_DEBUG_IF(debug_config->dump_layers_binary) { + if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary) { // Binary dump : raw auto output_layout = m_inst.get_output_layout(i); - auto filename = get_file_path_for_binary_dump(output_layout, name); + auto filename = get_file_path_for_binary_dump(output_layout, name, config.get_dump_tensors_path()); mem_lock lock(output_mem, m_stream); ov::util::save_binary(filename, lock.data(), output_mem->size()); GPU_DEBUG_COUT << " Dump layer dst : " << layer_name << " to " << filename << std::endl; debug_str_for_bin_load += (filename + ","); } else { + const bool dump_raw = config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::text_raw; + GPU_DEBUG_COUT << " Dump " << (dump_raw ? "raw " : "") << name << std::endl; + auto filename = config.get_dump_tensors_path() + get_name_for_dump(name) + ".txt"; // Text dump - log_memory_to_file(output_mem, m_inst.get_output_layout(i), m_stream, name, debug_config->dump_layers_raw); + log_memory_to_file(output_mem, + m_inst.get_output_layout(i), + m_stream, + name, + dump_raw); } } - GPU_DEBUG_IF(debug_config->dump_layers_binary && m_inst.is_input()) { + if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary && m_inst.is_input()) { debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"'; GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl;; } @@ -377,13 +491,14 @@ NetworkDebugHelper::NetworkDebugHelper(const network& net) : m_network(net) , m_iter(net.iteration) { auto net_id = m_network.get_id(); - GPU_DEBUG_IF(debug_config->dump_memory_pool > 0) { - auto& iters = debug_config->dump_memory_pool_iters; + const auto& config = m_network.get_config(); + if (config.get_dump_memory_pool()) { + auto& iters = config.get_dump_iterations(); if (iters.empty() || iters.find(m_iter) != iters.end()) { GPU_DEBUG_COUT << "============================================================================" << std::endl; GPU_DEBUG_COUT << "Start network execution (net_id : " << net_id << ", iter :" << m_iter << ")" << std::endl; if (m_iter == 0 && net_id > 0) { - dump_memory_pool(debug_config->dump_memory_pool_path, m_iter); + dump_memory_pool(config.get_dump_memory_pool_path(), m_iter); GPU_DEBUG_COUT << "============================================================================" << std::endl; } } @@ -391,38 +506,14 @@ NetworkDebugHelper::NetworkDebugHelper(const network& net) GPU_DEBUG_TRACE << "============================================================================" << std::endl; GPU_DEBUG_TRACE << "Start network execution (net_id : " << net_id << ", iter :" << m_iter << ")" << std::endl; } - - if (debug_config->list_layers == 1) { - for (auto& inst : m_network._exec_order) { - GPU_DEBUG_COUT << inst->id() << std::endl; - if (inst->get_node().is_type()) { - auto& loop_node = inst->get_node().as(); - for (auto& prim : loop_node.get_body_program()->get_processing_order()) { - GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; - } - } else if (inst->get_node().is_type()) { - auto& cond_node = inst->get_node().as(); - GPU_DEBUG_COUT << "* Branch_True" << std::endl; - for (auto& prim : cond_node.get_branch_true().inner_program->get_processing_order()) { - GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; - } - GPU_DEBUG_COUT << "* Branch_False" << std::endl; - for (auto& prim : cond_node.get_branch_false().inner_program->get_processing_order()) { - GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; - } - } - } - - if (!m_network.is_internal()) - exit(0); - } } NetworkDebugHelper::~NetworkDebugHelper() { auto prog = m_network.get_program().get(); auto net_id = m_network.get_id(); + const auto& config = prog->get_config(); // print '-data_shape' option for benchmark_app - if (debug_config->print_input_data_shapes == 1) { + if (config.get_verbose() >= 4) { std::stringstream data_shape_str; auto add_string = [&data_shape_str](std::string str) { data_shape_str << ((data_shape_str.rdbuf()->in_avail() == 0) ? " -data_shape " : ",") << str; @@ -443,7 +534,7 @@ NetworkDebugHelper::~NetworkDebugHelper() { << data_shape_str.str() << std::endl; } - if (!debug_config->dump_graphs.empty() && debug_config->is_target_iteration(m_iter)) { + if (!config.get_dump_graphs_path().empty() && is_target_iteration(m_iter, config.get_dump_iterations())) { auto get_fixed_str = [](int value, int length = 2) -> std::string { std::ostringstream ss; ss << std::setw(length) << std::setfill('0') << std::to_string(value); @@ -459,10 +550,10 @@ NetworkDebugHelper::~NetworkDebugHelper() { } } - if (debug_config->dump_memory_pool > 0) { - auto& iters = debug_config->dump_memory_pool_iters; + if (config.get_dump_memory_pool()) { + auto& iters = config.get_dump_iterations(); if (iters.empty() || iters.find(m_iter) != iters.end()) { - dump_memory_pool(debug_config->dump_memory_pool_path, m_iter); + dump_memory_pool(config.get_dump_memory_pool_path(), m_iter); GPU_DEBUG_COUT << "============================================================================" << std::endl; } } diff --git a/src/plugins/intel_gpu/src/graph/debug_helper.hpp b/src/plugins/intel_gpu/src/graph/debug_helper.hpp index 61572433cea494..e2137733cf73fc 100644 --- a/src/plugins/intel_gpu/src/graph/debug_helper.hpp +++ b/src/plugins/intel_gpu/src/graph/debug_helper.hpp @@ -39,8 +39,6 @@ class NodeDebugHelper { const network& m_network; const program* m_program; const size_t m_iter; - - const debug_configuration* debug_config = cldnn ::debug_configuration ::get_instance(); }; class NetworkDebugHelper { @@ -52,8 +50,6 @@ class NetworkDebugHelper { void dump_memory_pool(std::string dump_path, int64_t curr_iter) const; const network& m_network; const size_t m_iter; - - const debug_configuration* debug_config = cldnn ::debug_configuration ::get_instance(); }; #define NETWORK_DEBUG(net) NetworkDebugHelper __network_debug_helper(net) diff --git a/src/plugins/intel_gpu/src/graph/eltwise.cpp b/src/plugins/intel_gpu/src/graph/eltwise.cpp index 7805cb575aed9a..581f3f08dc120d 100644 --- a/src/plugins/intel_gpu/src/graph/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/eltwise.cpp @@ -393,7 +393,7 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) : ""); } } else { - bool use_new_shape_infer = network.get_config().get_property(ov::intel_gpu::allow_new_shape_infer); + bool use_new_shape_infer = network.get_config().get_allow_new_shape_infer(); auto input0_pshape = node.get_input_pshape(0); for (size_t i = 1; i < inputs_count; ++i) { diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp index 0c03916d45efcb..cc3956393c4103 100644 --- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp @@ -250,8 +250,7 @@ kernel_impl_params fully_connected_inst::get_fake_aligned_params(kernel_impl_par } } - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_fake_alignment) { + GPU_DEBUG_IF(orig_impl_param.get_program().get_config().get_disable_fake_alignment()) { can_apply_fake_alignment = false; } diff --git a/src/plugins/intel_gpu/src/graph/gather.cpp b/src/plugins/intel_gpu/src/graph/gather.cpp index 2020f10015f916..7d503faf8a2255 100644 --- a/src/plugins/intel_gpu/src/graph/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/gather.cpp @@ -150,7 +150,7 @@ void gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp index 333afe18775e0b..cffb4457000380 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp @@ -161,7 +161,7 @@ bool add_required_reorders::test_format(cldnn::program_node& node, format reques } void add_required_reorders::run(program& p) { - bool optimize_data = p.get_config().get_property(ov::intel_gpu::optimize_data); + bool optimize_data = p.get_config().get_optimize_data(); auto usr_itr = p.get_processing_order().begin(); while (usr_itr != p.get_processing_order().end()) { auto& usr = *usr_itr++; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp index 4c1b1008434144..ef4300c33bfea1 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp @@ -11,7 +11,7 @@ using namespace cldnn; void build_implementations::run(program& p) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "pass::build_implementations"); - if (p.get_config().get_property(ov::intel_gpu::partial_build_program)) { + if (p.get_config().get_partial_build_program()) { return; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp index eec55260e2ea4b..6c38bce8dd9e31 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp @@ -17,7 +17,7 @@ using namespace cldnn; namespace cldnn { void graph_initializations::set_outputs(program& p) { - auto custom_outputs = p.get_config().get_property(ov::intel_gpu::custom_outputs); + auto custom_outputs = p.get_config().get_custom_outputs(); if (!custom_outputs.empty()) { for (auto const& output : custom_outputs) { OPENVINO_ASSERT(p.has_node(output), "not found custom output node in current cldnn::program: ", output); @@ -37,7 +37,7 @@ void graph_initializations::set_outputs(program& p) { void graph_initializations::run(program& p) { set_outputs(p); - auto forcing_map = p.get_config().get_property(ov::intel_gpu::force_implementations); + auto forcing_map = p.get_config().get_force_implementations(); for (auto& kv : forcing_map) { if (p.has_node(kv.first)) { p.get_node(kv.first).set_forced_impl_type(kv.second.impl_type); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp index 4d21869dfa3953..2786a9e8e85b99 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp @@ -43,10 +43,6 @@ void post_optimize_weights::optimize_weights(T& node, program& p) { return; if (impl->is_dynamic()) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_build_time_weight_reorder_for_dynamic_nodes) { - return; - } // TODO: To relax current limitation w.r.t the future optimization of weight reorder process // In dynamic shape, selected weight format can change in runtime. However reordering blocked format to blocked format is not fully verified yet. // So we need to enable other primitives such as convolution with verifying reorder b/w the possible layouts diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 03e4af4d16359b..1c79ab27101808 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -80,8 +80,7 @@ bool concat_in_place_optimization::match(const program_node& concat_node, if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph()) return false; bool do_runtime_buffer_fusing = true; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + GPU_DEBUG_IF(concat_node.get_config().get_disable_runtime_buffer_fusing()) { do_runtime_buffer_fusing = false; } @@ -522,8 +521,7 @@ bool crop_in_place_optimization::match(const program_node& node, return false; if (node.get_users().size() > 0) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing && node.is_dynamic()) { + GPU_DEBUG_IF(node.get_config().get_disable_runtime_buffer_fusing() && node.is_dynamic()) { return false; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index ce5333f95a1b59..622b7cff4101ad 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -56,6 +56,9 @@ using namespace cldnn; void prepare_primitive_fusing::run(program& p) { + GPU_DEBUG_IF(p.get_config().get_disable_post_ops_fusions()) + return; + fuse_reorders(p); remove_redundant_reshape(p); fuse_swiglu(p); @@ -165,10 +168,7 @@ void prepare_primitive_fusing::fuse_reorders(program &p) { } void prepare_primitive_fusing::fuse_swiglu(program &p) { - GPU_DEBUG_GET_INSTANCE(debug_config); - bool disable_fc_swiglu_fusion = false; - GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1) - disable_fc_swiglu_fusion = true; + bool disable_fc_swiglu_fusion = GPU_DEBUG_VALUE_OR(p.get_config().get_disable_fc_swiglu_fusion(), false); // Apply only for high performant GPU if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 128) return; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp index f63f1bf4efbe21..33320126a9d910 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp @@ -16,6 +16,9 @@ using namespace cldnn; void prepare_primitive_fusing_through::run(program& p) { + GPU_DEBUG_IF(p.get_config().get_disable_post_ops_fusions()) + return; + auto try_fuse_through = [&](program_node& node) -> std::vector { // This function tries to fuse peer_node to first non reorder or reshape previous primitive. // It returns chain of primitives (reshapes and reorders) including potential fused_node (e.g. Conv, FC, etc) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index 34fa9647ec99c3..95c746d5d59791 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "pass_manager.h" #include "program_node.h" #include "intel_gpu/runtime/engine.hpp" @@ -142,9 +143,10 @@ propagate_constants::calculate(engine& engine, if (!has_non_trivial_constants) return {}; - ExecutionConfig cf_config = config; + ExecutionConfig cf_config = config.clone(); cf_config.set_property(ov::intel_gpu::optimize_data(false)); cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs)); + cf_config.finalize(engine); network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true); std::map, std::shared_ptr>> weightless_cache_map; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index 7b3cbdabe639a0..088afd84f5ff6a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -679,8 +679,6 @@ void insert_reorders(program& p, const std::map& fm } // namespace void reorder_inputs::run(program& p, reorder_factory& rf) { - GPU_DEBUG_GET_INSTANCE(debug_config); - auto& lo = p.get_layout_optimizer(); auto fmt_map = get_preferred_formats(p, lo); @@ -704,7 +702,7 @@ void reorder_inputs::run(program& p, reorder_factory& rf) { GPU_DEBUG_LOG_PASS << " " << node_ptr->id() << " " << fmt_to_str(fmt) << std::endl; } - GPU_DEBUG_IF(debug_config->verbose >= 2) { + GPU_DEBUG_IF(p.get_config().get_verbose() >= 2) { reorder_cnt total_reorder_count = std::accumulate(p.get_processing_order().begin(), p.get_processing_order().end(), diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp index a4e6d989543837..8b3a73c74f3aa6 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp @@ -74,7 +74,7 @@ void select_preferred_formats::run(program& p) { } #endif // ENABLE_ONEDNN_FOR_GPU - auto forcing_map = p.get_config().get_property(ov::intel_gpu::force_implementations); + auto forcing_map = p.get_config().get_force_implementations(); for (auto n : p.get_processing_order()) { n->recalc_output_layout(); @@ -121,7 +121,7 @@ void select_preferred_formats::run(program& p) { optimize_conv_permute(*n); } } catch (std::exception& exception) { - GPU_DEBUG_INFO << "WARNING(select_preferred_formats): " << exception.what() << std::endl; + GPU_DEBUG_LOG << "WARNING(select_preferred_formats): " << exception.what() << std::endl; } print_selected_formats(*n); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index d7813c03d80f99..b33a391dadea4c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -203,7 +203,7 @@ struct fully_connected_impl : typed_primitive_impl_ocl { params.quantization = kernel_selector::QuantizationType::NONE; } - params.dynamic_quantization_group_size = impl_param.get_program().get_config().get_property(ov::hint::dynamic_quantization_group_size); + params.dynamic_quantization_group_size = impl_param.get_program().get_config().get_dynamic_quantization_group_size(); return params; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index 624db86e38342c..9b73d9711a5a32 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -1204,13 +1204,13 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p params.engineInfo.ip_version = device_info.ip_version; params.engineInfo.arch = kernel_selector::gpu_arch(static_cast::type>(device_info.arch)); - auto impl_forcing = config.get_property(ov::intel_gpu::force_implementations); + auto impl_forcing = config.get_force_implementations(); if (impl_forcing.count(param_info.desc->id) != 0) { params.forceImplementation = impl_forcing.at(param_info.desc->id).kernel_name; } - params.allowStaticInputReordering = config.get_property(ov::intel_gpu::optimize_data) || config.get_property(ov::intel_gpu::allow_static_input_reorder); + params.allowStaticInputReordering = config.get_optimize_data() || config.get_allow_static_input_reorder(); params.allowInputReordering = false; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp index 8d305a592e081b..c02ad09044ae32 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp @@ -111,7 +111,7 @@ namespace cldnn { std::mutex kernels_cache::_mutex; std::string kernels_cache::get_cache_path() const { - auto path = _config.get_property(ov::cache_dir); + auto path = _config.get_cache_dir(); if (path.empty()) { return {}; } @@ -123,20 +123,12 @@ std::string kernels_cache::get_cache_path() const { } bool kernels_cache::is_cache_enabled() const { - if (!_config.get_property(ov::intel_gpu::allow_new_shape_infer) && - (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SPEED)) { + if (!_config.get_allow_new_shape_infer() && + (_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SPEED)) { return false; } - return !_config.get_property(ov::cache_dir).empty(); -} - -size_t kernels_cache::get_max_kernels_per_batch() const { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->max_kernels_per_batch >= 1) { - return static_cast(debug_config->max_kernels_per_batch); - } - return _config.get_property(ov::intel_gpu::max_kernels_per_batch); + return !_config.get_cache_dir().empty(); } void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::vector* all_batches) const { @@ -205,7 +197,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, // Create new kernels batch when the limit is reached // and current kernel's entry_point is duplicated in this kernels batch - if (current_bucket.back().kernels_counter >= get_max_kernels_per_batch() + if (current_bucket.back().kernels_counter >= _config.get_max_kernels_per_batch() || current_bucket.back().entry_point_to_id.find(entry_point) != current_bucket.back().entry_point_to_id.end() || need_separate_batch(entry_point)) { const auto& batch_id = static_cast(current_bucket.size()); @@ -246,11 +238,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, b.hash_value = std::hash()(full_code); - std::string dump_sources_dir = ""; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_sources.empty()) { - dump_sources_dir = debug_config->dump_sources; - } + std::string dump_sources_dir = GPU_DEBUG_VALUE_OR(_config.get_dump_sources_path(), ""); // Add -g -s to build options to allow IGC assembly dumper to associate assembler sources with corresponding OpenCL kernel code lines // Should be used with the IGC_ShaderDump option @@ -306,11 +294,9 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co auto& cl_build_device = dynamic_cast(*_device); bool dump_sources = batch.dump_custom_program; - std::string dump_sources_dir = ""; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_sources.empty()) { + std::string dump_sources_dir = GPU_DEBUG_VALUE_OR(_config.get_dump_sources_path(), ""); + GPU_DEBUG_IF(!dump_sources_dir.empty()) { dump_sources = true; - dump_sources_dir = debug_config->dump_sources; } std::string err_log; // accumulated build log from all program's parts (only contains messages from parts which @@ -385,7 +371,7 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co if (is_cache_enabled()) { // If kernels caching is enabled, then we save compiled bucket to binary file with name ${code_hash_value}.cl_cache // Note: Bin file contains full bucket, not separate kernels, so kernels reuse across different models is quite limited - // Bucket size can be changed in get_max_kernels_per_batch() method, but forcing it to 1 will lead to much longer + // Bucket size can be changed by max_kernels_per_batch config option, but forcing it to 1 will lead to much longer // compile time. std::lock_guard lock(cacheAccessMutex); ov::intel_gpu::save_binary(cached_bin_name, getProgramBinaries(program)); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp index 56459b93d2481c..b45226f44bd3e9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp @@ -113,7 +113,6 @@ class kernels_cache { std::string get_cache_path() const; bool is_cache_enabled() const; - size_t get_max_kernels_per_batch() const; bool _reuse_kernels = false; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp index 9e0a3fa5cfb390..64b92a15d1f4ba 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp @@ -18,8 +18,9 @@ struct ConcatenationImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; static const std::vector supported_types = { ov::element::f16, ov::element::u8, ov::element::i8 }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index 6f8502423047b6..0f8d271bc3a6cd 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -157,7 +157,6 @@ struct convolution_onednn : typed_primitive_onednn_impl { dnnl::memory::desc desc = onednn::layout_to_memory_desc(a_zp->get_layout(), dnnl::memory::format_tag::a, true); args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC, a_zp->get_onednn_memory(desc)}); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_TRACE_DETAIL << instance.id() << " activations_zero_points: " << " " << a_zp->get_layout().to_short_string() << std::endl; } @@ -167,7 +166,6 @@ struct convolution_onednn : typed_primitive_onednn_impl { dnnl::memory::desc desc = onednn::layout_to_memory_desc(w_zp->get_layout(), dnnl::memory::format_tag::a, true); args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS, w_zp->get_onednn_memory(desc)}); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_TRACE_DETAIL << instance.id() << " weights_zero_points: " << " " << w_zp->get_layout().to_short_string() << std::endl; } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp index c3f599fc5db9f6..430c42dee57f75 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp @@ -23,8 +23,9 @@ struct ConvolutionImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& conv_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp index 039cf36261caa0..238214f82dc6fb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp @@ -19,8 +19,9 @@ struct DeconvolutionImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& deconv_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp index c4dc5f7faa6531..731a83372a9dfc 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp @@ -21,8 +21,9 @@ struct FullyConnectedImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& fc_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp index 6c576d177043ee..3d64d2009490c0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp @@ -18,8 +18,9 @@ struct GemmImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& gemm_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp index 6fd16a4dd04acf..4b2615c62e2747 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp @@ -22,10 +22,10 @@ struct LSTMSeqImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (info.arch == gpu_arch::unknown) + if (info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; - const auto& lstm_seq_node = node.as(); const auto& in_layout = lstm_seq_node.get_input_layout(0); const auto& out_layout = lstm_seq_node.get_output_layout(0); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp index 4710b0c77b83c7..ced0316e13a08f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp @@ -19,8 +19,9 @@ struct PoolingImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& in_layout = node.get_input_layout(0); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 05a3dc5b2a9e4b..fe5920355e29c7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -47,12 +47,11 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _attrs(attrs), _pd(pd) { - _enable_profiling = config.get_property(ov::enable_profiling); + _enable_profiling = config.get_enable_profiling(); _scratchpad_md = _pd.scratchpad_desc(); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->verbose >= 4) { + GPU_DEBUG_IF(config.get_verbose() >= 4) { if (_scratchpad_md.get_size() > 0) { static std::atomic_llong total{0}; int64_t size = _scratchpad_md.get_size() / 1048576; @@ -70,9 +69,8 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _engine(&engine), _pd(), _prim() { - _enable_profiling = config.get_property(ov::enable_profiling); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + _enable_profiling = config.get_enable_profiling(); + GPU_DEBUG_IF(!config.get_dump_profiling_data_path().empty()) { _enable_profiling = true; } } @@ -318,7 +316,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { private: std::string get_cache_directory(const ExecutionConfig& config) const { - auto path = config.get_property(ov::cache_dir); + auto path = config.get_cache_dir(); if (path.empty()) { return {}; } @@ -343,7 +341,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { void build_primitive(const ExecutionConfig& config) { auto cache_outpath = get_cache_directory(config); - if (!config.get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!config.get_allow_new_shape_infer()) { cache_outpath = ""; } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp index 68d963fd9e369f..4a4a4c60df032d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp @@ -48,8 +48,9 @@ struct ReduceImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; const auto& reduce_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp index c4117058da88e9..824069f56b9583 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp @@ -56,8 +56,9 @@ struct ReorderImplementationManager : public ImplementationManager { if (output_fmt == format::custom) return true; + const auto& config = node.get_program().get_config(); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad || info.arch == gpu_arch::unknown) + if (!info.supports_immad || info.arch == gpu_arch::unknown || !config.get_use_onednn()) return false; if (!one_of(input_fmt.value, supported_formats) || !one_of(output_fmt.value, supported_formats)) diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp index fdb2f151de8986..0ce180380f14b5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/implementation_manager.cpp @@ -58,7 +58,7 @@ std::unique_ptr ImplementationManager::create(const program_node if (auto impl = create_impl(node, params)) { update_impl(*impl, params); impl->set_node_params(node); - impl->can_share_kernels = node.get_program().get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse); + impl->can_share_kernels = node.get_program().get_config().get_enable_kernels_reuse(); return impl; } diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp index d974b78f8e6d14..ce461632631d15 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/non_max_suppression_impls.cpp @@ -59,7 +59,7 @@ const std::vector>& Registry(scores_layout.get_partial_shape()[0].get_length()); const size_t kClassNum = static_cast(scores_layout.get_partial_shape()[1].get_length()); const size_t kNStreams = - static_cast(node.get_program().get_config().get_property(ov::streams::num)); + static_cast(node.get_program().get_config().get_num_streams()); const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast(8)) * kNStreams; return kKeyValue > 64; } diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 01286a1c6d04bc..619693f3b1a6fc 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -199,6 +199,8 @@ class primitive_inst { program_node const& get_node() const { return *_node; } network& get_network() const { return _network; } uint32_t get_network_id() const; + const ExecutionConfig& get_config() const { return get_network().get_config(); } + virtual event::ptr set_output_memory(memory::ptr mem, bool check = true, size_t idx = 0); void check_memory_to_set(const memory& mem, const layout& layout) const; const std::list& get_users() const { return _node->get_users(); } diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index 269a3c30fd293c..229dec6a80c77e 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -158,6 +158,7 @@ struct program_node { program& get_program() { return myprog; } program& get_program() const { return myprog; } + const ExecutionConfig& get_config() const { return myprog.get_config(); } primitive_impl* get_selected_impl() const { return selected_impl.get(); } void set_selected_impl(std::unique_ptr impl); diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 167b254a247637..dfd65cd9b58067 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -435,15 +435,10 @@ bool should_use_winograd_2x3_s1(const convolution_node& node, layout const& input_layout, layout const& weights_layout, bool output_size_handling_enabled) { - bool disable_winograd_conv = node.get_program().get_config().get_property(ov::intel_gpu::disable_winograd_convolution); + bool disable_winograd_conv = node.get_program().get_config().get_disable_winograd_convolution(); if (disable_winograd_conv) return false; - // cases when NOT to use winograd - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_winograd_conv == 1) - return false; - auto prim = node.get_primitive(); if (input_layout.data_type != data_types::f16 || (input_layout.is_static() && input_layout.feature() % 64 != 0) // current algorithm is effective for ifm to be multiply of 64 @@ -1134,73 +1129,12 @@ bool layout_optimizer::is_primitive_implemented_for_onednn(program_node& node) { return false; } -impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node) { -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->forced_impl_types.empty()) { - // Forcing impl type of one primitive - for (const auto& forced_impl_type : debug_config->forced_impl_types) { - if (node.is_type()) { - if (forced_impl_type == "fc:ocl") - return impl_types::ocl; - else if (forced_impl_type == "fc:onednn") - return impl_types::onednn; - } else if (node.is_type()) { - if (forced_impl_type == "gemm:ocl") - return impl_types::ocl; - else if (forced_impl_type == "gemm:onednn") - return impl_types::onednn; - } else if (node.is_type()) { - if (forced_impl_type == "do:cpu") - return impl_types::cpu; - else if (forced_impl_type == "do:ocl") - return impl_types::ocl; - } else if (node.is_type()) { - if (forced_impl_type == "reduce:ocl") - return impl_types::ocl; - else if (forced_impl_type == "reduce:onednn") - return impl_types::onednn; - } else if (node.is_type()) { - if (forced_impl_type == "concat:ocl") - return impl_types::ocl; - else if (forced_impl_type == "concat:onednn") - return impl_types::onednn; - } - - // Forcing one layer - size_t found_type = forced_impl_type.rfind(":"); - if (found_type != std::string::npos) { - impl_types preferred_type = impl_types::any; - auto impl_type = forced_impl_type.substr(found_type + 1); - if (impl_type == "ocl") - preferred_type = impl_types::ocl; - else if (impl_type == "onednn") - preferred_type = impl_types::onednn; - else if (impl_type == "cpu") - preferred_type = impl_types::cpu; - - if (node.id() == forced_impl_type.substr(0, found_type)) { - GPU_DEBUG_LOG << " Forced implementation type : " << forced_impl_type.substr(0, found_type) << " : " - << forced_impl_type.substr(found_type + 1) << std::endl; - return preferred_type; - } - } - } - } -#endif - - return impl_types::any; -} - impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format preferred_format) { if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) { auto forced_impl = _forcing_map.at(node.id()).second; if (forced_impl != impl_types::any) return forced_impl; } - auto forced_impl = get_forced_impl_type_by_config(node); - if (forced_impl != impl_types::any) - return forced_impl; auto shape_type = shape_types::any; diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index aa7c9a55775e6d..1b310fd4542f86 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -60,7 +60,7 @@ namespace cldnn { namespace { #ifdef GPU_DEBUG_CONFIG -void dump_perf_data_raw(std::string dump_path, const std::list>& exec_order) { +void dump_perf_data_raw(std::string dump_path, bool per_iter_mode, const std::list>& exec_order) { auto layouts_to_str = [](const std::vector& layouts) -> std::string { std::stringstream s; for (size_t i = 0; i < layouts.size(); i++) { @@ -71,7 +71,6 @@ void dump_perf_data_raw(std::string dump_path, const std::listdump_profiling_data_per_iter != 0; const std::string perf_raw_csv_header = per_iter_mode ? "prim_id,prim_type,stage,net_in_shapes,in_shapes,out_shapes,impl,iter,time_usec\n" : "prim_id,prim_type,stage,net_in_shapes,in_shapes,out_shapes,impl,iters,time_usec\n"; std::ofstream of(dump_path); @@ -139,13 +138,12 @@ void dump_perf_data_raw(std::string dump_path, const std::list& pids) { bool need_to_wait; do { need_to_wait = false; struct stat buffer; - for (auto pid : debug_config->after_proc) { + for (auto pid : pids) { auto path = "/proc/" + pid; std::cout << "check " + path << std::endl; if (stat(path.c_str(), &buffer) == 0) { @@ -158,8 +156,7 @@ void wait_for_the_turn() { } #else -void dump_perf_data_raw(std::string, const std::list>&) {} -void wait_for_the_turn() {} +void dump_perf_data_raw(std::string, bool per_iter_mode, const std::list>&) {} #endif } // namespace @@ -174,33 +171,22 @@ opt pass). */ network::network(program::ptr program, stream::ptr stream, bool is_internal, bool is_primary_stream) : _program(program) - , _config(program->get_config()) , _engine(program->get_engine()) , _stream(stream) - , _memory_pool(new memory_pool(program->get_engine())) + , _memory_pool(new memory_pool(program->get_engine(), program->get_config())) , _internal(is_internal) , _is_primary_stream(is_primary_stream) - , _enable_profiling(program->get_config().get_property(ov::enable_profiling)) + , _enable_profiling(program->get_config().get_enable_profiling()) , _reset_arguments(true) - , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio))) { + , _shape_predictor(new ShapePredictor(&program->get_engine(), program->get_config().get_shape_predictor_settings())) { if (!_internal) { net_id = get_unique_net_id(); } - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->after_proc.size() != 0) { - wait_for_the_turn(); - } - - GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { - auto& mem_preallocation_params = debug_config->mem_preallocation_params; - _shape_predictor.reset(new ShapePredictor(&program->get_engine(), - mem_preallocation_params.next_iters_preallocation_count, - mem_preallocation_params.max_per_iter_size, - mem_preallocation_params.max_per_dim_diff, - mem_preallocation_params.buffers_preallocation_ratio)); - } - + GPU_DEBUG_CODE( + if (get_config().get_start_after_processes().size() != 0) { + wait_for_the_turn(get_config().get_start_after_processes()); + }); calculate_weights_cache_capacity(); allocate_primitives(); configure_primitives_second_output(); @@ -238,9 +224,9 @@ network::~network() { if (_program != nullptr) _program->cancel_compilation_context(); _memory_pool->clear_pool_for_network(net_id); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { - dump_perf_data_raw(debug_config->dump_profiling_data + "/perf_raw" + std::to_string(net_id) + ".csv", _exec_order); + std::string dump_path = GPU_DEBUG_VALUE_OR(get_config().get_dump_profiling_data_path(), ""); + GPU_DEBUG_IF(!dump_path.empty()) { + dump_perf_data_raw(dump_path + "/perf_raw" + std::to_string(net_id) + ".csv", false, _exec_order); } } @@ -398,7 +384,7 @@ void network::calculate_weights_cache_capacity() { } // Sum all weights constants for each stream - required_mem_size += weights_const_size * _config.get_property(ov::streams::num); + required_mem_size += weights_const_size * get_config().get_num_streams(); // Add all other constants (shared between streams) required_mem_size += total_const_size - weights_const_size; diff --git a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp index 2732476a063f8f..df29ed36a7fd12 100644 --- a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp @@ -157,7 +157,7 @@ void non_max_suppression_gather_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[i]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[i].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[i] = {_network.get_engine().reinterpret_buffer(input_memory(i), _impl_params->get_output_layout(i))}; diff --git a/src/plugins/intel_gpu/src/graph/permute.cpp b/src/plugins/intel_gpu/src/graph/permute.cpp index bf87e78e4bbbc5..c4fddfde978c2d 100644 --- a/src/plugins/intel_gpu/src/graph/permute.cpp +++ b/src/plugins/intel_gpu/src/graph/permute.cpp @@ -146,7 +146,7 @@ void permute_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index bcf468cea97d03..9bb8f96fd25c65 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -163,7 +163,7 @@ static memory::ptr get_memory_from_pool(engine& _engine, OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate output for dynamic layout without upper bound"); // Use layout with max tensor for dynamic shape with upper bound - if (_node.get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + if (_node.get_program().get_config().get_enable_memory_pool()) { if (curr_memory != nullptr) pool.release_memory(curr_memory, _node.get_unique_id(), _node.id(), net_id); return pool.get_memory(layout, @@ -557,7 +557,6 @@ void primitive_inst::clear_output_memory() { void primitive_inst::realloc_if_needed(bool prev_execution_skipped) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("realloc_if_needed: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::memory_allocation); const auto& users = get_user_insts(); @@ -836,11 +835,6 @@ void primitive_inst::realloc_if_needed(bool prev_execution_skipped) { } int32_t tmp_prealloc_count = get_prealloc_iter_num(); - GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { - // If debug config is set, repsect the config most - tmp_prealloc_count = -1; - } - // If we allocated too large memory, reclaim the memory. for (size_t i = 0; i < updated_layouts.size(); ++i) { bool reclaim = 0; @@ -1082,8 +1076,7 @@ void primitive_inst::realloc_if_needed(bool prev_execution_skipped) { } bool primitive_inst::use_async_compilation() { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_async_compilation) { + GPU_DEBUG_IF(get_config().get_disable_async_compilation()) { return false; } @@ -1275,8 +1268,7 @@ void primitive_inst::update_paddings() { void primitive_inst::do_runtime_skip_reorder() { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_skip_reorder: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_skip_reorder) { + GPU_DEBUG_IF(get_config().get_disable_runtime_skip_reorder()) { return; } if (can_be_optimized()) @@ -1581,8 +1573,7 @@ void primitive_inst::do_runtime_in_place_concat() { return false; }; OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_concat: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + GPU_DEBUG_IF(get_config().get_disable_runtime_buffer_fusing()) { return; } if (update_shape_done_by_other) { @@ -1691,8 +1682,7 @@ void primitive_inst::do_runtime_skip_scatter_update() { void primitive_inst::do_runtime_in_place_crop() { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_crop: " + id())); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { + GPU_DEBUG_IF(get_config().get_disable_runtime_buffer_fusing()) { return; } @@ -1985,8 +1975,7 @@ void primitive_inst::execute() { set_out_event(_impl->execute(_impl_params->dep_events, *this)); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + GPU_DEBUG_IF(!get_config().get_dump_profiling_data_path().empty()) { auto ev = _impl_params->out_event; get_network().get_stream().wait_for_events({ev}); @@ -2042,7 +2031,7 @@ primitive_inst::primitive_inst(network & network, program_node const& node, bool : _network(network) , _node(&node) , _node_output_layout(node.get_output_layout()) - , _use_shared_kernels(node.get_program().get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse)) + , _use_shared_kernels(node.get_program().get_config().get_enable_kernels_reuse()) , _impl_params(node.get_kernel_impl_params()) , _impl(node.get_selected_impl() ? node.get_selected_impl()->clone() : nullptr) , _runtime_memory_dependencies(node.get_memory_dependencies()) @@ -2323,8 +2312,7 @@ void primitive_inst::update_weights() { reorder_impl->set_arguments(*reorder_inst, args); add_dep_event(reorder_impl->execute({}, *reorder_inst)); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { + GPU_DEBUG_IF(!get_config().get_dump_profiling_data_path().empty()) { stream.wait_for_events(_impl_params->dep_events); } @@ -2596,8 +2584,8 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() { ExecutionConfig subgraph_config{ ov::intel_gpu::allow_static_input_reorder(true), ov::intel_gpu::allow_new_shape_infer(true), - ov::enable_profiling(get_network().get_config().get_property(ov::enable_profiling)), - ov::intel_gpu::use_onednn(get_network().get_config().get_property(ov::intel_gpu::use_onednn)) + ov::enable_profiling(get_network().get_config().get_enable_profiling()), + ov::intel_gpu::use_onednn(get_network().get_config().get_use_onednn()) }; auto prog = program::build_program(get_network().get_engine(), t, @@ -2753,42 +2741,31 @@ bool primitive_inst::is_valid_fusion() const { } void primitive_inst::add_profiling_data(instrumentation::pipeline_stage stage, bool cache_hit, std::string memalloc_info, int64_t time, bool per_iter_mode) { - GPU_DEBUG_GET_INSTANCE(debug_config); -#ifdef GPU_DEBUG_CONFIG - int64_t curr_iter = -1; - GPU_DEBUG_IF(debug_config->dump_prof_data_iter_params.is_enabled) { - curr_iter = get_network().get_current_iteration_num(); - } - GPU_DEBUG_IF(curr_iter < 0 || debug_config->is_target_dump_prof_data_iteration(curr_iter)) { -#else - { -#endif - instrumentation::perf_counter_key key { - _network.get_input_layouts(), - _impl_params->input_layouts, - _impl_params->output_layouts, - get_implementation_name(), - stage, + instrumentation::perf_counter_key key { + _network.get_input_layouts(), + _impl_params->input_layouts, + _impl_params->output_layouts, + get_implementation_name(), + stage, #ifdef GPU_DEBUG_CONFIG - per_iter_mode ? get_network().get_current_iteration_num() : 0, + per_iter_mode ? get_network().get_current_iteration_num() : 0, #else - 0, + 0, #endif - cache_hit, - memalloc_info - }; - - auto hash = instrumentation::perf_counter_hash()(key); - auto& d = _profiling_data[hash]; - if (_profiling_info.find(hash) == _profiling_info.end()) { - _profiling_info.emplace(hash, key); - } + cache_hit, + memalloc_info + }; - auto& total_time = std::get<0>(d); - auto& total_iter = std::get<1>(d); - total_time += time; - total_iter++; + auto hash = instrumentation::perf_counter_hash()(key); + auto& d = _profiling_data[hash]; + if (_profiling_info.find(hash) == _profiling_info.end()) { + _profiling_info.emplace(hash, key); } + + auto& total_time = std::get<0>(d); + auto& total_iter = std::get<1>(d); + total_time += time; + total_iter++; } std::string primitive_inst::get_implementation_name() const { diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index c3de17f8a196d3..6bbb0f2512905d 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -3,6 +3,7 @@ // #include "impls/registry/implementation_manager.hpp" +#include "intel_gpu/plugin/remote_context.hpp" #include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/core/type.hpp" #include "openvino/runtime/system_conf.hpp" @@ -107,8 +108,8 @@ using namespace cldnn; using namespace ov::intel_gpu; static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) { - int streams = (num_streams > 0) ? num_streams : config.get_property(ov::compilation_num_threads); - auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority); + int streams = (num_streams > 0) ? num_streams : config.get_compilation_num_threads(); + auto priority = config.get_host_task_priority(); auto core_type = ov::hint::SchedulingCoreType::ANY_CORE; switch (priority) { case ov::hint::Priority::LOW: core_type = ov::hint::SchedulingCoreType::ECORE_ONLY; break; @@ -116,7 +117,7 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E case ov::hint::Priority::HIGH: core_type = ov::hint::SchedulingCoreType::PCORE_ONLY; break; default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority); } - bool enable_cpu_pinning = config.get_property(ov::hint::enable_cpu_pinning); + bool enable_cpu_pinning = config.get_enable_cpu_pinning(); ov::threading::IStreamsExecutor::Config task_executor_config(tags, streams, @@ -162,7 +163,7 @@ program::program(engine& engine_ref, program_node::reset_unique_id(); if (no_optimizations) { init_graph(); - _config.apply_user_properties(_engine.get_device_info()); + _config.finalize(_engine); } else { build_program(is_internal); if (_is_body_program) { @@ -198,7 +199,7 @@ program::program(engine& engine_ref, _task_executor(std::move(task_executor)), processing_order(), is_internal(is_internal) { - _config.apply_user_properties(_engine.get_device_info()); + _config.finalize(_engine); init_primitives(); init_program(); prepare_nodes(nodes); @@ -211,8 +212,8 @@ program::program(engine& engine, const ExecutionConfig& config) _config(config), processing_order() { init_primitives(); - _config.apply_user_properties(_engine.get_device_info()); - new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); + _config.finalize(_engine); + new_shape_infer = _config.get_allow_new_shape_infer(); _layout_optimizer = std::make_unique(); } @@ -220,11 +221,10 @@ program::~program() { } void program::init_program() { - GPU_DEBUG_GET_INSTANCE(debug_config); set_options(); pm = std::unique_ptr(new pass_manager(*this)); - new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); + new_shape_infer = _config.get_allow_new_shape_infer(); if (_task_executor == nullptr) _task_executor = program::make_task_executor(_config); @@ -232,19 +232,14 @@ void program::init_program() { kernel_selector::KernelBase::get_db().get_batch_headers(), kernel_selector::KernelBase::get_db().get_cm_batch_headers())); - _kernels_cache->set_kernels_reuse(get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse)); + _kernels_cache->set_kernels_reuse(_config.get_enable_kernels_reuse()); if (!_compilation_context) _compilation_context = program::make_compilation_context(_config); _layout_optimizer = std::make_unique(); - size_t impls_cache_capacity = _impls_cache_capacity; - GPU_DEBUG_IF(debug_config->impls_cache_capacity >= 0) { - impls_cache_capacity = debug_config->impls_cache_capacity; - } - - _impls_cache = std::make_unique(impls_cache_capacity); + _impls_cache = std::make_unique(get_config().get_impls_cache_capacity()); // Remove items of compilation context's internal queue when some impl is popped in kernels_cache // compilation context's queue check duplication of inserted task _impls_cache->set_remove_item_callback([this](ImplementationsCache::ItemType& item) { @@ -486,26 +481,17 @@ void program::set_options() { static std::atomic id_gen{0}; prog_id = ++id_gen; assert(prog_id != 0); - if (!_config.get_property(ov::intel_gpu::force_implementations).empty()) { - _config.set_property(ov::intel_gpu::optimize_data(true)); - } - - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - _config.set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); - } } void program::build_program(bool is_internal) { init_graph(); - _config.apply_user_properties(_engine.get_device_info()); + _config.finalize(_engine); { pre_optimize_graph(is_internal); } run_graph_compilation(); { post_optimize_graph(is_internal); } - GPU_DEBUG_GET_INSTANCE(debug_config); #ifdef GPU_DEBUG_CONFIG - if (debug_config->dry_run_path.empty() || is_internal) { + if (get_config().get_dry_run_path().empty() || is_internal) { #else { #endif @@ -528,10 +514,6 @@ void program::init_graph() { for (auto& node : processing_order) { if (!node->is_type()) node->get_output_layouts(); - if (node->is_type()) { - _config.set_property(ov::intel_gpu::use_onednn(true)); - _config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - } } // Perform initial shape_of subgraphs markup apply_opt_pass(); @@ -549,7 +531,7 @@ void program::pre_optimize_graph(bool is_internal) { bool output_size_handling_enabled = analyze_output_size_handling_need(); - bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = _config.get_optimize_data(); if (optimize_data) { apply_opt_pass(); } @@ -559,26 +541,13 @@ void program::pre_optimize_graph(bool is_internal) { reorder_factory rf; if (optimize_data) { - GPU_DEBUG_GET_INSTANCE(debug_config); -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) { -#else - { -#endif - apply_opt_pass(); - } + apply_opt_pass(); apply_opt_pass(); apply_opt_pass(); -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) { -#else - { -#endif - apply_opt_pass(); - } + apply_opt_pass(); apply_opt_pass(); @@ -626,7 +595,7 @@ void program::post_optimize_graph(bool is_internal) { reorder_factory rf; - bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = _config.get_optimize_data(); if (!is_internal) { apply_opt_pass(rf); @@ -634,10 +603,9 @@ void program::post_optimize_graph(bool is_internal) { apply_opt_pass(false, true); // TODO: do we need it at this place also? - auto partial_build = _config.get_property(ov::intel_gpu::partial_build_program); + auto partial_build = _config.get_partial_build_program(); #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_GET_INSTANCE(debug_config); - if (!is_internal && (!partial_build || !debug_config->dry_run_path.empty())) { + if (!is_internal && (!partial_build || !_config.get_dry_run_path().empty())) { #else if (!is_internal && !partial_build) { #endif @@ -653,7 +621,7 @@ void program::post_optimize_graph(bool is_internal) { // Recalculate processing order after all graph transformation to keep optimal primitives ordering // for OOO queue - if (_config.get_property(ov::intel_gpu::queue_type) == QueueTypes::out_of_order) + if (_config.get_queue_type() == QueueTypes::out_of_order) get_processing_order().calculate_BFS_processing_order(); apply_opt_pass(); @@ -777,7 +745,7 @@ const std::vector& program::get_allocating_order(bool forced_updat } void program::prepare_memory_dependencies() { - if (!_config.get_property(ov::intel_gpu::enable_memory_pool)) + if (!_config.get_enable_memory_pool()) return; for (auto& node : get_processing_order()) { node->add_memory_dependency(node->get_unique_id()); @@ -1408,8 +1376,7 @@ program::primitives_info program::get_current_stage_info() const { } void program::save_pass_info(std::string pass_name) { - // TODO: Directory path here can be probably changed to some bool flag - if (!_config.get_property(ov::intel_gpu::dump_graphs).empty()) + GPU_DEBUG_IF(!_config.get_dump_graphs_path().empty()) optimizer_passes_info.emplace_back(pass_name, get_current_stage_info()); } @@ -1437,7 +1404,7 @@ const program::primitives_info& program::get_primitives_info() const { return pr void program::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); } void program::set_layout_optimizer_attributes(layout_optimizer& lo) { - lo.set_implementation_forcing(_config.get_property(ov::intel_gpu::force_implementations)); + lo.set_implementation_forcing(_config.get_force_implementations()); // first pass to set layout optimization_attributes for topology @@ -1663,15 +1630,15 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1); #ifdef ENABLE_ONEDNN_FOR_GPU - bool enable_onednn_for_tests = get_config().get_property(ov::intel_gpu::optimize_data) || is_internal_program(); + bool enable_onednn_for_tests = get_config().get_optimize_data() || is_internal_program(); auto& engine = get_engine(); if (engine.get_device_info().vendor_id == INTEL_VENDOR_ID && - get_config().get_property(ov::intel_gpu::queue_type) == QueueTypes::in_order && + get_config().get_queue_type() == QueueTypes::in_order && enable_onednn_for_tests) { if (engine.get_device_info().supports_immad) { lo.add_all_onednn_impls_optimization_attribute(); } else { - if (get_config().get_property(ov::intel_gpu::use_onednn)) { + if (get_config().get_use_onednn()) { lo.enable_onednn_for(); } } @@ -1681,7 +1648,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { std::pair program::get_estimated_device_mem_usage() { auto max_alloc_size = get_engine().get_device_info().max_alloc_mem_size; - memory_pool pool(get_engine()); + memory_pool pool(get_engine(), get_config()); int64_t const_sum = 0; #ifdef __unix__ @@ -1885,8 +1852,8 @@ void program::load(cldnn::BinaryInputBuffer& ib) { init_program(); std::shared_ptr mapped_memory = nullptr; - std::string weights_path = _config.get_property(ov::weights_path); - if (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && + std::string weights_path = _config.get_weights_path(); + if (_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE && ov::util::validate_weights_path(weights_path)) { mapped_memory = ov::load_mmap_object(weights_path); } diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index 6a09fcd10eb513..eb5c152a361a16 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -3,6 +3,7 @@ // #include "program_dump_graph.h" +#include "intel_gpu/runtime/debug_configuration.hpp" #include "to_string_utils.h" #include "data_inst.h" #include "condition_inst.h" @@ -139,7 +140,6 @@ void close_stream(std::ofstream& graph) { graph.close(); } std::string get_node_id(const program_node* ptr) { return "node_" + std::to_string(reinterpret_cast(ptr)); } void dump_full_node(std::ofstream& out, const program_node* node) { - GPU_DEBUG_GET_INSTANCE(debug_config); try { out << node->type()->to_string(*node); } catch(const std::exception& e) { @@ -157,7 +157,7 @@ void dump_full_node(std::ofstream& out, const program_node* node) { } // namespace std::string get_dir_path(const ExecutionConfig& config) { - auto path = config.get_property(ov::intel_gpu::dump_graphs); + std::string path = GPU_DEBUG_VALUE_OR(config.get_dump_graphs_path(), ""); if (path.empty()) { return {}; } diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index b7767c14f9abff..65df228d6c733f 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -1851,8 +1851,7 @@ void program_node::create_onednn_primitive_attributes( // Trying to combine multiplications and additions which are placed one after another. // We do it in the cycle because some optimization cases can be simplified again from time to time do { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_onednn_opt_post_ops) + GPU_DEBUG_IF(get_config().get_disable_onednn_post_ops_opt()) break; optimized_post_ops = try_optimize_post_ops(fused_ops, optimized_post_ops, attrs, optimization_is_finished); } while (!optimization_is_finished); diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index 76b15ca54bcff8..75a4011eeefc25 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -287,7 +287,7 @@ void reorder_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index fc874e29f70ac4..b6e5b23a0f6476 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -320,7 +320,7 @@ void reshape_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp index 316acef0e492e8..a651baa50002fa 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp @@ -75,7 +75,7 @@ void scatter_elements_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp index 2f109f83df428f..3dbec05dbbe3b3 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp @@ -86,7 +86,7 @@ void scatter_nd_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_update.cpp index 4291ee67caa3ef..947507533796e0 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_update.cpp @@ -66,7 +66,7 @@ void scatter_update_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp index 5435de5598bea0..a3b6ad9166c964 100644 --- a/src/plugins/intel_gpu/src/graph/select.cpp +++ b/src/plugins/intel_gpu/src/graph/select.cpp @@ -95,7 +95,7 @@ select_inst::typed_primitive_inst(network& network, select_node const& node) : p 3, ""); - bool allow_new_shape_infer = network.get_program()->get_config().get_property(ov::intel_gpu::allow_new_shape_infer); + bool allow_new_shape_infer = network.get_program()->get_config().get_allow_new_shape_infer(); // Broadcast check is performed in ngraph shape infer of select when allow_new_shape_infer=true if (!allow_new_shape_infer) { if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) { diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp index 007553b8a9d192..674e7649bc9820 100644 --- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp @@ -208,7 +208,7 @@ void strided_slice_inst::update_output_memory() { // Can_be_optimized nodes are allocating from memory_pool too. In this case, // we need release the legacy output memory from memory pool explicitly. if (static_cast(_outputs[0]) && - _node->get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { + _node->get_program().get_config().get_enable_memory_pool()) { _network.get_memory_pool().release_memory(_outputs[0].get(), _node->get_unique_id(), _node->id(), _network.get_id()); } _outputs[0] = input_memory_ptr(); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp index a103a159faaf5d..72f3cc9120b9f1 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp @@ -70,7 +70,6 @@ JitConstants DynamicQuantizeKernelRef::GetJitConstants(const dynamic_quantize_pa } CommonDispatchData DynamicQuantizeKernelRef::SetDefault(const dynamic_quantize_params& params) const { - GPU_DEBUG_GET_INSTANCE(debug_config); CommonDispatchData dispatchData; OPENVINO_ASSERT(params.outputs[0].GetLayout() == DataLayout::bfyx, "It supports only 4d tensor"); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 25558be18c481a..a6b798bde97b9e 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -89,26 +89,6 @@ static bool is_per_token_dynamic_quantize(const fully_connected_params& params) static size_t get_dynamic_quantize_group_size(const fully_connected_params& params) { auto dynamic_quantization_group_size = params.dynamic_quantization_group_size; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { - dynamic_quantization_group_size = debug_config->dynamic_quantize_group_size; - - // Specify which Fully-connected layer would be dynamic-quantized - GPU_DEBUG_IF(!debug_config->dynamic_quantize_layers_without_onednn.empty()) { - auto layers = debug_config->dynamic_quantize_layers_without_onednn; - auto iter = std::find_if(layers.begin(), layers.end(), [&](const std::string& pattern){ - return debug_config->is_layer_name_matched(params.layerID, pattern); - }); - - if (iter != layers.end()) { - dynamic_quantization_group_size = debug_config->dynamic_quantize_group_size; - GPU_DEBUG_COUT << "Found specified Fully-connected layer [" << params.layerID << "]. Enable Dynamic-quantize." << std::endl; - } else { - dynamic_quantization_group_size = 0; - } - } - } - size_t scale_group_size = get_scale_group_size(params); size_t zp_group_num = params.decompression_zero_point.Feature().v; size_t zp_group_size = 0; diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index d273ba2b1df6a6..5b8edb9e7d08c9 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -5,6 +5,7 @@ #include "openvino/runtime/iplugin.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/util/weights_path.hpp" #include "intel_gpu/graph/serialization/binary_buffer.hpp" @@ -20,17 +21,17 @@ namespace ov::intel_gpu { namespace { std::shared_ptr create_task_executor(const std::shared_ptr& plugin, const ExecutionConfig& config) { - if (config.get_property(ov::internal::exclusive_async_requests)) { + if (config.get_exclusive_async_requests()) { // exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with // the CPU behavior return plugin->get_executor_manager()->get_executor("GPU"); - } else if (config.get_property(ov::hint::enable_cpu_pinning) || - config.get_property(ov::hint::enable_cpu_reservation)) { - bool enable_cpu_pinning = config.get_property(ov::hint::enable_cpu_pinning); - bool enable_cpu_reservation = config.get_property(ov::hint::enable_cpu_reservation); + } else if (config.get_enable_cpu_pinning() || + config.get_enable_cpu_reservation()) { + bool enable_cpu_pinning = config.get_enable_cpu_pinning(); + bool enable_cpu_reservation = config.get_enable_cpu_reservation(); return std::make_shared( ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", - config.get_property(ov::num_streams), + config.get_num_streams(), 1, ov::hint::SchedulingCoreType::PCORE_ONLY, enable_cpu_reservation, @@ -38,7 +39,7 @@ std::shared_ptr create_task_executor(const std::sh } else { return std::make_shared( ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", - config.get_property(ov::num_streams), + config.get_num_streams(), 0, ov::hint::SchedulingCoreType::ANY_CORE, false, @@ -62,7 +63,7 @@ CompiledModel::CompiledModel(std::shared_ptr model, m_outputs(ov::ICompiledModel::outputs()), m_loaded_from_cache(false) { auto graph_base = std::make_shared(model, m_context, m_config, 0); - for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { + for (uint16_t n = 0; n < m_config.get_num_streams(); n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -157,7 +158,7 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, } auto graph_base = std::make_shared(ib, context, m_config, 0); - for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { + for (uint16_t n = 0; n < m_config.get_num_streams(); n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } @@ -179,8 +180,8 @@ std::shared_ptr CompiledModel::create_infer_request() co void CompiledModel::export_model(std::ostream& model) const { // If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching // which requires the weights_path. - ov::CacheMode cache_mode = m_config.get_property(ov::cache_mode); - std::string weights_path = m_config.get_property(ov::weights_path); + ov::CacheMode cache_mode = m_config.get_cache_mode(); + std::string weights_path = m_config.get_weights_path(); if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) return; @@ -188,7 +189,7 @@ void CompiledModel::export_model(std::ostream& model) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); - const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks); + const ov::EncryptionCallbacks encryption_callbacks = m_config.get_cache_encryption_callbacks(); // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty. const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; @@ -290,15 +291,15 @@ ov::Any CompiledModel::get_property(const std::string& name) const { } else if (name == ov::loaded_from_cache) { return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache}; } else if (name == ov::optimal_number_of_infer_requests) { - unsigned int nr = m_config.get_property(ov::num_streams); - if (m_config.get_property(ov::hint::performance_mode) != ov::hint::PerformanceMode::LATENCY) + unsigned int nr = m_config.get_num_streams(); + if (m_config.get_performance_mode() != ov::hint::PerformanceMode::LATENCY) nr *= 2; return decltype(ov::optimal_number_of_infer_requests)::value_type {nr}; } else if (name == ov::execution_devices) { return decltype(ov::execution_devices)::value_type{m_context->get_device_name()}; } - return m_config.get_property(name); + return m_config.get_property(name, OptionVisibility::RELEASE); } std::shared_ptr CompiledModel::create_sync_infer_request() const { diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 6859728076fb6a..cba0d6aab9276d 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -2,7 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/graph/serialization/helpers.hpp" #include "intel_gpu/runtime/layout.hpp" +#include "openvino/core/any.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/runtime/threading/executor_manager.hpp" #include "openvino/runtime/exec_model_info.hpp" #include "openvino/pass/serialize.hpp" @@ -38,7 +41,7 @@ Graph::Graph(std::shared_ptr model, const RemoteContextImpl::Ptr& con : m_context(context) , m_config(config) , m_stream_id(stream_id) { - auto program_builder = std::make_shared(model, get_engine(), config, false); + auto program_builder = std::make_shared(model, get_engine(), config); m_config = program_builder->get_config(); build(program_builder->get_compiled_program()); @@ -85,15 +88,11 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context ib >> perfEntry.parentPrimitive; } } - { - bool bool_prop_value; - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::partial_build_program(bool_prop_value)); - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::optimize_data(bool_prop_value)); - ib >> bool_prop_value; - m_config.set_property(ov::intel_gpu::allow_new_shape_infer(bool_prop_value)); - } + + IstreamAttributeVisitor visitor(ib); + m_config.visit_attributes(visitor); + m_config.set_user_property(config.get_user_properties()); // Copy user properties if those were modified on import call + m_config.finalize(context.get(), nullptr); auto imported_prog = std::make_shared(get_engine(), m_config); imported_prog->load(ib); @@ -114,9 +113,8 @@ Graph::Graph(std::shared_ptr graph, uint16_t stream_id) } Graph::~Graph() { - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) { - const auto log_level = cldnn::debug_configuration::get_instance()->host_time_profiling; - + auto log_level = GPU_DEBUG_VALUE_OR(m_config.get_host_time_profiling(), 0); + GPU_DEBUG_IF(log_level) { auto get_time_str = [](int64_t time_mcs, int64_t iters_num = 1) { double time = static_cast(time_mcs); time /= iters_num; @@ -177,25 +175,26 @@ void Graph::build(std::shared_ptr program) { auto external_queue = m_context->get_external_queue(); if (external_queue) { - OPENVINO_ASSERT(m_config.get_property(ov::num_streams) == 1, "[GPU] Throughput streams can't be used with shared queue!"); + OPENVINO_ASSERT(m_config.get_num_streams() == 1, "[GPU] Throughput streams can't be used with shared queue!"); const auto &engine = program->get_engine(); m_network = std::make_shared(program, engine.create_stream(m_config, external_queue), m_stream_id); } else { m_network = std::make_shared(program, m_stream_id); } - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dry_run_path.empty()) { - ov::pass::Serialize(debug_config->dry_run_path, "").run_on_model(get_runtime_model()); + std::string dry_run_path = GPU_DEBUG_VALUE_OR(m_config.get_dry_run_path(), ""); + std::string dump_graphs_path = GPU_DEBUG_VALUE_OR(m_config.get_dump_graphs_path(), ""); + GPU_DEBUG_IF(!dry_run_path.empty()) { + ov::pass::Serialize(dry_run_path, "").run_on_model(get_runtime_model()); exit(0); } - GPU_DEBUG_IF(!debug_config->dump_graphs.empty() && m_stream_id == 0) { + GPU_DEBUG_IF(!dump_graphs_path.empty() && m_stream_id == 0) { static int net_id = 0; auto steps_info = get_network()->get_optimizer_passes_info(); size_t step_idx = 0; for (auto& step : steps_info) { - auto xml_path = debug_config->dump_graphs + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml"; + auto xml_path = dump_graphs_path + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml"; ov::pass::Serialize(xml_path, "").run_on_model(get_runtime_model(step.second, true)); step_idx++; } @@ -209,7 +208,7 @@ bool Graph::use_external_queue() const { std::shared_ptr Graph::get_runtime_model(std::vector& primitives_info, bool filter_const_primitives) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::get_runtime_model"); - if (m_config.get_property(ov::enable_profiling)) { + if (m_config.get_enable_profiling()) { try { // Update may throw an exception for step-by-step runtime graph dump, // since network->get_executed_primitives() method can't be called before network execution @@ -520,11 +519,8 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) { ob << perf_item.second.second.parentPrimitive; } } - { - ob << m_config.get_property(ov::intel_gpu::partial_build_program); - ob << m_config.get_property(ov::intel_gpu::optimize_data); - ob << m_config.get_property(ov::intel_gpu::allow_new_shape_infer); - } + OstreamAttributeVisitor visitor(ob); + m_config.visit_attributes(visitor); ob.set_stream(m_network->get_stream_ptr().get()); m_network->get_program()->save(ob); diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index 5c797b622aa28b..da080544363d00 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -1,6 +1,7 @@ // Copyright (C) 2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/op/if.hpp" #include "intel_gpu/plugin/program_builder.hpp" #include "intel_gpu/primitives/condition.hpp" @@ -20,17 +21,12 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ << internal_body->get_friendly_name() << ", num inputs: " << op->get_input_size() << std::endl; - auto config = p.get_config(); - { - auto custom_outputs = config.get_property(ov::intel_gpu::custom_outputs); - if (!custom_outputs.empty()) { - config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); - } - } - config.set_property(ov::intel_gpu::max_dynamic_batch(1)); + auto config = p.get_config().clone(); + config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); + config.finalize(p.get_engine()); - ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true); + ProgramBuilder prog(internal_body, p.get_engine(), config, p.get_task_executor(), p.get_compilation_context(), true); branch.inner_program = prog.get_compiled_program(); auto& input_map = branch.input_map; diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index 3e052c134390ae..556738c5df52ea 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -297,13 +297,12 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr #include #include -#include #include #include #include @@ -24,22 +23,19 @@ #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/runtime/device_query.hpp" #include "intel_gpu/runtime/execution_config.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" #include "intel_gpu/runtime/itt.hpp" +#include "openvino/core/any.hpp" #include "openvino/core/deprecated.hpp" -#include "openvino/op/gather.hpp" -#include "openvino/op/concat.hpp" -#include "openvino/op/paged_attention.hpp" #include "openvino/pass/manager.hpp" -#include "openvino/pass/pattern/op/wrap_type.hpp" -#include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/visualize_tree.hpp" #include "openvino/runtime/device_id_parser.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/performance_heuristics.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/runtime/properties.hpp" -#include "openvino/util/common_util.hpp" #include "openvino/util/weights_path.hpp" #include "transformations/common_optimizations/dimension_tracking.hpp" #include "transformations/init_node_info.hpp" @@ -66,33 +62,6 @@ namespace ov::intel_gpu { #include "intel_gpu/plugin/primitives_list.hpp" #undef REGISTER_FACTORY -const auto is_llm = [](const std::shared_ptr& model) -> bool { - using namespace ov::pass::pattern; - - auto past = wrap_type(); - auto convert_past = wrap_type({past}); - auto gather_input = std::make_shared(OutputVector{past, convert_past}); - auto beam_idx = wrap_type(); - auto gather_past = wrap_type({gather_input, beam_idx, wrap_type()}); - auto gather_convert = wrap_type({gather_past}); - auto concat_past_input = std::make_shared(OutputVector{past, convert_past, gather_past, gather_convert}); - auto concat = wrap_type({concat_past_input, any_input()}); - auto convert_present = wrap_type({concat}); - auto present_input = std::make_shared(OutputVector{concat, convert_present}); - auto present = wrap_type({present_input}); - - auto kvcache_matcher = std::make_shared(present, "KVCacheMatcher"); - - for (auto& op : model->get_ordered_ops()) { - if (kvcache_matcher->match(op) || - ov::is_type(op)) { - return true; - } - } - - return false; -}; - void Plugin::register_primitives() const { #define REGISTER_FACTORY(op_version, op_name) FACTORY_CALL(op_version, op_name) #include "intel_gpu/plugin/primitives_list.hpp" @@ -128,18 +97,30 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p const ExecutionConfig& config, const std::shared_ptr& context) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::clone_and_transform_model"); - GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_DEFINE_MEM_LOGGER("Plugin::clone_and_transform_model"); auto cloned_model = model->clone(); OPENVINO_ASSERT(cloned_model != nullptr, "[GPU] Failed to clone model!"); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name(); + // Here we create a copy of the config to finalize it and ensure that transformation pipe can use correct options values + // This is manily needed to correctly update lower level properties when higher level option is set by user + // For example, transformation use inference_precision hint which may be updated by execution_mode property. + // Update itself will happen on finalization stage, so we must call it to have correct passes flow. + // The reason why we can't do finalization once and then just run all graph transformations is that + // part of the tranformations may actually impact some properties. For example, LSTMSequence op presense + // impacts value of use_onednn property. But in order to understand if there's an op of this type we have to run + // common optimizations which may do subgraph fusion to LSTMSequence op. So basically, final value of use_onednn + // property can be computed for transformed model only. + auto config_copy = config.clone(); + config_copy.finalize(context.get(), model.get()); + + std::string dump_path = GPU_DEBUG_VALUE_OR(config_copy.get_dump_graphs_path(), ""); + GPU_DEBUG_IF(!dump_path.empty()) { + auto path_base = dump_path + "/" + cloned_model->get_name(); ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } - transform_model(cloned_model, config, context); + transform_model(cloned_model, config_copy, context); // Transformations for some reason may drop output tensor names, so here we copy those from the original model auto new_results = cloned_model->get_results(); @@ -154,8 +135,8 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p new_res->set_friendly_name(old_res->get_friendly_name()); } - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name() + "_" + "transformed_func"; + GPU_DEBUG_IF(!dump_path.empty()) { + auto path_base = dump_path + "/" + cloned_model->get_name() + "_" + "transformed_func"; ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } return cloned_model; @@ -194,22 +175,6 @@ Plugin::Plugin() { m_compiled_model_runtime_properties["OV_VERSION"] = ov_version.buildNumber; } -void Plugin::set_cache_info(const std::shared_ptr& model, ExecutionConfig& config) const { - // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with - // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not - // using that mechanism. - if (config.get_property(ov::cache_mode) != ov::CacheMode::OPTIMIZE_SIZE) { - return; - } - - const auto& rt_info = model->get_rt_info(); - auto weights_path = rt_info.find("__weights_path"); - if (weights_path != rt_info.end()) { - ov::AnyMap weights_path_property{{"WEIGHTS_PATH", weights_path->second}}; - config.set_property(weights_path_property); - } -} - std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, const ov::AnyMap& orig_config) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model"); std::string device_id = get_device_id(orig_config); @@ -219,14 +184,11 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); - if (model->has_rt_info("runtime_options")) - config.apply_rt_info(context->get_engine().get_device_info(), model->get_rt_info("runtime_options"), is_llm(model)); - config.apply_user_properties(context->get_engine().get_device_info()); - - set_cache_info(model, config); + config.set_user_property(orig_config, OptionVisibility::RELEASE); auto transformed_model = clone_and_transform_model(model, config, context); + + config.finalize(context.get(), transformed_model.get()); { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model::CreateCompiledModel"); return std::make_shared(transformed_model, shared_from_this(), context, config); @@ -242,14 +204,12 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); - if (model->has_rt_info("runtime_options")) - config.apply_rt_info(context_impl->get_engine().get_device_info(), model->get_rt_info("runtime_options"), is_llm(model)); - config.apply_user_properties(context_impl->get_engine().get_device_info()); - - set_cache_info(model, config); + config.set_user_property(orig_config, OptionVisibility::RELEASE); auto transformed_model = clone_and_transform_model(model, config, context_impl); + + config.finalize(context_impl.get(), transformed_model.get()); + return std::make_shared(transformed_model, shared_from_this(), context_impl, config); } @@ -277,7 +237,7 @@ ov::SoPtr Plugin::get_default_context(const AnyMap& params) void Plugin::set_property(const ov::AnyMap &config) { auto update_config = [](ExecutionConfig& config, const ov::AnyMap& user_config) { - config.set_user_property(user_config); + config.set_user_property(user_config, OptionVisibility::RELEASE); // Check that custom layers config can be loaded if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) { CustomLayerMap custom_layers; @@ -312,14 +272,12 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& auto ctx = get_default_context(device_id); ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(orig_config); - if (model->has_rt_info("runtime_options")) - config.apply_rt_info(ctx->get_engine().get_device_info(), model->get_rt_info("runtime_options"), is_llm(model)); - config.apply_user_properties(ctx->get_engine().get_device_info()); + config.set_user_property(orig_config, OptionVisibility::RELEASE); + config.finalize(ctx.get(), model.get()); ProgramBuilder prog(ctx->get_engine(), config); - float query_model_ratio = config.get_property(ov::internal::query_model_ratio.name()).as(); + float query_model_ratio = config.get_query_model_ratio(); auto supported = ov::get_supported_nodes(model, [&config,&ctx,this](std::shared_ptr& model) { @@ -369,11 +327,10 @@ std::shared_ptr Plugin::import_model(std::istream& model, } ExecutionConfig config = m_configs_map.at(device_id); - config.set_user_property(_orig_config); - config.apply_user_properties(context_impl->get_engine().get_device_info()); + config.set_user_property(_orig_config, OptionVisibility::RELEASE); - ov::CacheMode cache_mode = config.get_property(ov::cache_mode); - ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks); + ov::CacheMode cache_mode = config.get_cache_mode(); + ov::EncryptionCallbacks encryption_callbacks = config.get_cache_encryption_callbacks(); const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; std::unique_ptr ib_ptr = @@ -390,9 +347,8 @@ std::shared_ptr Plugin::import_model(std::istream& model, return nullptr; } - std::string weights_path = config.get_property(ov::weights_path); - if (config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && - !ov::util::validate_weights_path(weights_path)) { + std::string weights_path = config.get_weights_path(); + if (config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE && !ov::util::validate_weights_path(weights_path)) { return nullptr; } @@ -478,7 +434,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] get_property: Couldn't find config for GPU with id ", device_id); const auto& c = m_configs_map.at(device_id); - return c.get_property(name); + return c.get_property(name, OptionVisibility::RELEASE); } auto StringRightTrim = [](std::string string, std::string substring, bool case_sensitive = true) { @@ -512,8 +468,6 @@ bool Plugin::is_metric(const std::string& name) const { ov::Any Plugin::get_metric(const std::string& name, const ov::AnyMap& options) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::get_metric"); - GPU_DEBUG_GET_INSTANCE(debug_config); - auto device_id = get_property(ov::device::id.name(), options).as(); auto iter = m_device_map.find(std::to_string(cldnn::device_query::device_id)); @@ -687,12 +641,12 @@ std::vector Plugin::get_device_capabilities(const cldnn::device_inf } uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { - GPU_DEBUG_GET_INSTANCE(debug_config); auto device_id = get_property(ov::device::id.name(), options).as(); auto context = get_default_contexts().at(device_id); const auto& device_info = context->get_engine().get_device_info(); - const auto& config = m_configs_map.at(device_id); - uint32_t n_streams = static_cast(config.get_property(ov::num_streams)); + auto config = m_configs_map.at(device_id); + config.set_property(ov::intel_gpu::partial_build_program(true)); + uint32_t n_streams = static_cast(config.get_num_streams()); uint64_t occupied_device_mem = 0; auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as>(); auto occupied_usm_dev = statistic_result.find("usm_device_current"); @@ -744,17 +698,14 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { OPENVINO_THROW("[GPU_MAX_BATCH_SIZE] ov::hint::model should be std::shared_ptr type"); } + config.finalize(context.get(), model.get()); + size_t base_batch_size = 16; // empirically decided for DG1 auto& engine = get_default_context(device_id)->get_engine(); std::shared_ptr program; - GPU_DEBUG_IF(debug_config->base_batch_for_memory_estimation > 0) { - size_t user_specified_base_batch_size = debug_config->base_batch_for_memory_estimation; - base_batch_size = (user_specified_base_batch_size != base_batch_size) ? user_specified_base_batch_size : base_batch_size; - } - auto cloned_model = model->clone(); try { @@ -809,7 +760,7 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { TransformationsPipeline transformations(config, context); transformations.apply(cloned_model); - program = std::make_shared(cloned_model, engine, config, true); + program = std::make_shared(cloned_model, engine, config); std::pair device_memory_usage = program->get_compiled_program()->get_estimated_device_mem_usage(); if (device_memory_usage.first == static_cast(-1L) && device_memory_usage.second == static_cast(-1L)) { return static_cast(max_batch_size); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 2abc8bb65df6ac..5d9871d1fff29a 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" #include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/split.hpp" @@ -11,7 +12,8 @@ #include "openvino/op/loop.hpp" #include "openvino/op/search_sorted.hpp" #include "openvino/op/stft.hpp" -#include "ov_ops/dynamic_quantize.hpp" +#include "openvino/runtime/properties.hpp" + #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/program_builder.hpp" @@ -61,7 +63,6 @@ std::string layer_type_name_ID(const std::shared_ptr& op) { } ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, - bool partial_build, std::shared_ptr task_executor, std::shared_ptr compilation_context, bool is_inner_program) @@ -105,20 +106,11 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml"; CustomLayer::LoadFromFile(config_path, m_custom_layers, true); - auto custom_layers_config = m_config.get_property(ov::intel_gpu::config_file); + auto custom_layers_config = m_config.get_config_file(); CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty()); auto ops = model->get_ordered_ops(); - // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, - // smaller # of kernels are built compared to static models. - // So having smaller batch size is even better for dynamic model as we can do more parallel build. - if (model->is_dynamic()) { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(4)); - } else { - m_config.set_property(ov::intel_gpu::max_kernels_per_batch(8)); - } - - m_program = build(ops, partial_build, is_inner_program); + m_program = build(ops, is_inner_program); } ProgramBuilder::ProgramBuilder(cldnn::engine& engine, const ExecutionConfig& config) @@ -148,24 +140,8 @@ void ProgramBuilder::cleanup_build() { #endif } -std::shared_ptr ProgramBuilder::build(const std::vector>& ops, bool partial_build, bool is_inner_program) { +std::shared_ptr ProgramBuilder::build(const std::vector>& ops, bool is_inner_program) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "ProgramBuilder::build"); - // In the case of inner program, allow_new_shape_infer flag is setted by outside of program. - // So, do not check allow_new_shape_infer for inner program build - for (const auto& op : ops) { - if (requires_new_shape_infer(op)) { - allow_new_shape_infer = true; - break; - } - } - - if (is_inner_program) { - allow_new_shape_infer = (m_config.get_property(ov::intel_gpu::allow_new_shape_infer) || allow_new_shape_infer); - } - - m_config.set_property(ov::intel_gpu::partial_build_program(partial_build)); - m_config.set_property(ov::intel_gpu::optimize_data(true)); - m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer)); prepare_build(); { @@ -211,7 +187,6 @@ bool ProgramBuilder::is_op_supported(const std::shared_ptr& op) { if (!data_types_are_supported(op.get())) return false; - allow_new_shape_infer = requires_new_shape_infer(op); CreateSingleLayerPrimitive(op); cleanup_build(); DisableQueryMode(); @@ -268,7 +243,7 @@ std::vector ProgramBuilder::GetInputInfo(const std::shared_pt // Note: Currently Split/Variadic Split are divided to multiple crops // LSTMCell contains its own body network, and each output has a unique pid // But there is no need to maintain output port index for the next node e.g. Result - bool is_legacy_multiple_outputs = !allow_new_shape_infer + bool is_legacy_multiple_outputs = !use_new_shape_infer() || ov::is_type(prevOp) || ov::is_type(prevOp) || ov::is_type(prevOp); @@ -309,7 +284,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_name = op.get_friendly_name(); prim->origin_op_type_name = op.get_type_name(); - if (this->m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { + if (this->m_config.get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) { if (auto data_prim = dynamic_cast(prim.get())) { auto rt_info = op.get_rt_info(); @@ -340,7 +315,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_type_name = prim->type_string(); } - if (this->m_config.get_property(ov::enable_profiling) && should_profile) { + if (this->m_config.get_enable_profiling() && should_profile) { profiling_ids.push_back(prim_id); init_profile_info(*prim); } @@ -352,51 +327,6 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptradd_primitive(prim); } -bool ProgramBuilder::requires_new_shape_infer(const std::shared_ptr& op) const { - if (op->is_dynamic()) { - return true; - } - - // HACK: SearchSorted has specific shape requirements. - // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine, - // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid. - // Similar case for STFT. - if (ov::is_type(op) || ov::is_type(op)) - return true; - - if (ov::is_type(op)) - return true; - - if (ov::is_type(op)) { - const auto body_function = std::static_pointer_cast(op)->get_function(); - if (body_function->is_dynamic()) - return true; - } - - if (ov::is_type(op) || ov::is_type(op)) { - return true; - } - // When input node has dynamic shape with 4 dimension, this function return false - // because op.is_dynamic() which only checks input shapes return false. - // So, in the case of input data, we need to check output shape. - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).is_dynamic()) - return true; - } - - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).size() > 6) - return true; - } - - for (size_t i = 0; i < op->get_input_size(); i++) { - if (op->get_input_partial_shape(i).size() > 6) - return true; - } - - return false; -} - int64_t ProgramBuilder::get_parameter_index(const std::shared_ptr& parameter) const { return m_model->get_parameter_index(parameter); } diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 41b358b6d7faad..1633bb022480a5 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -33,9 +33,8 @@ namespace { inline bool can_use_usm_host(const cldnn::engine& engine, const uint64_t total_output_bytes) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->use_usm_host == 1) { return true; } - GPU_DEBUG_IF(debug_config->use_usm_host == 2) { return false; } + GPU_DEBUG_IF(ov::intel_gpu::ExecutionConfig::get_usm_policy() == 1) { return true; } + GPU_DEBUG_IF(ov::intel_gpu::ExecutionConfig::get_usm_policy() == 2) { return false; } auto can_use_usm = engine.use_unified_shared_memory(); // When output size is large, it is better not to write to usm_host directly @@ -113,20 +112,9 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr& c : ov::ISyncInferRequest(compiled_model) , m_graph(compiled_model->get_graph(0)) , m_context(std::static_pointer_cast(compiled_model->get_context_impl())) - , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio))) - , m_enable_profiling(m_graph->get_config().get_property(ov::enable_profiling)) + , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_shape_predictor_settings())) + , m_enable_profiling(m_graph->get_config().get_enable_profiling()) , m_use_external_queue(m_graph->use_external_queue()) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { - auto& mem_preallocation_params = debug_config->mem_preallocation_params; - m_shape_predictor.reset( - new cldnn::ShapePredictor(&m_graph->get_engine(), - mem_preallocation_params.next_iters_preallocation_count, - mem_preallocation_params.max_per_iter_size, - mem_preallocation_params.max_per_dim_diff, - mem_preallocation_params.buffers_preallocation_ratio)); - } - init_mappings(); allocate_inputs(); allocate_outputs(); @@ -317,15 +305,16 @@ void SyncInferRequest::enqueue() { m_internal_outputs = network->execute(dependencies); auto network_enqueue_end = std::chrono::high_resolution_clock::now(); + [[maybe_unused]] const auto& config = network->get_config(); + // If dump layers path is set, only runs first inference. - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0 && debug_config->dump_iteration.empty()) { + GPU_DEBUG_IF(!config.get_dump_tensors_path().empty() && config.get_dump_iterations().empty()) { GPU_DEBUG_INFO << "Only run first inference to dump layers." << std::endl; exit(0); } auto enqueue_end = std::chrono::high_resolution_clock::now(); - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) { + GPU_DEBUG_IF(config.get_host_time_profiling()) { network_enqueue_time = std::chrono::duration_cast(network_enqueue_end - network_enqueue_start).count(); const uint64_t total_time = std::chrono::duration_cast(enqueue_end - enqueue_start).count(); @@ -422,7 +411,7 @@ void SyncInferRequest::wait() { auto mem_shape = output_layout.get_shape(); // In case of old shape infer we need to shrink out tensor shape to avoid redudnant dimensions that occur due to rank extension // For new shape infer this shouldn't happen, thus remove that WA once we migrate to ngraph-based shape infer for all cases - if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!m_graph->get_config().get_allow_new_shape_infer()) { OPENVINO_ASSERT(port.get_partial_shape().is_static(), "[GPU] Unexpected dynamic shape for legacy shape inference"); OPENVINO_ASSERT(ov::shape_size(port.get_shape()) == ov::shape_size(mem_shape), "[GPU] Unexpected elements count for output tensor"); mem_shape = port.get_shape(); @@ -503,7 +492,7 @@ void SyncInferRequest::wait() { } auto wait_end = std::chrono::high_resolution_clock::now(); - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->host_time_profiling) { + GPU_DEBUG_IF(m_graph->get_config().get_host_time_profiling()) { auto& exec_time_info = m_graph->host_exec_times.back(); const uint64_t total_time = std::chrono::duration_cast(wait_end - wait_start).count(); @@ -895,7 +884,7 @@ std::vector SyncInferRequest::prepare_input(const std::string auto memory = device_tensor->get_memory(); // WA to extend shape to ranks expected by legacy shape infer. Remove after full migration to new shape infer - if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) { + if (!m_graph->get_config().get_allow_new_shape_infer()) { auto new_layout = memory->get_layout(); new_layout.set_partial_shape(m_graph->get_input_layouts().at(input_idx).get_shape()); memory = engine.reinterpret_buffer(*memory, new_layout); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp index 6c0d50be96e7ae..66fe9d9c9e0fc6 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp @@ -16,9 +16,8 @@ namespace ov::intel_gpu { -DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size) +DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric) : ov::pass::MatcherPass() { - GPU_DEBUG_GET_INSTANCE(debug_config); using namespace ov::pass::pattern; using QuantizationType = ov::op::internal::DynamicQuantize::QuantizationType; @@ -55,9 +54,7 @@ DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size config.scale_dt = element::f16; config.group_sizes = shape_group_size; - // AZP does not support grouped size dyn-quan - // XXX: This is currently wrapped as GPU_DEBUG_IF as dynamic_quantize_asym is not exposed through public API. - GPU_DEBUG_IF(debug_config->dynamic_quantize_asym && group_size == UINT64_MAX) { + if (asymmetric && group_size == UINT64_MAX) { config.quantization_type = QuantizationType::Asymmetric; config.quantization_dt = element::u8; config.zp_dt = element::u8; // it supports u8 only now diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp index 85d32fbfdcea84..f8b13685389f1d 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.hpp @@ -11,7 +11,7 @@ namespace ov::intel_gpu { class DynamicQuantizeFullyConnected: public ov::pass::MatcherPass { public: OPENVINO_MATCHER_PASS_RTTI("DynamicQuantizeFullyConnected"); - DynamicQuantizeFullyConnected(uint64_t group_size); + DynamicQuantizeFullyConnected(uint64_t group_size, bool asymmetric = false); }; } // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp index e2090a4d2b5eb8..29e82e4acac904 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp @@ -20,7 +20,6 @@ namespace ov::intel_gpu { FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion(bool fuse_mlp_swiglu) { using namespace ov::pass::pattern; - GPU_DEBUG_GET_INSTANCE(debug_config); // Three FCs connected to the same input size_t min_num_fcs_to_fuse = 3; // Note: diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index f036afc0cd59ad..0d6d83f2f2982e 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -289,7 +289,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { const ov::element::TypeVector supported_woq_types = {ov::element::u8, ov::element::i8, ov::element::u4, ov::element::i4}; bool enableInt8; ov::element::Type infer_precision = ov::element::undefined; - bool unroll_loop = config.get_property(ov::intel_gpu::enable_loop_unrolling); + bool unroll_loop = config.get_enable_loop_unrolling(); { ov::pass::Manager manager("Plugin:GPU"); auto pass_config = manager.get_pass_config(); @@ -302,7 +302,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); - enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && is_model_quantized; + enableInt8 = config.get_enable_lp_transformations() && is_model_quantized; manager.register_pass( std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }, @@ -335,7 +335,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { }; // Add conversion from FP data types to infer precision if it's specified - infer_precision = config.get_property(ov::hint::inference_precision); + infer_precision = config.get_inference_precision(); if (infer_precision != ov::element::undefined) { if (!fp_precision_supported(infer_precision)) infer_precision = fallback_precision; @@ -412,11 +412,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); pass_config->set_callback([&](const std::shared_ptr node){ - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->enable_sdpa != -1) { - GPU_DEBUG_CODE(return cldnn::debug_configuration::get_instance()->enable_sdpa == 1); - } - - if (!config.get_property(ov::intel_gpu::hint::enable_sdpa_optimization)) + if (!config.get_enable_sdpa_optimization()) return false; auto sdpa = ov::as_type_ptr(node); @@ -926,7 +922,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { pass_config->disable(); pass_config->disable(); - float activations_scale_factor = config.get_property(ov::hint::activations_scale_factor); + float activations_scale_factor = config.get_activations_scale_factor(); if (activations_scale_factor > 0.f && infer_precision == ov::element::f16 && !enableInt8) { using namespace ov::pass::low_precision; @@ -988,13 +984,9 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - bool disable_horizontal_fc_fusion = false; - bool disable_fc_swiglu_fusion = false; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_horizontal_fc_fusion == 1) - disable_horizontal_fc_fusion = true; - GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1) - disable_fc_swiglu_fusion = true; + bool disable_horizontal_fc_fusion = GPU_DEBUG_VALUE_OR(config.get_disable_horizontal_fc_fusion(), false); + bool disable_fc_swiglu_fusion = GPU_DEBUG_VALUE_OR(config.get_disable_fc_swiglu_fusion(), false); + // mlp fusion is only supported for cldnn on high performant GPUis bool fuse_mlp_swiglu = !device_info.supports_immad && device_info.execution_units_count >= 128 && @@ -1032,7 +1024,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - auto kv_cache_compression_dt = config.get_property(ov::hint::kv_cache_precision); + auto kv_cache_compression_dt = config.get_kv_cache_precision(); manager.register_pass(kv_cache_compression_dt, device_info.supports_immad); manager.register_pass(); @@ -1052,7 +1044,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); if (device_info.supports_immad) { - auto dynamic_quantization_group_size = config.get_property(ov::hint::dynamic_quantization_group_size); + bool asymmetric_dyn_quant = GPU_DEBUG_VALUE_OR(config.get_asym_dynamic_quantization(), false); + auto dynamic_quantization_group_size = config.get_dynamic_quantization_group_size(); pass_config->set_callback([=](const_node_ptr& root) -> bool { for (size_t i = 0 ; i < root->get_input_node_shared_ptr(0)->get_output_size(); ++i) { if (root->get_input_node_shared_ptr(0)->get_output_element_type(i) == ov::element::Type_t::f32) { @@ -1070,14 +1063,14 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // AZP does not support 8bit weight // XXX: This is currently wrapped as GPU_DEBUG_IF as dynamic_quantize_asym is not exposed through public API. - GPU_DEBUG_IF(debug_config->dynamic_quantize_asym + GPU_DEBUG_IF(asymmetric_dyn_quant && (root->get_input_element_type(1) == ov::element::i8 || root->get_input_element_type(1) == ov::element::u8)) { GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: asym quantization does not support 8bit weight" << std::endl; return true; } // AZP does not support grouped size dyn-quan - GPU_DEBUG_IF(debug_config->dynamic_quantize_asym && (dynamic_quantization_group_size != UINT64_MAX)) { + GPU_DEBUG_IF(asymmetric_dyn_quant && (dynamic_quantization_group_size != UINT64_MAX)) { GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: asym quantization does not support grouped quantization" << " ('DynamicQuantizeAsym' is enabled with grouped size dyn-quan)" << std::endl; return true; @@ -1094,7 +1087,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { return false; }); - manager.register_pass(dynamic_quantization_group_size); + manager.register_pass(dynamic_quantization_group_size, asymmetric_dyn_quant); } // Remove Pad in front of MaxPool if both the pads_begin and pads_end are zero. @@ -1103,7 +1096,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // This is supposed to be the last pass to ensure that we don't have name collisions until // GPU plugin stops using friendly names for program creation manager.register_pass(true); - GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->verbose >= 1) { + GPU_DEBUG_IF(config.get_verbose() >= 1) { manager.register_pass(); } manager.run_passes(func); diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp index 550d740f772a16..cb36a8e0349457 100644 --- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp +++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp @@ -3,604 +3,23 @@ // #include "intel_gpu/runtime/debug_configuration.hpp" -#include -#include +#include "intel_gpu/runtime/execution_config.hpp" #include -#include -#include -#include -#include #include -namespace cldnn { -const char *debug_configuration::prefix = "GPU_Debug: "; -std::ostream* debug_configuration::verbose_stream; - -// Default policy is that dump_configuration will override other configuration from IE. - -#ifdef GPU_DEBUG_CONFIG - -#define GPU_DEBUG_COUT_ std::cout << cldnn::debug_configuration::prefix - -template -void print_option(std::string option_name, T option_value) { - GPU_DEBUG_COUT_ << "Config " << option_name << " = " << option_value << std::endl; -} - -static std::string to_upper_case(const std::string& var) { - std::stringstream s; - - for (size_t i = 0; i < var.size(); i++) { - if (std::isupper(var[i])) { - if (i != 0) { - s << "_"; - } - s << var[i]; - } else { - s << static_cast(std::toupper(var[i])); - } - } - - return s.str(); -} - -static std::vector get_possible_option_names(const std::string& var, std::vector allowed_option_prefixes) { - std::vector result; - - for (auto& prefix : allowed_option_prefixes) { - result.push_back(prefix + var); - result.push_back(prefix + to_upper_case(var)); - } - - return result; -} - -template -T convert_to(const std::string &str) { - std::istringstream ss(str); - T res; - ss >> res; - return res; -} - -template <> -std::string convert_to(const std::string &str) { - return str; -} - -static std::set parse_int_set(std::string& str) { - std::set int_array; - // eliminate '"' from string to avoid parsing error - str.erase(std::remove_if(str.begin(), str.end(), [](char c) { - return c == '\"'; }), str.end()); - if (str.size() > 0) { - str = " " + str + " "; - std::istringstream ss(str); - std::string token; - while (ss >> token) { - try { - int_array.insert(static_cast(std::stol(token))); - } catch(const std::exception &) { - int_array.clear(); - GPU_DEBUG_COUT << "Argument was ignored. It cannot be parsed to integer array: " << str << std::endl; - break; - } - } - } - return int_array; -} - -template -void get_debug_env_var(const std::string &var, T &val, std::vector allowed_option_prefixes) { - bool found = false; - for (auto o : get_possible_option_names(var, allowed_option_prefixes)) { - if (const auto env_var = std::getenv(o.c_str())) { - val = convert_to(env_var); - found = true; - } - } - - if (found) { - print_option(var, val); - } -} - -template -void get_gpu_debug_env_var(const std::string &var, T &val) { - return get_debug_env_var(var, val, {"OV_GPU_"}); -} - -template -void get_common_debug_env_var(const std::string &var, T &val) { - // The list below should be prioritized from lowest to highest prefix priority - // If an option is set several times with different prefixes, version with the highest priority will be actually used. - // This may allow to enable global option with some value and override this value for GPU plugin - // For example: OV_GPU_Verbose=2 OV_Verbose=1 ./my_app => this->verbose == 2 - // In that case we enable Verbose (with level = 1) for all OV components that support this option, but for GPU plugin we increase verbose level to 2 - std::vector allowed_option_prefixes = { - "OV_", - "OV_GPU_" - }; - - return get_debug_env_var(var, val, allowed_option_prefixes); -} - -static void print_help_messages() { - std::vector> message_list; - message_list.emplace_back("OV_GPU_Help", "Print help messages"); - message_list.emplace_back("OV_GPU_Verbose", "Verbose execution"); - message_list.emplace_back("OV_GPU_VerboseColor", "Print verbose color"); - message_list.emplace_back("OV_GPU_VerboseFile", "Filename to dump verbose log"); - message_list.emplace_back("OV_GPU_ListLayers", "Print layers names"); - message_list.emplace_back("OV_GPU_PrintMultiKernelPerf", "Print execution time of each kernel in multi-kernel primitimive"); - message_list.emplace_back("OV_GPU_PrintInputDataShapes", "Print data_shapes of input layers for benchmark_app."); - message_list.emplace_back("OV_GPU_DisableUsm", "Disable usm usage"); - message_list.emplace_back("OV_GPU_DisableOnednn", "Disable onednn for discrete GPU (no effect for integrated GPU)"); - message_list.emplace_back("OV_GPU_DisableOnednnOptPostOps", "Disable onednn optimize post operators"); - message_list.emplace_back("OV_GPU_DumpProfilingData", "Enables dump of extended profiling information to specified directory." - " Please use OV_GPU_DumpProfilingDataPerIter=1 env variable to collect performance per iteration." - " Note: Performance impact may be significant as this option enforces host side sync after each primitive"); - message_list.emplace_back("OV_GPU_DumpProfilingDataIteration", "Enable collecting profiling data only at iterations with requested range. " - "For example for dump profiling data only when iteration is from 10 to 20, you can use " - "OV_GPU_DumpProfilingDataIteration='10..20'. Additionally, you can dump profiling data only " - "from one specific iteration by giving the same values for the start and end, and the open " - "ended range is also available by range from given start to the last iteration as -1. e.g. " - "OV_GPU_DumpProfilingDataIteration='10..-1'"); - message_list.emplace_back("OV_GPU_HostTimeProfiling", "Enable collecting of model enqueue time spent on the host"); - message_list.emplace_back("OV_GPU_DumpGraphs", "1) dump ngraph before and after transformation. 2) dump graph in model compiling." - "3) dump graph in execution."); - message_list.emplace_back("OV_GPU_DumpSources", "Dump opencl sources"); - message_list.emplace_back("OV_GPU_DumpLayersPath", "Enable dumping intermediate buffers and set the dest path"); - message_list.emplace_back("OV_GPU_DumpLayers", "Dump intermediate buffers of specified layers only, separated by space." - " Support case-insensitive and regular expression. For example .*conv.*"); - message_list.emplace_back("OV_GPU_DumpLayersResult", "Dump output buffers of result layers only"); - message_list.emplace_back("OV_GPU_DumpLayersInput", "Dump intermediate buffers of input layers only"); - message_list.emplace_back("OV_GPU_DumpLayersDstOnly", "Dump only output of layers"); - message_list.emplace_back("OV_GPU_DumpLayersLimitBatch", "Limit the size of batch to dump"); - message_list.emplace_back("OV_GPU_DumpLayersRaw", "If true, dump data is stored in raw memory format."); - message_list.emplace_back("OV_GPU_DumpLayersRawBinary", "If true, dump data is stored in binary format."); - message_list.emplace_back("OV_GPU_DryRunPath", "Dry run and serialize execution graph into the specified path"); - message_list.emplace_back("OV_GPU_BaseBatchForMemEstimation", "Base batch size to be used in memory estimation"); - message_list.emplace_back("OV_GPU_AfterProc", "Run inference after the specified process PIDs are finished, separated by space." - " Supported on only on linux."); - message_list.emplace_back("OV_GPU_SerialCompile", "Serialize creating primitives and compiling kernels"); - message_list.emplace_back("OV_GPU_ForceImplTypes", "Force implementation type of a target primitive or layer. [primitive or layer_name]:[impl_type]" - " For example fc:onednn gemm:onednn reduce:ocl do:cpu" - " For primitives fc, gemm, do, reduce, concat are supported. Separated by space."); - message_list.emplace_back("OV_GPU_MaxKernelsPerBatch", "Maximum number of kernels in a batch during compiling kernels"); - message_list.emplace_back("OV_GPU_ImplsCacheCapacity", "The maximum number of entries in the kernel impl cache"); - message_list.emplace_back("OV_GPU_DisableAsyncCompilation", "Disable async compilation"); - message_list.emplace_back("OV_GPU_DisableWinogradConv", "Disable Winograd convolution"); - message_list.emplace_back("OV_GPU_DisableDynamicImpl", "Disable dynamic implementation"); - message_list.emplace_back("OV_GPU_DisableRuntimeBufferFusing", "Disable runtime buffer fusing"); - message_list.emplace_back("OV_GPU_DisableMemoryReuse", "Disable memory reuse"); - message_list.emplace_back("OV_GPU_EnableSDPA", "This allows the enforcement of SDPA decomposition logic: 0 completely disables SDPA kernel usage, " - "and 1 enables it for all the cases."); - message_list.emplace_back("OV_GPU_DumpMemoryPool", "Dump memory pool contents of each iteration"); - message_list.emplace_back("OV_GPU_DumpMemoryPoolIters", "List of iterations to dump memory pool status, separated by space."); - message_list.emplace_back("OV_GPU_DumpMemoryPoolPath", "Enable dumping memory pool status to csv file and set the dest path"); - message_list.emplace_back("OV_GPU_DisableBuildTimeWeightReorderForDynamicNodes", "Disable build time weight reorder for dynmaic nodes."); - message_list.emplace_back("OV_GPU_DisableRuntimeSkipReorder", "Disable runtime skip reorder."); - message_list.emplace_back("OV_GPU_DisablePrimitiveFusing", "Disable primitive fusing"); - message_list.emplace_back("OV_GPU_DisableFakeAlignment", "Disable fake alignment"); - message_list.emplace_back("OV_GPU_UseUsmHost", "Set explicit policy for usm host usage for network input/output. " - "0: default, 1: use usm_host, 2: do not use usm_host"); - message_list.emplace_back("OV_GPU_KVCacheCompression", "Enable/Disable KV-cache compression"); - message_list.emplace_back("OV_GPU_DynamicQuantizeLayersWithoutOnednn", "Enable Dynamic quantization for specified Fully connected layers only, " - "separated by space. Support case-insensitive and regular expression. For example .*fully_connected.*"); - message_list.emplace_back("OV_GPU_DynamicQuantizeGroupSize", "Specify a group size of dynamic quantization to enable " - "dynamic quantization for Fully-connected primitive."); - message_list.emplace_back("OV_GPU_DynamicQuantizeAsym", "Enable asymmetric dynamic quantization when set as 1."); - message_list.emplace_back("OV_GPU_DisableHorizontalFCFusion", "Disable horizontal fc fusion"); - message_list.emplace_back("OV_GPU_DisableFCSwigluFusion", "Disable fc + swiglu fusion"); - message_list.emplace_back("OV_GPU_DumpIteration", "Dump n-th execution of network, separated by space."); - message_list.emplace_back("OV_GPU_MemPreallocationOptions", "Controls buffer pre-allocation feature. Expects 4 values separated by space in " - "the following order: number of iterations for pre-allocation(int), max size of single iteration in bytes(int), " - "max per-dim allowed diff(int), unconditional buffers preallocation ratio(float). For example for disabling memory " - "preallocation at all, you can use OV_GPU_MemPreallocationOptions='0 0 0 1.0'"); - message_list.emplace_back("OV_GPU_LoadDumpRawBinary", - "Specified layers which are loading dumped binary files generated by OV_GPU_DumpLayersRawBinary debug-config." - " Currently, other layers except input-layer('parameter' type) are loading binaries for only input." - " Different input or output tensors are seperated by ','. Different layers are separated by space. For example, " - " \"[input_layer_name1]:[binary_dumped_file1],[binary_dump_file2] [input_layer_name2]:[binary_dump_1],[binary_dump_2]\""); - - auto max_name_length_item = std::max_element(message_list.begin(), message_list.end(), - [](std::pair& a, std::pair& b){ - return a.first.size() < b.first.size(); - }); - int name_width = static_cast(max_name_length_item->first.size()) + 2; - - GPU_DEBUG_COUT_ << "Supported environment variables for debugging" << std::endl; - for (auto& p : message_list) { - GPU_DEBUG_COUT_ << " - " << std::left << std::setw(name_width) << p.first + " " << p.second << std::endl; - } -} - -#endif - -debug_configuration::debug_configuration() - : help(0) - , verbose(0) - , verbose_color(0) - , verbose_file() - , list_layers(0) - , print_multi_kernel_perf(0) - , print_input_data_shapes(0) - , disable_usm(0) - , disable_onednn(0) - , disable_onednn_opt_post_ops(0) - , dump_profiling_data(std::string("")) - , dump_profiling_data_per_iter(0) - , host_time_profiling(0) - , dump_graphs(std::string()) - , dump_sources(std::string()) - , dump_layers_path(std::string()) - , dry_run_path(std::string()) - , dump_layers_dst_only(0) - , dump_layers_result(0) - , dump_layers_input(0) - , dump_layers_limit_batch(std::numeric_limits::max()) - , dump_layers_raw(0) - , dump_layers_binary(0) - , dump_memory_pool(0) - , dump_memory_pool_path(std::string()) - , base_batch_for_memory_estimation(-1) - , serialize_compile(0) - , max_kernels_per_batch(0) - , impls_cache_capacity(-1) - , enable_sdpa(-1) - , disable_async_compilation(0) - , disable_winograd_conv(0) - , disable_dynamic_impl(0) - , disable_runtime_buffer_fusing(0) - , disable_memory_reuse(0) - , disable_build_time_weight_reorder_for_dynamic_nodes(0) - , disable_runtime_skip_reorder(0) - , disable_primitive_fusing(0) - , disable_fake_alignment(0) - , use_usm_host(0) - , use_kv_cache_compression(-1) - , dynamic_quantize_group_size(DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) - , dynamic_quantize_asym(0) - , disable_horizontal_fc_fusion(0) - , disable_fc_swiglu_fusion(0) { +namespace ov::intel_gpu { +std::ostream& get_verbose_stream() { #ifdef GPU_DEBUG_CONFIG - get_gpu_debug_env_var("Help", help); - get_common_debug_env_var("Verbose", verbose); - get_gpu_debug_env_var("VerboseColor", verbose_color); - get_gpu_debug_env_var("VerboseFile", verbose_file); - get_gpu_debug_env_var("ListLayers", list_layers); - get_gpu_debug_env_var("PrintMultiKernelPerf", print_multi_kernel_perf); - get_gpu_debug_env_var("PrintInputDataShapes", print_input_data_shapes); - get_gpu_debug_env_var("DisableUsm", disable_usm); - get_gpu_debug_env_var("DumpGraphs", dump_graphs); - get_gpu_debug_env_var("DumpSources", dump_sources); - get_gpu_debug_env_var("DumpLayersPath", dump_layers_path); - get_gpu_debug_env_var("DumpLayersLimitBatch", dump_layers_limit_batch); - get_gpu_debug_env_var("DumpLayersRaw", dump_layers_raw); - get_gpu_debug_env_var("DumpLayersRawBinary", dump_layers_binary); - get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only); - get_gpu_debug_env_var("DumpLayersResult", dump_layers_result); - get_gpu_debug_env_var("DumpLayersInput", dump_layers_input); - get_gpu_debug_env_var("DisableOnednn", disable_onednn); - get_gpu_debug_env_var("DisableOnednnOptPostOps", disable_onednn_opt_post_ops); - get_gpu_debug_env_var("DumpProfilingData", dump_profiling_data); - get_gpu_debug_env_var("DumpProfilingDataPerIter", dump_profiling_data_per_iter); - get_gpu_debug_env_var("HostTimeProfiling", host_time_profiling); - std::string dump_prof_data_iter_str; - get_gpu_debug_env_var("DumpProfilingDataIteration", dump_prof_data_iter_str); - get_gpu_debug_env_var("DryRunPath", dry_run_path); - get_gpu_debug_env_var("DumpMemoryPool", dump_memory_pool); - std::string dump_runtime_memory_pool_iters_str; - get_gpu_debug_env_var("DumpMemoryPoolIters", dump_runtime_memory_pool_iters_str); - get_gpu_debug_env_var("DumpMemoryPoolPath", dump_memory_pool_path); - get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation); - std::string dump_layers_str; - get_gpu_debug_env_var("DumpLayers", dump_layers_str); - std::string after_proc_str; - get_gpu_debug_env_var("AfterProc", after_proc_str); - get_gpu_debug_env_var("SerialCompile", serialize_compile); - std::string forced_impl_types_str; - get_gpu_debug_env_var("ForceImplTypes", forced_impl_types_str); - get_gpu_debug_env_var("MaxKernelsPerBatch", max_kernels_per_batch); - get_gpu_debug_env_var("ImplsCacheCapacity", impls_cache_capacity); - get_gpu_debug_env_var("EnableSDPA", enable_sdpa); - get_gpu_debug_env_var("DisableAsyncCompilation", disable_async_compilation); - get_gpu_debug_env_var("DisableWinogradConv", disable_winograd_conv); - get_gpu_debug_env_var("DisableDynamicImpl", disable_dynamic_impl); - get_gpu_debug_env_var("DisableRuntimeBufferFusing", disable_runtime_buffer_fusing); - get_gpu_debug_env_var("DisableMemoryReuse", disable_memory_reuse); - get_gpu_debug_env_var("DisableBuildTimeWeightReorderForDynamicNodes", disable_build_time_weight_reorder_for_dynamic_nodes); - get_gpu_debug_env_var("DisableRuntimeSkipReorder", disable_runtime_skip_reorder); - get_gpu_debug_env_var("DisablePrimitiveFusing", disable_primitive_fusing); - get_gpu_debug_env_var("DisableFakeAlignment", disable_fake_alignment); - get_gpu_debug_env_var("UseUsmHost", use_usm_host); - get_gpu_debug_env_var("KVCacheCompression", use_kv_cache_compression); - get_gpu_debug_env_var("DynamicQuantizeGroupSize", dynamic_quantize_group_size); - get_gpu_debug_env_var("DynamicQuantizeAsym", dynamic_quantize_asym); - get_gpu_debug_env_var("DisableHorizontalFCFusion", disable_horizontal_fc_fusion); - get_gpu_debug_env_var("DisableFCSwigluFusion", disable_fc_swiglu_fusion); - std::string dump_iteration_str; - get_gpu_debug_env_var("DumpIteration", dump_iteration_str); - std::string mem_preallocation_params_str; - get_gpu_debug_env_var("MemPreallocationOptions", mem_preallocation_params_str); - std::string load_dump_raw_bin_str; - get_gpu_debug_env_var("LoadDumpRawBinary", load_dump_raw_bin_str); - std::string dynamic_quantize_layers_without_onednn_str; - get_gpu_debug_env_var("DynamicQuantizeLayersWithoutOnednn", dynamic_quantize_layers_without_onednn_str); - - if (help > 0) { - print_help_messages(); - exit(0); - } - - if (verbose_file.length() > 0) { + if (ExecutionConfig::get_log_to_file().length() > 0) { static std::ofstream fout; - fout.open(verbose_file); - verbose_stream = &fout; + if (!fout.is_open()) + fout.open(ExecutionConfig::get_log_to_file()); + return fout; } else { - verbose_stream = &std::cout; - } - - if (dump_prof_data_iter_str.length() > 0) { - dump_prof_data_iter_str = " " + dump_prof_data_iter_str + " "; - std::istringstream iss(dump_prof_data_iter_str); - char dot; - int64_t start, end; - bool is_valid_range = false; - if (iss >> start >> dot >> dot >> end) { - if (start <= end || end == -1) { - try { - is_valid_range = true; - dump_prof_data_iter_params.start = start; - dump_prof_data_iter_params.end = end; - } catch(const std::exception &) { - is_valid_range = false; - } - } - } - if (!is_valid_range) - std::cout << "OV_GPU_DumpProfilingDataIteration was ignored. It cannot be parsed to valid iteration range." << std::endl; - dump_prof_data_iter_params.is_enabled = is_valid_range; - } - - if (dump_layers_str.length() > 0) { - // Insert delimiter for easier parsing when used - dump_layers_str = " " + dump_layers_str + " "; - std::stringstream ss(dump_layers_str); - std::string layer; - while (ss >> layer) { - dump_layers.push_back(layer); - } - } - - if (dynamic_quantize_layers_without_onednn_str.length() > 0) { - // Insert delimiter for easier parsing when used - dynamic_quantize_layers_without_onednn_str = " " + dynamic_quantize_layers_without_onednn_str + " "; - std::stringstream ss(dynamic_quantize_layers_without_onednn_str); - std::string layer; - while (ss >> layer) { - dynamic_quantize_layers_without_onednn.push_back(layer); - } - } - - if (forced_impl_types_str.length() > 0) { - forced_impl_types_str = " " + forced_impl_types_str + " "; - std::stringstream ss(forced_impl_types_str); - std::string type; - while (ss >> type) { - forced_impl_types.push_back(type); - } - } - - // Parsing for loading binary files - if (load_dump_raw_bin_str.length() > 0) { - load_dump_raw_bin_str = " " + load_dump_raw_bin_str + " "; - std::stringstream ss(load_dump_raw_bin_str); - std::string type; - while (ss >> type) { - load_layers_raw_dump.push_back(type); - } - } - - if (dump_iteration_str.size() > 0) { - dump_iteration = parse_int_set(dump_iteration_str); - } - - if (dump_runtime_memory_pool_iters_str.size() > 0) { - dump_memory_pool_iters = parse_int_set(dump_runtime_memory_pool_iters_str); - } - - if (mem_preallocation_params_str.size() > 0) { - mem_preallocation_params_str = " " + mem_preallocation_params_str + " "; - std::istringstream ss(mem_preallocation_params_str); - std::vector params; - std::string param; - while (ss >> param) - params.push_back(param); - - bool correct_params = params.size() == 4; - if (correct_params) { - try { - mem_preallocation_params.next_iters_preallocation_count = std::stol(params[0]); - mem_preallocation_params.max_per_iter_size = std::stol(params[1]); - mem_preallocation_params.max_per_dim_diff = std::stol(params[2]); - mem_preallocation_params.buffers_preallocation_ratio = std::stof(params[3]); - } catch(const std::exception &) { - correct_params = false; - } - } - - if (!correct_params) - GPU_DEBUG_COUT_ << "OV_GPU_MemPreallocationOptions were ignored, because they cannot be parsed.\n"; - - mem_preallocation_params.is_initialized = correct_params; - } - - if (after_proc_str.length() > 0) { -#ifdef _WIN32 - GPU_DEBUG_COUT_ << "Warning: OV_GPU_AfterProc is supported only on linux" << std::endl; -#else - after_proc_str = " " + after_proc_str + " "; // Insert delimiter for easier parsing when used - std::stringstream ss(after_proc_str); - std::string pid; - while (ss >> pid) { - after_proc.push_back(pid); - } -#endif - } -#endif -} - -const debug_configuration *debug_configuration::get_instance() { - static std::unique_ptr instance(nullptr); -#ifdef GPU_DEBUG_CONFIG - static std::mutex _m; - std::lock_guard lock(_m); - if (nullptr == instance) - instance.reset(new debug_configuration()); - return instance.get(); -#else - return nullptr; -#endif -} - -bool debug_configuration::is_target_dump_prof_data_iteration(int64_t iteration) const { -#ifdef GPU_DEBUG_CONFIG - if (iteration < 0) - return true; - - if (dump_prof_data_iter_params.start > iteration) - return false; - - if (dump_prof_data_iter_params.start <= dump_prof_data_iter_params.end && - dump_prof_data_iter_params.end < iteration) - return false; - - return true; -#else - return false; -#endif -} - -std::vector debug_configuration::get_filenames_for_matched_layer_loading_binaries(const std::string& id) const { - std::vector file_names; -#ifdef GPU_DEBUG_CONFIG - if (load_layers_raw_dump.empty()) - return file_names; - - for (const auto& load_layer : load_layers_raw_dump) { - size_t file = load_layer.rfind(":"); - if (file != std::string::npos) { - if (id == load_layer.substr(0, file)) { - auto file_name_str = load_layer.substr(file + 1); - size_t head = 0; - size_t found = 0; - do { - found = file_name_str.find(",", head); - if (found != std::string::npos) - file_names.push_back(file_name_str.substr(head, (found - head))); - else - file_names.push_back(file_name_str.substr(head)); - - head = found+1; - GPU_DEBUG_LOG << " Layer name loading raw dump : " << load_layer.substr(0, file) << " / the dump file : " - << file_names.back() << std::endl; - } while (found != std::string::npos); - - return file_names; - } - } - } -#endif - - return file_names; -} - -std::string debug_configuration::get_matched_from_filelist(const std::vector& file_names, std::string pattern) const { -#ifdef GPU_DEBUG_CONFIG - for (const auto& file : file_names) { - auto found = file.find(pattern); - if (found != std::string::npos) { - return file; - } + return std::cout; } -#endif - return std::string(); -} - -std::string debug_configuration::get_name_for_dump(const std::string& file_name) const { - std::string filename = file_name; -#ifdef GPU_DEBUG_CONFIG - std::replace(filename.begin(), filename.end(), '\\', '_'); - std::replace(filename.begin(), filename.end(), '/', '_'); - std::replace(filename.begin(), filename.end(), ' ', '_'); - std::replace(filename.begin(), filename.end(), ':', '_'); -#endif - return filename; -} - -bool debug_configuration::is_layer_name_matched(const std::string& layer_name, const std::string& pattern) const { -#ifdef GPU_DEBUG_CONFIG - auto upper_layer_name = std::string(layer_name.length(), '\0'); - std::transform(layer_name.begin(), layer_name.end(), upper_layer_name.begin(), ::toupper); - auto upper_pattern = std::string(pattern.length(), '\0'); - std::transform(pattern.begin(), pattern.end(), upper_pattern.begin(), ::toupper); - - // Check pattern from exec_graph - size_t pos = upper_layer_name.find(':'); - auto upper_exec_graph_name = upper_layer_name.substr(pos + 1, upper_layer_name.size()); - if (upper_exec_graph_name.compare(upper_pattern) == 0) { - return true; - } - - // Check pattern with regular expression - std::regex re(upper_pattern); - return std::regex_match(upper_layer_name, re); -#else - return false; -#endif -} - -bool debug_configuration::is_layer_for_dumping(const std::string& layer_name, bool is_output, bool is_input) const { -#ifdef GPU_DEBUG_CONFIG - // Dump result layer - if (is_output == true && dump_layers_result == 1 && - (layer_name.find("constant:") == std::string::npos)) - return true; - // Dump all layers - if (dump_layers.empty() && dump_layers_result == 0 && dump_layers_input == 0) - return true; - - // Dump input layers - size_t pos = layer_name.find(':'); - auto type = layer_name.substr(0, pos); - if (is_input == true && type == "parameter" && dump_layers_input == 1) - return true; - - auto iter = std::find_if(dump_layers.begin(), dump_layers.end(), [&](const std::string& dl){ - return is_layer_name_matched(layer_name, dl); - }); - return (iter != dump_layers.end()); -#else - return false; -#endif -} - -bool debug_configuration::is_target_iteration(int64_t iteration) const { -#ifdef GPU_DEBUG_CONFIG - if (iteration < 0) - return true; - - if (dump_iteration.empty()) - return true; - - if (dump_iteration.find(iteration) == std::end(dump_iteration)) - return false; - - return true; #else - return false; + return std::cout; #endif } -} // namespace cldnn +} // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/runtime/device.cpp b/src/plugins/intel_gpu/src/runtime/device.cpp index fa027ebe9e2e33..428d18f6c51775 100644 --- a/src/plugins/intel_gpu/src/runtime/device.cpp +++ b/src/plugins/intel_gpu/src/runtime/device.cpp @@ -65,8 +65,7 @@ float device::get_gops(cldnn::data_types dt) const { } bool device::use_unified_shared_memory() const { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_usm) { + GPU_DEBUG_IF(ExecutionConfig::get_disable_usm()) { return false; } if (get_mem_caps().supports_usm()) { diff --git a/src/plugins/intel_gpu/src/runtime/engine.cpp b/src/plugins/intel_gpu/src/runtime/engine.cpp index b5ec7da3fab705..73bceb8bea8659 100644 --- a/src/plugins/intel_gpu/src/runtime/engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/engine.cpp @@ -71,8 +71,7 @@ const device::ptr engine::get_device() const { } bool engine::use_unified_shared_memory() const { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_usm) { + GPU_DEBUG_IF(ExecutionConfig::get_disable_usm()) { return false; } if (_device->get_mem_caps().supports_usm()) { diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 7d2a9d5f90fc8b..5641aeb96abd84 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -1,308 +1,331 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "intel_gpu/runtime/execution_config.hpp" -#include "intel_gpu/runtime/debug_configuration.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "openvino/core/any.hpp" +#include "openvino/core/model.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/loop.hpp" +#include "openvino/op/lstm_sequence.hpp" +#include "openvino/op/paged_attention.hpp" +#include "openvino/op/search_sorted.hpp" +#include "openvino/op/stft.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/label.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "ov_ops/dynamic_quantize.hpp" #include "openvino/runtime/internal_properties.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" +#include "openvino/runtime/plugin_config.hpp" #include "openvino/runtime/properties.hpp" -#include namespace ov::intel_gpu { -ExecutionConfig::ExecutionConfig() { - set_default(); -} +namespace { -class InferencePrecisionValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - auto precision = v.as(); - return precision == ov::element::f16 || precision == ov::element::f32 || precision == ov::element::undefined; - } -}; - -class PerformanceModeValidator : public BaseValidator { -public: - bool is_valid(const ov::Any& v) const override { - auto mode = v.as(); - return mode == ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT || - mode == ov::hint::PerformanceMode::THROUGHPUT || - mode == ov::hint::PerformanceMode::LATENCY; +ov::RTMap get_rt_info(const ov::Model& model) { + ov::RTMap rt_info; + if (model.has_rt_info("runtime_options")) + rt_info = model.get_rt_info("runtime_options"); + + if (model.has_rt_info("__weights_path")) { + rt_info[ov::weights_path.name()] = model.get_rt_info("__weights_path"); } -}; - -void ExecutionConfig::set_default() { - register_property( - std::make_tuple(ov::device::id, "0"), - std::make_tuple(ov::enable_profiling, false), - std::make_tuple(ov::cache_dir, ""), - std::make_tuple(ov::num_streams, 1), - std::make_tuple(ov::compilation_num_threads, std::max(1, static_cast(std::thread::hardware_concurrency()))), - std::make_tuple(ov::hint::inference_precision, ov::element::f16, InferencePrecisionValidator()), - std::make_tuple(ov::hint::model_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY, PerformanceModeValidator()), - std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE), - std::make_tuple(ov::hint::num_requests, 0), - std::make_tuple(ov::hint::enable_cpu_pinning, false), - std::make_tuple(ov::hint::enable_cpu_reservation, false), - - std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::queue_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::enable_sdpa_optimization, true), - std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true), - std::make_tuple(ov::intel_gpu::disable_winograd_convolution, false), - std::make_tuple(ov::internal::exclusive_async_requests, false), - std::make_tuple(ov::internal::query_model_ratio, 1.0f), - std::make_tuple(ov::cache_mode, ov::CacheMode::OPTIMIZE_SPEED), - std::make_tuple(ov::cache_encryption_callbacks, EncryptionCallbacks{}), - std::make_tuple(ov::hint::dynamic_quantization_group_size, 0), - std::make_tuple(ov::hint::kv_cache_precision, ov::element::f16), - std::make_tuple(ov::intel_gpu::hint::enable_kernels_reuse, false), - std::make_tuple(ov::weights_path, ""), - std::make_tuple(ov::hint::activations_scale_factor, -1.f), - - // Legacy API properties - std::make_tuple(ov::intel_gpu::nv12_two_inputs, false), - std::make_tuple(ov::intel_gpu::config_file, ""), - std::make_tuple(ov::intel_gpu::enable_lp_transformations, false)); - - register_property( - std::make_tuple(ov::intel_gpu::max_dynamic_batch, 1), - std::make_tuple(ov::intel_gpu::queue_type, QueueTypes::out_of_order), - std::make_tuple(ov::intel_gpu::optimize_data, false), - std::make_tuple(ov::intel_gpu::enable_memory_pool, true), - std::make_tuple(ov::intel_gpu::allow_static_input_reorder, false), - std::make_tuple(ov::intel_gpu::custom_outputs, std::vector{}), - std::make_tuple(ov::intel_gpu::dump_graphs, ""), - std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}), - std::make_tuple(ov::intel_gpu::partial_build_program, false), - std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false), - std::make_tuple(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape, false), - std::make_tuple(ov::intel_gpu::buffers_preallocation_ratio, 1.1f), - std::make_tuple(ov::intel_gpu::max_kernels_per_batch, 8), - std::make_tuple(ov::intel_gpu::use_onednn, false)); + return rt_info; } -void ExecutionConfig::register_property_impl(const std::pair& property, PropertyVisibility visibility, BaseValidator::Ptr validator) { - property_validators[property.first] = validator; - supported_properties[property.first] = visibility; - internal_properties[property.first] = property.second; -} -void ExecutionConfig::set_property(const AnyMap& config) { - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; - OPENVINO_ASSERT(is_supported(kv.first), "[GPU] Attempt to set property ", name, " (", val.as(), ") which was not registered!\n"); - OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": ", val.as()); - internal_properties[name] = val; +bool requires_new_shape_infer(const std::shared_ptr& op) { + if (op->is_dynamic()) { + return true; } -} -bool ExecutionConfig::is_supported(const std::string& name) const { - bool supported = supported_properties.find(name) != supported_properties.end(); - bool has_validator = property_validators.find(name) != property_validators.end(); + // HACK: SearchSorted has specific shape requirements. + // E.g. static input shapes: sorted:[8], values:[2,3,4] are prefectly fine, + // but sorted:[8,1,1,1], values:[2,3,4,1] is not valid. + // Similar case for STFT. + if (ov::is_type(op) || ov::is_type(op)) + return true; - return supported && has_validator; -} + if (ov::is_type(op)) + return true; -bool ExecutionConfig::is_set_by_user(const std::string& name) const { - return user_properties.find(name) != user_properties.end(); -} + if (ov::is_type(op)) { + const auto body_function = std::static_pointer_cast(op)->get_function(); + if (body_function->is_dynamic()) + return true; + } -void ExecutionConfig::set_user_property(const AnyMap& config) { - for (auto& kv : config) { - auto& name = kv.first; - auto& val = kv.second; - bool supported = is_supported(name) && supported_properties.at(name) == PropertyVisibility::PUBLIC; - OPENVINO_ASSERT(supported, "[GPU] Attempt to set user property ", name, " (", val.as(), ") which was not registered or internal!\n"); - OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": `", val.as(), "`"); + if (ov::is_type(op) || ov::is_type(op)) { + return true; + } + // When input node has dynamic shape with 4 dimension, this function return false + // because op.is_dynamic() which only checks input shapes return false. + // So, in the case of input data, we need to check output shape. + for (size_t i = 0; i < op->get_output_size(); i++) { + if (op->get_output_partial_shape(i).is_dynamic()) + return true; + } - user_properties[kv.first] = kv.second; + for (size_t i = 0; i < op->get_output_size(); i++) { + if (op->get_output_partial_shape(i).size() > 6) + return true; } -} -Any ExecutionConfig::get_property(const std::string& name) const { - if (user_properties.find(name) != user_properties.end()) { - return user_properties.at(name); + for (size_t i = 0; i < op->get_input_size(); i++) { + if (op->get_input_partial_shape(i).size() > 6) + return true; } - OPENVINO_ASSERT(internal_properties.find(name) != internal_properties.end(), "[GPU] Can't get internal property with name ", name); - return internal_properties.at(name); + return false; } -void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { - if (is_set_by_user(ov::hint::execution_mode)) { - const auto mode = get_property(ov::hint::execution_mode); - if (!is_set_by_user(ov::hint::inference_precision)) { - if (mode == ov::hint::ExecutionMode::ACCURACY) { - set_property(ov::hint::inference_precision(ov::element::undefined)); - } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) { - if (info.supports_fp16) - set_property(ov::hint::inference_precision(ov::element::f16)); - else - set_property(ov::hint::inference_precision(ov::element::f32)); - } +bool is_llm(const ov::Model& model) { + using namespace ov::pass::pattern; + + auto past = wrap_type(); + auto convert_past = wrap_type({past}); + auto gather_input = std::make_shared(OutputVector{past, convert_past}); + auto beam_idx = wrap_type(); + auto gather_past = wrap_type({gather_input, beam_idx, wrap_type()}); + auto gather_convert = wrap_type({gather_past}); + auto concat_past_input = std::make_shared(OutputVector{past, convert_past, gather_past, gather_convert}); + auto concat = wrap_type({concat_past_input, any_input()}); + auto convert_present = wrap_type({concat}); + auto present_input = std::make_shared(OutputVector{concat, convert_present}); + auto present = wrap_type({present_input}); + + auto kvcache_matcher = std::make_shared(present, "KVCacheMatcher"); + + for (auto& op : model.get_ordered_ops()) { + if (kvcache_matcher->match(op) || ov::is_type(op)) { + return true; } } + + return false; } -void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { - if (is_set_by_user(ov::hint::performance_mode)) { - const auto mode = get_property(ov::hint::performance_mode); - if (!is_set_by_user(ov::num_streams)) { - if (mode == ov::hint::PerformanceMode::LATENCY) { - set_property(ov::num_streams(1)); - } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { - set_property(ov::num_streams(ov::streams::AUTO)); - } - } +} // namespace + +#define OV_CONFIG_LOCAL_OPTION(...) +#define OV_CONFIG_GLOBAL_OPTION(PropertyNamespace, PropertyVar, Visibility, ...) \ + ConfigOption ExecutionConfig::m_ ## PropertyVar{GET_EXCEPT_LAST(__VA_ARGS__)}; + +#include "intel_gpu/runtime/options.inl" + +#undef OV_CONFIG_LOCAL_OPTION +#undef OV_CONFIG_GLOBAL_OPTION + +ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { + #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)) + #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_MAPPING(__VA_ARGS__)) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_LOCAL_OPTION + #undef OV_CONFIG_GLOBAL_OPTION +} + +ExecutionConfig::ExecutionConfig(const ExecutionConfig& other) : ExecutionConfig() { + m_user_properties = other.m_user_properties; + m_is_finalized = other.m_is_finalized; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); } +} - if (get_property(ov::num_streams) == ov::streams::AUTO) { - int32_t n_streams = std::max(info.num_ccs, 2); - set_property(ov::num_streams(n_streams)); +ExecutionConfig& ExecutionConfig::operator=(const ExecutionConfig& other) { + m_user_properties = other.m_user_properties; + m_is_finalized = other.m_is_finalized; + for (const auto& kv : other.m_options_map) { + m_options_map.at(kv.first)->set_any(kv.second->get_any()); } + return *this; +} - if (get_property(ov::internal::exclusive_async_requests)) { - set_property(ov::num_streams(1)); +ExecutionConfig ExecutionConfig::clone() const { + ExecutionConfig new_config = *this; + new_config.m_is_finalized = false; + return new_config; +} + +void ExecutionConfig::finalize(cldnn::engine& engine) { + auto ctx = std::make_shared("GPU", std::vector{engine.get_device()}); + PluginConfig::finalize(ctx.get(), nullptr); +} + +void ExecutionConfig::apply_rt_info(const IRemoteContext* context, const ov::RTMap& rt_info, bool is_llm) { + const auto& info = dynamic_cast(context)->get_engine().get_device_info(); + if (!info.supports_immad) { + apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); } + if (!is_llm) + apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); - // Allow kernels reuse only for single-stream scenarios - if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) { - if (get_property(ov::num_streams) != 1) { - set_property(ov::intel_gpu::hint::enable_kernels_reuse(false)); - } + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); + + // WEIGHTS_PATH is used for the weightless cache mechanism which is used only with + // ov::CacheMode::OPTIMIZE_SIZE setting. Not setting WEIGHTS_PATH will result in not + // using that mechanism. + if (get_cache_mode() == ov::CacheMode::OPTIMIZE_SIZE) { + apply_rt_info_property(ov::weights_path, rt_info); } } -void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { - if (is_set_by_user(ov::hint::model_priority)) { - const auto priority = get_property(ov::hint::model_priority); - if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { - set_property(ov::intel_gpu::hint::queue_priority(priority)); +void ExecutionConfig::apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) { + apply_rt_info(context, get_rt_info(model), is_llm(model)); + + const auto& ops = model.get_ops(); + + std::function)> process_op = [&, this](std::shared_ptr op) { + if (requires_new_shape_infer(op)) { + m_allow_new_shape_infer = true; + } + // In the case of dynamic models, because most of the layers are mapped to shape agnostic kernels, + // smaller # of kernels are built compared to static models. + // So having smaller batch size is even better for dynamic model as we can do more parallel build. + if (op->is_dynamic()) { + m_max_kernels_per_batch = 4; + } + + // Allow using onednn for models with LSTMSequence op as it's much more performant than existing ocl impl + if (ov::is_type(op)) { + m_use_onednn = true; + } + + if (auto multi_subgraph_op = ov::as_type_ptr(op)) { + for (const auto& sub_graph : multi_subgraph_op->get_functions()) { + for (auto& sub_op : sub_graph->get_ops()) { + process_op(sub_op); + } + } } + }; + + for (const auto& op : ops) { + process_op(op); } + + m_optimize_data = true; } -void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs)); +void ExecutionConfig::finalize_impl(const IRemoteContext* context) { + GPU_DEBUG_IF(get_help()) { + print_help(); + exit(-1); } - GPU_DEBUG_IF(debug_config->serialize_compile == 1) { - set_property(ov::compilation_num_threads(1)); + const auto& info = dynamic_cast(context)->get_engine().get_device_info(); + apply_hints(info); + if (!is_set_by_user(ov::internal::enable_lp_transformations)) { + m_enable_lp_transformations = info.supports_imad || info.supports_immad; } - - GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) { - GPU_DEBUG_COUT << "[WARNING] ov::enable_profiling property was forced because of enabled OV_GPU_DumpProfilingData debug option\n"; - set_property(ov::enable_profiling(true)); + if (!is_set_by_user(ov::intel_gpu::use_onednn) && info.supports_immad) { + m_use_onednn = true; + } + if (get_use_onednn()) { + m_queue_type = QueueTypes::in_order; } - GPU_DEBUG_IF(debug_config->disable_dynamic_impl == 1) { - set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); + if (!is_set_by_user(ov::hint::kv_cache_precision) || get_kv_cache_precision() == ov::element::undefined) { + if (info.supports_immad) { // MFDNN-11755 + m_kv_cache_precision = get_inference_precision(); + } else { + // Enable KV-cache compression by default for non-systolic platforms only + m_kv_cache_precision = ov::element::i8; + } } - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { - if (debug_config->dynamic_quantize_group_size == -1) - set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX)); - else - set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size)); + // Enable dynamic quantization by default for non-systolic platforms + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && get_dynamic_quantization_group_size() == 0 && !info.supports_immad) { + m_dynamic_quantization_group_size = 32; } - GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) { - GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } else { - set_property(ov::hint::kv_cache_precision(ov::element::undefined)); - } + if (!get_force_implementations().empty()) { + m_optimize_data = true; } + +#ifdef ENABLE_DEBUG_CAPS + // For now we apply env/config only for build with debug caps, but it can be updated in the future to allow + // reading release options for any build type + apply_config_options(context->get_device_name(), get_debug_config()); +#endif // ENABLE_DEBUG_CAPS } void ExecutionConfig::apply_hints(const cldnn::device_info& info) { apply_execution_hints(info); apply_performance_hints(info); apply_priority_hints(info); - apply_debug_options(info); } -void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { - if (finalized) - return; - - // Copy internal properties before applying hints to ensure that - // a property set by hint won't be overriden by a value in user config. - // E.g num_streams=AUTO && hint=THROUGHPUT - // If we apply hints first and then copy all values from user config to internal one, - // then we'll get num_streams=AUTO in final config while some integer number is expected. - for (auto& kv : user_properties) { - internal_properties[kv.first] = kv.second; - } - apply_hints(info); - if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) { - set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad)); - } - if (info.supports_immad) { - set_property(ov::intel_gpu::use_onednn(true)); - } - if (get_property(ov::intel_gpu::use_onednn)) { - set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - } - if (!is_set_by_user(ov::hint::enable_cpu_reservation)) { - if (get_property(ov::hint::enable_cpu_pinning)) { - set_property(ov::hint::enable_cpu_reservation(true)); - } - } - if (get_property(ov::hint::enable_cpu_reservation)) { - if (!is_set_by_user(ov::hint::enable_cpu_pinning)) { - set_property(ov::hint::enable_cpu_pinning(true)); +void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::execution_mode)) { + const auto mode = get_execution_mode(); + if (!is_set_by_user(ov::hint::inference_precision)) { + if (mode == ov::hint::ExecutionMode::ACCURACY) { + m_inference_precision = ov::element::undefined; + } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) { + if (info.supports_fp16) + m_inference_precision = ov::element::f16; + else + m_inference_precision = ov::element::f32; + } } } +} - if (!is_set_by_user(ov::hint::kv_cache_precision) || get_property(ov::hint::kv_cache_precision) == ov::element::undefined) { - if (info.supports_immad) { // MFDNN-11755 - set_property(ov::hint::kv_cache_precision(get_property(ov::hint::inference_precision))); - } else { - // Enable KV-cache compression by default for non-systolic platforms only - set_property(ov::hint::kv_cache_precision(ov::element::i8)); +void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::performance_mode)) { + const auto mode = get_performance_mode(); + if (!is_set_by_user(ov::num_streams)) { + if (mode == ov::hint::PerformanceMode::LATENCY) { + m_num_streams = 1; + } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) { + m_num_streams = ov::streams::AUTO; + } } } - // Enable dynamic quantization by default for non-systolic platforms - if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && - get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); + if (get_num_streams() == ov::streams::AUTO) { + int32_t n_streams = std::max(info.num_ccs, 2); + m_num_streams = n_streams; } - finalized = true; + if (get_exclusive_async_requests()) { + m_num_streams = 1; + } - user_properties.clear(); + // Allow kernels reuse only for single-stream scenarios + if (get_enable_kernels_reuse()) { + if (get_num_streams() != 1) { + m_enable_kernels_reuse = false; + } + } } -void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info, const bool is_llm) { - if (!info.supports_immad) { - apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); +void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) { + if (is_set_by_user(ov::hint::model_priority)) { + const auto priority = get_model_priority(); + if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) { + m_queue_priority = priority; + } } - if (!is_llm) - apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); - apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); } -std::string ExecutionConfig::to_string() const { - std::stringstream s; - s << "internal properties:\n"; - for (auto& kv : internal_properties) { - s << "\t" << kv.first << ": " << kv.second.as() << std::endl; - } - s << "user properties:\n"; - for (auto& kv : user_properties) { - s << "\t" << kv.first << ": " << kv.second.as() << std::endl; - } - return s.str(); +const ov::PluginConfig::OptionsDesc& ExecutionConfig::get_options_desc() const { + static ov::PluginConfig::OptionsDesc help_map { + #define OV_CONFIG_LOCAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_HELP(__VA_ARGS__)) + #define OV_CONFIG_GLOBAL_OPTION(...) EXPAND(OV_CONFIG_OPTION_HELP(__VA_ARGS__)) + #include "intel_gpu/runtime/options.inl" + #undef OV_CONFIG_LOCAL_OPTION + #undef OV_CONFIG_GLOBAL_OPTION + }; + return help_map; } } // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp index 1417680c985632..a899f78eb565d3 100644 --- a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp +++ b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp @@ -52,7 +52,6 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive auto type = mem->get_allocation_type(); const auto _layout_bytes_count = _layout.bytes_count(); - GPU_DEBUG_GET_INSTANCE(debug_config); { auto it = _non_padded_pool.lower_bound(_layout_bytes_count); @@ -67,7 +66,7 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive } if (it->second._users.empty()) { #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = it->first; total_mem_size_non_padded_pool -= released_mem_size; if (type == allocation_type::usm_host) @@ -104,7 +103,7 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive } if (list_itr->_users.empty()) { #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = mem->size(); total_mem_size_padded_pool -= released_mem_size; if (type == allocation_type::usm_host) @@ -128,14 +127,14 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive } } #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto iter = std::find_if(_no_reusable_mems.begin(), _no_reusable_mems.end(), [&](const cldnn::memory_record& r) { return (network_id == r._network_id && type == r._type && mem->get_internal_params().mem == r._memory->get_internal_params().mem); }); if (iter != _no_reusable_mems.end()) { - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = iter->_users.begin()->_mem_size; total_mem_size_no_reusable -= released_mem_size; if (type == allocation_type::usm_host) @@ -183,8 +182,7 @@ memory::ptr memory_pool::get_from_non_padded_pool(const layout& layout, memory_record({{MEM_USER(unique_id, network_id, prim_id, layout_bytes_count)}}, mem, network_id, type)); #ifdef GPU_DEBUG_CONFIG { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { total_mem_size_non_padded_pool += layout_bytes_count; if (type == allocation_type::usm_host) mem_size_non_padded_pool_host += layout_bytes_count; @@ -225,8 +223,7 @@ memory::ptr memory_pool::get_from_padded_pool(const layout& layout, memory_record({{MEM_USER(unique_id, network_id, prim_id, mem->size())}}, mem, network_id, type)); #ifdef GPU_DEBUG_CONFIG { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { const auto allocated_mem_size = mem->size(); total_mem_size_padded_pool += allocated_mem_size; if (type == allocation_type::usm_host) @@ -242,8 +239,7 @@ memory::ptr memory_pool::get_from_padded_pool(const layout& layout, _padded_pool.emplace(layout, std::move(list)); #ifdef GPU_DEBUG_CONFIG { - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { const auto allocated_mem_size = mem->size(); total_mem_size_padded_pool += allocated_mem_size; if (type == allocation_type::usm_host) @@ -300,8 +296,7 @@ memory::ptr memory_pool::get_memory(const layout& layout, bool reset, bool is_dynamic) { bool do_reuse = reusable_across_network; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_memory_reuse) { + GPU_DEBUG_IF(_config.get_disable_memory_reuse()) { do_reuse = false; } if (do_reuse) { @@ -316,7 +311,7 @@ memory::ptr memory_pool::get_memory(const layout& layout, // images (reuse not yet implemented) auto mem = alloc_memory(layout, type, reset); #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto allocated_mem_size = mem->size(); _no_reusable_mems.push_back( memory_record({{MEM_USER(unique_id, network_id, prim_id, allocated_mem_size)}}, mem, network_id, type)); @@ -330,7 +325,7 @@ memory::ptr memory_pool::get_memory(const layout& layout, } else { auto mem = alloc_memory(layout, type, reset); #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto allocated_mem_size = mem->size(); _no_reusable_mems.push_back( memory_record({{MEM_USER(unique_id, network_id, prim_id, allocated_mem_size)}}, mem, network_id, type)); @@ -344,7 +339,6 @@ memory::ptr memory_pool::get_memory(const layout& layout, } void memory_pool::clear_pool_for_network(uint32_t network_id) { - GPU_DEBUG_GET_INSTANCE(debug_config); // free up _non_padded_pool for this network { auto itr = _non_padded_pool.begin(); @@ -354,7 +348,7 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { if (record._network_id == network_id) { #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = itr->first; total_mem_size_non_padded_pool -= released_mem_size; if (record._type == allocation_type::usm_host) @@ -388,7 +382,7 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { if (list.empty()) { #ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = itr->first.bytes_count(); total_mem_size_padded_pool -= released_mem_size; if (type == allocation_type::usm_host) @@ -404,12 +398,12 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { #ifdef GPU_DEBUG_CONFIG // free up _no_reusable_mems for this network - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto itr = _no_reusable_mems.begin(); while (itr != _no_reusable_mems.end()) { auto& record = *itr; if (itr->_network_id == network_id) { - GPU_DEBUG_IF(debug_config->dump_memory_pool) { + GPU_DEBUG_IF(_config.get_dump_memory_pool()) { auto released_mem_size = itr->_users.begin()->_mem_size; total_mem_size_no_reusable -= released_mem_size; if (record._type == allocation_type::usm_host) @@ -439,7 +433,9 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { } } -memory_pool::memory_pool(engine& engine) : _engine(&engine) { } +memory_pool::memory_pool(engine& engine, const ExecutionConfig& config) : _engine(&engine), _config(config) { + (void)(_config); // Silence unused warning +} #ifdef GPU_DEBUG_CONFIG inline std::string get_mb_size(size_t size) { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp index 441494b41c4f8f..520cb9bd22e073 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp @@ -295,9 +295,6 @@ device_info init_device_info(const cl::Device& device, const cl::Context& contex GPU_DEBUG_INFO << "GPU version: " << static_cast(info.gfx_ver.major) << "." << static_cast(info.gfx_ver.minor) << "." << static_cast(info.gfx_ver.revision) << (info.has_separate_cache ? " with separate cache" : "") << std::endl; - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->disable_onednn) - info.supports_immad = false; } else if (nv_device_attr_supported) { info.gfx_ver = {static_cast(device.getInfo()), static_cast(device.getInfo()), diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp index 11fab0106bff83..857283b9558647 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp @@ -64,7 +64,7 @@ void ocl_engine::create_onednn_engine(const ExecutionConfig& config) { auto casted = std::dynamic_pointer_cast(_device); OPENVINO_ASSERT(casted, "[GPU] Invalid device type stored in ocl_engine"); - std::string cache_dir = config.get_property(ov::cache_dir); + std::string cache_dir = config.get_cache_dir(); if (cache_dir.empty()) { _onednn_engine = std::make_shared(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get())); } else { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp index dbf2a01aa4eadf..d722b5f3bc1bfd 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp @@ -219,16 +219,6 @@ bool ocl_events::get_profiling_info_impl(std::listprint_multi_kernel_perf) { - if (period.stage == instrumentation::profiling_stage::executing) { - GPU_DEBUG_TRACE << "Multi-kernel time: "; - for (auto& duration : all_durations[period.stage]) - GPU_DEBUG_TRACE << " " << (duration.second - duration.first) / 1000; - GPU_DEBUG_TRACE << " Total " << sum / 1000 << std::endl; - } - } - info.push_back(get_profiling_interval(period.stage, 0, sum)); } diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp index e227c94c7dc06d..bc01a8174292e4 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp @@ -189,22 +189,22 @@ void set_arguments_impl(ocl_kernel_type& kernel, } // namespace ocl_stream::ocl_stream(const ocl_engine &engine, const ExecutionConfig& config) - : stream(config.get_property(ov::intel_gpu::queue_type), stream::get_expected_sync_method(config)) + : stream(config.get_queue_type(), stream::get_expected_sync_method(config)) , _engine(engine) { auto context = engine.get_cl_context(); auto device = engine.get_cl_device(); ocl::command_queues_builder queue_builder; - queue_builder.set_profiling(config.get_property(ov::enable_profiling)); + queue_builder.set_profiling(config.get_enable_profiling()); queue_builder.set_out_of_order(m_queue_type == QueueTypes::out_of_order); OPENVINO_ASSERT(m_sync_method != SyncMethods::none || m_queue_type == QueueTypes::in_order, "[GPU] Unexpected sync method (none) is specified for out_of_order queue"); bool priorty_extensions = engine.extension_supported("cl_khr_priority_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_priority_mode(config.get_property(ov::intel_gpu::hint::queue_priority), priorty_extensions); + queue_builder.set_priority_mode(config.get_queue_priority(), priorty_extensions); bool throttle_extensions = engine.extension_supported("cl_khr_throttle_hints") && engine.extension_supported("cl_khr_create_command_queue"); - queue_builder.set_throttle_mode(config.get_property(ov::intel_gpu::hint::queue_throttle), throttle_extensions); + queue_builder.set_throttle_mode(config.get_queue_throttle(), throttle_extensions); bool queue_families_extension = engine.get_device_info().supports_queue_families; queue_builder.set_supports_queue_families(queue_families_extension); diff --git a/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp b/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp index 3b925f07361fff..3a028218a3f62f 100644 --- a/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp +++ b/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp @@ -67,7 +67,7 @@ std::pair ShapePredictor::predict_preallocation_shape(const std int32_t custom_prealloc_dim) { size_t next_iters_prealloc_count = custom_next_iters_prealloc_count > 0 ? static_cast(custom_next_iters_prealloc_count) - : _next_iters_preallocation_count; + : _settings.next_iters_preallocation_count; const auto& current_shape = layout.get_shape(); auto dt_bitwidth = ov::element::Type(layout.data_type).bitwidth(); @@ -122,7 +122,7 @@ std::pair ShapePredictor::predict_preallocation_shape(const std // to avoid huge unexpected memory preallocations if (can_use_iterations_preallocation) { for (size_t i = 0; i < diffs[0].size(); ++i) { - if (diffs[0][i] > _max_per_dim_diff) { + if (diffs[0][i] > _settings.max_per_dim_diff) { can_use_iterations_preallocation = false; break; } @@ -132,7 +132,7 @@ std::pair ShapePredictor::predict_preallocation_shape(const std for (size_t i = 0; i < current_shape.size(); ++i) single_iter_shape.push_back(diffs[0][i] == 0 ? current_shape[i] : 1); - if (ceil_div(ov::shape_size(single_iter_shape) * dt_bitwidth, 8) > _max_per_iter_size) + if (ceil_div(ov::shape_size(single_iter_shape) * dt_bitwidth, 8) > _settings.max_per_iter_size) can_use_iterations_preallocation = false; } @@ -142,13 +142,13 @@ std::pair ShapePredictor::predict_preallocation_shape(const std auto preallocation_shape = diffs[0] * mul_shape; auto new_shape = current_shape + preallocation_shape; return {true, new_shape}; - } else if (_buffers_preallocation_ratio > 1.0f) { + } else if (_settings.buffers_preallocation_ratio > 1.0f) { if (format::is_blocked(layout.format)) return {false, {}}; // Apply percentage buffer preallocation auto current_shape_size = ov::shape_size(current_shape); ov::Shape new_shape_size(current_shape.size(), 1); - new_shape_size[0] = static_cast(current_shape_size * _buffers_preallocation_ratio); + new_shape_size[0] = static_cast(current_shape_size * _settings.buffers_preallocation_ratio); return {true, new_shape_size}; } } diff --git a/src/plugins/intel_gpu/src/runtime/stream.cpp b/src/plugins/intel_gpu/src/runtime/stream.cpp index 0516a551f19177..913d84d8f476f5 100644 --- a/src/plugins/intel_gpu/src/runtime/stream.cpp +++ b/src/plugins/intel_gpu/src/runtime/stream.cpp @@ -20,8 +20,8 @@ QueueTypes stream::detect_queue_type(engine_types engine_type, void* queue_handl } SyncMethods stream::get_expected_sync_method(const ExecutionConfig& config) { - auto profiling = config.get_property(ov::enable_profiling); - auto queue_type = config.get_property(ov::intel_gpu::queue_type); + auto profiling = config.get_enable_profiling(); + auto queue_type = config.get_queue_type(); return profiling ? SyncMethods::events : queue_type == QueueTypes::out_of_order ? SyncMethods::barriers : SyncMethods::none; } diff --git a/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp b/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp index fb9711e7605859..c78b472b9c54cb 100644 --- a/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp +++ b/src/plugins/intel_gpu/tests/functional/concurrency/gpu_reservation_test.cpp @@ -28,6 +28,10 @@ TEST_F(GpuReservationTest, Mutiple_CompiledModel_Reservation) { models.emplace_back(ov::test::utils::make_multi_single_conv()); auto core = ov::test::utils::PluginCache::get().core(); + + auto available_devices = core->get_available_devices(); + if (std::find(available_devices.begin(), available_devices.end(), ov::test::utils::DEVICE_CPU) == available_devices.end()) + GTEST_SKIP(); core->set_property(target_devices[1], config); ov::AnyMap property_config = {{ov::num_streams.name(), 1}, @@ -54,4 +58,4 @@ TEST_F(GpuReservationTest, Mutiple_CompiledModel_Reservation) { if (thread.joinable()) thread.join(); } -} \ No newline at end of file +} diff --git a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp index 6d8f231b93576a..6fb20181eb1936 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp @@ -318,7 +318,7 @@ class WeightsPrimitiveFusingTestOneDNN : public BaseFusingTest result; for (auto& shape : in_shapes) @@ -74,7 +76,9 @@ TEST_P(shape_predictor_tests_b_fs_yx_fsv16, prediction) { auto& expected_predicted_shape = p.expected_predicted_shape; auto& engine = get_test_engine(); - ShapePredictor sp(&engine, p.buffers_preallocation_ratio); + ShapePredictor::Settings settings; + settings.buffers_preallocation_ratio = p.buffers_preallocation_ratio; + ShapePredictor sp(&engine, settings); std::pair result; for (auto& shape : in_shapes) @@ -121,8 +125,10 @@ INSTANTIATE_TEST_SUITE_P(smoke, shape_predictor_tests_b_fs_yx_fsv16, TEST(shape_predictor_tests, check_max_buffer_size) { auto& engine = get_test_engine(); - const auto& buffers_preallocation_ratio = 1.1; - ShapePredictor sp(&engine, buffers_preallocation_ratio); + const auto& buffers_preallocation_ratio = 1.1f; + ShapePredictor::Settings settings; + settings.buffers_preallocation_ratio = buffers_preallocation_ratio; + ShapePredictor sp(&engine, settings); const auto max_alloc_mem_size = engine.get_device_info().max_alloc_mem_size; auto layout = cldnn::layout({static_cast(max_alloc_mem_size)}, ov::element::u8, format::bfyx); diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp index 3f3b6019611e3a..031b417f4f5356 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp @@ -527,7 +527,7 @@ TEST(prepare_primitive_fusing, fuse_constant_transposes_removal_check) { auto prog = program::build_program(engine, topology, config, false, true); - prog->get_layout_optimizer().set_implementation_forcing(config.get_property(ov::intel_gpu::force_implementations)); + prog->get_layout_optimizer().set_implementation_forcing(config.get_force_implementations()); program_wrapper::apply_opt_pass(*prog); ASSERT_TRUE(!has_node(*prog, "permute")); diff --git a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp index 1fbeab7e67ac2d..b46033f15d77db 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp @@ -136,7 +136,7 @@ TEST(remove_redundant_reorders, skip_reorder_fusing_when_sibling_not_support_pad auto prog = program::build_program(engine, topology, config, false, true); config.set_property(ov::intel_gpu::optimize_data(true)); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -165,7 +165,7 @@ TEST(remove_redundant_reorders, not_to_fuse_reshape_with_fused_prims) { auto prog = program::build_program(engine, topology, config, false, true); program_wrapper::apply_opt_pass(*prog); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -204,7 +204,7 @@ TEST(remove_redundant_reorders, not_to_fuse_permute) { auto prog = program::build_program(engine, topology, config, false, true); ASSERT_NE(prog, nullptr); - bool opt_data = config.get_property(ov::intel_gpu::optimize_data); + bool opt_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog); program_wrapper::apply_opt_pass(*prog, opt_data); @@ -266,7 +266,7 @@ TEST(remove_redundant_reorders, remove_fused) { auto prog = program::build_program(engine, topology, config, false, true); program_wrapper::apply_opt_pass(*prog); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -293,7 +293,7 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_mvn_dyn) { config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config, false, true); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); @@ -336,7 +336,7 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_concat_dyn) { config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config, false, true); - bool optimize_data = config.get_property(ov::intel_gpu::optimize_data); + bool optimize_data = config.get_optimize_data(); program_wrapper::apply_opt_pass(*prog, optimize_data); ASSERT_NE(prog, nullptr); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp index b145fa603063bc..1b2d0be5907293 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp @@ -1422,7 +1422,7 @@ struct concat_gpu_4d_implicit : public concat_gpu { } auto outputs = concat_network->execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network->get_primitive("concat"))->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); @@ -1642,7 +1642,7 @@ struct concat_gpu_4d_implicit_onednn : public concat_gpu { } auto outputs = concat_network.execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); @@ -1805,7 +1805,7 @@ struct concat_gpu_4d_explicit : public concat_gpu { } auto outputs = concat_network.execute(); - bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); + bool concat_opt_enabled = config.get_optimize_data(); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); // If sibling is using onednn impl and batch > 1, the onednn impl cannot process the implicit concat'ed buffer. diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp index 9e92f2ebbfd293..0ff21d1ed5f3dc 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp @@ -1317,7 +1317,6 @@ TEST_P(crop_gpu_dynamic, i32_in2x3x2x2_crop_offsets) { } } } - config2.set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); network network2(engine, topology, config2); // run with static kernel network2.set_input_data("input", input); auto outputs2 = network2.execute(); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp index 6a96b694eea1c5..7c08fe85c07eeb 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/debug_config_gpu_test.cpp @@ -10,7 +10,7 @@ using namespace ::tests; TEST(debug_config_test, check_debug_config_off_on_release) { #ifdef NDEBUG - GPU_DEBUG_GET_INSTANCE(debug_config); + auto config = get_test_default_config(get_test_engine()); GPU_DEBUG_IF(1) { GTEST_FAIL(); /* This should be disabled in case of release build */ } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index 87d4c4ed7f0a2d..52209883247f99 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -4210,7 +4210,7 @@ TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_sta this->test_compressed_int4_scale_dyn_quan_weight_i4(false, 320, 1024, 1024, 32, 32, true); } -// Test weight zp for INT8 ASYM +// Test weight zp for INT8 ASYM TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_128_large) { this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 320, 4096, 4096, 128, 128, true); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp index 4939630fab3c57..fc20272b2dec6b 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp @@ -55,7 +55,6 @@ static program::ptr build_program(engine& engine, ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(output_names_vec)); - config.set_property(ov::intel_gpu::max_dynamic_batch(1)); config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer)); return program::build_program(engine, body_topology, config, false, false, true); @@ -837,7 +836,7 @@ static void test_loop_gpu_multiple_shapes(ov::PartialShape body_input_layout, permute("permute1", input_info("input_origin"), {0, 1, 2, 3}), concatenation("input1", {input_info("permute1"), input_info("input_origin")}, 0), loop("loop", - {input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input1"), input_info("input2")}, + {input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input1"), input_info("input2")}, body_program, trip_count_id, initial_condition_id, actual_iteration_count_id, input_primitive_maps, output_primitive_maps, back_edges, num_iterations, body_current_iteration_id, body_execution_condition_id, 2), @@ -1105,7 +1104,6 @@ static void test_loop_gpu_wo_trip_count_update_primitive_id(ov::PartialShape bod auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true); auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx}); - std::vector body_input_layouts; for (size_t i = 0; i < body_input_layout.size(); i++) { diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp index 8caad2b576d734..8fd32877291d44 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp @@ -1913,7 +1913,7 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant) auto outputs = net.execute(); auto executed_primitives = net.get_executed_primitives(); - if (config.get_property(ov::intel_gpu::queue_type) != QueueTypes::out_of_order) + if (config.get_queue_type() != QueueTypes::out_of_order) GTEST_SKIP(); ASSERT_TRUE(executed_primitives.count("in") == 1); diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h b/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h index 77b519d8e3cf5f..09c081abbb6fe6 100644 --- a/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h +++ b/src/plugins/intel_gpu/tests/unit/test_utils/program_wrapper.h @@ -38,7 +38,9 @@ namespace cldnn p.prepare_memory_dependencies(); } static void update_configs_properties(program& p, const ov::AnyMap& properties) { - p._config.set_property(properties); + auto config_copy = p._config.clone(); + config_copy.set_property(properties); + p._config = config_copy; } }; diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp index 6af8572d323907..23641dbdba7a63 100644 --- a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp @@ -54,14 +54,14 @@ void generic_test::run_single_test(bool is_caching_test) { } } std::string input_name = "input" + std::to_string(i); - if ((i == 0) && generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if ((i == 0) && generic_params->network_config.get_optimize_data()) { // Add reorder after the first input in case of optimize data flag since it might change the input layout. input_name = "input0_init"; } // First input is provided to the network as input_layout. // Other inputs are provided as input_layout if optimize data flag is off. Otherwise they are provided as data. - if ((i == 0) || !generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if ((i == 0) || !generic_params->network_config.get_optimize_data()) { topology.add(input_layout(input_name, input_mems[i]->get_layout())); input_layouts_names.push_back(input_name); } else { @@ -74,7 +74,7 @@ void generic_test::run_single_test(bool is_caching_test) { } } - if (generic_params->network_config.get_property(ov::intel_gpu::optimize_data)) { + if (generic_params->network_config.get_optimize_data()) { // Add reorder after the first input in case of optimize data flag since it might change the input layout. topology.add(reorder("input0", input_info("input0_init"), input_mems[0]->get_layout())); }