From 1234d07c80c0baa1475d8ad384a8b19ebaef9433 Mon Sep 17 00:00:00 2001 From: zhaohb Date: Tue, 4 Jun 2024 19:40:15 +0800 Subject: [PATCH 1/4] [CPU] Optimize the unique operator --- src/plugins/intel_cpu/src/nodes/unique.cpp | 42 +++++++++------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/unique.cpp b/src/plugins/intel_cpu/src/nodes/unique.cpp index ad322756ab28e3..130213dfcb8703 100644 --- a/src/plugins/intel_cpu/src/nodes/unique.cpp +++ b/src/plugins/intel_cpu/src/nodes/unique.cpp @@ -225,41 +225,31 @@ void Unique::flattenTensorExec() { } } } else { - uniDataTmpPtr[0] = srcDataPtr[0]; - if (definedOutputs[FIRST_UNIQUE_IDX]) { - firstTmpPtr[0] = 0; - } - if (definedOutputs[INPUT_TO_UNIQ_IDX]) { - inToOutTmpPtr[0] = 0; - } + std::unordered_map uniq; + uniq.reserve(inputLen); + if (definedOutputs[OCCURRENCES_NUM]) { std::fill(occurTmpPtr, occurTmpPtr + inputLen, 1); } - uniqueLen = 1; - - for (size_t i = 1; i < inputLen; i++) { - bool found = false; - size_t j = 0; - for (; j < uniqueLen; j++) { - if (uniDataTmpPtr[j] == srcDataPtr[i]) { - found = true; - break; - } - } - if (!found) { - uniDataTmpPtr[uniqueLen] = srcDataPtr[i]; + + for (size_t i = 0, j = 0; i < inputLen; ++i) { + auto it = uniq.emplace(srcDataPtr[i], j); + inToOutTmpPtr[i] = it.first->second; + if (it.second) { if (definedOutputs[FIRST_UNIQUE_IDX]) { - firstTmpPtr[uniqueLen] = i; + firstTmpPtr[j] = i; } - uniqueLen++; + ++j; } else { if (definedOutputs[OCCURRENCES_NUM]) { - occurTmpPtr[j]++; + occurTmpPtr[inToOutTmpPtr[i]]++; } } - if (definedOutputs[INPUT_TO_UNIQ_IDX]) { - inToOutTmpPtr[i] = j; - } + } + + uniqueLen = static_cast(uniq.size()); + for (const auto& it : uniq) { + uniDataTmpPtr[it.second] = it.first; } } From 75fbd4b0b176752244d25718489c53d6676c862f Mon Sep 17 00:00:00 2001 From: zhaohb Date: Fri, 27 Dec 2024 00:11:34 +0800 Subject: [PATCH 2/4] hunyuan-3b model support kvcache and gqa fusion --- .../src/plugin/transformations/kv_cache_fusion.cpp | 9 +++++---- .../unsqueeze_broadcast_reshape_sdpa_fusion.cpp | 7 ++++++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp index f22b32b23ea407..b384fec890c7d4 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp @@ -42,7 +42,8 @@ KVCacheFusionMatcher::KVCacheFusionMatcher() { auto gather_input = std::make_shared(OutputVector{past, convert_past}); auto beam_idx = wrap_type(); auto gather_past = wrap_type({gather_input, beam_idx, wrap_type()}); - auto concat_past_input = std::make_shared(OutputVector{past, convert_past, gather_past}); + auto gather_convert = wrap_type({gather_past}); + auto concat_past_input = std::make_shared(OutputVector{past, convert_past, gather_past, gather_convert}); auto concat = wrap_type({concat_past_input, any_input()}); auto convert_present = wrap_type({concat}); auto present_input = std::make_shared(OutputVector{concat, convert_present}); @@ -62,9 +63,9 @@ KVCacheFusionMatcher::KVCacheFusionMatcher() { if (past_node->get_variable_id() != present_node->get_variable_id()) return false; - // TODO: Support conversion internally - if (!concat_node || concat_node->get_output_element_type(0) != past_node->get_output_element_type(0)) - return false; + // // TODO: Support conversion internally + // if (!concat_node || concat_node->get_output_element_type(0) != past_node->get_output_element_type(0)) + // return false; auto variable = past_node->get_variable(); auto concat_axis = concat_node->get_axis(); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/unsqueeze_broadcast_reshape_sdpa_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/unsqueeze_broadcast_reshape_sdpa_fusion.cpp index 2b0d2ed5eaf145..3f4480eaef0cbb 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/unsqueeze_broadcast_reshape_sdpa_fusion.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/unsqueeze_broadcast_reshape_sdpa_fusion.cpp @@ -52,8 +52,13 @@ UnsqueezeBroadcastReshapeSDPAFusion::UnsqueezeBroadcastReshapeSDPAFusion() { auto reshape_b_m = wrap_type({broadcast_b_m, any_input()}, reshape_predicate); auto reshape_c_m = wrap_type({broadcast_c_m, any_input()}, reshape_predicate); + auto convert_reshape_b_m = wrap_type({reshape_b_m}); + auto reshape_b_m_input = std::make_shared(OutputVector{reshape_b_m, convert_reshape_b_m}); + auto convert_reshape_c_m = wrap_type({reshape_c_m}); + auto reshape_c_m_input = std::make_shared(OutputVector{reshape_c_m, convert_reshape_c_m}); + auto sdpa_without_attn_mask_m = wrap_type({ input_a_m, reshape_b_m, reshape_c_m }); - auto sdpa_with_attn_mask_m = wrap_type({ input_a_m, reshape_b_m, reshape_c_m, input_attn_mask }); + auto sdpa_with_attn_mask_m = wrap_type({ input_a_m, reshape_b_m_input, reshape_c_m_input, input_attn_mask }); auto sdpa_with_attn_mask_and_scale_m = wrap_type({ input_a_m, reshape_b_m, reshape_c_m, input_attn_mask, input_scale }); auto sdpa_m = std::make_shared(OutputVector{sdpa_without_attn_mask_m, sdpa_with_attn_mask_m, sdpa_with_attn_mask_and_scale_m}); From 157cc5382c78acc00e0196e0cc04690074fdca86 Mon Sep 17 00:00:00 2001 From: zhaohb Date: Fri, 3 Jan 2025 18:28:53 +0800 Subject: [PATCH 3/4] Keep the original verification code --- .../src/plugin/transformations/kv_cache_fusion.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp index b384fec890c7d4..eb876650b08903 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp @@ -24,6 +24,7 @@ #include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/visualize_tree.hpp" #include "transformations/utils/utils.hpp" +#include "openvino/opsets/opset8.hpp" namespace ov { namespace intel_gpu { @@ -64,8 +65,10 @@ KVCacheFusionMatcher::KVCacheFusionMatcher() { return false; // // TODO: Support conversion internally - // if (!concat_node || concat_node->get_output_element_type(0) != past_node->get_output_element_type(0)) - // return false; + if (ov::is_type(concat_past_input)) { + if (!concat_node || concat_node->get_output_element_type(0) != past_node->get_output_element_type(0)) + return false; + } auto variable = past_node->get_variable(); auto concat_axis = concat_node->get_axis(); From ae39bf4c0372dc76075d81fdc3235183df92c9b8 Mon Sep 17 00:00:00 2001 From: zhaohb Date: Tue, 7 Jan 2025 03:22:50 +0800 Subject: [PATCH 4/4] update --- .../src/plugin/transformations/kv_cache_fusion.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp index eb876650b08903..7661673e764949 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp @@ -64,11 +64,11 @@ KVCacheFusionMatcher::KVCacheFusionMatcher() { if (past_node->get_variable_id() != present_node->get_variable_id()) return false; - // // TODO: Support conversion internally - if (ov::is_type(concat_past_input)) { + // TODO: Support conversion internally + if (ov::is_type(concat_past_input)) { if (!concat_node || concat_node->get_output_element_type(0) != past_node->get_output_element_type(0)) return false; - } + } auto variable = past_node->get_variable(); auto concat_axis = concat_node->get_axis();