From 0f5ea872a968f54b00be8ffa075f1c4b9fbf6106 Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Thu, 23 Jan 2025 13:00:51 +0100 Subject: [PATCH] [Snippets][CPU] Disable VNNI requirement for i8 brgemm --- .../snippets/x64/op/brgemm_utils.cpp | 18 +++++++++++------- .../transformation_pipeline.cpp | 5 +++-- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.cpp index f360437d59da6b..53805973d4ae2d 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.cpp @@ -29,9 +29,10 @@ cpu_isa_t get_primitive_isa(const ov::element::Type& dt_in0, bool is_with_amx) { } else { \ Y \ } -#define SUPPORT_ONE(X, MESSAGE) SUPPORT(X, OV_CPU_JIT_EMITTER_THROW(MESSAGE);) -#define SUPPORT_TWO(X, Y, MESSAGE) SUPPORT(X, SUPPORT_ONE(Y, MESSAGE)) -#define SUPPORT_THREE(X, Y, Z, MESSAGE) SUPPORT(X, SUPPORT_TWO(Y, Z, MESSAGE)) +#define SUPPORT_ONE(X, MESSAGE) SUPPORT(X, OV_CPU_JIT_EMITTER_THROW(MESSAGE);) +#define SUPPORT_TWO(X, Y, MESSAGE) SUPPORT(X, SUPPORT_ONE(Y, MESSAGE)) +#define SUPPORT_THREE(X, Y, Z, MESSAGE) SUPPORT(X, SUPPORT_TWO(Y, Z, MESSAGE)) +#define SUPPORT_FOUR(A, B, C, D, MESSAGE) SUPPORT(A, SUPPORT_THREE(B, C, D, MESSAGE)) // Note: AMX might be not used even if it's supported by the hardware, check the BrgemmToBrgemmCPU pass for details if (is_with_amx) { @@ -44,16 +45,19 @@ cpu_isa_t get_primitive_isa(const ov::element::Type& dt_in0, bool is_with_amx) { } else if (dt_in0 == ov::element::bf16) { SUPPORT_ONE(avx512_core_bf16, "Unsupported hardware configuration: bf16 is supported only on avx512 platforms") } else if (one_of(dt_in0, ov::element::u8, ov::element::i8)) { - SUPPORT_THREE(avx512_core_vnni, - avx2_vnni_2, - avx2_vnni, - "Unsupported hardware configuration: int8 is supported only on vnni platforms") + SUPPORT_FOUR(avx512_core, + avx512_core_vnni, + avx2_vnni_2, + avx2_vnni, + "Unsupported hardware configuration: int8 is supported only on vnni or avx512 platforms") } else { SUPPORT_TWO(avx512_core, cpu::x64::avx2, "Unsupported hardware configuration: brgemm requires at least avx2 isa") } return isa; +#undef SUPPORT_FOUR +#undef SUPPORT_THREE #undef SUPPORT_TWO #undef SUPPORT_ONE #undef SUPPORT diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index da61917a146db0..8352905e051828 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -1063,7 +1063,7 @@ void Transformations::MainSnippets(void) { const auto is_bf16 = (in_type0 == ov::element::bf16 && in_type1 == ov::element::bf16) || ((in_type0 == element::f32 && in_type1 == ov::element::f32 && config.inferencePrecision == ov::element::bf16)); - const auto is_int8 = in_type0 == ov::element::i8; + const auto is_int8 = (in_type0 == element::i8 || in_type0 == element::u8) && (in_type1 == element::i8); if (matmul->get_transpose_a()) return false; if (is_fp32) @@ -1071,13 +1071,14 @@ void Transformations::MainSnippets(void) { if (is_int8) return dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx) || dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_vnni) || + dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) || dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni); if (is_bf16) return dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx) || dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16); if (is_fp16) return dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx_fp16); - return true; + return false; }; auto is_unsupported_parallel_work_amount = [&](const std::shared_ptr& n, const ov::PartialShape& shape) {