From 71d6b0b0c1e5e7f34ccb710470cb90a9a51005c8 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 31 Dec 2024 19:08:05 +0000 Subject: [PATCH] [AArch64][GlobalISel] Lower shuffle vector with scalar destinations. (#121384) I believe these are usually canonicalized to vector extracts in most situations, but under -O0 we might trigger failures in the widening code if we do not handle scalar destinations correctly. The simplest solution should be to lower the shuffle to an extract. Fixes #121365. --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 7 +-- .../AArch64/GlobalISel/legalize-shuffle-1x.ll | 43 +++++++++++++++++++ .../GlobalISel/legalize-shuffle-vector.mir | 29 +++++++++++++ 3 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-1x.ll diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 4b7d4158faf069..7de066e09ed2f3 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1062,10 +1062,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) return llvm::is_contained( {v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy); }) - // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we - // just want those lowered into G_BUILD_VECTOR + // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar + // destinations, we just want those lowered into G_BUILD_VECTOR or + // G_EXTRACT_ELEMENT. .lowerIf([=](const LegalityQuery &Query) { - return !Query.Types[1].isVector(); + return !Query.Types[0].isVector() || !Query.Types[1].isVector(); }) .moreElementsIf( [](const LegalityQuery &Query) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-1x.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-1x.ll new file mode 100644 index 00000000000000..b52957767de4db --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-1x.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple aarch64 -O0 -global-isel -o - %s | FileCheck %s + +define <1 x i1> @shuffle_extract_4(<8 x i1> %a, <8 x i1> %b) { +; CHECK-LABEL: shuffle_extract_4: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: umov w8, v0.h[4] +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> + ret <1 x i1> %extractvec60 +} + +define <1 x i1> @shuffle_extract_12(<8 x i1> %a, <8 x i1> %b) { +; CHECK-LABEL: shuffle_extract_12: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.8h, v1.8b, #0 +; CHECK-NEXT: umov w8, v0.h[4] +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> + ret <1 x i1> %extractvec60 +} + +define <1 x i1> @shuffle_extract_p(<8 x i1> %a, <8 x i1> %b) { +; CHECK-LABEL: shuffle_extract_p: +; CHECK: // %bb.0: +; CHECK-NEXT: // implicit-def: $w8 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> + ret <1 x i1> %extractvec60 +} + +define <1 x i32> @shufflevector_v1i32(<1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: shufflevector_v1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret + %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> + ret <1 x i32> %c +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir index 2464026aa125b5..af03a21806982e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -618,3 +618,32 @@ body: | RET_ReallyLR implicit $q0 ... +--- +name: shuffle_v8i1_v1i8 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + ; CHECK-LABEL: name: shuffle_v8i1_v1i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[COPY]](<8 x s8>) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[ANYEXT]](<8 x s16>), [[C]](s64) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %2:_(<8 x s8>) = COPY $d0 + %0:_(<8 x s1>) = G_TRUNC %2:_(<8 x s8>) + %3:_(<8 x s8>) = COPY $d1 + %1:_(<8 x s1>) = G_TRUNC %3:_(<8 x s8>) + %4:_(s1) = G_SHUFFLE_VECTOR %0:_(<8 x s1>), %1:_, shufflemask(12) + %5:_(s8) = G_ZEXT %4:_(s1) + %6:_(s32) = G_ANYEXT %5:_(s8) + $w0 = COPY %6:_(s32) + RET_ReallyLR implicit $w0 +...