-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][GlobalISel] Lower shuffle vector with scalar destinations. #121384
Conversation
I believe these are usually canonicalized to vector extracts in most situations, but under -O0 we might trigger failures in the widening code if we do not handle scalar destinations correctly. The simplest solution should be to lower the shuffle to an extract.
@llvm/pr-subscribers-llvm-globalisel Author: David Green (davemgreen) ChangesI believe these are usually canonicalized to vector extracts in most situations, but under -O0 we might trigger failures in the widening code if we do not handle scalar destinations correctly. The simplest solution should be to lower the shuffle to an extract. Fixes #121365. Full diff: https://github.com/llvm/llvm-project/pull/121384.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 4b7d4158faf069..7de066e09ed2f3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1062,10 +1062,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return llvm::is_contained(
{v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
})
- // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
- // just want those lowered into G_BUILD_VECTOR
+ // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar
+ // destinations, we just want those lowered into G_BUILD_VECTOR or
+ // G_EXTRACT_ELEMENT.
.lowerIf([=](const LegalityQuery &Query) {
- return !Query.Types[1].isVector();
+ return !Query.Types[0].isVector() || !Query.Types[1].isVector();
})
.moreElementsIf(
[](const LegalityQuery &Query) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-1x.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-1x.ll
new file mode 100644
index 00000000000000..b52957767de4db
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-1x.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple aarch64 -O0 -global-isel -o - %s | FileCheck %s
+
+define <1 x i1> @shuffle_extract_4(<8 x i1> %a, <8 x i1> %b) {
+; CHECK-LABEL: shuffle_extract_4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: umov w8, v0.h[4]
+; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: ret
+ %extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> <i32 4>
+ ret <1 x i1> %extractvec60
+}
+
+define <1 x i1> @shuffle_extract_12(<8 x i1> %a, <8 x i1> %b) {
+; CHECK-LABEL: shuffle_extract_12:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.8h, v1.8b, #0
+; CHECK-NEXT: umov w8, v0.h[4]
+; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: ret
+ %extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> <i32 12>
+ ret <1 x i1> %extractvec60
+}
+
+define <1 x i1> @shuffle_extract_p(<8 x i1> %a, <8 x i1> %b) {
+; CHECK-LABEL: shuffle_extract_p:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // implicit-def: $w8
+; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: ret
+ %extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> <i32 poison>
+ ret <1 x i1> %extractvec60
+}
+
+define <1 x i32> @shufflevector_v1i32(<1 x i32> %a, <1 x i32> %b) {
+; CHECK-LABEL: shufflevector_v1i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: ret
+ %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 1>
+ ret <1 x i32> %c
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
index 2464026aa125b5..af03a21806982e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
@@ -618,3 +618,32 @@ body: |
RET_ReallyLR implicit $q0
...
+---
+name: shuffle_v8i1_v1i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $d0, $d1
+ ; CHECK-LABEL: name: shuffle_v8i1_v1i8
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[COPY]](<8 x s8>)
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[ANYEXT]](<8 x s16>), [[C]](s64)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
+ ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(<8 x s8>) = COPY $d0
+ %0:_(<8 x s1>) = G_TRUNC %2:_(<8 x s8>)
+ %3:_(<8 x s8>) = COPY $d1
+ %1:_(<8 x s1>) = G_TRUNC %3:_(<8 x s8>)
+ %4:_(s1) = G_SHUFFLE_VECTOR %0:_(<8 x s1>), %1:_, shufflemask(12)
+ %5:_(s8) = G_ZEXT %4:_(s1)
+ %6:_(s32) = G_ANYEXT %5:_(s8)
+ $w0 = COPY %6:_(s32)
+ RET_ReallyLR implicit $w0
+...
|
@llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesI believe these are usually canonicalized to vector extracts in most situations, but under -O0 we might trigger failures in the widening code if we do not handle scalar destinations correctly. The simplest solution should be to lower the shuffle to an extract. Fixes #121365. Full diff: https://github.com/llvm/llvm-project/pull/121384.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 4b7d4158faf069..7de066e09ed2f3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1062,10 +1062,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return llvm::is_contained(
{v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
})
- // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
- // just want those lowered into G_BUILD_VECTOR
+ // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar
+ // destinations, we just want those lowered into G_BUILD_VECTOR or
+ // G_EXTRACT_ELEMENT.
.lowerIf([=](const LegalityQuery &Query) {
- return !Query.Types[1].isVector();
+ return !Query.Types[0].isVector() || !Query.Types[1].isVector();
})
.moreElementsIf(
[](const LegalityQuery &Query) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-1x.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-1x.ll
new file mode 100644
index 00000000000000..b52957767de4db
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-1x.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple aarch64 -O0 -global-isel -o - %s | FileCheck %s
+
+define <1 x i1> @shuffle_extract_4(<8 x i1> %a, <8 x i1> %b) {
+; CHECK-LABEL: shuffle_extract_4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: umov w8, v0.h[4]
+; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: ret
+ %extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> <i32 4>
+ ret <1 x i1> %extractvec60
+}
+
+define <1 x i1> @shuffle_extract_12(<8 x i1> %a, <8 x i1> %b) {
+; CHECK-LABEL: shuffle_extract_12:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.8h, v1.8b, #0
+; CHECK-NEXT: umov w8, v0.h[4]
+; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: ret
+ %extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> <i32 12>
+ ret <1 x i1> %extractvec60
+}
+
+define <1 x i1> @shuffle_extract_p(<8 x i1> %a, <8 x i1> %b) {
+; CHECK-LABEL: shuffle_extract_p:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // implicit-def: $w8
+; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: ret
+ %extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> <i32 poison>
+ ret <1 x i1> %extractvec60
+}
+
+define <1 x i32> @shufflevector_v1i32(<1 x i32> %a, <1 x i32> %b) {
+; CHECK-LABEL: shufflevector_v1i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: ret
+ %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 1>
+ ret <1 x i32> %c
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
index 2464026aa125b5..af03a21806982e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
@@ -618,3 +618,32 @@ body: |
RET_ReallyLR implicit $q0
...
+---
+name: shuffle_v8i1_v1i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $d0, $d1
+ ; CHECK-LABEL: name: shuffle_v8i1_v1i8
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[COPY]](<8 x s8>)
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[ANYEXT]](<8 x s16>), [[C]](s64)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
+ ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(<8 x s8>) = COPY $d0
+ %0:_(<8 x s1>) = G_TRUNC %2:_(<8 x s8>)
+ %3:_(<8 x s8>) = COPY $d1
+ %1:_(<8 x s1>) = G_TRUNC %3:_(<8 x s8>)
+ %4:_(s1) = G_SHUFFLE_VECTOR %0:_(<8 x s1>), %1:_, shufflemask(12)
+ %5:_(s8) = G_ZEXT %4:_(s1)
+ %6:_(s32) = G_ANYEXT %5:_(s8)
+ $w0 = COPY %6:_(s32)
+ RET_ReallyLR implicit $w0
+...
|
I believe these are usually canonicalized to vector extracts in most situations, but under -O0 we might trigger failures in the widening code if we do not handle scalar destinations correctly. The simplest solution should be to lower the shuffle to an extract.
Fixes #121365.