Skip to content

Commit 50e345e

Browse files
[AArch64] Use correct regclass for spills of ZPR2/ZPR4 (#148806)
Commit a629322 forced the register class of ZPR[24]StridedOrContiguous for spills/fills of ZPR2 and ZPR4, but this may result in issues when the regclass for the fill is a ZPR2/ZPR4 which would allow the register allocator to pick `z1_z2`, which is not a supported register for ZPR2StridedOrContiguous that only supports tuples of the form (strided) `z0_z8`, `z1_z9` or (contiguous, start at multiple of 2) `z0_z1`, `z2_z3`. For spills we could add a new register class that supports any of the tuple forms, but I've decided to use two pseudos similar to the fills for consistency. Fixes #148655
1 parent bda5602 commit 50e345e

File tree

4 files changed

+73
-38
lines changed

4 files changed

+73
-38
lines changed

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,18 +1591,22 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
15911591
"Non-writeback variants of STGloop / STZGloop should not "
15921592
"survive past PrologEpilogInserter.");
15931593
case AArch64::STR_ZZZZXI:
1594+
case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
15941595
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
15951596
case AArch64::STR_ZZZXI:
15961597
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
15971598
case AArch64::STR_ZZXI:
1599+
case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
15981600
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
15991601
case AArch64::STR_PPXI:
16001602
return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
16011603
case AArch64::LDR_ZZZZXI:
1604+
case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
16021605
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
16031606
case AArch64::LDR_ZZZXI:
16041607
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
16051608
case AArch64::LDR_ZZXI:
1609+
case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
16061610
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
16071611
case AArch64::LDR_PPXI:
16081612
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2482,8 +2482,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
24822482
case AArch64::LDR_PXI:
24832483
case AArch64::LDR_ZXI:
24842484
case AArch64::LDR_ZZXI:
2485+
case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
24852486
case AArch64::LDR_ZZZXI:
24862487
case AArch64::LDR_ZZZZXI:
2488+
case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
24872489
case AArch64::LDRBBui:
24882490
case AArch64::LDRBui:
24892491
case AArch64::LDRDui:
@@ -2525,8 +2527,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
25252527
case AArch64::STR_PXI:
25262528
case AArch64::STR_ZXI:
25272529
case AArch64::STR_ZZXI:
2530+
case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
25282531
case AArch64::STR_ZZZXI:
25292532
case AArch64::STR_ZZZZXI:
2533+
case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
25302534
case AArch64::STRBBui:
25312535
case AArch64::STRBui:
25322536
case AArch64::STRDui:
@@ -4318,7 +4322,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
43184322
break;
43194323
// SVE
43204324
case AArch64::STR_ZZZZXI:
4325+
case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
43214326
case AArch64::LDR_ZZZZXI:
4327+
case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
43224328
Scale = TypeSize::getScalable(16);
43234329
Width = TypeSize::getScalable(16 * 4);
43244330
MinOffset = -256;
@@ -4332,7 +4338,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
43324338
MaxOffset = 253;
43334339
break;
43344340
case AArch64::STR_ZZXI:
4341+
case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
43354342
case AArch64::LDR_ZZXI:
4343+
case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
43364344
Scale = TypeSize::getScalable(16);
43374345
Width = TypeSize::getScalable(16 * 2);
43384346
MinOffset = -256;
@@ -5559,8 +5567,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
55595567
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
55605568
Opc = AArch64::ST1Twov2d;
55615569
Offset = false;
5562-
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5563-
AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5570+
} else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5571+
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5572+
"Unexpected register store without SVE store instructions");
5573+
Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
5574+
StackID = TargetStackID::ScalableVector;
5575+
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
55645576
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
55655577
"Unexpected register store without SVE store instructions");
55665578
Opc = AArch64::STR_ZZXI;
@@ -5584,8 +5596,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
55845596
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
55855597
Opc = AArch64::ST1Fourv2d;
55865598
Offset = false;
5587-
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5588-
AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5599+
} else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5600+
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5601+
"Unexpected register store without SVE store instructions");
5602+
Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
5603+
StackID = TargetStackID::ScalableVector;
5604+
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
55895605
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
55905606
"Unexpected register store without SVE store instructions");
55915607
Opc = AArch64::STR_ZZZZXI;
@@ -5736,8 +5752,12 @@ void AArch64InstrInfo::loadRegFromStackSlot(
57365752
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
57375753
Opc = AArch64::LD1Twov2d;
57385754
Offset = false;
5739-
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5740-
AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5755+
} else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5756+
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5757+
"Unexpected register load without SVE load instructions");
5758+
Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
5759+
StackID = TargetStackID::ScalableVector;
5760+
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
57415761
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
57425762
"Unexpected register load without SVE load instructions");
57435763
Opc = AArch64::LDR_ZZXI;
@@ -5761,8 +5781,12 @@ void AArch64InstrInfo::loadRegFromStackSlot(
57615781
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
57625782
Opc = AArch64::LD1Fourv2d;
57635783
Offset = false;
5764-
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5765-
AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5784+
} else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5785+
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5786+
"Unexpected register load without SVE load instructions");
5787+
Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
5788+
StackID = TargetStackID::ScalableVector;
5789+
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
57665790
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
57675791
"Unexpected register load without SVE load instructions");
57685792
Opc = AArch64::LDR_ZZZZXI;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2625,16 +2625,22 @@ let Predicates = [HasSVE_or_SME] in {
26252625
// These get expanded to individual LDR_ZXI/STR_ZXI instructions in
26262626
// AArch64ExpandPseudoInsts.
26272627
let mayLoad = 1, hasSideEffects = 0 in {
2628-
def LDR_ZZXI : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2628+
def LDR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2629+
def LDR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2630+
2631+
def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
26292632
def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2630-
def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2631-
def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2633+
def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2634+
def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
26322635
}
26332636
let mayStore = 1, hasSideEffects = 0 in {
2634-
def STR_ZZXI : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2637+
def STR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2638+
def STR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2639+
2640+
def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
26352641
def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2636-
def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2637-
def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2642+
def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2643+
def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
26382644
}
26392645

26402646
let AddedComplexity = 1 in {

llvm/test/CodeGen/AArch64/spillfill-sve.mir

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy %s -o - | FileCheck %s
2-
# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND
1+
# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s
2+
# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s --check-prefix=EXPAND
33
--- |
44
; ModuleID = '<stdin>'
55
source_filename = "<stdin>"
@@ -14,13 +14,14 @@
1414
define aarch64_sve_vector_pcs void @spills_fills_stack_id_virtreg_ppr_to_pnr() #1 { entry: unreachable }
1515
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable }
1616
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable }
17-
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #0 { entry: unreachable }
17+
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #2 { entry: unreachable }
1818
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable }
1919
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable }
20-
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #0 { entry: unreachable }
20+
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #2 { entry: unreachable }
2121

2222
attributes #0 = { nounwind "target-features"="+sve" }
2323
attributes #1 = { nounwind "target-features"="+sve2p1" }
24+
attributes #2 = { nounwind "target-features"="+sve,+sme2" "aarch64_pstate_sm_enabled" }
2425

2526
...
2627
---
@@ -318,23 +319,23 @@ registers:
318319
- { id: 0, class: zpr2 }
319320
stack:
320321
liveins:
321-
- { reg: '$z0_z1', virtual-reg: '%0' }
322+
- { reg: '$z1_z2', virtual-reg: '%0' }
322323
body: |
323324
bb.0.entry:
324-
liveins: $z0_z1
325+
liveins: $z1_z2
325326
326327
; CHECK-LABEL: name: spills_fills_stack_id_zpr2
327328
; CHECK: stack:
328329
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 32, alignment: 16
329330
; CHECK-NEXT: stack-id: scalable-vector
330331
331332
; EXPAND-LABEL: name: spills_fills_stack_id_zpr2
332-
; EXPAND: STR_ZXI $z0, $sp, 0
333-
; EXPAND: STR_ZXI $z1, $sp, 1
334-
; EXPAND: $z0 = LDR_ZXI $sp, 0
335-
; EXPAND: $z1 = LDR_ZXI $sp, 1
333+
; EXPAND: STR_ZXI $z1, $sp, 0
334+
; EXPAND: STR_ZXI $z2, $sp, 1
335+
; EXPAND: $z1 = LDR_ZXI $sp, 0
336+
; EXPAND: $z2 = LDR_ZXI $sp, 1
336337
337-
%0:zpr2 = COPY $z0_z1
338+
%0:zpr2 = COPY $z1_z2
338339
339340
$z0_z1_z2_z3 = IMPLICIT_DEF
340341
$z4_z5_z6_z7 = IMPLICIT_DEF
@@ -345,7 +346,7 @@ body: |
345346
$z24_z25_z26_z27 = IMPLICIT_DEF
346347
$z28_z29_z30_z31 = IMPLICIT_DEF
347348
348-
$z0_z1 = COPY %0
349+
$z1_z2 = COPY %0
349350
RET_ReallyLR
350351
...
351352
---
@@ -439,27 +440,27 @@ registers:
439440
- { id: 0, class: zpr4 }
440441
stack:
441442
liveins:
442-
- { reg: '$z0_z1_z2_z3', virtual-reg: '%0' }
443+
- { reg: '$z1_z2_z3_z4', virtual-reg: '%0' }
443444
body: |
444445
bb.0.entry:
445-
liveins: $z0_z1_z2_z3
446+
liveins: $z1_z2_z3_z4
446447
447448
; CHECK-LABEL: name: spills_fills_stack_id_zpr4
448449
; CHECK: stack:
449450
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 16
450451
; CHECK-NEXT: stack-id: scalable-vector
451452
452453
; EXPAND-LABEL: name: spills_fills_stack_id_zpr4
453-
; EXPAND: STR_ZXI $z0, $sp, 0
454-
; EXPAND: STR_ZXI $z1, $sp, 1
455-
; EXPAND: STR_ZXI $z2, $sp, 2
456-
; EXPAND: STR_ZXI $z3, $sp, 3
457-
; EXPAND: $z0 = LDR_ZXI $sp, 0
458-
; EXPAND: $z1 = LDR_ZXI $sp, 1
459-
; EXPAND: $z2 = LDR_ZXI $sp, 2
460-
; EXPAND: $z3 = LDR_ZXI $sp, 3
454+
; EXPAND: STR_ZXI $z1, $sp, 0
455+
; EXPAND: STR_ZXI $z2, $sp, 1
456+
; EXPAND: STR_ZXI $z3, $sp, 2
457+
; EXPAND: STR_ZXI $z4, $sp, 3
458+
; EXPAND: $z1 = LDR_ZXI $sp, 0
459+
; EXPAND: $z2 = LDR_ZXI $sp, 1
460+
; EXPAND: $z3 = LDR_ZXI $sp, 2
461+
; EXPAND: $z4 = LDR_ZXI $sp, 3
461462
462-
%0:zpr4 = COPY $z0_z1_z2_z3
463+
%0:zpr4 = COPY $z1_z2_z3_z4
463464
464465
$z0_z1_z2_z3 = IMPLICIT_DEF
465466
$z4_z5_z6_z7 = IMPLICIT_DEF
@@ -470,7 +471,7 @@ body: |
470471
$z24_z25_z26_z27 = IMPLICIT_DEF
471472
$z28_z29_z30_z31 = IMPLICIT_DEF
472473
473-
$z0_z1_z2_z3 = COPY %0
474+
$z1_z2_z3_z4 = COPY %0
474475
RET_ReallyLR
475476
...
476477
---

0 commit comments

Comments
 (0)