Skip to content

[AArch64] Use correct regclass for spills of ZPR2/ZPR4 #148806

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1591,18 +1591,22 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
"Non-writeback variants of STGloop / STZGloop should not "
"survive past PrologEpilogInserter.");
case AArch64::STR_ZZZZXI:
case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
case AArch64::STR_ZZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
case AArch64::STR_ZZXI:
case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
case AArch64::STR_PPXI:
return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
case AArch64::LDR_ZZZZXI:
case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
case AArch64::LDR_ZZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
case AArch64::LDR_ZZXI:
case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
case AArch64::LDR_PPXI:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
Expand Down
40 changes: 32 additions & 8 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2482,8 +2482,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::LDR_PXI:
case AArch64::LDR_ZXI:
case AArch64::LDR_ZZXI:
case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
case AArch64::LDR_ZZZXI:
case AArch64::LDR_ZZZZXI:
case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
case AArch64::LDRBBui:
case AArch64::LDRBui:
case AArch64::LDRDui:
Expand Down Expand Up @@ -2525,8 +2527,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::STR_PXI:
case AArch64::STR_ZXI:
case AArch64::STR_ZZXI:
case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
case AArch64::STR_ZZZXI:
case AArch64::STR_ZZZZXI:
case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
case AArch64::STRBBui:
case AArch64::STRBui:
case AArch64::STRDui:
Expand Down Expand Up @@ -4318,7 +4322,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
break;
// SVE
case AArch64::STR_ZZZZXI:
case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
case AArch64::LDR_ZZZZXI:
case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
Scale = TypeSize::getScalable(16);
Width = TypeSize::getScalable(16 * 4);
MinOffset = -256;
Expand All @@ -4332,7 +4338,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
MaxOffset = 253;
break;
case AArch64::STR_ZZXI:
case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
case AArch64::LDR_ZZXI:
case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
Scale = TypeSize::getScalable(16);
Width = TypeSize::getScalable(16 * 2);
MinOffset = -256;
Expand Down Expand Up @@ -5559,8 +5567,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Twov2d;
Offset = false;
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
} else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
StackID = TargetStackID::ScalableVector;
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZZXI;
Expand All @@ -5584,8 +5596,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Fourv2d;
Offset = false;
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
} else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
StackID = TargetStackID::ScalableVector;
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZZZZXI;
Expand Down Expand Up @@ -5736,8 +5752,12 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Twov2d;
Offset = false;
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
} else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
StackID = TargetStackID::ScalableVector;
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZZXI;
Expand All @@ -5761,8 +5781,12 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Fourv2d;
Offset = false;
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
} else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
StackID = TargetStackID::ScalableVector;
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZZZZXI;
Expand Down
18 changes: 12 additions & 6 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2625,16 +2625,22 @@ let Predicates = [HasSVE_or_SME] in {
// These get expanded to individual LDR_ZXI/STR_ZXI instructions in
// AArch64ExpandPseudoInsts.
let mayLoad = 1, hasSideEffects = 0 in {
def LDR_ZZXI : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def LDR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
Copy link
Member

@MacDue MacDue Jul 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: not all of this is this PR, but it bugs me that there's three different ways to refer to the same concept (which makes future grepping harder):

  • strided_and_contiguous
  • StridedOrContiguous
  • STRIDED_CONTIGUOUS

(my vote is for "strided or contiguous")

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, we've not been very consistent with the naming, I'll fix that up in a follow-up NFC patch. My vote is also for "strided or contiguous" in that case.

def LDR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;

def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
}
let mayStore = 1, hasSideEffects = 0 in {
def STR_ZZXI : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def STR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def STR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;

def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
}

let AddedComplexity = 1 in {
Expand Down
49 changes: 25 additions & 24 deletions llvm/test/CodeGen/AArch64/spillfill-sve.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy %s -o - | FileCheck %s
# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND
# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s
# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s --check-prefix=EXPAND
--- |
; ModuleID = '<stdin>'
source_filename = "<stdin>"
Expand All @@ -14,13 +14,14 @@
define aarch64_sve_vector_pcs void @spills_fills_stack_id_virtreg_ppr_to_pnr() #1 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #2 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #2 { entry: unreachable }

attributes #0 = { nounwind "target-features"="+sve" }
attributes #1 = { nounwind "target-features"="+sve2p1" }
attributes #2 = { nounwind "target-features"="+sve,+sme2" "aarch64_pstate_sm_enabled" }

...
---
Expand Down Expand Up @@ -318,23 +319,23 @@ registers:
- { id: 0, class: zpr2 }
stack:
liveins:
- { reg: '$z0_z1', virtual-reg: '%0' }
- { reg: '$z1_z2', virtual-reg: '%0' }
body: |
bb.0.entry:
liveins: $z0_z1
liveins: $z1_z2

; CHECK-LABEL: name: spills_fills_stack_id_zpr2
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 32, alignment: 16
; CHECK-NEXT: stack-id: scalable-vector

; EXPAND-LABEL: name: spills_fills_stack_id_zpr2
; EXPAND: STR_ZXI $z0, $sp, 0
; EXPAND: STR_ZXI $z1, $sp, 1
; EXPAND: $z0 = LDR_ZXI $sp, 0
; EXPAND: $z1 = LDR_ZXI $sp, 1
; EXPAND: STR_ZXI $z1, $sp, 0
; EXPAND: STR_ZXI $z2, $sp, 1
; EXPAND: $z1 = LDR_ZXI $sp, 0
; EXPAND: $z2 = LDR_ZXI $sp, 1

%0:zpr2 = COPY $z0_z1
%0:zpr2 = COPY $z1_z2

$z0_z1_z2_z3 = IMPLICIT_DEF
$z4_z5_z6_z7 = IMPLICIT_DEF
Expand All @@ -345,7 +346,7 @@ body: |
$z24_z25_z26_z27 = IMPLICIT_DEF
$z28_z29_z30_z31 = IMPLICIT_DEF

$z0_z1 = COPY %0
$z1_z2 = COPY %0
RET_ReallyLR
...
---
Expand Down Expand Up @@ -439,27 +440,27 @@ registers:
- { id: 0, class: zpr4 }
stack:
liveins:
- { reg: '$z0_z1_z2_z3', virtual-reg: '%0' }
- { reg: '$z1_z2_z3_z4', virtual-reg: '%0' }
body: |
bb.0.entry:
liveins: $z0_z1_z2_z3
liveins: $z1_z2_z3_z4

; CHECK-LABEL: name: spills_fills_stack_id_zpr4
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 16
; CHECK-NEXT: stack-id: scalable-vector

; EXPAND-LABEL: name: spills_fills_stack_id_zpr4
; EXPAND: STR_ZXI $z0, $sp, 0
; EXPAND: STR_ZXI $z1, $sp, 1
; EXPAND: STR_ZXI $z2, $sp, 2
; EXPAND: STR_ZXI $z3, $sp, 3
; EXPAND: $z0 = LDR_ZXI $sp, 0
; EXPAND: $z1 = LDR_ZXI $sp, 1
; EXPAND: $z2 = LDR_ZXI $sp, 2
; EXPAND: $z3 = LDR_ZXI $sp, 3
; EXPAND: STR_ZXI $z1, $sp, 0
; EXPAND: STR_ZXI $z2, $sp, 1
; EXPAND: STR_ZXI $z3, $sp, 2
; EXPAND: STR_ZXI $z4, $sp, 3
; EXPAND: $z1 = LDR_ZXI $sp, 0
; EXPAND: $z2 = LDR_ZXI $sp, 1
; EXPAND: $z3 = LDR_ZXI $sp, 2
; EXPAND: $z4 = LDR_ZXI $sp, 3

%0:zpr4 = COPY $z0_z1_z2_z3
%0:zpr4 = COPY $z1_z2_z3_z4

$z0_z1_z2_z3 = IMPLICIT_DEF
$z4_z5_z6_z7 = IMPLICIT_DEF
Expand All @@ -470,7 +471,7 @@ body: |
$z24_z25_z26_z27 = IMPLICIT_DEF
$z28_z29_z30_z31 = IMPLICIT_DEF

$z0_z1_z2_z3 = COPY %0
$z1_z2_z3_z4 = COPY %0
RET_ReallyLR
...
---
Expand Down
Loading