From f802e9214321123d96184f33246c4469c53591c5 Mon Sep 17 00:00:00 2001 From: Richard Patel Date: Thu, 4 Jul 2024 11:07:32 +0000 Subject: [PATCH] [SBF] Use add32 for sign extension on ALU32 Using "add32 reg, 0" for sign extension is more efficient than using "lsh reg, 32; arsh reg 32". --- llvm/lib/Target/SBF/SBFISelLowering.cpp | 13 ++++ llvm/lib/Target/SBF/SBFInstrInfo.td | 2 +- llvm/lib/Target/SBF/SBFMIPeephole.cpp | 63 +++++++++++++++++++ .../CodeGen/SBF/32-bit-subreg-peephole.ll | 6 +- llvm/test/CodeGen/SBF/atomics_sbf.ll | 20 +++--- llvm/test/CodeGen/SBF/loop-exit-cond.ll | 9 ++- 6 files changed, 91 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/SBF/SBFISelLowering.cpp b/llvm/lib/Target/SBF/SBFISelLowering.cpp index e744795cf57535..e0603abafa79e7 100644 --- a/llvm/lib/Target/SBF/SBFISelLowering.cpp +++ b/llvm/lib/Target/SBF/SBFISelLowering.cpp @@ -1023,6 +1023,19 @@ SBFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, BuildMI(BB, DL, TII.get(SBF::MOV_32_64), PromotedReg0).addReg(Reg); return PromotedReg0; } + + if (Subtarget->isSolana()) { + const TargetRegisterClass *RC32 = getRegClassFor(MVT::i32); + Register Reg0 = RegInfo.createVirtualRegister(RC32); + Register PromotedReg0 = RegInfo.createVirtualRegister(RC); + BuildMI(BB, DL, TII.get(SBF::ADD_ri_32), Reg0).addReg(Reg).addImm(0); + BuildMI(BB, DL, TII.get(SBF::SUBREG_TO_REG), PromotedReg0) + .addImm(0) + .addReg(Reg0) + .addImm(SBF::sub_32); + return PromotedReg0; + } + Register PromotedReg0 = RegInfo.createVirtualRegister(RC); Register PromotedReg1 = RegInfo.createVirtualRegister(RC); Register PromotedReg2 = RegInfo.createVirtualRegister(RC); diff --git a/llvm/lib/Target/SBF/SBFInstrInfo.td b/llvm/lib/Target/SBF/SBFInstrInfo.td index 4bc620a97e81ba..b0d27364fa5150 100644 --- a/llvm/lib/Target/SBF/SBFInstrInfo.td +++ b/llvm/lib/Target/SBF/SBFInstrInfo.td @@ -969,7 +969,7 @@ def : Pat<(SBFWrapper tglobaladdr:$in), def : Pat<(i64 (sext GPR32:$src)), - (SRA_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>; + (SUBREG_TO_REG (i64 0), (ADD_ri_32 GPR32:$src, 0), sub_32)>; def : Pat<(i64 (zext GPR32:$src)), (MOV_32_64 GPR32:$src)>; diff --git a/llvm/lib/Target/SBF/SBFMIPeephole.cpp b/llvm/lib/Target/SBF/SBFMIPeephole.cpp index e88bad1e46c396..4e81ce4755201d 100644 --- a/llvm/lib/Target/SBF/SBFMIPeephole.cpp +++ b/llvm/lib/Target/SBF/SBFMIPeephole.cpp @@ -185,6 +185,69 @@ bool SBFMIPeephole::eliminateZExtSeq() { } // Eliminate the 32-bit to 64-bit zero extension sequence when possible. + // (new MOV32/ADD32) + // + // w0@1 := SUB_ri w0@0, 1 # user instruction that sign-extends + // w0@2 := ADD_ri w0@1, 0 # sign extend inserted by SBPFISelLowering + // r0@3 := SUBREG_TO_REG 0, w0@2, sub_32 + // + // to + // + // w0@1 := SUB_ri w0@0, 1 # user instruction that sign-extends + // r0@3 := SUBREG_TO_REG 0, w0@1, sub_32 + if (MI.getOpcode() == SBF::SUBREG_TO_REG && + MI.getOperand(1).getImm() == 0 && + MI.getOperand(3).getImm() == SBF::sub_32) { + Register DstReg = MI.getOperand(0).getReg(); + Register SextReg = MI.getOperand(2).getReg(); + MachineInstr *SextMI = MRI->getVRegDef(SextReg); + + LLVM_DEBUG(dbgs() << "Starting SUBREG_TO_REG found:\n"); + LLVM_DEBUG(MI.dump()); + + if (!SextMI || SextMI->isPHI() || + SextMI->getOpcode() != SBF::ADD_ri_32 || + SextMI->getOperand(2).getImm() != 0) + continue; + + LLVM_DEBUG(dbgs() << " ADD32 REG, 0 found:\n"); + LLVM_DEBUG(SextMI->dump()); + + Register UserReg = SextMI->getOperand(1).getReg(); + MachineInstr *UserMI = MRI->getVRegDef(UserReg); + if (!UserMI || UserMI->isPHI()) + continue; + + // Check if source register of "ADD32 REG, 0" is already in sign + // extended form. + switch (UserMI->getOpcode()) { + case SBF::ADD_ri_32: + case SBF::SUB_ri_32: + case SBF::MUL_ri_32: + break; + default: + continue; + } + + LLVM_DEBUG(dbgs() << " Sign-extending instruction found:\n"); + LLVM_DEBUG(UserMI->dump()); + + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(SBF::SUBREG_TO_REG), DstReg) + .addImm(0) + .addReg(UserReg) + .addImm(SBF::sub_32); + SextMI->eraseFromParent(); + + // Can't erase the SUBREG_TO_REG in it's own iteration. + // Mark it to ToErase, and erase in the next iteration. + ToErase = &MI; + ZExtElemNum++; + Eliminated = true; + break; + } + + // Eliminate the 32-bit to 64-bit zero extension sequence when possible. + // (old SLL/SRL) // // MOV_32_64 rB, wA // SLL_ri rB, rB, 32 diff --git a/llvm/test/CodeGen/SBF/32-bit-subreg-peephole.ll b/llvm/test/CodeGen/SBF/32-bit-subreg-peephole.ll index 6ade7e00390d0c..65c7ad18c34e4d 100644 --- a/llvm/test/CodeGen/SBF/32-bit-subreg-peephole.ll +++ b/llvm/test/CodeGen/SBF/32-bit-subreg-peephole.ll @@ -73,8 +73,7 @@ define dso_local i64 @select_s(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_add entry: %cmp = icmp sgt i32 %a, %b %c.d = select i1 %cmp, i64 %c, i64 %d -; CHECK: lsh64 r{{[0-9]+}}, 32 -; CHECK-NEXT: arsh64 r{{[0-9]+}}, 32 +; CHECK: add32 w{{[0-9]+}}, 0 ; CHECK: {{jslt|jsgt}} r{{[0-9]+}}, r{{[0-9]+}}, ret i64 %c.d } @@ -117,8 +116,7 @@ entry: %cmp = icmp sgt i32 %call, 6 ; The shifts can't be optimized out because %call comes from function call ; return i32 so the high bits might be invalid. -; CHECK: lsh64 r{{[0-9]+}}, 32 -; CHECK-NEXT: arsh64 r{{[0-9]+}}, 32 +; CHECK: add32 w{{[0-9]+}}, 0 %cond = zext i1 %cmp to i32 ; CHECK: {{jslt|jsgt}} r{{[0-9]+}}, {{[0-9]+}}, ret i32 %cond diff --git a/llvm/test/CodeGen/SBF/atomics_sbf.ll b/llvm/test/CodeGen/SBF/atomics_sbf.ll index 88e3e7c80ba9b2..53b55a14e4b06a 100644 --- a/llvm/test/CodeGen/SBF/atomics_sbf.ll +++ b/llvm/test/CodeGen/SBF/atomics_sbf.ll @@ -181,12 +181,10 @@ entry: ; CHECK-LABEL: test_min_32 ; CHECK: ldxw w0, [r1 + 0] -; CHECK: mov64 r4, r0 -; CHECK: lsh64 r4, 32 -; CHECK: arsh64 r4, 32 -; CHECK: mov32 r5, w2 -; CHECK: lsh64 r5, 32 -; CHECK: arsh64 r5, 32 +; CHECK: mov32 w4, w0 +; CHECK-NEXT: add32 w4, 0 +; CHECK: mov32 w5, w2 +; CHECK-NEXT: add32 w5, 0 ; CHECK: mov32 w3, w0 ; CHECK: jslt r4, r5, LBB16_2 ; CHECK: mov32 w3, w2 @@ -211,12 +209,10 @@ entry: ; CHECK-LABEL: test_max_32 ; CHECK: ldxw w0, [r1 + 0] -; CHECK: mov64 r4, r0 -; CHECK: lsh64 r4, 32 -; CHECK: arsh64 r4, 32 -; CHECK: mov32 r5, w2 -; CHECK: lsh64 r5, 32 -; CHECK: arsh64 r5, 32 +; CHECK: mov32 w4, w0 +; CHECK-NEXT: add32 w4, 0 +; CHECK: mov32 w5, w2 +; CHECK-NEXT: add32 w5, 0 ; CHECK: mov32 w3, w0 ; CHECK: jsgt r4, r5, LBB18_2 ; CHECK: mov32 w3, w2 diff --git a/llvm/test/CodeGen/SBF/loop-exit-cond.ll b/llvm/test/CodeGen/SBF/loop-exit-cond.ll index 9d422664232b0d..847c9f13232673 100644 --- a/llvm/test/CodeGen/SBF/loop-exit-cond.ll +++ b/llvm/test/CodeGen/SBF/loop-exit-cond.ll @@ -49,11 +49,10 @@ for.cond: ; preds = %for.inc, %if.then %cmp1 = icmp slt i32 %2, %3 br i1 %cmp1, label %for.body, label %for.cond.cleanup -; CHECK: mov32 r[[LEN:[0-9]+]], w1 -; CHECK: add32 w[[IDX32:[0-9]+]], 1 -; CHECK: mov64 r[[IDX:[0-9]+]], r[[IDX32:[0-9]+]] -; CHECK: lsh64 r[[IDX:[0-9]+]], 32 -; CHECK: arsh64 r[[IDX:[0-9]+]], 32 +; CHECK: mov32 w[[LEN32:[0-9]+]], w1 +; CHECK: add32 w[[IDX:[0-9]+]], 1 +; CHECK: mov32 w[[LEN:[0-9]+]], w[[LEN32]] +; CHECK: add32 w[[LEN]], 0 ; CHECK-NEXT: jslt r[[IDX]], r[[LEN]], for.cond.cleanup: ; preds = %for.cond