diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index dca55dafcc5e3..8331fe333e637 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4865,31 +4865,29 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // for srcA/srcB? // // vdst, srcA, srcB, srcC - const SIMachineFunctionInfo *Info = MF.getInfo(); OpdsMapping[0] = - Info->mayNeedAGPRs() + !Subtarget.hasGFX90AInsts() ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[4] = - Info->mayNeedAGPRs() + !Subtarget.hasGFX90AInsts() ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); break; } case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { - const SIMachineFunctionInfo *Info = MF.getInfo(); OpdsMapping[0] = - Info->mayNeedAGPRs() + !Subtarget.hasGFX90AInsts() ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[4] = - Info->mayNeedAGPRs() + !Subtarget.hasGFX90AInsts() ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 07d79d677104a..11c9adb3371d5 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16076,7 +16076,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - SIMachineFunctionInfo *Info = MF->getInfo(); if (TII->isVOP3(MI.getOpcode())) { // Make sure constant bus requirements are respected. @@ -16087,7 +16086,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // use between vgpr and agpr as agpr tuples tend to be big. if (!MI.getDesc().operands().empty()) { unsigned Opc = MI.getOpcode(); - bool HasAGPRs = Info->mayNeedAGPRs(); const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); int16_t Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); for (auto I : @@ -16095,7 +16093,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), Src2Idx}) { if (I == -1) break; - if ((I == Src2Idx) && (HasAGPRs)) + if (I == Src2Idx) break; MachineOperand &Op = MI.getOperand(I); if (!Op.isReg() || !Op.getReg().isVirtual()) @@ -16129,22 +16127,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, TII->legalizeOpWithMove(MI, Src1Idx); } } - - if (!HasAGPRs) - return; - - // Resolve the rest of AV operands to AGPRs. - if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) { - if (Src2->isReg() && Src2->getReg().isVirtual()) { - auto *RC = TRI->getRegClassForReg(MRI, Src2->getReg()); - if (TRI->isVectorSuperClass(RC)) { - auto *NewRC = TRI->getEquivalentAGPRClass(RC); - MRI.setRegClass(Src2->getReg(), NewRC); - if (Src2->isTied()) - MRI.setRegClass(MI.getOperand(0).getReg(), NewRC); - } - } - } } return; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 1673bfa152674..7a279d7bede7d 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -63,12 +63,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, PSInputAddr = AMDGPU::getInitialPSInputAddr(F); } - MayNeedAGPRs = ST.hasMAIInsts(); - if (ST.hasGFX90AInsts() && - ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && - !mayUseAGPRs(F)) - MayNeedAGPRs = false; // We will select all MAI with VGPR operands. - if (AMDGPU::isChainCC(CC)) { // Chain functions don't receive an SP from their caller, but are free to // set one up. For now, we can use s32 to match what amdgpu_gfx functions diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 0e7635a045588..b9157b9a8c7e6 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -493,8 +493,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, // user arguments. This is an offset from the KernargSegmentPtr. bool ImplicitArgPtr : 1; - bool MayNeedAGPRs : 1; - // The hard-wired high half of the address of the global information table // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since // current hardware only allows a 16 bit value. @@ -1165,10 +1163,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, unsigned getMaxMemoryClusterDWords() const { return MaxMemoryClusterDWords; } - bool mayNeedAGPRs() const { - return MayNeedAGPRs; - } - // \returns true if a function has a use of AGPRs via inline asm or // has a call which may use it. bool mayUseAGPRs(const Function &F) const; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index e8db879ca5077..6b6b74234cfef 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -856,17 +856,11 @@ defvar MayNotNeedAGPRs_gisel = [{ return !MF.getInfo()->mayNeedAGPRs(); }]; -class AgprMAIFrag : - MAIFrag { - let GISelPredicateCode = MayNeedAGPRs_gisel; -} +class AgprMAIFrag + : MAIFrag {} -class VgprMAIFrag : - MAIFrag { - let GISelPredicateCode = MayNotNeedAGPRs_gisel; -} +class VgprMAIFrag + : MAIFrag {} let isAsCheapAsAMove = 1, isReMaterializable = 1 in { defm V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>; @@ -917,10 +911,14 @@ multiclass MAIInst), Scaled>, MFMATable<0, "AGPR", NAME # "_e64">; - let OtherPredicates = [isGFX90APlus], Mnemonic = OpName in - def _vgprcd_e64 : MAIInst("VOPProfileMAI_" # P # "_VCD"), - !if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, VgprMAIFrag), Scaled>, - MFMATable<0, "VGPR", NAME # "_vgprcd_e64", NAME # "_e64">; + let OtherPredicates = [isGFX90APlus], Mnemonic = OpName, + AddedComplexity = 10 in def _vgprcd_e64 + : MAIInst("VOPProfileMAI_"#P#"_VCD"), + !if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, + VgprMAIFrag), + Scaled>, + MFMATable<0, "VGPR", NAME#"_vgprcd_e64", NAME#"_e64">; } if NoDstOverlap then { @@ -931,16 +929,22 @@ multiclass MAIInst), Scaled>, MFMATable<1, "AGPR", NAME # "_e64", NAME # "_mac_e64">; - let OtherPredicates = [isGFX90APlus] in - def _mac_vgprcd_e64 : MAIInst("VOPProfileMAI_" # P # "_VCD"), - !if(!eq(node, null_frag), null_frag, VgprMAIFrag), Scaled>, - MFMATable<1, "VGPR", NAME # "_vgprcd_e64", NAME # "_mac_e64">; + let OtherPredicates = [isGFX90APlus], + AddedComplexity = 10 in def _mac_vgprcd_e64 + : MAIInst("VOPProfileMAI_"#P#"_VCD"), + !if(!eq(node, null_frag), null_frag, + VgprMAIFrag), + Scaled>, + MFMATable<1, "VGPR", NAME#"_vgprcd_e64", NAME#"_mac_e64">; } } } // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 } -// Provide a wrapper around MAIInst that provides the appended operands from V_MFMA_LD_SCALE_B32 +// Provide a wrapper around MAIInst that provides the appended operands from +// V_MFMA_LD_SCALE_B32 AGPR variants are never selected; VGPR is selected and +// may later be rewritten to AGPR. multiclass ScaledMAIInst_mc { defvar VariantSuffix = !subst(!toupper(OpName), "", NAME); // Drop the main opcode name prefix to get the "_fN_fM" suffix. defvar UnscaledOpName = UnscaledOpName_#VariantSuffix; @@ -949,9 +953,9 @@ multiclass ScaledMAIInst_mc(!cast(UnscaledOpName#"_e64").Pfl).NoDstOverlap; - def _e64 : ScaledMAIInst(UnscaledOpName#"_e64"), !if(NoDstOverlap, null_frag, AgprMAIFrag)>, - MFMATable<0, "AGPR", NAME # "_e64">; + def _e64 + : ScaledMAIInst(UnscaledOpName#"_e64"), null_frag>, + MFMATable<0, "AGPR", NAME#"_e64">; def _vgprcd_e64 : ScaledMAIInst(UnscaledOpName#"_vgprcd_e64"), !if(NoDstOverlap, null_frag, VgprMAIFrag)>, @@ -961,9 +965,10 @@ multiclass ScaledMAIInst_mc(UnscaledOpName # "_mac_e64"), AgprMAIFrag>, - MFMATable<1, "AGPR", NAME # "_e64">; + def _mac_e64 + : ScaledMAIInst(UnscaledOpName#"_mac_e64"), null_frag>, + MFMATable<1, "AGPR", NAME#"_e64">; def _mac_vgprcd_e64 : ScaledMAIInst(UnscaledOpName # "_mac_vgprcd_e64"), VgprMAIFrag>,