-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AArch64][PAC] Introduce AArch64::PAC pseudo instruction #146488
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-aarch64 Author: Anatoly Trosinenko (atrosinenko) ChangesIntroduce a pseudo instruction to be selected instead of a pair of To simplify the instruction selection, select AArch64::PAC pseudo using Full diff: https://github.com/llvm/llvm-project/pull/146488.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index a16c104d8bef5..40890dbe6e532 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -171,6 +171,9 @@ class AArch64AsmPrinter : public AsmPrinter {
// Emit the sequence for AUT or AUTPAC.
void emitPtrauthAuthResign(const MachineInstr *MI);
+ // Emit the sequence for PAC.
+ void emitPtrauthSign(const MachineInstr *MI);
+
// Emit the sequence to compute the discriminator.
//
// ScratchReg should be x16/x17.
@@ -2173,6 +2176,31 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {
OutStreamer->emitLabel(EndSym);
}
+void AArch64AsmPrinter::emitPtrauthSign(const MachineInstr *MI) {
+ Register Val = MI->getOperand(1).getReg();
+ auto Key = (AArch64PACKey::ID)MI->getOperand(2).getImm();
+ uint64_t Disc = MI->getOperand(3).getImm();
+ Register AddrDisc = MI->getOperand(4).getReg();
+ bool AddrDiscKilled = MI->getOperand(4).isKill();
+
+ // Compute aut discriminator into x17
+ assert(isUInt<16>(Disc));
+ Register DiscReg = emitPtrauthDiscriminator(
+ Disc, AddrDisc, AArch64::X17, /*MayUseAddrAsScratch=*/AddrDiscKilled);
+ bool IsZeroDisc = DiscReg == AArch64::XZR;
+ unsigned Opc = getPACOpcodeForKey(Key, IsZeroDisc);
+
+ // paciza x16 ; if IsZeroDisc
+ // pacia x16, x17 ; if !IsZeroDisc
+ MCInst PACInst;
+ PACInst.setOpcode(Opc);
+ PACInst.addOperand(MCOperand::createReg(Val));
+ PACInst.addOperand(MCOperand::createReg(Val));
+ if (!IsZeroDisc)
+ PACInst.addOperand(MCOperand::createReg(DiscReg));
+ EmitToStreamer(*OutStreamer, PACInst);
+}
+
void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) {
bool IsCall = MI->getOpcode() == AArch64::BLRA;
unsigned BrTarget = MI->getOperand(0).getReg();
@@ -2867,6 +2895,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
emitPtrauthAuthResign(MI);
return;
+ case AArch64::PAC:
+ emitPtrauthSign(MI);
+ return;
+
case AArch64::LOADauthptrstatic:
LowerLOADauthptrstatic(*MI);
return;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1f98d69edb473..9e00905b3fec8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3071,6 +3071,75 @@ AArch64TargetLowering::EmitGetSMESaveSize(MachineInstr &MI,
return BB;
}
+// Helper function to find the instruction that defined a virtual register.
+// If unable to find such instruction, returns nullptr.
+static MachineInstr *stripVRegCopies(const MachineRegisterInfo &MRI,
+ Register Reg) {
+ while (Reg.isVirtual()) {
+ MachineInstr *DefMI = MRI.getVRegDef(Reg);
+ assert(DefMI && "Virtual register definition not found");
+ unsigned Opcode = DefMI->getOpcode();
+
+ if (Opcode == AArch64::COPY) {
+ Reg = DefMI->getOperand(1).getReg();
+ // Vreg is defined by copying from physreg.
+ if (Reg.isPhysical())
+ return DefMI;
+ continue;
+ }
+ if (Opcode == AArch64::SUBREG_TO_REG) {
+ Reg = DefMI->getOperand(2).getReg();
+ continue;
+ }
+
+ return DefMI;
+ }
+ return nullptr;
+}
+
+void AArch64TargetLowering::fixupBlendComponents(
+ MachineInstr &MI, MachineBasicBlock *BB, MachineOperand &IntDiscOp,
+ MachineOperand &AddrDiscOp, const TargetRegisterClass *AddrDiscRC) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ Register AddrDisc = AddrDiscOp.getReg();
+ int64_t IntDisc = IntDiscOp.getImm();
+
+ assert(IntDisc == 0 && "Blend components are already expanded");
+
+ MachineInstr *MaybeBlend = stripVRegCopies(MRI, AddrDisc);
+
+ if (MaybeBlend) {
+ // Detect blend(addr, imm) which is lowered as "MOVK addr, #imm, #48".
+ // Alternatively, recognize small immediate modifier passed via VReg.
+ if (MaybeBlend->getOpcode() == AArch64::MOVKXi &&
+ MaybeBlend->getOperand(3).getImm() == 48) {
+ AddrDisc = MaybeBlend->getOperand(1).getReg();
+ IntDisc = MaybeBlend->getOperand(2).getImm();
+ } else if (MaybeBlend->getOpcode() == AArch64::MOVi32imm &&
+ isUInt<16>(MaybeBlend->getOperand(1).getImm())) {
+ AddrDisc = AArch64::XZR;
+ IntDisc = MaybeBlend->getOperand(1).getImm();
+ }
+ }
+
+ // Normalize NoRegister operands emitted by GlobalISel.
+ if (AddrDisc == AArch64::NoRegister)
+ AddrDisc = AArch64::XZR;
+
+ // Make sure AddrDisc operand respects the register class imposed by MI.
+ if (AddrDisc != AArch64::XZR && MRI.getRegClass(AddrDisc) != AddrDiscRC) {
+ Register TmpReg = MRI.createVirtualRegister(AddrDiscRC);
+ BuildMI(*BB, MI, DL, TII->get(AArch64::COPY), TmpReg).addReg(AddrDisc);
+ AddrDisc = TmpReg;
+ }
+
+ AddrDiscOp.setReg(AddrDisc);
+ IntDiscOp.setImm(IntDisc);
+}
+
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
@@ -3169,6 +3238,11 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
case AArch64::MOVT_TIZ_PSEUDO:
return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true);
+
+ case AArch64::PAC:
+ fixupBlendComponents(MI, BB, MI.getOperand(3), MI.getOperand(4),
+ &AArch64::GPR64noipRegClass);
+ return BB;
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 89f90ee2b7707..3f71daa4daa91 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -182,6 +182,13 @@ class AArch64TargetLowering : public TargetLowering {
MachineBasicBlock *EmitGetSMESaveSize(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ /// Replace (0, vreg) discriminator components with the operands of blend
+ /// or with (immediate, XZR) when possible.
+ void fixupBlendComponents(MachineInstr &MI, MachineBasicBlock *BB,
+ MachineOperand &IntDiscOp,
+ MachineOperand &AddrDiscOp,
+ const TargetRegisterClass *AddrDiscRC) const;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0f3f24f0853c9..98ba694551d9f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2025,7 +2025,7 @@ let Predicates = [HasPAuth] in {
def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>;
}
- defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
+ defm PAC : SignAuth<0b000, 0b010, "pac", null_frag>;
defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
def XPACI : ClearAuth<0, "xpaci">;
@@ -2131,6 +2131,25 @@ let Predicates = [HasPAuth] in {
let Uses = [X16];
}
+ // PAC pseudo instruction. Is AsmPrinter, it is expanded into an actual PAC*
+ // instruction immediately preceded by the discriminator computation.
+ // This enforces the expected immediate modifier is used for signing, even
+ // if an attacker is able to substitute AddrDisc.
+ def PAC : Pseudo<(outs GPR64:$SignedVal),
+ (ins GPR64:$Val, i32imm:$Key, i64imm:$Disc, GPR64noip:$AddrDisc),
+ [], "$SignedVal = $Val">, Sched<[WriteI, ReadI]> {
+ let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
+ let mayStore = 0;
+ let mayLoad = 0;
+ let Size = 12;
+ let Defs = [X17];
+ let usesCustomInserter = 1;
+ }
+
+ def : Pat<(int_ptrauth_sign GPR64:$Val, timm:$Key, GPR64noip:$AddrDisc),
+ (PAC GPR64:$Val, $Key, 0, GPR64noip:$AddrDisc)>;
+
// AUT and re-PAC a value, using different keys/data.
// This directly manipulates x16/x17, which are the only registers the OS
// guarantees are safe to use for sensitive operations.
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-isel.ll b/llvm/test/CodeGen/AArch64/ptrauth-isel.ll
new file mode 100644
index 0000000000000..89ce439f5b47b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-isel.ll
@@ -0,0 +1,205 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple arm64e-apple-darwin -verify-machineinstrs -stop-after=finalize-isel -global-isel=0 \
+; RUN: | FileCheck %s --check-prefixes=DAGISEL
+; RUN: llc < %s -mtriple arm64e-apple-darwin -verify-machineinstrs -stop-after=finalize-isel -global-isel=1 -global-isel-abort=1 \
+; RUN: | FileCheck %s --check-prefixes=GISEL
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -verify-machineinstrs -stop-after=finalize-isel -global-isel=0 \
+; RUN: | FileCheck %s --check-prefixes=DAGISEL
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -verify-machineinstrs -stop-after=finalize-isel -global-isel=1 -global-isel-abort=1 \
+; RUN: | FileCheck %s --check-prefixes=GISEL
+
+; Check MIR produced by the instruction selector to validate properties that
+; cannot be reliably tested by only inspecting the final asm output.
+
+@discvar = dso_local global i64 0
+
+; Make sure the components of blend(addr, imm) are recognized and passed to
+; PAC pseudo via separate operands to prevent substitution of the immediate
+; modifier.
+;
+; MIR output of the instruction selector is inspected, as it is hard to reliably
+; distinguish MOVKXi immediately followed by a pseudo from a standalone pseudo
+; instruction carrying address and immediate modifiers in its separate operands
+; by only observing the final asm output.
+
+define i64 @small_imm_disc(i64 %addr) {
+ ; DAGISEL-LABEL: name: small_imm_disc
+ ; DAGISEL: bb.0.entry:
+ ; DAGISEL-NEXT: liveins: $x0
+ ; DAGISEL-NEXT: {{ $}}
+ ; DAGISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; DAGISEL-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 42
+ ; DAGISEL-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64noip = SUBREG_TO_REG 0, killed [[MOVi32imm]], %subreg.sub_32
+ ; DAGISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, killed $xzr, implicit-def dead $x17
+ ; DAGISEL-NEXT: $x0 = COPY [[PAC]]
+ ; DAGISEL-NEXT: RET_ReallyLR implicit $x0
+ ;
+ ; GISEL-LABEL: name: small_imm_disc
+ ; GISEL: bb.1.entry:
+ ; GISEL-NEXT: liveins: $x0
+ ; GISEL-NEXT: {{ $}}
+ ; GISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; GISEL-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 42
+ ; GISEL-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64noip = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+ ; GISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, $xzr, implicit-def dead $x17
+ ; GISEL-NEXT: $x0 = COPY [[PAC]]
+ ; GISEL-NEXT: RET_ReallyLR implicit $x0
+entry:
+ %signed = call i64 @llvm.ptrauth.sign(i64 %addr, i32 2, i64 42)
+ ret i64 %signed
+}
+
+define i64 @large_imm_disc_wreg(i64 %addr) {
+ ; DAGISEL-LABEL: name: large_imm_disc_wreg
+ ; DAGISEL: bb.0.entry:
+ ; DAGISEL-NEXT: liveins: $x0
+ ; DAGISEL-NEXT: {{ $}}
+ ; DAGISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; DAGISEL-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 12345678
+ ; DAGISEL-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64noip = SUBREG_TO_REG 0, killed [[MOVi32imm]], %subreg.sub_32
+ ; DAGISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 0, killed [[SUBREG_TO_REG]], implicit-def dead $x17
+ ; DAGISEL-NEXT: $x0 = COPY [[PAC]]
+ ; DAGISEL-NEXT: RET_ReallyLR implicit $x0
+ ;
+ ; GISEL-LABEL: name: large_imm_disc_wreg
+ ; GISEL: bb.1.entry:
+ ; GISEL-NEXT: liveins: $x0
+ ; GISEL-NEXT: {{ $}}
+ ; GISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; GISEL-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 12345678
+ ; GISEL-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64noip = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+ ; GISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 0, [[SUBREG_TO_REG]], implicit-def dead $x17
+ ; GISEL-NEXT: $x0 = COPY [[PAC]]
+ ; GISEL-NEXT: RET_ReallyLR implicit $x0
+entry:
+ %signed = call i64 @llvm.ptrauth.sign(i64 %addr, i32 2, i64 12345678)
+ ret i64 %signed
+}
+
+define i64 @large_imm_disc_xreg(i64 %addr) {
+ ; DAGISEL-LABEL: name: large_imm_disc_xreg
+ ; DAGISEL: bb.0.entry:
+ ; DAGISEL-NEXT: liveins: $x0
+ ; DAGISEL-NEXT: {{ $}}
+ ; DAGISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; DAGISEL-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64noip = MOVi64imm 123456789012345
+ ; DAGISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 0, killed [[MOVi64imm]], implicit-def dead $x17
+ ; DAGISEL-NEXT: $x0 = COPY [[PAC]]
+ ; DAGISEL-NEXT: RET_ReallyLR implicit $x0
+ ;
+ ; GISEL-LABEL: name: large_imm_disc_xreg
+ ; GISEL: bb.1.entry:
+ ; GISEL-NEXT: liveins: $x0
+ ; GISEL-NEXT: {{ $}}
+ ; GISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; GISEL-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64noip = MOVi64imm 123456789012345
+ ; GISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 0, [[MOVi64imm]], implicit-def dead $x17
+ ; GISEL-NEXT: $x0 = COPY [[PAC]]
+ ; GISEL-NEXT: RET_ReallyLR implicit $x0
+entry:
+ %signed = call i64 @llvm.ptrauth.sign(i64 %addr, i32 2, i64 123456789012345)
+ ret i64 %signed
+}
+
+define i64 @blend_and_sign_same_bb(i64 %addr) {
+ ; DAGISEL-LABEL: name: blend_and_sign_same_bb
+ ; DAGISEL: bb.0.entry:
+ ; DAGISEL-NEXT: liveins: $x0
+ ; DAGISEL-NEXT: {{ $}}
+ ; DAGISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; DAGISEL-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @discvar
+ ; DAGISEL-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @discvar :: (dereferenceable load (s64) from @discvar)
+ ; DAGISEL-NEXT: [[MOVKXi:%[0-9]+]]:gpr64noip = MOVKXi [[LDRXui]], 42, 48
+ ; DAGISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64noip = COPY [[LDRXui]]
+ ; DAGISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, killed [[COPY1]], implicit-def dead $x17
+ ; DAGISEL-NEXT: $x0 = COPY [[PAC]]
+ ; DAGISEL-NEXT: RET_ReallyLR implicit $x0
+ ;
+ ; GISEL-LABEL: name: blend_and_sign_same_bb
+ ; GISEL: bb.1.entry:
+ ; GISEL-NEXT: liveins: $x0
+ ; GISEL-NEXT: {{ $}}
+ ; GISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; GISEL-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @discvar
+ ; GISEL-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @discvar :: (dereferenceable load (s64) from @discvar)
+ ; GISEL-NEXT: [[MOVKXi:%[0-9]+]]:gpr64noip = MOVKXi [[LDRXui]], 42, 48
+ ; GISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64noip = COPY [[LDRXui]]
+ ; GISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, [[COPY1]], implicit-def dead $x17
+ ; GISEL-NEXT: $x0 = COPY [[PAC]]
+ ; GISEL-NEXT: RET_ReallyLR implicit $x0
+entry:
+ %addrdisc = load i64, ptr @discvar
+ %disc = call i64 @llvm.ptrauth.blend(i64 %addrdisc, i64 42)
+ %signed = call i64 @llvm.ptrauth.sign(i64 %addr, i32 2, i64 %disc)
+ ret i64 %signed
+}
+
+; In the below test cases both %addrdisc and %disc are computed (i.e. they are
+; neither global addresses, nor function arguments) in a different basic block,
+; making them harder to express via ISD::PtrAuthGlobalAddress.
+
+define i64 @blend_and_sign_different_bbs(i64 %addr, i64 %cond) {
+ ; DAGISEL-LABEL: name: blend_and_sign_different_bbs
+ ; DAGISEL: bb.0.entry:
+ ; DAGISEL-NEXT: successors: %bb.1(0x50000000), %bb.2(0x30000000)
+ ; DAGISEL-NEXT: liveins: $x0, $x1
+ ; DAGISEL-NEXT: {{ $}}
+ ; DAGISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
+ ; DAGISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
+ ; DAGISEL-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @discvar
+ ; DAGISEL-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @discvar :: (dereferenceable load (s64) from @discvar)
+ ; DAGISEL-NEXT: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[LDRXui]], 42, 48
+ ; DAGISEL-NEXT: [[COPY2:%[0-9]+]]:gpr64noip = COPY [[MOVKXi]]
+ ; DAGISEL-NEXT: CBZX [[COPY]], %bb.2
+ ; DAGISEL-NEXT: B %bb.1
+ ; DAGISEL-NEXT: {{ $}}
+ ; DAGISEL-NEXT: bb.1.next:
+ ; DAGISEL-NEXT: successors: %bb.2(0x80000000)
+ ; DAGISEL-NEXT: {{ $}}
+ ; DAGISEL-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY [[COPY2]]
+ ; DAGISEL-NEXT: INLINEASM &nop, 1 /* sideeffect attdialect */, 3866633 /* reguse:GPR64common */, [[COPY3]]
+ ; DAGISEL-NEXT: {{ $}}
+ ; DAGISEL-NEXT: bb.2.exit:
+ ; DAGISEL-NEXT: [[COPY4:%[0-9]+]]:gpr64noip = COPY [[LDRXui]]
+ ; DAGISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY1]], 2, 42, [[COPY4]], implicit-def dead $x17
+ ; DAGISEL-NEXT: $x0 = COPY [[PAC]]
+ ; DAGISEL-NEXT: RET_ReallyLR implicit $x0
+ ;
+ ; GISEL-LABEL: name: blend_and_sign_different_bbs
+ ; GISEL: bb.1.entry:
+ ; GISEL-NEXT: successors: %bb.2(0x50000000), %bb.3(0x30000000)
+ ; GISEL-NEXT: liveins: $x0, $x1
+ ; GISEL-NEXT: {{ $}}
+ ; GISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; GISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; GISEL-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @discvar
+ ; GISEL-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @discvar :: (dereferenceable load (s64) from @discvar)
+ ; GISEL-NEXT: [[MOVKXi:%[0-9]+]]:gpr64noip = MOVKXi [[LDRXui]], 42, 48
+ ; GISEL-NEXT: CBZX [[COPY1]], %bb.3
+ ; GISEL-NEXT: B %bb.2
+ ; GISEL-NEXT: {{ $}}
+ ; GISEL-NEXT: bb.2.next:
+ ; GISEL-NEXT: successors: %bb.3(0x80000000)
+ ; GISEL-NEXT: {{ $}}
+ ; GISEL-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY [[MOVKXi]]
+ ; GISEL-NEXT: INLINEASM &nop, 1 /* sideeffect attdialect */, 3866633 /* reguse:GPR64common */, [[COPY2]]
+ ; GISEL-NEXT: {{ $}}
+ ; GISEL-NEXT: bb.3.exit:
+ ; GISEL-NEXT: [[COPY3:%[0-9]+]]:gpr64noip = COPY [[LDRXui]]
+ ; GISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, [[COPY3]], implicit-def dead $x17
+ ; GISEL-NEXT: $x0 = COPY [[PAC]]
+ ; GISEL-NEXT: RET_ReallyLR implicit $x0
+entry:
+ %addrdisc = load i64, ptr @discvar
+ %disc = call i64 @llvm.ptrauth.blend(i64 %addrdisc, i64 42)
+ %cond.b = icmp ne i64 %cond, 0
+ br i1 %cond.b, label %next, label %exit
+
+next:
+ call void asm sideeffect "nop", "r"(i64 %disc)
+ br label %exit
+
+exit:
+ %signed = call i64 @llvm.ptrauth.sign(i64 %addr, i32 2, i64 %disc)
+ ret i64 %signed
+}
|
a7b265c
to
ec68c72
Compare
079d654
to
707a36c
Compare
ec68c72
to
ba9d896
Compare
Ping. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall, this looks reasonable. And in parallel with the existing approach of combined resigns. Some minor comments below
|
||
/// Replace (0, vreg) discriminator components with the operands of blend | ||
/// or with (immediate, XZR) when possible. | ||
void fixupBlendComponents(MachineInstr &MI, MachineBasicBlock *BB, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would probably suggest to use a bit more specific naming. As blend
also could refer to vector blend operation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Renamed to fixupPtrauthDiscriminator
, thanks.
I think this should be opt-in behavior. It might be useful if the goal is to protect the stack, but in cases where the goal is to only protect the heap due to the high cost of protecting the stack, this change would only add overhead. If you are deploying a comprehensive stack protection, I think something like Darwin's x16/x17 protection would be necessary, so maybe we can use isX16X17Safer() as the control for whether to turn this on? |
b4d7b7e
to
b9ea24b
Compare
@pcc This transformation looks quite light-weight at first. Here is the assembly code that is ultimately produced for Output of
In the above example, only
like this
which saves two instructions ( Aside from subtly affecting register allocation, I expect this patch to add at most two instructions ( define void @test(i64 %addrdisc, i64 %a) {
%disc = call i64 @llvm.ptrauth.blend(i64 %addrdisc, i64 42)
%signed0 = call i64 @llvm.ptrauth.sign(i64 %a, i32 0, i64 %disc)
%b = call i64 asm sideeffect "add $0, $1, 123", "=r,r"(i64 %signed0)
%signed1 = call i64 @llvm.ptrauth.sign(i64 %b, i32 1, i64 %disc)
%c = call i64 asm sideeffect "add $0, $1, 123", "=r,r"(i64 %signed1)
%signed2 = call i64 @llvm.ptrauth.sign(i64 %c, i32 2, i64 %disc)
%d = call i64 asm sideeffect "add $0, $1, 123", "=r,r"(i64 %signed2)
%signed3 = call i64 @llvm.ptrauth.sign(i64 %d, i32 3, i64 %disc)
call void asm sideeffect "nop", "r"(i64 %signed3)
ret void
} which is translated into
Without this patch, something along these lines could be emitted:
On the other hand, there is no overhead at all in the most trivial cases, as using Please note that there are quite a few redundant instructions in MIR output generated right after By the way, the same approach is already used for AUT and AUTPAC (which are expanded into longer sequences, though, as it may be desirable to explicitly check the authenticated pointer in case the CPU does not implement FEAT_FPAC) - these are updated in #146489. On the other hand, it should be rather straightforward to make this behavior opt-in simply by calling |
@pcc To clarify things a bit further, I feel like PAC pseudo is quite different from AUT and AUTPAC pseudos. When a pseudo instruction is expanded into an authentication (and possibly something else afterwards), a significant overhead is possible due to checking if the authentication succeeded - in some cases this is important if the CPU does not implement FEAT_FPAC - furthermore, this check can be inserted automatically. On the other hand, while it is crucial to only provide safe values for PAC pseudo to sign, there are not much things AsmPrinter can do automatically for a standalone PAC. As far as I can see, it should be rather straightforward to conditionalize this logic as long as there is a condition: I should be able to provide a The problem is that there doesn't seem to be a clear predicate to use: we want to choose whether to protect against the threat X or save Y instructions of overhead depending on the condition P. In case of AUTx16x17 vs. AUTxMxN choice, X is "a sensitive value can be stored in an unsafe register" and Y is increasing register pressure, etc. Then P can obviously be chosen as "x16/x17 are safer that other general-purpose registers", as otherwise all registers are equally safe (or equally unsafe), thus there is no threat X at all, but the cost Y of the mitigation is still non-zero. In case of this PR, on the other hand, Y is known (adding at most two simple instructions per single I think the overhead is quite small, so we could turn this security feature on unconditionally without the chances to accidentally forget to opt-in. On the other hand, if decreasing the code-size overhead is worth increasing the "implementation overhead", then it should be feasible to expose a predicate to the tablegenerated patterns (for example "enable the protection as long as the user have not opted-out explicitly"). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks reasonable, thanks! I have no objections merging this.
A couple of thoughts regarding discussion about performance overhead. It would be very nice to have no overhead compared to current implementation, and it looks like that this could be achieved using an approach similar to #132857. But I agree with @atrosinenko that in most cases there is no overhead. So, we have the following points:
- this PR is beneficial for security;
- the performance overhead is not observed for the majority of the cases;
- we have this PR included in 21.x release milestone.
Given that, I think it's worth merging even w/o fixing the performance overhead. But, if we do so, it's definitely worth fixing that in a follow-up patch. My point is that security-affecting updates should be better merged sooner than later :)
|
||
// Helper function to find the instruction that defined a virtual register. | ||
// If unable to find such instruction, returns nullptr. | ||
static MachineInstr *stripVRegCopies(const MachineRegisterInfo &MRI, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit
static MachineInstr *stripVRegCopies(const MachineRegisterInfo &MRI, | |
static const MachineInstr *stripVRegCopies(const MachineRegisterInfo &MRI, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Applied, thanks!
MaybeBlend->getOperand(3).getImm() == 48) { | ||
AddrDisc = MaybeBlend->getOperand(1).getReg(); | ||
IntDisc = MaybeBlend->getOperand(2).getImm(); | ||
} else if (MaybeBlend->getOpcode() == AArch64::MOVi32imm && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we have a guarantee that only this particular mov opcode is used for small immediate discriminators?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for pointing this out, I rechecked which instructions can be used to materialize an immediate constant and it turned out it is possible for MOVi64imm
to be used even for small i64
constants when optimization is disabled (see patterns in AArch64InstrInfo.td
). Furthermore, I realized it is necessary to check if integer constant is an immediate operand (as opposed to some relocation referring a global symbol, for example). For that reason, I rewritten the chain of "if"-"else if" into a switch and added more tests in 33b61f5.
It is not so clear cut and this is not the criteria that I would use when making such decisions. For example if we increase the amount of security provided by a mitigation (assuming that security can be objectively measured) but make it so expensive as to be prevent some users from deploying it at all, the practical effect is reduced security. That being said: in parallel I was also measuring the actual performance impact of this PR. I patched this PR into my structure protection branch (top two commits) and measured it using the included pfp-bench script on my M2 Ultra Mac Studio. The performance impact of this change was below the noise level on BM_RPC_Fleet/process_time which is the benchmark most affected by structure protection. This result was even when using Command run:
Result after 1400 runs (benchmark time in ns, x is without this PR, + is with):
I think it is important to have a convenient and objective way of measuring the performance impact of changes like this. Once structure protection is landed (or even before that, by using my branch) I think we'll have that using something like my script. If something similar can be developed for PAuth ABI I think that would be great as well. |
711a978
to
33b61f5
Compare
Introduce a pseudo instruction to be selected instead of a pair of `MOVKXi` and `PAC[DI][AB]` carrying address and immediate modifiers as separate operands. The new pseudo instruction is expanded in AsmPrinter, so that MOVKXi is emitted immediately before `PAC[DI][AB]`. This way, an attacker cannot control the immediate modifier used to sign the value, even if address modifier can be substituted. To simplify the instruction selection, select AArch64::PAC pseudo using TableGen pattern and post-process its $AddrDisc operand by custom inserter hook - this eliminates duplication of the logic for DAGISel and GlobalISel. Furthermore, this improves cross-BB analysis in case of DAGISel.
33b61f5
to
eb14eec
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@atrosinenko Thanks for adding MIR tests for different MOVi{32|64}/MOVKXi patterns in eb14eec! This LGTM and I have no objections merging this.
I have just spotted that the I'm really sorry for updating an already approved PR, but I hope this change does not affect the codegen significantly, as the register classes of PAC operands are not changed. |
Introduce a pseudo instruction carrying address and immediate modifiers as separate operands to be selected instead of a pair of `MOVKXi` and `PAC[ID][AB]` . The new pseudo instruction is expanded in AsmPrinter, so that `MOVKXi` is emitted immediately before `PAC[ID][AB]`. This way, an attacker cannot control the immediate modifier used to sign the value, even if address modifier can be substituted. To simplify the instruction selection, select `AArch64::PAC` pseudo using TableGen pattern and post-process its `$AddrDisc` operand by custom inserter hook - this eliminates duplication of the logic for DAGISel and GlobalISel. Furthermore, this improves cross-BB analysis in case of DAGISel.
Introduce a pseudo instruction carrying address and immediate modifiers as separate operands to be selected instead of a pair of
MOVKXi
andPAC[ID][AB]
. The new pseudo instruction is expanded in AsmPrinter, so thatMOVKXi
is emitted immediately beforePAC[ID][AB]
. This way, an attacker cannot control the immediate modifier used to signthe value, even if address modifier can be substituted.
To simplify the instruction selection, select
AArch64::PAC
pseudo using TableGen pattern and post-process its$AddrDisc
operand by custom inserter hook - this eliminates duplication of the logic for DAGISel and GlobalISel. Furthermore, this improves cross-BB analysis in case of DAGISel.