From fcad2bbcfc1f0acaa91fe03b71ff069975f80c8e Mon Sep 17 00:00:00 2001 From: Patrick O'Neill Date: Thu, 10 Aug 2023 15:58:19 +0100 Subject: [PATCH] [RISC-V] Add proposed mapping for Ztso Currently LLVM emits Ztso code for fences, loads, and stores (behind an experimental flag) [1]. This patch updates the mapping and implements support for LR/SC and AMO ops. This updated mapping is compatible with the RVWMO ABI present in the psABI. Additional context can be found in the psABI pull request [2]. [1] https://reviews.llvm.org/D143076 [2] https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/391 Differential Revision: https://reviews.llvm.org/D155517 --- .../RISCV/RISCVExpandAtomicPseudoInsts.cpp | 89 +- llvm/lib/Target/RISCV/RISCVFeatures.td | 1 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 5 +- llvm/lib/Target/RISCV/RISCVInstrInfoA.td | 18 +- llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll | 2228 +++-- llvm/test/CodeGen/RISCV/atomic-load-store.ll | 14 + llvm/test/CodeGen/RISCV/atomic-rmw.ll | 7168 +++++++++++------ 7 files changed, 6384 insertions(+), 3139 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index 59f1e8319ae72e..d0d0e32334484b 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -30,6 +30,7 @@ namespace { class RISCVExpandAtomicPseudo : public MachineFunctionPass { public: + const RISCVSubtarget *STI; const RISCVInstrInfo *TII; static char ID; @@ -72,7 +73,8 @@ class RISCVExpandAtomicPseudo : public MachineFunctionPass { char RISCVExpandAtomicPseudo::ID = 0; bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { - TII = MF.getSubtarget().getInstrInfo(); + STI = &MF.getSubtarget(); + TII = STI->getInstrInfo(); #ifndef NDEBUG const unsigned OldSize = getInstSizeInBytes(MF); @@ -148,24 +150,30 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, return false; } -static unsigned getLRForRMW32(AtomicOrdering Ordering) { +static unsigned getLRForRMW32(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { switch (Ordering) { default: llvm_unreachable("Unexpected AtomicOrdering"); case AtomicOrdering::Monotonic: return RISCV::LR_W; case AtomicOrdering::Acquire: + if (Subtarget->hasStdExtZtso()) + return RISCV::LR_W; return RISCV::LR_W_AQ; case AtomicOrdering::Release: return RISCV::LR_W; case AtomicOrdering::AcquireRelease: + if (Subtarget->hasStdExtZtso()) + return RISCV::LR_W; return RISCV::LR_W_AQ; case AtomicOrdering::SequentiallyConsistent: return RISCV::LR_W_AQ_RL; } } -static unsigned getSCForRMW32(AtomicOrdering Ordering) { +static unsigned getSCForRMW32(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { switch (Ordering) { default: llvm_unreachable("Unexpected AtomicOrdering"); @@ -174,32 +182,42 @@ static unsigned getSCForRMW32(AtomicOrdering Ordering) { case AtomicOrdering::Acquire: return RISCV::SC_W; case AtomicOrdering::Release: + if (Subtarget->hasStdExtZtso()) + return RISCV::SC_W; return RISCV::SC_W_RL; case AtomicOrdering::AcquireRelease: + if (Subtarget->hasStdExtZtso()) + return RISCV::SC_W; return RISCV::SC_W_RL; case AtomicOrdering::SequentiallyConsistent: return RISCV::SC_W_RL; } } -static unsigned getLRForRMW64(AtomicOrdering Ordering) { +static unsigned getLRForRMW64(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { switch (Ordering) { default: llvm_unreachable("Unexpected AtomicOrdering"); case AtomicOrdering::Monotonic: return RISCV::LR_D; case AtomicOrdering::Acquire: + if (Subtarget->hasStdExtZtso()) + return RISCV::LR_D; return RISCV::LR_D_AQ; case AtomicOrdering::Release: return RISCV::LR_D; case AtomicOrdering::AcquireRelease: + if (Subtarget->hasStdExtZtso()) + return RISCV::LR_D; return RISCV::LR_D_AQ; case AtomicOrdering::SequentiallyConsistent: return RISCV::LR_D_AQ_RL; } } -static unsigned getSCForRMW64(AtomicOrdering Ordering) { +static unsigned getSCForRMW64(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { switch (Ordering) { default: llvm_unreachable("Unexpected AtomicOrdering"); @@ -208,27 +226,33 @@ static unsigned getSCForRMW64(AtomicOrdering Ordering) { case AtomicOrdering::Acquire: return RISCV::SC_D; case AtomicOrdering::Release: + if (Subtarget->hasStdExtZtso()) + return RISCV::SC_D; return RISCV::SC_D_RL; case AtomicOrdering::AcquireRelease: + if (Subtarget->hasStdExtZtso()) + return RISCV::SC_D; return RISCV::SC_D_RL; case AtomicOrdering::SequentiallyConsistent: return RISCV::SC_D_RL; } } -static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) { +static unsigned getLRForRMW(AtomicOrdering Ordering, int Width, + const RISCVSubtarget *Subtarget) { if (Width == 32) - return getLRForRMW32(Ordering); + return getLRForRMW32(Ordering, Subtarget); if (Width == 64) - return getLRForRMW64(Ordering); + return getLRForRMW64(Ordering, Subtarget); llvm_unreachable("Unexpected LR width\n"); } -static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) { +static unsigned getSCForRMW(AtomicOrdering Ordering, int Width, + const RISCVSubtarget *Subtarget) { if (Width == 32) - return getSCForRMW32(Ordering); + return getSCForRMW32(Ordering, Subtarget); if (Width == 64) - return getSCForRMW64(Ordering); + return getSCForRMW64(Ordering, Subtarget); llvm_unreachable("Unexpected SC width\n"); } @@ -236,7 +260,8 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL, MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB, MachineBasicBlock *DoneMBB, - AtomicRMWInst::BinOp BinOp, int Width) { + AtomicRMWInst::BinOp BinOp, int Width, + const RISCVSubtarget *STI) { Register DestReg = MI.getOperand(0).getReg(); Register ScratchReg = MI.getOperand(1).getReg(); Register AddrReg = MI.getOperand(2).getReg(); @@ -249,7 +274,7 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, // binop scratch, dest, val // sc.[w|d] scratch, scratch, (addr) // bnez scratch, loop - BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg) + BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg) .addReg(AddrReg); switch (BinOp) { default: @@ -263,7 +288,7 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, .addImm(-1); break; } - BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg) + BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg) .addReg(AddrReg) .addReg(ScratchReg); BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) @@ -294,10 +319,13 @@ static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL, .addReg(ScratchReg); } -static void doMaskedAtomicBinOpExpansion( - const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL, - MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB, - MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) { +static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII, + MachineInstr &MI, DebugLoc DL, + MachineBasicBlock *ThisMBB, + MachineBasicBlock *LoopMBB, + MachineBasicBlock *DoneMBB, + AtomicRMWInst::BinOp BinOp, int Width, + const RISCVSubtarget *STI) { assert(Width == 32 && "Should never need to expand masked 64-bit operations"); Register DestReg = MI.getOperand(0).getReg(); Register ScratchReg = MI.getOperand(1).getReg(); @@ -315,7 +343,7 @@ static void doMaskedAtomicBinOpExpansion( // xor scratch, destreg, scratch // sc.w scratch, scratch, (alignedaddr) // bnez scratch, loop - BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg) + BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg) .addReg(AddrReg); switch (BinOp) { default: @@ -348,7 +376,7 @@ static void doMaskedAtomicBinOpExpansion( insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg, ScratchReg); - BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg) + BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg) .addReg(AddrReg) .addReg(ScratchReg); BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) @@ -380,10 +408,11 @@ bool RISCVExpandAtomicPseudo::expandAtomicBinOp( MBB.addSuccessor(LoopMBB); if (!IsMasked) - doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width); + doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width, + STI); else doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, - Width); + Width, STI); NextMBBI = MBB.end(); MI.eraseFromParent(); @@ -455,7 +484,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( // mv scratch1, destreg // [sext scratch2 if signed min/max] // ifnochangeneeded scratch2, incr, .looptail - BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg) + BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg) .addReg(AddrReg); BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg) .addReg(DestReg) @@ -507,7 +536,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( // .looptail: // sc.w scratch1, scratch1, (addr) // bnez scratch1, loop - BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg) + BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg) .addReg(AddrReg) .addReg(Scratch1Reg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) @@ -626,7 +655,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // .loophead: // lr.[w|d] dest, (addr) // bne dest, cmpval, done - BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg) + BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), + DestReg) .addReg(AddrReg); BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE)) .addReg(DestReg) @@ -635,7 +665,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // .looptail: // sc.[w|d] scratch, newval, (addr) // bnez scratch, loophead - BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg) + BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), + ScratchReg) .addReg(AddrReg) .addReg(NewValReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) @@ -648,7 +679,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // and scratch, dest, mask // bne scratch, cmpval, done Register MaskReg = MI.getOperand(5).getReg(); - BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg) + BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), + DestReg) .addReg(AddrReg); BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg) .addReg(DestReg) @@ -666,7 +698,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // bnez scratch, loophead insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg, MaskReg, ScratchReg); - BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg) + BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), + ScratchReg) .addReg(AddrReg) .addReg(ScratchReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index e92f9cd0b615e5..8aeaa11bae5b8e 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -542,6 +542,7 @@ def FeatureStdExtZtso def HasStdExtZtso : Predicate<"Subtarget->hasStdExtZTso()">, AssemblerPredicate<(all_of FeatureStdExtZtso), "'Ztso' (Memory Model - Total Store Order)">; +def NotHasStdExtZtso : Predicate<"!Subtarget->hasStdExtZtso()">; def FeatureStdExtZawrs : SubtargetFeature<"zawrs", "HasStdExtZawrs", "true", "'Zawrs' (Wait on Reservation Set)">; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 48268c9e13e67f..1c578481d40712 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16909,8 +16909,11 @@ Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder, Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const { - if (Subtarget.hasStdExtZtso()) + if (Subtarget.hasStdExtZtso()) { + if (isa(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) + return Builder.CreateFence(Ord); return nullptr; + } if (isa(Inst) && isAcquireOrStronger(Ord)) return Builder.CreateFence(AtomicOrdering::Acquire); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 8421109b85147e..b06415b620e04e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -133,11 +133,10 @@ let Predicates = [HasAtomicLdSt, IsRV64] in { def : AtomicStPat; } -let Predicates = [HasStdExtA] in { - /// AMOs multiclass AMOPat { +let Predicates = [HasStdExtA, NotHasStdExtZtso] in { def : PatGprGpr(AtomicOp#"_monotonic"), !cast(BaseInst), vt>; def : PatGprGpr(AtomicOp#"_acquire"), @@ -149,6 +148,21 @@ multiclass AMOPat { def : PatGprGpr(AtomicOp#"_seq_cst"), !cast(BaseInst#"_AQ_RL"), vt>; } +let Predicates = [HasStdExtA, HasStdExtZtso] in { + def : PatGprGpr(AtomicOp#"_monotonic"), + !cast(BaseInst), vt>; + def : PatGprGpr(AtomicOp#"_acquire"), + !cast(BaseInst), vt>; + def : PatGprGpr(AtomicOp#"_release"), + !cast(BaseInst), vt>; + def : PatGprGpr(AtomicOp#"_acq_rel"), + !cast(BaseInst), vt>; + def : PatGprGpr(AtomicOp#"_seq_cst"), + !cast(BaseInst), vt>; +} +} + +let Predicates = [HasStdExtA] in { defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">; defm : AMOPat<"atomic_load_add_32", "AMOADD_W">; diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll index d04f85ef30783e..f900b5161f7512 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -2,15 +2,15 @@ ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32IA %s +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32IA %s +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV64IA %s +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV64IA %s +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32I-LABEL: cmpxchg_i8_monotonic_monotonic: @@ -102,28 +102,51 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i8_acquire_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a0, a2, a0 -; RV32IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a2, (a3) -; RV32IA-NEXT: and a5, a2, a4 -; RV32IA-NEXT: bne a5, a1, .LBB1_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 -; RV32IA-NEXT: xor a5, a2, a0 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a2, a5 -; RV32IA-NEXT: sc.w a5, a5, (a3) -; RV32IA-NEXT: bnez a5, .LBB1_1 -; RV32IA-NEXT: .LBB1_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i8_acquire_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: andi a2, a2, 255 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-NEXT: and a5, a2, a4 +; RV32IA-WMO-NEXT: bne a5, a1, .LBB1_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a2, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a3) +; RV32IA-WMO-NEXT: bnez a5, .LBB1_1 +; RV32IA-WMO-NEXT: .LBB1_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: andi a2, a2, 255 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a3) +; RV32IA-TSO-NEXT: and a5, a2, a4 +; RV32IA-TSO-NEXT: bne a5, a1, .LBB1_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a2, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-NEXT: bnez a5, .LBB1_1 +; RV32IA-TSO-NEXT: .LBB1_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i8_acquire_monotonic: ; RV64I: # %bb.0: @@ -138,28 +161,51 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i8_acquire_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a0, a2, a0 -; RV64IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a2, (a3) -; RV64IA-NEXT: and a5, a2, a4 -; RV64IA-NEXT: bne a5, a1, .LBB1_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 -; RV64IA-NEXT: xor a5, a2, a0 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a2, a5 -; RV64IA-NEXT: sc.w a5, a5, (a3) -; RV64IA-NEXT: bnez a5, .LBB1_1 -; RV64IA-NEXT: .LBB1_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i8_acquire_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: andi a2, a2, 255 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB1_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a3) +; RV64IA-WMO-NEXT: bnez a5, .LBB1_1 +; RV64IA-WMO-NEXT: .LBB1_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: andi a2, a2, 255 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a3) +; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB1_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-NEXT: bnez a5, .LBB1_1 +; RV64IA-TSO-NEXT: .LBB1_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic ret void } @@ -178,28 +224,51 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i8_acquire_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a0, a2, a0 -; RV32IA-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a2, (a3) -; RV32IA-NEXT: and a5, a2, a4 -; RV32IA-NEXT: bne a5, a1, .LBB2_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 -; RV32IA-NEXT: xor a5, a2, a0 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a2, a5 -; RV32IA-NEXT: sc.w a5, a5, (a3) -; RV32IA-NEXT: bnez a5, .LBB2_1 -; RV32IA-NEXT: .LBB2_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i8_acquire_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: andi a2, a2, 255 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-NEXT: and a5, a2, a4 +; RV32IA-WMO-NEXT: bne a5, a1, .LBB2_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a2, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a3) +; RV32IA-WMO-NEXT: bnez a5, .LBB2_1 +; RV32IA-WMO-NEXT: .LBB2_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: andi a2, a2, 255 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a3) +; RV32IA-TSO-NEXT: and a5, a2, a4 +; RV32IA-TSO-NEXT: bne a5, a1, .LBB2_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a2, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-NEXT: bnez a5, .LBB2_1 +; RV32IA-TSO-NEXT: .LBB2_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i8_acquire_acquire: ; RV64I: # %bb.0: @@ -214,28 +283,51 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i8_acquire_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a0, a2, a0 -; RV64IA-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a2, (a3) -; RV64IA-NEXT: and a5, a2, a4 -; RV64IA-NEXT: bne a5, a1, .LBB2_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 -; RV64IA-NEXT: xor a5, a2, a0 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a2, a5 -; RV64IA-NEXT: sc.w a5, a5, (a3) -; RV64IA-NEXT: bnez a5, .LBB2_1 -; RV64IA-NEXT: .LBB2_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i8_acquire_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: andi a2, a2, 255 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB2_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a3) +; RV64IA-WMO-NEXT: bnez a5, .LBB2_1 +; RV64IA-WMO-NEXT: .LBB2_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: andi a2, a2, 255 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a3) +; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB2_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-NEXT: bnez a5, .LBB2_1 +; RV64IA-TSO-NEXT: .LBB2_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire ret void } @@ -254,28 +346,51 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i8_release_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a0, a2, a0 -; RV32IA-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a2, (a3) -; RV32IA-NEXT: and a5, a2, a4 -; RV32IA-NEXT: bne a5, a1, .LBB3_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 -; RV32IA-NEXT: xor a5, a2, a0 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a2, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a3) -; RV32IA-NEXT: bnez a5, .LBB3_1 -; RV32IA-NEXT: .LBB3_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i8_release_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: andi a2, a2, 255 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a2, (a3) +; RV32IA-WMO-NEXT: and a5, a2, a4 +; RV32IA-WMO-NEXT: bne a5, a1, .LBB3_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a2, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-NEXT: bnez a5, .LBB3_1 +; RV32IA-WMO-NEXT: .LBB3_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i8_release_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: andi a2, a2, 255 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a3) +; RV32IA-TSO-NEXT: and a5, a2, a4 +; RV32IA-TSO-NEXT: bne a5, a1, .LBB3_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a2, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-NEXT: bnez a5, .LBB3_1 +; RV32IA-TSO-NEXT: .LBB3_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i8_release_monotonic: ; RV64I: # %bb.0: @@ -290,28 +405,51 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i8_release_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a0, a2, a0 -; RV64IA-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a2, (a3) -; RV64IA-NEXT: and a5, a2, a4 -; RV64IA-NEXT: bne a5, a1, .LBB3_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 -; RV64IA-NEXT: xor a5, a2, a0 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a2, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a3) -; RV64IA-NEXT: bnez a5, .LBB3_1 -; RV64IA-NEXT: .LBB3_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i8_release_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: andi a2, a2, 255 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a2, (a3) +; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB3_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-NEXT: bnez a5, .LBB3_1 +; RV64IA-WMO-NEXT: .LBB3_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_release_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: andi a2, a2, 255 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a3) +; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB3_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-NEXT: bnez a5, .LBB3_1 +; RV64IA-TSO-NEXT: .LBB3_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic ret void } @@ -330,28 +468,51 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i8_release_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a0, a2, a0 -; RV32IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a2, (a3) -; RV32IA-NEXT: and a5, a2, a4 -; RV32IA-NEXT: bne a5, a1, .LBB4_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -; RV32IA-NEXT: xor a5, a2, a0 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a2, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a3) -; RV32IA-NEXT: bnez a5, .LBB4_1 -; RV32IA-NEXT: .LBB4_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i8_release_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: andi a2, a2, 255 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-NEXT: and a5, a2, a4 +; RV32IA-WMO-NEXT: bne a5, a1, .LBB4_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a2, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-NEXT: bnez a5, .LBB4_1 +; RV32IA-WMO-NEXT: .LBB4_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i8_release_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: andi a2, a2, 255 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a3) +; RV32IA-TSO-NEXT: and a5, a2, a4 +; RV32IA-TSO-NEXT: bne a5, a1, .LBB4_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a2, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-NEXT: bnez a5, .LBB4_1 +; RV32IA-TSO-NEXT: .LBB4_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i8_release_acquire: ; RV64I: # %bb.0: @@ -366,28 +527,51 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i8_release_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a0, a2, a0 -; RV64IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a2, (a3) -; RV64IA-NEXT: and a5, a2, a4 -; RV64IA-NEXT: bne a5, a1, .LBB4_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -; RV64IA-NEXT: xor a5, a2, a0 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a2, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a3) -; RV64IA-NEXT: bnez a5, .LBB4_1 -; RV64IA-NEXT: .LBB4_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i8_release_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: andi a2, a2, 255 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB4_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-NEXT: bnez a5, .LBB4_1 +; RV64IA-WMO-NEXT: .LBB4_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_release_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: andi a2, a2, 255 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a3) +; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB4_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-NEXT: bnez a5, .LBB4_1 +; RV64IA-TSO-NEXT: .LBB4_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release acquire ret void } @@ -406,28 +590,51 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i8_acq_rel_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a0, a2, a0 -; RV32IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a2, (a3) -; RV32IA-NEXT: and a5, a2, a4 -; RV32IA-NEXT: bne a5, a1, .LBB5_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -; RV32IA-NEXT: xor a5, a2, a0 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a2, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a3) -; RV32IA-NEXT: bnez a5, .LBB5_1 -; RV32IA-NEXT: .LBB5_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: andi a2, a2, 255 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-NEXT: and a5, a2, a4 +; RV32IA-WMO-NEXT: bne a5, a1, .LBB5_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a2, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-NEXT: bnez a5, .LBB5_1 +; RV32IA-WMO-NEXT: .LBB5_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: andi a2, a2, 255 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a3) +; RV32IA-TSO-NEXT: and a5, a2, a4 +; RV32IA-TSO-NEXT: bne a5, a1, .LBB5_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a2, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-NEXT: bnez a5, .LBB5_1 +; RV32IA-TSO-NEXT: .LBB5_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV64I: # %bb.0: @@ -442,28 +649,51 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i8_acq_rel_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a0, a2, a0 -; RV64IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a2, (a3) -; RV64IA-NEXT: and a5, a2, a4 -; RV64IA-NEXT: bne a5, a1, .LBB5_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -; RV64IA-NEXT: xor a5, a2, a0 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a2, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a3) -; RV64IA-NEXT: bnez a5, .LBB5_1 -; RV64IA-NEXT: .LBB5_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: andi a2, a2, 255 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB5_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-NEXT: bnez a5, .LBB5_1 +; RV64IA-WMO-NEXT: .LBB5_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: andi a2, a2, 255 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a3) +; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB5_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-NEXT: bnez a5, .LBB5_1 +; RV64IA-TSO-NEXT: .LBB5_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel monotonic ret void } @@ -482,28 +712,51 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i8_acq_rel_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a0, a2, a0 -; RV32IA-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a2, (a3) -; RV32IA-NEXT: and a5, a2, a4 -; RV32IA-NEXT: bne a5, a1, .LBB6_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -; RV32IA-NEXT: xor a5, a2, a0 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a2, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a3) -; RV32IA-NEXT: bnez a5, .LBB6_1 -; RV32IA-NEXT: .LBB6_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: andi a2, a2, 255 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-NEXT: and a5, a2, a4 +; RV32IA-WMO-NEXT: bne a5, a1, .LBB6_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a2, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-NEXT: bnez a5, .LBB6_1 +; RV32IA-WMO-NEXT: .LBB6_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: andi a2, a2, 255 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a3) +; RV32IA-TSO-NEXT: and a5, a2, a4 +; RV32IA-TSO-NEXT: bne a5, a1, .LBB6_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a2, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-NEXT: bnez a5, .LBB6_1 +; RV32IA-TSO-NEXT: .LBB6_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV64I: # %bb.0: @@ -518,28 +771,51 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i8_acq_rel_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a0, a2, a0 -; RV64IA-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a2, (a3) -; RV64IA-NEXT: and a5, a2, a4 -; RV64IA-NEXT: bne a5, a1, .LBB6_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -; RV64IA-NEXT: xor a5, a2, a0 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a2, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a3) -; RV64IA-NEXT: bnez a5, .LBB6_1 -; RV64IA-NEXT: .LBB6_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: andi a2, a2, 255 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB6_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-NEXT: bnez a5, .LBB6_1 +; RV64IA-WMO-NEXT: .LBB6_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: andi a2, a2, 255 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a3) +; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB6_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-NEXT: bnez a5, .LBB6_1 +; RV64IA-TSO-NEXT: .LBB6_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel acquire ret void } @@ -864,29 +1140,53 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i16_acquire_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a5, a4, a0 -; RV32IA-NEXT: and a1, a1, a4 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: and a2, a2, a4 -; RV32IA-NEXT: sll a0, a2, a0 -; RV32IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a2, (a3) -; RV32IA-NEXT: and a4, a2, a5 -; RV32IA-NEXT: bne a4, a1, .LBB11_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-NEXT: xor a4, a2, a0 -; RV32IA-NEXT: and a4, a4, a5 -; RV32IA-NEXT: xor a4, a2, a4 -; RV32IA-NEXT: sc.w a4, a4, (a3) -; RV32IA-NEXT: bnez a4, .LBB11_1 -; RV32IA-NEXT: .LBB11_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i16_acquire_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a4, 16 +; RV32IA-WMO-NEXT: addi a4, a4, -1 +; RV32IA-WMO-NEXT: sll a5, a4, a0 +; RV32IA-WMO-NEXT: and a1, a1, a4 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: and a2, a2, a4 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-NEXT: and a4, a2, a5 +; RV32IA-WMO-NEXT: bne a4, a1, .LBB11_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV32IA-WMO-NEXT: xor a4, a2, a0 +; RV32IA-WMO-NEXT: and a4, a4, a5 +; RV32IA-WMO-NEXT: xor a4, a2, a4 +; RV32IA-WMO-NEXT: sc.w a4, a4, (a3) +; RV32IA-WMO-NEXT: bnez a4, .LBB11_1 +; RV32IA-WMO-NEXT: .LBB11_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a4, 16 +; RV32IA-TSO-NEXT: addi a4, a4, -1 +; RV32IA-TSO-NEXT: sll a5, a4, a0 +; RV32IA-TSO-NEXT: and a1, a1, a4 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: and a2, a2, a4 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a3) +; RV32IA-TSO-NEXT: and a4, a2, a5 +; RV32IA-TSO-NEXT: bne a4, a1, .LBB11_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV32IA-TSO-NEXT: xor a4, a2, a0 +; RV32IA-TSO-NEXT: and a4, a4, a5 +; RV32IA-TSO-NEXT: xor a4, a2, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-NEXT: bnez a4, .LBB11_1 +; RV32IA-TSO-NEXT: .LBB11_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i16_acquire_monotonic: ; RV64I: # %bb.0: @@ -901,29 +1201,53 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i16_acquire_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a5, a4, a0 -; RV64IA-NEXT: and a1, a1, a4 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: and a2, a2, a4 -; RV64IA-NEXT: sllw a0, a2, a0 -; RV64IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a2, (a3) -; RV64IA-NEXT: and a4, a2, a5 -; RV64IA-NEXT: bne a4, a1, .LBB11_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-NEXT: xor a4, a2, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: xor a4, a2, a4 -; RV64IA-NEXT: sc.w a4, a4, (a3) -; RV64IA-NEXT: bnez a4, .LBB11_1 -; RV64IA-NEXT: .LBB11_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i16_acquire_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a4, 16 +; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-NEXT: and a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: and a2, a2, a4 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB11_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a2, a4 +; RV64IA-WMO-NEXT: sc.w a4, a4, (a3) +; RV64IA-WMO-NEXT: bnez a4, .LBB11_1 +; RV64IA-WMO-NEXT: .LBB11_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a4, 16 +; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: sllw a5, a4, a0 +; RV64IA-TSO-NEXT: and a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: and a2, a2, a4 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a3) +; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB11_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a2, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-NEXT: bnez a4, .LBB11_1 +; RV64IA-TSO-NEXT: .LBB11_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic ret void } @@ -942,29 +1266,53 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i16_acquire_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a5, a4, a0 -; RV32IA-NEXT: and a1, a1, a4 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: and a2, a2, a4 -; RV32IA-NEXT: sll a0, a2, a0 -; RV32IA-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a2, (a3) -; RV32IA-NEXT: and a4, a2, a5 -; RV32IA-NEXT: bne a4, a1, .LBB12_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV32IA-NEXT: xor a4, a2, a0 -; RV32IA-NEXT: and a4, a4, a5 -; RV32IA-NEXT: xor a4, a2, a4 -; RV32IA-NEXT: sc.w a4, a4, (a3) -; RV32IA-NEXT: bnez a4, .LBB12_1 -; RV32IA-NEXT: .LBB12_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i16_acquire_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a4, 16 +; RV32IA-WMO-NEXT: addi a4, a4, -1 +; RV32IA-WMO-NEXT: sll a5, a4, a0 +; RV32IA-WMO-NEXT: and a1, a1, a4 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: and a2, a2, a4 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-NEXT: and a4, a2, a5 +; RV32IA-WMO-NEXT: bne a4, a1, .LBB12_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV32IA-WMO-NEXT: xor a4, a2, a0 +; RV32IA-WMO-NEXT: and a4, a4, a5 +; RV32IA-WMO-NEXT: xor a4, a2, a4 +; RV32IA-WMO-NEXT: sc.w a4, a4, (a3) +; RV32IA-WMO-NEXT: bnez a4, .LBB12_1 +; RV32IA-WMO-NEXT: .LBB12_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a4, 16 +; RV32IA-TSO-NEXT: addi a4, a4, -1 +; RV32IA-TSO-NEXT: sll a5, a4, a0 +; RV32IA-TSO-NEXT: and a1, a1, a4 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: and a2, a2, a4 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a3) +; RV32IA-TSO-NEXT: and a4, a2, a5 +; RV32IA-TSO-NEXT: bne a4, a1, .LBB12_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV32IA-TSO-NEXT: xor a4, a2, a0 +; RV32IA-TSO-NEXT: and a4, a4, a5 +; RV32IA-TSO-NEXT: xor a4, a2, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-NEXT: bnez a4, .LBB12_1 +; RV32IA-TSO-NEXT: .LBB12_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i16_acquire_acquire: ; RV64I: # %bb.0: @@ -979,29 +1327,53 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i16_acquire_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a5, a4, a0 -; RV64IA-NEXT: and a1, a1, a4 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: and a2, a2, a4 -; RV64IA-NEXT: sllw a0, a2, a0 -; RV64IA-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a2, (a3) -; RV64IA-NEXT: and a4, a2, a5 -; RV64IA-NEXT: bne a4, a1, .LBB12_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV64IA-NEXT: xor a4, a2, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: xor a4, a2, a4 -; RV64IA-NEXT: sc.w a4, a4, (a3) -; RV64IA-NEXT: bnez a4, .LBB12_1 -; RV64IA-NEXT: .LBB12_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i16_acquire_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a4, 16 +; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-NEXT: and a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: and a2, a2, a4 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB12_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a2, a4 +; RV64IA-WMO-NEXT: sc.w a4, a4, (a3) +; RV64IA-WMO-NEXT: bnez a4, .LBB12_1 +; RV64IA-WMO-NEXT: .LBB12_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a4, 16 +; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: sllw a5, a4, a0 +; RV64IA-TSO-NEXT: and a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: and a2, a2, a4 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a3) +; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB12_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a2, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-NEXT: bnez a4, .LBB12_1 +; RV64IA-TSO-NEXT: .LBB12_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire ret void } @@ -1020,29 +1392,53 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i16_release_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a5, a4, a0 -; RV32IA-NEXT: and a1, a1, a4 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: and a2, a2, a4 -; RV32IA-NEXT: sll a0, a2, a0 -; RV32IA-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a2, (a3) -; RV32IA-NEXT: and a4, a2, a5 -; RV32IA-NEXT: bne a4, a1, .LBB13_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV32IA-NEXT: xor a4, a2, a0 -; RV32IA-NEXT: and a4, a4, a5 -; RV32IA-NEXT: xor a4, a2, a4 -; RV32IA-NEXT: sc.w.rl a4, a4, (a3) -; RV32IA-NEXT: bnez a4, .LBB13_1 -; RV32IA-NEXT: .LBB13_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i16_release_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a4, 16 +; RV32IA-WMO-NEXT: addi a4, a4, -1 +; RV32IA-WMO-NEXT: sll a5, a4, a0 +; RV32IA-WMO-NEXT: and a1, a1, a4 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: and a2, a2, a4 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a2, (a3) +; RV32IA-WMO-NEXT: and a4, a2, a5 +; RV32IA-WMO-NEXT: bne a4, a1, .LBB13_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV32IA-WMO-NEXT: xor a4, a2, a0 +; RV32IA-WMO-NEXT: and a4, a4, a5 +; RV32IA-WMO-NEXT: xor a4, a2, a4 +; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-NEXT: bnez a4, .LBB13_1 +; RV32IA-WMO-NEXT: .LBB13_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i16_release_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a4, 16 +; RV32IA-TSO-NEXT: addi a4, a4, -1 +; RV32IA-TSO-NEXT: sll a5, a4, a0 +; RV32IA-TSO-NEXT: and a1, a1, a4 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: and a2, a2, a4 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a3) +; RV32IA-TSO-NEXT: and a4, a2, a5 +; RV32IA-TSO-NEXT: bne a4, a1, .LBB13_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV32IA-TSO-NEXT: xor a4, a2, a0 +; RV32IA-TSO-NEXT: and a4, a4, a5 +; RV32IA-TSO-NEXT: xor a4, a2, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-NEXT: bnez a4, .LBB13_1 +; RV32IA-TSO-NEXT: .LBB13_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i16_release_monotonic: ; RV64I: # %bb.0: @@ -1057,29 +1453,53 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i16_release_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a5, a4, a0 -; RV64IA-NEXT: and a1, a1, a4 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: and a2, a2, a4 -; RV64IA-NEXT: sllw a0, a2, a0 -; RV64IA-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a2, (a3) -; RV64IA-NEXT: and a4, a2, a5 -; RV64IA-NEXT: bne a4, a1, .LBB13_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV64IA-NEXT: xor a4, a2, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: xor a4, a2, a4 -; RV64IA-NEXT: sc.w.rl a4, a4, (a3) -; RV64IA-NEXT: bnez a4, .LBB13_1 -; RV64IA-NEXT: .LBB13_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i16_release_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a4, 16 +; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-NEXT: and a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: and a2, a2, a4 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a2, (a3) +; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB13_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a2, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-NEXT: bnez a4, .LBB13_1 +; RV64IA-WMO-NEXT: .LBB13_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_release_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a4, 16 +; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: sllw a5, a4, a0 +; RV64IA-TSO-NEXT: and a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: and a2, a2, a4 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a3) +; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB13_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a2, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-NEXT: bnez a4, .LBB13_1 +; RV64IA-TSO-NEXT: .LBB13_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic ret void } @@ -1098,29 +1518,53 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i16_release_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a5, a4, a0 -; RV32IA-NEXT: and a1, a1, a4 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: and a2, a2, a4 -; RV32IA-NEXT: sll a0, a2, a0 -; RV32IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a2, (a3) -; RV32IA-NEXT: and a4, a2, a5 -; RV32IA-NEXT: bne a4, a1, .LBB14_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV32IA-NEXT: xor a4, a2, a0 -; RV32IA-NEXT: and a4, a4, a5 -; RV32IA-NEXT: xor a4, a2, a4 -; RV32IA-NEXT: sc.w.rl a4, a4, (a3) -; RV32IA-NEXT: bnez a4, .LBB14_1 -; RV32IA-NEXT: .LBB14_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i16_release_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a4, 16 +; RV32IA-WMO-NEXT: addi a4, a4, -1 +; RV32IA-WMO-NEXT: sll a5, a4, a0 +; RV32IA-WMO-NEXT: and a1, a1, a4 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: and a2, a2, a4 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-NEXT: and a4, a2, a5 +; RV32IA-WMO-NEXT: bne a4, a1, .LBB14_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV32IA-WMO-NEXT: xor a4, a2, a0 +; RV32IA-WMO-NEXT: and a4, a4, a5 +; RV32IA-WMO-NEXT: xor a4, a2, a4 +; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-NEXT: bnez a4, .LBB14_1 +; RV32IA-WMO-NEXT: .LBB14_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i16_release_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a4, 16 +; RV32IA-TSO-NEXT: addi a4, a4, -1 +; RV32IA-TSO-NEXT: sll a5, a4, a0 +; RV32IA-TSO-NEXT: and a1, a1, a4 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: and a2, a2, a4 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a3) +; RV32IA-TSO-NEXT: and a4, a2, a5 +; RV32IA-TSO-NEXT: bne a4, a1, .LBB14_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV32IA-TSO-NEXT: xor a4, a2, a0 +; RV32IA-TSO-NEXT: and a4, a4, a5 +; RV32IA-TSO-NEXT: xor a4, a2, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-NEXT: bnez a4, .LBB14_1 +; RV32IA-TSO-NEXT: .LBB14_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i16_release_acquire: ; RV64I: # %bb.0: @@ -1135,29 +1579,53 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i16_release_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a5, a4, a0 -; RV64IA-NEXT: and a1, a1, a4 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: and a2, a2, a4 -; RV64IA-NEXT: sllw a0, a2, a0 -; RV64IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a2, (a3) -; RV64IA-NEXT: and a4, a2, a5 -; RV64IA-NEXT: bne a4, a1, .LBB14_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV64IA-NEXT: xor a4, a2, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: xor a4, a2, a4 -; RV64IA-NEXT: sc.w.rl a4, a4, (a3) -; RV64IA-NEXT: bnez a4, .LBB14_1 -; RV64IA-NEXT: .LBB14_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i16_release_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a4, 16 +; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-NEXT: and a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: and a2, a2, a4 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB14_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a2, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-NEXT: bnez a4, .LBB14_1 +; RV64IA-WMO-NEXT: .LBB14_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_release_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a4, 16 +; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: sllw a5, a4, a0 +; RV64IA-TSO-NEXT: and a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: and a2, a2, a4 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a3) +; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB14_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a2, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-NEXT: bnez a4, .LBB14_1 +; RV64IA-TSO-NEXT: .LBB14_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release acquire ret void } @@ -1176,29 +1644,53 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i16_acq_rel_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a5, a4, a0 -; RV32IA-NEXT: and a1, a1, a4 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: and a2, a2, a4 -; RV32IA-NEXT: sll a0, a2, a0 -; RV32IA-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a2, (a3) -; RV32IA-NEXT: and a4, a2, a5 -; RV32IA-NEXT: bne a4, a1, .LBB15_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV32IA-NEXT: xor a4, a2, a0 -; RV32IA-NEXT: and a4, a4, a5 -; RV32IA-NEXT: xor a4, a2, a4 -; RV32IA-NEXT: sc.w.rl a4, a4, (a3) -; RV32IA-NEXT: bnez a4, .LBB15_1 -; RV32IA-NEXT: .LBB15_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a4, 16 +; RV32IA-WMO-NEXT: addi a4, a4, -1 +; RV32IA-WMO-NEXT: sll a5, a4, a0 +; RV32IA-WMO-NEXT: and a1, a1, a4 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: and a2, a2, a4 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-NEXT: and a4, a2, a5 +; RV32IA-WMO-NEXT: bne a4, a1, .LBB15_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV32IA-WMO-NEXT: xor a4, a2, a0 +; RV32IA-WMO-NEXT: and a4, a4, a5 +; RV32IA-WMO-NEXT: xor a4, a2, a4 +; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-NEXT: bnez a4, .LBB15_1 +; RV32IA-WMO-NEXT: .LBB15_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a4, 16 +; RV32IA-TSO-NEXT: addi a4, a4, -1 +; RV32IA-TSO-NEXT: sll a5, a4, a0 +; RV32IA-TSO-NEXT: and a1, a1, a4 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: and a2, a2, a4 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a3) +; RV32IA-TSO-NEXT: and a4, a2, a5 +; RV32IA-TSO-NEXT: bne a4, a1, .LBB15_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV32IA-TSO-NEXT: xor a4, a2, a0 +; RV32IA-TSO-NEXT: and a4, a4, a5 +; RV32IA-TSO-NEXT: xor a4, a2, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-NEXT: bnez a4, .LBB15_1 +; RV32IA-TSO-NEXT: .LBB15_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV64I: # %bb.0: @@ -1213,29 +1705,53 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i16_acq_rel_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a5, a4, a0 -; RV64IA-NEXT: and a1, a1, a4 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: and a2, a2, a4 -; RV64IA-NEXT: sllw a0, a2, a0 -; RV64IA-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a2, (a3) -; RV64IA-NEXT: and a4, a2, a5 -; RV64IA-NEXT: bne a4, a1, .LBB15_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV64IA-NEXT: xor a4, a2, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: xor a4, a2, a4 -; RV64IA-NEXT: sc.w.rl a4, a4, (a3) -; RV64IA-NEXT: bnez a4, .LBB15_1 -; RV64IA-NEXT: .LBB15_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a4, 16 +; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-NEXT: and a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: and a2, a2, a4 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB15_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a2, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-NEXT: bnez a4, .LBB15_1 +; RV64IA-WMO-NEXT: .LBB15_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a4, 16 +; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: sllw a5, a4, a0 +; RV64IA-TSO-NEXT: and a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: and a2, a2, a4 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a3) +; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB15_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a2, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-NEXT: bnez a4, .LBB15_1 +; RV64IA-TSO-NEXT: .LBB15_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel monotonic ret void } @@ -1254,29 +1770,53 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i16_acq_rel_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a5, a4, a0 -; RV32IA-NEXT: and a1, a1, a4 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: and a2, a2, a4 -; RV32IA-NEXT: sll a0, a2, a0 -; RV32IA-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a2, (a3) -; RV32IA-NEXT: and a4, a2, a5 -; RV32IA-NEXT: bne a4, a1, .LBB16_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV32IA-NEXT: xor a4, a2, a0 -; RV32IA-NEXT: and a4, a4, a5 -; RV32IA-NEXT: xor a4, a2, a4 -; RV32IA-NEXT: sc.w.rl a4, a4, (a3) -; RV32IA-NEXT: bnez a4, .LBB16_1 -; RV32IA-NEXT: .LBB16_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a4, 16 +; RV32IA-WMO-NEXT: addi a4, a4, -1 +; RV32IA-WMO-NEXT: sll a5, a4, a0 +; RV32IA-WMO-NEXT: and a1, a1, a4 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: and a2, a2, a4 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-NEXT: and a4, a2, a5 +; RV32IA-WMO-NEXT: bne a4, a1, .LBB16_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV32IA-WMO-NEXT: xor a4, a2, a0 +; RV32IA-WMO-NEXT: and a4, a4, a5 +; RV32IA-WMO-NEXT: xor a4, a2, a4 +; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-NEXT: bnez a4, .LBB16_1 +; RV32IA-WMO-NEXT: .LBB16_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a4, 16 +; RV32IA-TSO-NEXT: addi a4, a4, -1 +; RV32IA-TSO-NEXT: sll a5, a4, a0 +; RV32IA-TSO-NEXT: and a1, a1, a4 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: and a2, a2, a4 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a3) +; RV32IA-TSO-NEXT: and a4, a2, a5 +; RV32IA-TSO-NEXT: bne a4, a1, .LBB16_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV32IA-TSO-NEXT: xor a4, a2, a0 +; RV32IA-TSO-NEXT: and a4, a4, a5 +; RV32IA-TSO-NEXT: xor a4, a2, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-NEXT: bnez a4, .LBB16_1 +; RV32IA-TSO-NEXT: .LBB16_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV64I: # %bb.0: @@ -1291,29 +1831,53 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i16_acq_rel_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a5, a4, a0 -; RV64IA-NEXT: and a1, a1, a4 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: and a2, a2, a4 -; RV64IA-NEXT: sllw a0, a2, a0 -; RV64IA-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a2, (a3) -; RV64IA-NEXT: and a4, a2, a5 -; RV64IA-NEXT: bne a4, a1, .LBB16_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV64IA-NEXT: xor a4, a2, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: xor a4, a2, a4 -; RV64IA-NEXT: sc.w.rl a4, a4, (a3) -; RV64IA-NEXT: bnez a4, .LBB16_1 -; RV64IA-NEXT: .LBB16_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a4, 16 +; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-NEXT: and a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: and a2, a2, a4 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB16_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a2, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-NEXT: bnez a4, .LBB16_1 +; RV64IA-WMO-NEXT: .LBB16_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a4, 16 +; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: sllw a5, a4, a0 +; RV64IA-TSO-NEXT: and a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: and a2, a2, a4 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a3) +; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB16_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a2, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-NEXT: bnez a4, .LBB16_1 +; RV64IA-TSO-NEXT: .LBB16_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel acquire ret void } @@ -1619,16 +2183,27 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_acquire_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB21_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -; RV32IA-NEXT: sc.w a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB21_1 -; RV32IA-NEXT: .LBB21_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_acquire_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB21_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB21_1 +; RV32IA-WMO-NEXT: .LBB21_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB21_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB21_1 +; RV32IA-TSO-NEXT: .LBB21_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_acquire_monotonic: ; RV64I: # %bb.0: @@ -1643,17 +2218,29 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_acquire_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB21_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -; RV64IA-NEXT: sc.w a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB21_1 -; RV64IA-NEXT: .LBB21_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_acquire_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB21_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB21_1 +; RV64IA-WMO-NEXT: .LBB21_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB21_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB21_1 +; RV64IA-TSO-NEXT: .LBB21_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic ret void } @@ -1672,16 +2259,27 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_acquire_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB22_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -; RV32IA-NEXT: sc.w a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB22_1 -; RV32IA-NEXT: .LBB22_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_acquire_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB22_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB22_1 +; RV32IA-WMO-NEXT: .LBB22_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB22_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB22_1 +; RV32IA-TSO-NEXT: .LBB22_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_acquire_acquire: ; RV64I: # %bb.0: @@ -1696,17 +2294,29 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_acquire_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB22_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -; RV64IA-NEXT: sc.w a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB22_1 -; RV64IA-NEXT: .LBB22_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_acquire_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB22_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB22_1 +; RV64IA-WMO-NEXT: .LBB22_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB22_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB22_1 +; RV64IA-TSO-NEXT: .LBB22_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire ret void } @@ -1725,16 +2335,27 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_release_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB23_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB23_1 -; RV32IA-NEXT: .LBB23_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_release_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB23_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB23_1 +; RV32IA-WMO-NEXT: .LBB23_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_release_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB23_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB23_1 +; RV32IA-TSO-NEXT: .LBB23_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_release_monotonic: ; RV64I: # %bb.0: @@ -1749,17 +2370,29 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_release_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB23_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB23_1 -; RV64IA-NEXT: .LBB23_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_release_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB23_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB23_1 +; RV64IA-WMO-NEXT: .LBB23_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_release_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB23_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB23_1 +; RV64IA-TSO-NEXT: .LBB23_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic ret void } @@ -1778,16 +2411,27 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_release_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB24_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB24_1 -; RV32IA-NEXT: .LBB24_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_release_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB24_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB24_1 +; RV32IA-WMO-NEXT: .LBB24_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_release_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB24_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB24_1 +; RV32IA-TSO-NEXT: .LBB24_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_release_acquire: ; RV64I: # %bb.0: @@ -1802,17 +2446,29 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_release_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB24_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB24_1 -; RV64IA-NEXT: .LBB24_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_release_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB24_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB24_1 +; RV64IA-WMO-NEXT: .LBB24_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_release_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB24_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB24_1 +; RV64IA-TSO-NEXT: .LBB24_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release acquire ret void } @@ -1831,16 +2487,27 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_acq_rel_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB25_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB25_1 -; RV32IA-NEXT: .LBB25_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB25_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB25_1 +; RV32IA-WMO-NEXT: .LBB25_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB25_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB25_1 +; RV32IA-TSO-NEXT: .LBB25_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_acq_rel_monotonic: ; RV64I: # %bb.0: @@ -1855,17 +2522,29 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_acq_rel_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB25_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB25_1 -; RV64IA-NEXT: .LBB25_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB25_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB25_1 +; RV64IA-WMO-NEXT: .LBB25_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB25_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB25_1 +; RV64IA-TSO-NEXT: .LBB25_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel monotonic ret void } @@ -1884,16 +2563,27 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_acq_rel_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB26_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB26_1 -; RV32IA-NEXT: .LBB26_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB26_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB26_1 +; RV32IA-WMO-NEXT: .LBB26_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB26_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB26_1 +; RV32IA-TSO-NEXT: .LBB26_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_acq_rel_acquire: ; RV64I: # %bb.0: @@ -1908,17 +2598,29 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_acq_rel_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB26_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB26_1 -; RV64IA-NEXT: .LBB26_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB26_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB26_1 +; RV64IA-WMO-NEXT: .LBB26_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB26_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB26_1 +; RV64IA-TSO-NEXT: .LBB26_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel acquire ret void } @@ -2190,16 +2892,27 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_acquire_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d.aq a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB31_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB31_1 Depth=1 -; RV64IA-NEXT: sc.d a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB31_1 -; RV64IA-NEXT: .LBB31_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_acquire_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB31_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB31_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB31_1 +; RV64IA-WMO-NEXT: .LBB31_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB31_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB31_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB31_1 +; RV64IA-TSO-NEXT: .LBB31_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic ret void } @@ -2252,16 +2965,27 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_acquire_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d.aq a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB32_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1 -; RV64IA-NEXT: sc.d a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB32_1 -; RV64IA-NEXT: .LBB32_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_acquire_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB32_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB32_1 +; RV64IA-WMO-NEXT: .LBB32_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB32_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB32_1 +; RV64IA-TSO-NEXT: .LBB32_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire ret void } @@ -2314,16 +3038,27 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_release_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB33_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1 -; RV64IA-NEXT: sc.d.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB33_1 -; RV64IA-NEXT: .LBB33_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_release_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB33_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB33_1 +; RV64IA-WMO-NEXT: .LBB33_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_release_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB33_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB33_1 +; RV64IA-TSO-NEXT: .LBB33_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic ret void } @@ -2376,16 +3111,27 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_release_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d.aq a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB34_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB34_1 Depth=1 -; RV64IA-NEXT: sc.d.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB34_1 -; RV64IA-NEXT: .LBB34_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_release_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB34_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB34_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB34_1 +; RV64IA-WMO-NEXT: .LBB34_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_release_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB34_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB34_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB34_1 +; RV64IA-TSO-NEXT: .LBB34_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release acquire ret void } @@ -2438,16 +3184,27 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_acq_rel_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d.aq a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB35_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 -; RV64IA-NEXT: sc.d.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB35_1 -; RV64IA-NEXT: .LBB35_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB35_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB35_1 +; RV64IA-WMO-NEXT: .LBB35_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB35_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB35_1 +; RV64IA-TSO-NEXT: .LBB35_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel monotonic ret void } @@ -2500,16 +3257,27 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_acq_rel_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d.aq a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB36_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 -; RV64IA-NEXT: sc.d.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB36_1 -; RV64IA-NEXT: .LBB36_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB36_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB36_1 +; RV64IA-WMO-NEXT: .LBB36_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB36_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB36_1 +; RV64IA-TSO-NEXT: .LBB36_3: +; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel acquire ret void } diff --git a/llvm/test/CodeGen/RISCV/atomic-load-store.ll b/llvm/test/CodeGen/RISCV/atomic-load-store.ll index 1ca234db4c78e6..d3488ebed89f60 100644 --- a/llvm/test/CodeGen/RISCV/atomic-load-store.ll +++ b/llvm/test/CodeGen/RISCV/atomic-load-store.ll @@ -1010,6 +1010,7 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-LABEL: atomic_store_i8_seq_cst: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: sb a1, 0(a0) +; RV32IA-TSO-NEXT: fence rw, rw ; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomic_store_i8_seq_cst: @@ -1031,6 +1032,7 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-LABEL: atomic_store_i8_seq_cst: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sb a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw ; RV64IA-TSO-NEXT: ret ; ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst: @@ -1043,6 +1045,7 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst: ; RV32IA-TSO-TRAILING-FENCE: # %bb.0: ; RV32IA-TSO-TRAILING-FENCE-NEXT: sb a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw ; RV32IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst: @@ -1055,6 +1058,7 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst: ; RV64IA-TSO-TRAILING-FENCE: # %bb.0: ; RV64IA-TSO-TRAILING-FENCE-NEXT: sb a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret store atomic i8 %b, ptr %a seq_cst, align 1 ret void @@ -1216,6 +1220,7 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-LABEL: atomic_store_i16_seq_cst: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: sh a1, 0(a0) +; RV32IA-TSO-NEXT: fence rw, rw ; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomic_store_i16_seq_cst: @@ -1237,6 +1242,7 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-LABEL: atomic_store_i16_seq_cst: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sh a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw ; RV64IA-TSO-NEXT: ret ; ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst: @@ -1249,6 +1255,7 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst: ; RV32IA-TSO-TRAILING-FENCE: # %bb.0: ; RV32IA-TSO-TRAILING-FENCE-NEXT: sh a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw ; RV32IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst: @@ -1261,6 +1268,7 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst: ; RV64IA-TSO-TRAILING-FENCE: # %bb.0: ; RV64IA-TSO-TRAILING-FENCE-NEXT: sh a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret store atomic i16 %b, ptr %a seq_cst, align 2 ret void @@ -1422,6 +1430,7 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32IA-TSO-LABEL: atomic_store_i32_seq_cst: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: sw a1, 0(a0) +; RV32IA-TSO-NEXT: fence rw, rw ; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomic_store_i32_seq_cst: @@ -1443,6 +1452,7 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64IA-TSO-LABEL: atomic_store_i32_seq_cst: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sw a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw ; RV64IA-TSO-NEXT: ret ; ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst: @@ -1455,6 +1465,7 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst: ; RV32IA-TSO-TRAILING-FENCE: # %bb.0: ; RV32IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw ; RV32IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst: @@ -1467,6 +1478,7 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst: ; RV64IA-TSO-TRAILING-FENCE: # %bb.0: ; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret store atomic i32 %b, ptr %a seq_cst, align 4 ret void @@ -1646,6 +1658,7 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64IA-TSO-LABEL: atomic_store_i64_seq_cst: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sd a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw ; RV64IA-TSO-NEXT: ret ; ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i64_seq_cst: @@ -1658,6 +1671,7 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i64_seq_cst: ; RV64IA-TSO-TRAILING-FENCE: # %bb.0: ; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret store atomic i64 %b, ptr %a seq_cst, align 8 ret void diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 813bbd100f8efa..c47d4484d8c0d8 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -2,15 +2,15 @@ ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32IA %s +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32IA %s +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV64IA %s +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV64IA %s +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-LABEL: atomicrmw_xchg_i8_monotonic: @@ -87,25 +87,45 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_xchg_i8_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a2) -; RV32IA-NEXT: mv a5, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB1_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NEXT: mv a5, a1 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB1_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: mv a5, a1 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB1_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i8_acquire: ; RV64I: # %bb.0: @@ -117,25 +137,45 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_xchg_i8_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a2) -; RV64IA-NEXT: mv a5, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB1_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_xchg_i8_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NEXT: mv a5, a1 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB1_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_xchg_i8_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: mv a5, a1 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB1_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw xchg ptr %a, i8 %b acquire ret i8 %1 } @@ -151,25 +191,45 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_xchg_i8_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a2) -; RV32IA-NEXT: mv a5, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB2_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a4, (a2) +; RV32IA-WMO-NEXT: mv a5, a1 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB2_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: mv a5, a1 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB2_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i8_release: ; RV64I: # %bb.0: @@ -181,25 +241,45 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_xchg_i8_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a2) -; RV64IA-NEXT: mv a5, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB2_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_xchg_i8_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a4, (a2) +; RV64IA-WMO-NEXT: mv a5, a1 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB2_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_xchg_i8_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: mv a5, a1 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB2_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw xchg ptr %a, i8 %b release ret i8 %1 } @@ -215,25 +295,45 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_xchg_i8_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a2) -; RV32IA-NEXT: mv a5, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB3_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NEXT: mv a5, a1 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB3_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: mv a5, a1 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB3_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i8_acq_rel: ; RV64I: # %bb.0: @@ -245,25 +345,45 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_xchg_i8_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a2) -; RV64IA-NEXT: mv a5, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB3_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_xchg_i8_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NEXT: mv a5, a1 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB3_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_xchg_i8_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: mv a5, a1 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB3_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw xchg ptr %a, i8 %b acq_rel ret i8 %1 } @@ -880,25 +1000,45 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_add_i8_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a2) -; RV32IA-NEXT: add a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB16_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_add_i8_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NEXT: add a5, a4, a1 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB16_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_add_i8_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: add a5, a4, a1 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB16_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i8_acquire: ; RV64I: # %bb.0: @@ -910,25 +1050,45 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_add_i8_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a2) -; RV64IA-NEXT: add a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB16_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_add_i8_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NEXT: add a5, a4, a1 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB16_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_add_i8_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: add a5, a4, a1 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB16_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw add ptr %a, i8 %b acquire ret i8 %1 } @@ -944,25 +1104,45 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_add_i8_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a2) -; RV32IA-NEXT: add a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB17_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_add_i8_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a4, (a2) +; RV32IA-WMO-NEXT: add a5, a4, a1 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB17_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_add_i8_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: add a5, a4, a1 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB17_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i8_release: ; RV64I: # %bb.0: @@ -974,25 +1154,45 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_add_i8_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a2) -; RV64IA-NEXT: add a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB17_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_add_i8_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a4, (a2) +; RV64IA-WMO-NEXT: add a5, a4, a1 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB17_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_add_i8_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: add a5, a4, a1 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB17_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw add ptr %a, i8 %b release ret i8 %1 } @@ -1008,25 +1208,45 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_add_i8_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a2) -; RV32IA-NEXT: add a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB18_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_add_i8_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NEXT: add a5, a4, a1 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB18_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_add_i8_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: add a5, a4, a1 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB18_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i8_acq_rel: ; RV64I: # %bb.0: @@ -1038,25 +1258,45 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_add_i8_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a2) -; RV64IA-NEXT: add a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB18_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_add_i8_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NEXT: add a5, a4, a1 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB18_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_add_i8_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: add a5, a4, a1 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB18_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw add ptr %a, i8 %b acq_rel ret i8 %1 } @@ -1200,25 +1440,45 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_sub_i8_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a2) -; RV32IA-NEXT: sub a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB21_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_sub_i8_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NEXT: sub a5, a4, a1 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB21_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_sub_i8_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: sub a5, a4, a1 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB21_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i8_acquire: ; RV64I: # %bb.0: @@ -1230,25 +1490,45 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_sub_i8_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a2) -; RV64IA-NEXT: sub a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB21_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_sub_i8_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NEXT: sub a5, a4, a1 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB21_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_sub_i8_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: sub a5, a4, a1 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB21_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw sub ptr %a, i8 %b acquire ret i8 %1 } @@ -1264,25 +1544,45 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_sub_i8_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a2) -; RV32IA-NEXT: sub a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB22_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_sub_i8_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a4, (a2) +; RV32IA-WMO-NEXT: sub a5, a4, a1 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB22_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_sub_i8_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: sub a5, a4, a1 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB22_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i8_release: ; RV64I: # %bb.0: @@ -1294,25 +1594,45 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_sub_i8_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a2) -; RV64IA-NEXT: sub a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB22_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_sub_i8_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a4, (a2) +; RV64IA-WMO-NEXT: sub a5, a4, a1 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB22_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_sub_i8_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: sub a5, a4, a1 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB22_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw sub ptr %a, i8 %b release ret i8 %1 } @@ -1328,25 +1648,45 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_sub_i8_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a2) -; RV32IA-NEXT: sub a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB23_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_sub_i8_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NEXT: sub a5, a4, a1 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB23_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_sub_i8_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: sub a5, a4, a1 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB23_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i8_acq_rel: ; RV64I: # %bb.0: @@ -1358,25 +1698,45 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_sub_i8_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a2) -; RV64IA-NEXT: sub a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB23_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_sub_i8_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NEXT: sub a5, a4, a1 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB23_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_sub_i8_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: sub a5, a4, a1 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB23_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw sub ptr %a, i8 %b acq_rel ret i8 %1 } @@ -1782,26 +2142,47 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_nand_i8_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a2) -; RV32IA-NEXT: and a5, a4, a1 -; RV32IA-NEXT: not a5, a5 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB31_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_nand_i8_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NEXT: and a5, a4, a1 +; RV32IA-WMO-NEXT: not a5, a5 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB31_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_nand_i8_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: and a5, a4, a1 +; RV32IA-TSO-NEXT: not a5, a5 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB31_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i8_acquire: ; RV64I: # %bb.0: @@ -1813,26 +2194,47 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_nand_i8_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a2) -; RV64IA-NEXT: and a5, a4, a1 -; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB31_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_nand_i8_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NEXT: and a5, a4, a1 +; RV64IA-WMO-NEXT: not a5, a5 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB31_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_nand_i8_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: and a5, a4, a1 +; RV64IA-TSO-NEXT: not a5, a5 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB31_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw nand ptr %a, i8 %b acquire ret i8 %1 } @@ -1848,26 +2250,47 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_nand_i8_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a2) -; RV32IA-NEXT: and a5, a4, a1 -; RV32IA-NEXT: not a5, a5 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB32_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_nand_i8_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a4, (a2) +; RV32IA-WMO-NEXT: and a5, a4, a1 +; RV32IA-WMO-NEXT: not a5, a5 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB32_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_nand_i8_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: and a5, a4, a1 +; RV32IA-TSO-NEXT: not a5, a5 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB32_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i8_release: ; RV64I: # %bb.0: @@ -1879,26 +2302,47 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_nand_i8_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a2) -; RV64IA-NEXT: and a5, a4, a1 -; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB32_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_nand_i8_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a4, (a2) +; RV64IA-WMO-NEXT: and a5, a4, a1 +; RV64IA-WMO-NEXT: not a5, a5 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB32_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_nand_i8_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: and a5, a4, a1 +; RV64IA-TSO-NEXT: not a5, a5 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB32_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw nand ptr %a, i8 %b release ret i8 %1 } @@ -1914,26 +2358,47 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_nand_i8_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a2) -; RV32IA-NEXT: and a5, a4, a1 -; RV32IA-NEXT: not a5, a5 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB33_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_nand_i8_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NEXT: and a5, a4, a1 +; RV32IA-WMO-NEXT: not a5, a5 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB33_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_nand_i8_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: and a5, a4, a1 +; RV32IA-TSO-NEXT: not a5, a5 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB33_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i8_acq_rel: ; RV64I: # %bb.0: @@ -1943,28 +2408,49 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_fetch_nand_1@plt ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret -; -; RV64IA-LABEL: atomicrmw_nand_i8_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a2) -; RV64IA-NEXT: and a5, a4, a1 -; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB33_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomicrmw_nand_i8_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NEXT: and a5, a4, a1 +; RV64IA-WMO-NEXT: not a5, a5 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB33_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_nand_i8_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: and a5, a4, a1 +; RV64IA-TSO-NEXT: not a5, a5 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB33_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw nand ptr %a, i8 %b acq_rel ret i8 %1 } @@ -2664,34 +3150,63 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_max_i8_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: andi a3, a0, 24 -; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: slli a1, a1, 24 -; RV32IA-NEXT: srai a1, a1, 24 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: xori a3, a3, 24 -; RV32IA-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a2) -; RV32IA-NEXT: and a7, a5, a4 -; RV32IA-NEXT: mv a6, a5 -; RV32IA-NEXT: sll a7, a7, a3 -; RV32IA-NEXT: sra a7, a7, a3 -; RV32IA-NEXT: bge a7, a1, .LBB46_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 -; RV32IA-NEXT: xor a6, a5, a1 -; RV32IA-NEXT: and a6, a6, a4 -; RV32IA-NEXT: xor a6, a5, a6 -; RV32IA-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 -; RV32IA-NEXT: sc.w a6, a6, (a2) -; RV32IA-NEXT: bnez a6, .LBB46_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_max_i8_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: andi a3, a0, 24 +; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: slli a1, a1, 24 +; RV32IA-WMO-NEXT: srai a1, a1, 24 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: xori a3, a3, 24 +; RV32IA-WMO-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a5, (a2) +; RV32IA-WMO-NEXT: and a7, a5, a4 +; RV32IA-WMO-NEXT: mv a6, a5 +; RV32IA-WMO-NEXT: sll a7, a7, a3 +; RV32IA-WMO-NEXT: sra a7, a7, a3 +; RV32IA-WMO-NEXT: bge a7, a1, .LBB46_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 +; RV32IA-WMO-NEXT: xor a6, a5, a1 +; RV32IA-WMO-NEXT: and a6, a6, a4 +; RV32IA-WMO-NEXT: xor a6, a5, a6 +; RV32IA-WMO-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a6, a6, (a2) +; RV32IA-WMO-NEXT: bnez a6, .LBB46_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a5, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_max_i8_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: andi a3, a0, 24 +; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: slli a1, a1, 24 +; RV32IA-TSO-NEXT: srai a1, a1, 24 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: xori a3, a3, 24 +; RV32IA-TSO-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a5, (a2) +; RV32IA-TSO-NEXT: and a7, a5, a4 +; RV32IA-TSO-NEXT: mv a6, a5 +; RV32IA-TSO-NEXT: sll a7, a7, a3 +; RV32IA-TSO-NEXT: sra a7, a7, a3 +; RV32IA-TSO-NEXT: bge a7, a1, .LBB46_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 +; RV32IA-TSO-NEXT: xor a6, a5, a1 +; RV32IA-TSO-NEXT: and a6, a6, a4 +; RV32IA-TSO-NEXT: xor a6, a5, a6 +; RV32IA-TSO-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV32IA-TSO-NEXT: bnez a6, .LBB46_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a5, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i8_acquire: ; RV64I: # %bb.0: @@ -2735,34 +3250,63 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_max_i8_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: andi a3, a0, 24 -; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: slli a1, a1, 56 -; RV64IA-NEXT: srai a1, a1, 56 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: xori a3, a3, 56 -; RV64IA-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a2) -; RV64IA-NEXT: and a7, a5, a4 -; RV64IA-NEXT: mv a6, a5 -; RV64IA-NEXT: sll a7, a7, a3 -; RV64IA-NEXT: sra a7, a7, a3 -; RV64IA-NEXT: bge a7, a1, .LBB46_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 -; RV64IA-NEXT: xor a6, a5, a1 -; RV64IA-NEXT: and a6, a6, a4 -; RV64IA-NEXT: xor a6, a5, a6 -; RV64IA-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 -; RV64IA-NEXT: sc.w a6, a6, (a2) -; RV64IA-NEXT: bnez a6, .LBB46_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a5, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_max_i8_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: andi a3, a0, 24 +; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: slli a1, a1, 56 +; RV64IA-WMO-NEXT: srai a1, a1, 56 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: xori a3, a3, 56 +; RV64IA-WMO-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a5, (a2) +; RV64IA-WMO-NEXT: and a7, a5, a4 +; RV64IA-WMO-NEXT: mv a6, a5 +; RV64IA-WMO-NEXT: sll a7, a7, a3 +; RV64IA-WMO-NEXT: sra a7, a7, a3 +; RV64IA-WMO-NEXT: bge a7, a1, .LBB46_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 +; RV64IA-WMO-NEXT: xor a6, a5, a1 +; RV64IA-WMO-NEXT: and a6, a6, a4 +; RV64IA-WMO-NEXT: xor a6, a5, a6 +; RV64IA-WMO-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a6, a6, (a2) +; RV64IA-WMO-NEXT: bnez a6, .LBB46_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_max_i8_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: andi a3, a0, 24 +; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: slli a1, a1, 56 +; RV64IA-TSO-NEXT: srai a1, a1, 56 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: xori a3, a3, 56 +; RV64IA-TSO-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a5, (a2) +; RV64IA-TSO-NEXT: and a7, a5, a4 +; RV64IA-TSO-NEXT: mv a6, a5 +; RV64IA-TSO-NEXT: sll a7, a7, a3 +; RV64IA-TSO-NEXT: sra a7, a7, a3 +; RV64IA-TSO-NEXT: bge a7, a1, .LBB46_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 +; RV64IA-TSO-NEXT: xor a6, a5, a1 +; RV64IA-TSO-NEXT: and a6, a6, a4 +; RV64IA-TSO-NEXT: xor a6, a5, a6 +; RV64IA-TSO-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV64IA-TSO-NEXT: bnez a6, .LBB46_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw max ptr %a, i8 %b acquire ret i8 %1 } @@ -2810,34 +3354,63 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_max_i8_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: andi a3, a0, 24 -; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: slli a1, a1, 24 -; RV32IA-NEXT: srai a1, a1, 24 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: xori a3, a3, 24 -; RV32IA-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a2) -; RV32IA-NEXT: and a7, a5, a4 -; RV32IA-NEXT: mv a6, a5 -; RV32IA-NEXT: sll a7, a7, a3 -; RV32IA-NEXT: sra a7, a7, a3 -; RV32IA-NEXT: bge a7, a1, .LBB47_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 -; RV32IA-NEXT: xor a6, a5, a1 -; RV32IA-NEXT: and a6, a6, a4 -; RV32IA-NEXT: xor a6, a5, a6 -; RV32IA-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a6, a6, (a2) -; RV32IA-NEXT: bnez a6, .LBB47_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_max_i8_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: andi a3, a0, 24 +; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: slli a1, a1, 24 +; RV32IA-WMO-NEXT: srai a1, a1, 24 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: xori a3, a3, 24 +; RV32IA-WMO-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a5, (a2) +; RV32IA-WMO-NEXT: and a7, a5, a4 +; RV32IA-WMO-NEXT: mv a6, a5 +; RV32IA-WMO-NEXT: sll a7, a7, a3 +; RV32IA-WMO-NEXT: sra a7, a7, a3 +; RV32IA-WMO-NEXT: bge a7, a1, .LBB47_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 +; RV32IA-WMO-NEXT: xor a6, a5, a1 +; RV32IA-WMO-NEXT: and a6, a6, a4 +; RV32IA-WMO-NEXT: xor a6, a5, a6 +; RV32IA-WMO-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-WMO-NEXT: bnez a6, .LBB47_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a5, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_max_i8_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: andi a3, a0, 24 +; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: slli a1, a1, 24 +; RV32IA-TSO-NEXT: srai a1, a1, 24 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: xori a3, a3, 24 +; RV32IA-TSO-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a5, (a2) +; RV32IA-TSO-NEXT: and a7, a5, a4 +; RV32IA-TSO-NEXT: mv a6, a5 +; RV32IA-TSO-NEXT: sll a7, a7, a3 +; RV32IA-TSO-NEXT: sra a7, a7, a3 +; RV32IA-TSO-NEXT: bge a7, a1, .LBB47_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 +; RV32IA-TSO-NEXT: xor a6, a5, a1 +; RV32IA-TSO-NEXT: and a6, a6, a4 +; RV32IA-TSO-NEXT: xor a6, a5, a6 +; RV32IA-TSO-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV32IA-TSO-NEXT: bnez a6, .LBB47_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a5, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i8_release: ; RV64I: # %bb.0: @@ -2881,34 +3454,63 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_max_i8_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: andi a3, a0, 24 -; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: slli a1, a1, 56 -; RV64IA-NEXT: srai a1, a1, 56 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: xori a3, a3, 56 -; RV64IA-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a2) -; RV64IA-NEXT: and a7, a5, a4 -; RV64IA-NEXT: mv a6, a5 -; RV64IA-NEXT: sll a7, a7, a3 -; RV64IA-NEXT: sra a7, a7, a3 -; RV64IA-NEXT: bge a7, a1, .LBB47_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 -; RV64IA-NEXT: xor a6, a5, a1 -; RV64IA-NEXT: and a6, a6, a4 -; RV64IA-NEXT: xor a6, a5, a6 -; RV64IA-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a6, a6, (a2) -; RV64IA-NEXT: bnez a6, .LBB47_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a5, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_max_i8_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: andi a3, a0, 24 +; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: slli a1, a1, 56 +; RV64IA-WMO-NEXT: srai a1, a1, 56 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: xori a3, a3, 56 +; RV64IA-WMO-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a5, (a2) +; RV64IA-WMO-NEXT: and a7, a5, a4 +; RV64IA-WMO-NEXT: mv a6, a5 +; RV64IA-WMO-NEXT: sll a7, a7, a3 +; RV64IA-WMO-NEXT: sra a7, a7, a3 +; RV64IA-WMO-NEXT: bge a7, a1, .LBB47_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 +; RV64IA-WMO-NEXT: xor a6, a5, a1 +; RV64IA-WMO-NEXT: and a6, a6, a4 +; RV64IA-WMO-NEXT: xor a6, a5, a6 +; RV64IA-WMO-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-WMO-NEXT: bnez a6, .LBB47_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_max_i8_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: andi a3, a0, 24 +; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: slli a1, a1, 56 +; RV64IA-TSO-NEXT: srai a1, a1, 56 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: xori a3, a3, 56 +; RV64IA-TSO-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a5, (a2) +; RV64IA-TSO-NEXT: and a7, a5, a4 +; RV64IA-TSO-NEXT: mv a6, a5 +; RV64IA-TSO-NEXT: sll a7, a7, a3 +; RV64IA-TSO-NEXT: sra a7, a7, a3 +; RV64IA-TSO-NEXT: bge a7, a1, .LBB47_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 +; RV64IA-TSO-NEXT: xor a6, a5, a1 +; RV64IA-TSO-NEXT: and a6, a6, a4 +; RV64IA-TSO-NEXT: xor a6, a5, a6 +; RV64IA-TSO-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV64IA-TSO-NEXT: bnez a6, .LBB47_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw max ptr %a, i8 %b release ret i8 %1 } @@ -2956,34 +3558,63 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_max_i8_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: andi a3, a0, 24 -; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: slli a1, a1, 24 -; RV32IA-NEXT: srai a1, a1, 24 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: xori a3, a3, 24 -; RV32IA-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a2) -; RV32IA-NEXT: and a7, a5, a4 -; RV32IA-NEXT: mv a6, a5 -; RV32IA-NEXT: sll a7, a7, a3 -; RV32IA-NEXT: sra a7, a7, a3 -; RV32IA-NEXT: bge a7, a1, .LBB48_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 -; RV32IA-NEXT: xor a6, a5, a1 -; RV32IA-NEXT: and a6, a6, a4 -; RV32IA-NEXT: xor a6, a5, a6 -; RV32IA-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a6, a6, (a2) -; RV32IA-NEXT: bnez a6, .LBB48_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_max_i8_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: andi a3, a0, 24 +; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: slli a1, a1, 24 +; RV32IA-WMO-NEXT: srai a1, a1, 24 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: xori a3, a3, 24 +; RV32IA-WMO-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a5, (a2) +; RV32IA-WMO-NEXT: and a7, a5, a4 +; RV32IA-WMO-NEXT: mv a6, a5 +; RV32IA-WMO-NEXT: sll a7, a7, a3 +; RV32IA-WMO-NEXT: sra a7, a7, a3 +; RV32IA-WMO-NEXT: bge a7, a1, .LBB48_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 +; RV32IA-WMO-NEXT: xor a6, a5, a1 +; RV32IA-WMO-NEXT: and a6, a6, a4 +; RV32IA-WMO-NEXT: xor a6, a5, a6 +; RV32IA-WMO-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-WMO-NEXT: bnez a6, .LBB48_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a5, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_max_i8_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: andi a3, a0, 24 +; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: slli a1, a1, 24 +; RV32IA-TSO-NEXT: srai a1, a1, 24 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: xori a3, a3, 24 +; RV32IA-TSO-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a5, (a2) +; RV32IA-TSO-NEXT: and a7, a5, a4 +; RV32IA-TSO-NEXT: mv a6, a5 +; RV32IA-TSO-NEXT: sll a7, a7, a3 +; RV32IA-TSO-NEXT: sra a7, a7, a3 +; RV32IA-TSO-NEXT: bge a7, a1, .LBB48_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 +; RV32IA-TSO-NEXT: xor a6, a5, a1 +; RV32IA-TSO-NEXT: and a6, a6, a4 +; RV32IA-TSO-NEXT: xor a6, a5, a6 +; RV32IA-TSO-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV32IA-TSO-NEXT: bnez a6, .LBB48_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a5, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i8_acq_rel: ; RV64I: # %bb.0: @@ -3027,34 +3658,63 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_max_i8_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: andi a3, a0, 24 -; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: slli a1, a1, 56 -; RV64IA-NEXT: srai a1, a1, 56 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: xori a3, a3, 56 -; RV64IA-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a2) -; RV64IA-NEXT: and a7, a5, a4 -; RV64IA-NEXT: mv a6, a5 -; RV64IA-NEXT: sll a7, a7, a3 -; RV64IA-NEXT: sra a7, a7, a3 -; RV64IA-NEXT: bge a7, a1, .LBB48_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 -; RV64IA-NEXT: xor a6, a5, a1 -; RV64IA-NEXT: and a6, a6, a4 -; RV64IA-NEXT: xor a6, a5, a6 -; RV64IA-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a6, a6, (a2) -; RV64IA-NEXT: bnez a6, .LBB48_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a5, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_max_i8_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: andi a3, a0, 24 +; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: slli a1, a1, 56 +; RV64IA-WMO-NEXT: srai a1, a1, 56 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: xori a3, a3, 56 +; RV64IA-WMO-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a5, (a2) +; RV64IA-WMO-NEXT: and a7, a5, a4 +; RV64IA-WMO-NEXT: mv a6, a5 +; RV64IA-WMO-NEXT: sll a7, a7, a3 +; RV64IA-WMO-NEXT: sra a7, a7, a3 +; RV64IA-WMO-NEXT: bge a7, a1, .LBB48_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 +; RV64IA-WMO-NEXT: xor a6, a5, a1 +; RV64IA-WMO-NEXT: and a6, a6, a4 +; RV64IA-WMO-NEXT: xor a6, a5, a6 +; RV64IA-WMO-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-WMO-NEXT: bnez a6, .LBB48_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_max_i8_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: andi a3, a0, 24 +; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: slli a1, a1, 56 +; RV64IA-TSO-NEXT: srai a1, a1, 56 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: xori a3, a3, 56 +; RV64IA-TSO-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a5, (a2) +; RV64IA-TSO-NEXT: and a7, a5, a4 +; RV64IA-TSO-NEXT: mv a6, a5 +; RV64IA-TSO-NEXT: sll a7, a7, a3 +; RV64IA-TSO-NEXT: sra a7, a7, a3 +; RV64IA-TSO-NEXT: bge a7, a1, .LBB48_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 +; RV64IA-TSO-NEXT: xor a6, a5, a1 +; RV64IA-TSO-NEXT: and a6, a6, a4 +; RV64IA-TSO-NEXT: xor a6, a5, a6 +; RV64IA-TSO-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV64IA-TSO-NEXT: bnez a6, .LBB48_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw max ptr %a, i8 %b acq_rel ret i8 %1 } @@ -3394,34 +4054,63 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_min_i8_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: andi a3, a0, 24 -; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: slli a1, a1, 24 -; RV32IA-NEXT: srai a1, a1, 24 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: xori a3, a3, 24 -; RV32IA-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a2) -; RV32IA-NEXT: and a7, a5, a4 -; RV32IA-NEXT: mv a6, a5 -; RV32IA-NEXT: sll a7, a7, a3 -; RV32IA-NEXT: sra a7, a7, a3 -; RV32IA-NEXT: bge a1, a7, .LBB51_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 -; RV32IA-NEXT: xor a6, a5, a1 -; RV32IA-NEXT: and a6, a6, a4 -; RV32IA-NEXT: xor a6, a5, a6 -; RV32IA-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 -; RV32IA-NEXT: sc.w a6, a6, (a2) -; RV32IA-NEXT: bnez a6, .LBB51_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_min_i8_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: andi a3, a0, 24 +; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: slli a1, a1, 24 +; RV32IA-WMO-NEXT: srai a1, a1, 24 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: xori a3, a3, 24 +; RV32IA-WMO-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a5, (a2) +; RV32IA-WMO-NEXT: and a7, a5, a4 +; RV32IA-WMO-NEXT: mv a6, a5 +; RV32IA-WMO-NEXT: sll a7, a7, a3 +; RV32IA-WMO-NEXT: sra a7, a7, a3 +; RV32IA-WMO-NEXT: bge a1, a7, .LBB51_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 +; RV32IA-WMO-NEXT: xor a6, a5, a1 +; RV32IA-WMO-NEXT: and a6, a6, a4 +; RV32IA-WMO-NEXT: xor a6, a5, a6 +; RV32IA-WMO-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a6, a6, (a2) +; RV32IA-WMO-NEXT: bnez a6, .LBB51_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a5, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_min_i8_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: andi a3, a0, 24 +; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: slli a1, a1, 24 +; RV32IA-TSO-NEXT: srai a1, a1, 24 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: xori a3, a3, 24 +; RV32IA-TSO-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a5, (a2) +; RV32IA-TSO-NEXT: and a7, a5, a4 +; RV32IA-TSO-NEXT: mv a6, a5 +; RV32IA-TSO-NEXT: sll a7, a7, a3 +; RV32IA-TSO-NEXT: sra a7, a7, a3 +; RV32IA-TSO-NEXT: bge a1, a7, .LBB51_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 +; RV32IA-TSO-NEXT: xor a6, a5, a1 +; RV32IA-TSO-NEXT: and a6, a6, a4 +; RV32IA-TSO-NEXT: xor a6, a5, a6 +; RV32IA-TSO-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV32IA-TSO-NEXT: bnez a6, .LBB51_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a5, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i8_acquire: ; RV64I: # %bb.0: @@ -3465,34 +4154,63 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_min_i8_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: andi a3, a0, 24 -; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: slli a1, a1, 56 -; RV64IA-NEXT: srai a1, a1, 56 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: xori a3, a3, 56 -; RV64IA-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a2) -; RV64IA-NEXT: and a7, a5, a4 -; RV64IA-NEXT: mv a6, a5 -; RV64IA-NEXT: sll a7, a7, a3 -; RV64IA-NEXT: sra a7, a7, a3 -; RV64IA-NEXT: bge a1, a7, .LBB51_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 -; RV64IA-NEXT: xor a6, a5, a1 -; RV64IA-NEXT: and a6, a6, a4 -; RV64IA-NEXT: xor a6, a5, a6 -; RV64IA-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 -; RV64IA-NEXT: sc.w a6, a6, (a2) -; RV64IA-NEXT: bnez a6, .LBB51_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a5, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_min_i8_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: andi a3, a0, 24 +; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: slli a1, a1, 56 +; RV64IA-WMO-NEXT: srai a1, a1, 56 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: xori a3, a3, 56 +; RV64IA-WMO-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a5, (a2) +; RV64IA-WMO-NEXT: and a7, a5, a4 +; RV64IA-WMO-NEXT: mv a6, a5 +; RV64IA-WMO-NEXT: sll a7, a7, a3 +; RV64IA-WMO-NEXT: sra a7, a7, a3 +; RV64IA-WMO-NEXT: bge a1, a7, .LBB51_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 +; RV64IA-WMO-NEXT: xor a6, a5, a1 +; RV64IA-WMO-NEXT: and a6, a6, a4 +; RV64IA-WMO-NEXT: xor a6, a5, a6 +; RV64IA-WMO-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a6, a6, (a2) +; RV64IA-WMO-NEXT: bnez a6, .LBB51_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_min_i8_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: andi a3, a0, 24 +; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: slli a1, a1, 56 +; RV64IA-TSO-NEXT: srai a1, a1, 56 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: xori a3, a3, 56 +; RV64IA-TSO-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a5, (a2) +; RV64IA-TSO-NEXT: and a7, a5, a4 +; RV64IA-TSO-NEXT: mv a6, a5 +; RV64IA-TSO-NEXT: sll a7, a7, a3 +; RV64IA-TSO-NEXT: sra a7, a7, a3 +; RV64IA-TSO-NEXT: bge a1, a7, .LBB51_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 +; RV64IA-TSO-NEXT: xor a6, a5, a1 +; RV64IA-TSO-NEXT: and a6, a6, a4 +; RV64IA-TSO-NEXT: xor a6, a5, a6 +; RV64IA-TSO-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV64IA-TSO-NEXT: bnez a6, .LBB51_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw min ptr %a, i8 %b acquire ret i8 %1 } @@ -3540,34 +4258,63 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_min_i8_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: andi a3, a0, 24 -; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: slli a1, a1, 24 -; RV32IA-NEXT: srai a1, a1, 24 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: xori a3, a3, 24 -; RV32IA-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a2) -; RV32IA-NEXT: and a7, a5, a4 -; RV32IA-NEXT: mv a6, a5 -; RV32IA-NEXT: sll a7, a7, a3 -; RV32IA-NEXT: sra a7, a7, a3 -; RV32IA-NEXT: bge a1, a7, .LBB52_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 -; RV32IA-NEXT: xor a6, a5, a1 -; RV32IA-NEXT: and a6, a6, a4 -; RV32IA-NEXT: xor a6, a5, a6 -; RV32IA-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a6, a6, (a2) -; RV32IA-NEXT: bnez a6, .LBB52_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_min_i8_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: andi a3, a0, 24 +; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: slli a1, a1, 24 +; RV32IA-WMO-NEXT: srai a1, a1, 24 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: xori a3, a3, 24 +; RV32IA-WMO-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a5, (a2) +; RV32IA-WMO-NEXT: and a7, a5, a4 +; RV32IA-WMO-NEXT: mv a6, a5 +; RV32IA-WMO-NEXT: sll a7, a7, a3 +; RV32IA-WMO-NEXT: sra a7, a7, a3 +; RV32IA-WMO-NEXT: bge a1, a7, .LBB52_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 +; RV32IA-WMO-NEXT: xor a6, a5, a1 +; RV32IA-WMO-NEXT: and a6, a6, a4 +; RV32IA-WMO-NEXT: xor a6, a5, a6 +; RV32IA-WMO-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-WMO-NEXT: bnez a6, .LBB52_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a5, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_min_i8_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: andi a3, a0, 24 +; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: slli a1, a1, 24 +; RV32IA-TSO-NEXT: srai a1, a1, 24 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: xori a3, a3, 24 +; RV32IA-TSO-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a5, (a2) +; RV32IA-TSO-NEXT: and a7, a5, a4 +; RV32IA-TSO-NEXT: mv a6, a5 +; RV32IA-TSO-NEXT: sll a7, a7, a3 +; RV32IA-TSO-NEXT: sra a7, a7, a3 +; RV32IA-TSO-NEXT: bge a1, a7, .LBB52_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 +; RV32IA-TSO-NEXT: xor a6, a5, a1 +; RV32IA-TSO-NEXT: and a6, a6, a4 +; RV32IA-TSO-NEXT: xor a6, a5, a6 +; RV32IA-TSO-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV32IA-TSO-NEXT: bnez a6, .LBB52_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a5, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i8_release: ; RV64I: # %bb.0: @@ -3611,34 +4358,63 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_min_i8_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: andi a3, a0, 24 -; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: slli a1, a1, 56 -; RV64IA-NEXT: srai a1, a1, 56 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: xori a3, a3, 56 -; RV64IA-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a2) -; RV64IA-NEXT: and a7, a5, a4 -; RV64IA-NEXT: mv a6, a5 -; RV64IA-NEXT: sll a7, a7, a3 -; RV64IA-NEXT: sra a7, a7, a3 -; RV64IA-NEXT: bge a1, a7, .LBB52_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 -; RV64IA-NEXT: xor a6, a5, a1 -; RV64IA-NEXT: and a6, a6, a4 -; RV64IA-NEXT: xor a6, a5, a6 -; RV64IA-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a6, a6, (a2) -; RV64IA-NEXT: bnez a6, .LBB52_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a5, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_min_i8_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: andi a3, a0, 24 +; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: slli a1, a1, 56 +; RV64IA-WMO-NEXT: srai a1, a1, 56 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: xori a3, a3, 56 +; RV64IA-WMO-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a5, (a2) +; RV64IA-WMO-NEXT: and a7, a5, a4 +; RV64IA-WMO-NEXT: mv a6, a5 +; RV64IA-WMO-NEXT: sll a7, a7, a3 +; RV64IA-WMO-NEXT: sra a7, a7, a3 +; RV64IA-WMO-NEXT: bge a1, a7, .LBB52_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 +; RV64IA-WMO-NEXT: xor a6, a5, a1 +; RV64IA-WMO-NEXT: and a6, a6, a4 +; RV64IA-WMO-NEXT: xor a6, a5, a6 +; RV64IA-WMO-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-WMO-NEXT: bnez a6, .LBB52_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_min_i8_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: andi a3, a0, 24 +; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: slli a1, a1, 56 +; RV64IA-TSO-NEXT: srai a1, a1, 56 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: xori a3, a3, 56 +; RV64IA-TSO-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a5, (a2) +; RV64IA-TSO-NEXT: and a7, a5, a4 +; RV64IA-TSO-NEXT: mv a6, a5 +; RV64IA-TSO-NEXT: sll a7, a7, a3 +; RV64IA-TSO-NEXT: sra a7, a7, a3 +; RV64IA-TSO-NEXT: bge a1, a7, .LBB52_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 +; RV64IA-TSO-NEXT: xor a6, a5, a1 +; RV64IA-TSO-NEXT: and a6, a6, a4 +; RV64IA-TSO-NEXT: xor a6, a5, a6 +; RV64IA-TSO-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV64IA-TSO-NEXT: bnez a6, .LBB52_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw min ptr %a, i8 %b release ret i8 %1 } @@ -3667,53 +4443,82 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1@plt ; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB53_4 -; RV32I-NEXT: .LBB53_2: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 24 -; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s2, a0, .LBB53_1 -; RV32I-NEXT: # %bb.3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB53_2 Depth=1 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: j .LBB53_1 -; RV32I-NEXT: .LBB53_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 -; RV32I-NEXT: ret -; -; RV32IA-LABEL: atomicrmw_min_i8_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: andi a3, a0, 24 -; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: slli a1, a1, 24 -; RV32IA-NEXT: srai a1, a1, 24 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: xori a3, a3, 24 -; RV32IA-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a2) -; RV32IA-NEXT: and a7, a5, a4 -; RV32IA-NEXT: mv a6, a5 -; RV32IA-NEXT: sll a7, a7, a3 -; RV32IA-NEXT: sra a7, a7, a3 -; RV32IA-NEXT: bge a1, a7, .LBB53_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 -; RV32IA-NEXT: xor a6, a5, a1 -; RV32IA-NEXT: and a6, a6, a4 -; RV32IA-NEXT: xor a6, a5, a6 -; RV32IA-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a6, a6, (a2) -; RV32IA-NEXT: bnez a6, .LBB53_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 -; RV32IA-NEXT: ret +; RV32I-NEXT: .LBB53_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a0, a3, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bge s2, a0, .LBB53_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB53_2 Depth=1 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: j .LBB53_1 +; RV32I-NEXT: .LBB53_4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomicrmw_min_i8_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: andi a3, a0, 24 +; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: slli a1, a1, 24 +; RV32IA-WMO-NEXT: srai a1, a1, 24 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: xori a3, a3, 24 +; RV32IA-WMO-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a5, (a2) +; RV32IA-WMO-NEXT: and a7, a5, a4 +; RV32IA-WMO-NEXT: mv a6, a5 +; RV32IA-WMO-NEXT: sll a7, a7, a3 +; RV32IA-WMO-NEXT: sra a7, a7, a3 +; RV32IA-WMO-NEXT: bge a1, a7, .LBB53_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 +; RV32IA-WMO-NEXT: xor a6, a5, a1 +; RV32IA-WMO-NEXT: and a6, a6, a4 +; RV32IA-WMO-NEXT: xor a6, a5, a6 +; RV32IA-WMO-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-WMO-NEXT: bnez a6, .LBB53_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a5, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_min_i8_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: andi a3, a0, 24 +; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: slli a1, a1, 24 +; RV32IA-TSO-NEXT: srai a1, a1, 24 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: xori a3, a3, 24 +; RV32IA-TSO-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a5, (a2) +; RV32IA-TSO-NEXT: and a7, a5, a4 +; RV32IA-TSO-NEXT: mv a6, a5 +; RV32IA-TSO-NEXT: sll a7, a7, a3 +; RV32IA-TSO-NEXT: sra a7, a7, a3 +; RV32IA-TSO-NEXT: bge a1, a7, .LBB53_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 +; RV32IA-TSO-NEXT: xor a6, a5, a1 +; RV32IA-TSO-NEXT: and a6, a6, a4 +; RV32IA-TSO-NEXT: xor a6, a5, a6 +; RV32IA-TSO-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV32IA-TSO-NEXT: bnez a6, .LBB53_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a5, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i8_acq_rel: ; RV64I: # %bb.0: @@ -3757,34 +4562,63 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_min_i8_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: andi a3, a0, 24 -; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: slli a1, a1, 56 -; RV64IA-NEXT: srai a1, a1, 56 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: xori a3, a3, 56 -; RV64IA-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a2) -; RV64IA-NEXT: and a7, a5, a4 -; RV64IA-NEXT: mv a6, a5 -; RV64IA-NEXT: sll a7, a7, a3 -; RV64IA-NEXT: sra a7, a7, a3 -; RV64IA-NEXT: bge a1, a7, .LBB53_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 -; RV64IA-NEXT: xor a6, a5, a1 -; RV64IA-NEXT: and a6, a6, a4 -; RV64IA-NEXT: xor a6, a5, a6 -; RV64IA-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a6, a6, (a2) -; RV64IA-NEXT: bnez a6, .LBB53_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a5, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_min_i8_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: andi a3, a0, 24 +; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: slli a1, a1, 56 +; RV64IA-WMO-NEXT: srai a1, a1, 56 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: xori a3, a3, 56 +; RV64IA-WMO-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a5, (a2) +; RV64IA-WMO-NEXT: and a7, a5, a4 +; RV64IA-WMO-NEXT: mv a6, a5 +; RV64IA-WMO-NEXT: sll a7, a7, a3 +; RV64IA-WMO-NEXT: sra a7, a7, a3 +; RV64IA-WMO-NEXT: bge a1, a7, .LBB53_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 +; RV64IA-WMO-NEXT: xor a6, a5, a1 +; RV64IA-WMO-NEXT: and a6, a6, a4 +; RV64IA-WMO-NEXT: xor a6, a5, a6 +; RV64IA-WMO-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-WMO-NEXT: bnez a6, .LBB53_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_min_i8_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: andi a3, a0, 24 +; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: slli a1, a1, 56 +; RV64IA-TSO-NEXT: srai a1, a1, 56 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: xori a3, a3, 56 +; RV64IA-TSO-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a5, (a2) +; RV64IA-TSO-NEXT: and a7, a5, a4 +; RV64IA-TSO-NEXT: mv a6, a5 +; RV64IA-TSO-NEXT: sll a7, a7, a3 +; RV64IA-TSO-NEXT: sra a7, a7, a3 +; RV64IA-TSO-NEXT: bge a1, a7, .LBB53_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 +; RV64IA-TSO-NEXT: xor a6, a5, a1 +; RV64IA-TSO-NEXT: and a6, a6, a4 +; RV64IA-TSO-NEXT: xor a6, a5, a6 +; RV64IA-TSO-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV64IA-TSO-NEXT: bnez a6, .LBB53_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw min ptr %a, i8 %b acq_rel ret i8 %1 } @@ -4108,29 +4942,53 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_umax_i8_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a2) -; RV32IA-NEXT: and a6, a4, a3 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a6, a1, .LBB56_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB56_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_umax_i8_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NEXT: and a6, a4, a3 +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB56_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a4, a1 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB56_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_umax_i8_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: and a6, a4, a3 +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB56_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a4, a1 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB56_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i8_acquire: ; RV64I: # %bb.0: @@ -4172,29 +5030,53 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_umax_i8_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a2) -; RV64IA-NEXT: and a6, a4, a3 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a6, a1, .LBB56_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB56_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_umax_i8_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NEXT: and a6, a4, a3 +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: bgeu a6, a1, .LBB56_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a4, a1 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB56_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_umax_i8_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: and a6, a4, a3 +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: bgeu a6, a1, .LBB56_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a4, a1 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB56_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw umax ptr %a, i8 %b acquire ret i8 %1 } @@ -4240,29 +5122,53 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_umax_i8_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a2) -; RV32IA-NEXT: and a6, a4, a3 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a6, a1, .LBB57_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB57_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_umax_i8_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a4, (a2) +; RV32IA-WMO-NEXT: and a6, a4, a3 +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB57_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a4, a1 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB57_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_umax_i8_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: and a6, a4, a3 +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB57_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a4, a1 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB57_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i8_release: ; RV64I: # %bb.0: @@ -4304,29 +5210,53 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_umax_i8_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a2) -; RV64IA-NEXT: and a6, a4, a3 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a6, a1, .LBB57_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB57_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_umax_i8_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a4, (a2) +; RV64IA-WMO-NEXT: and a6, a4, a3 +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: bgeu a6, a1, .LBB57_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a4, a1 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB57_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_umax_i8_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: and a6, a4, a3 +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: bgeu a6, a1, .LBB57_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a4, a1 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB57_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw umax ptr %a, i8 %b release ret i8 %1 } @@ -4372,29 +5302,53 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_umax_i8_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a2) -; RV32IA-NEXT: and a6, a4, a3 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a6, a1, .LBB58_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB58_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_umax_i8_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NEXT: and a6, a4, a3 +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB58_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a4, a1 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB58_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_umax_i8_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: and a6, a4, a3 +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB58_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a4, a1 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB58_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i8_acq_rel: ; RV64I: # %bb.0: @@ -4436,29 +5390,53 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_umax_i8_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a2) -; RV64IA-NEXT: and a6, a4, a3 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a6, a1, .LBB58_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB58_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_umax_i8_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NEXT: and a6, a4, a3 +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: bgeu a6, a1, .LBB58_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a4, a1 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB58_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_umax_i8_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: and a6, a4, a3 +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: bgeu a6, a1, .LBB58_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a4, a1 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB58_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw umax ptr %a, i8 %b acq_rel ret i8 %1 } @@ -4768,29 +5746,53 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_umin_i8_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a2) -; RV32IA-NEXT: and a6, a4, a3 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a6, .LBB61_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB61_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_umin_i8_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NEXT: and a6, a4, a3 +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB61_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a4, a1 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB61_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_umin_i8_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: and a6, a4, a3 +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB61_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a4, a1 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB61_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i8_acquire: ; RV64I: # %bb.0: @@ -4832,29 +5834,53 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_umin_i8_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a2) -; RV64IA-NEXT: and a6, a4, a3 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a6, .LBB61_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB61_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_umin_i8_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NEXT: and a6, a4, a3 +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: bgeu a1, a6, .LBB61_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a4, a1 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB61_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_umin_i8_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: and a6, a4, a3 +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: bgeu a1, a6, .LBB61_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a4, a1 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB61_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw umin ptr %a, i8 %b acquire ret i8 %1 } @@ -4900,29 +5926,53 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_umin_i8_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a2) -; RV32IA-NEXT: and a6, a4, a3 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a6, .LBB62_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB62_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_umin_i8_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a4, (a2) +; RV32IA-WMO-NEXT: and a6, a4, a3 +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB62_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a4, a1 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB62_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_umin_i8_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: and a6, a4, a3 +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB62_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a4, a1 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB62_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i8_release: ; RV64I: # %bb.0: @@ -4964,29 +6014,53 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_umin_i8_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a2) -; RV64IA-NEXT: and a6, a4, a3 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a6, .LBB62_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB62_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_umin_i8_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a4, (a2) +; RV64IA-WMO-NEXT: and a6, a4, a3 +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: bgeu a1, a6, .LBB62_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a4, a1 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB62_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_umin_i8_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: and a6, a4, a3 +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: bgeu a1, a6, .LBB62_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a4, a1 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB62_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw umin ptr %a, i8 %b release ret i8 %1 } @@ -5032,29 +6106,53 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_umin_i8_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a2) -; RV32IA-NEXT: and a6, a4, a3 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a6, .LBB63_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB63_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_umin_i8_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: andi a1, a1, 255 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NEXT: and a6, a4, a3 +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB63_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a4, a1 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB63_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a4, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_umin_i8_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: andi a1, a1, 255 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NEXT: and a6, a4, a3 +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB63_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a4, a1 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB63_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a4, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i8_acq_rel: ; RV64I: # %bb.0: @@ -5096,29 +6194,53 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_umin_i8_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a3, a3, a0 -; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a2) -; RV64IA-NEXT: and a6, a4, a3 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a6, .LBB63_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB63_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_umin_i8_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: andi a1, a1, 255 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NEXT: and a6, a4, a3 +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: bgeu a1, a6, .LBB63_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a4, a1 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB63_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a4, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_umin_i8_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: andi a1, a1, 255 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NEXT: and a6, a4, a3 +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: bgeu a1, a6, .LBB63_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a4, a1 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB63_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a4, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw umin ptr %a, i8 %b acq_rel ret i8 %1 } @@ -5328,30 +6450,51 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: li a2, 2 ; RV32I-NEXT: call __atomic_exchange_2@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV32IA-LABEL: atomicrmw_xchg_i16_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: mv a5, a1 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB66_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: mv a5, a1 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB66_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: mv a5, a1 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB66_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i16_acquire: ; RV64I: # %bb.0: @@ -5363,26 +6506,47 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_xchg_i16_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: mv a5, a1 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB66_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_xchg_i16_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: mv a5, a1 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB66_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_xchg_i16_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: mv a5, a1 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB66_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw xchg ptr %a, i16 %b acquire ret i16 %1 } @@ -5398,26 +6562,47 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_xchg_i16_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) -; RV32IA-NEXT: mv a5, a1 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB67_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a3, (a2) +; RV32IA-WMO-NEXT: mv a5, a1 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB67_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: mv a5, a1 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB67_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i16_release: ; RV64I: # %bb.0: @@ -5429,26 +6614,47 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_xchg_i16_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) -; RV64IA-NEXT: mv a5, a1 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB67_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_xchg_i16_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a3, (a2) +; RV64IA-WMO-NEXT: mv a5, a1 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB67_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_xchg_i16_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: mv a5, a1 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB67_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw xchg ptr %a, i16 %b release ret i16 %1 } @@ -5464,26 +6670,47 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_xchg_i16_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: mv a5, a1 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB68_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: mv a5, a1 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB68_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: mv a5, a1 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB68_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV64I: # %bb.0: @@ -5495,26 +6722,47 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_xchg_i16_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: mv a5, a1 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB68_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_xchg_i16_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: mv a5, a1 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB68_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_xchg_i16_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: mv a5, a1 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB68_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw xchg ptr %a, i16 %b acq_rel ret i16 %1 } @@ -6165,26 +7413,47 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_add_i16_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: add a5, a3, a1 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB81_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_add_i16_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: add a5, a3, a1 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB81_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_add_i16_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: add a5, a3, a1 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB81_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16_acquire: ; RV64I: # %bb.0: @@ -6196,26 +7465,47 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_add_i16_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: add a5, a3, a1 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB81_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_add_i16_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: add a5, a3, a1 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB81_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_add_i16_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: add a5, a3, a1 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB81_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw add ptr %a, i16 %b acquire ret i16 %1 } @@ -6231,26 +7521,47 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_add_i16_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) -; RV32IA-NEXT: add a5, a3, a1 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB82_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_add_i16_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a3, (a2) +; RV32IA-WMO-NEXT: add a5, a3, a1 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB82_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_add_i16_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: add a5, a3, a1 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB82_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16_release: ; RV64I: # %bb.0: @@ -6262,26 +7573,47 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_add_i16_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) -; RV64IA-NEXT: add a5, a3, a1 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB82_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_add_i16_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a3, (a2) +; RV64IA-WMO-NEXT: add a5, a3, a1 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB82_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_add_i16_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: add a5, a3, a1 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB82_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw add ptr %a, i16 %b release ret i16 %1 } @@ -6297,26 +7629,47 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_add_i16_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: add a5, a3, a1 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB83_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_add_i16_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: add a5, a3, a1 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB83_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_add_i16_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: add a5, a3, a1 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB83_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16_acq_rel: ; RV64I: # %bb.0: @@ -6328,26 +7681,47 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_add_i16_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: add a5, a3, a1 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB83_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_add_i16_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: add a5, a3, a1 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB83_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_add_i16_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: add a5, a3, a1 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB83_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw add ptr %a, i16 %b acq_rel ret i16 %1 } @@ -6495,26 +7869,47 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_sub_i16_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: sub a5, a3, a1 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB86_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_sub_i16_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: sub a5, a3, a1 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB86_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_sub_i16_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: sub a5, a3, a1 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB86_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_acquire: ; RV64I: # %bb.0: @@ -6526,26 +7921,47 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_sub_i16_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: sub a5, a3, a1 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB86_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_sub_i16_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: sub a5, a3, a1 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB86_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_sub_i16_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: sub a5, a3, a1 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB86_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw sub ptr %a, i16 %b acquire ret i16 %1 } @@ -6561,57 +7977,99 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_sub_i16_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) -; RV32IA-NEXT: sub a5, a3, a1 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB87_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_sub_i16_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a3, (a2) +; RV32IA-WMO-NEXT: sub a5, a3, a1 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB87_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_sub_i16_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: sub a5, a3, a1 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB87_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_release: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: li a2, 3 -; RV64I-NEXT: call __atomic_fetch_sub_2@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret -; -; RV64IA-LABEL: atomicrmw_sub_i16_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) -; RV64IA-NEXT: sub a5, a3, a1 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB87_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64I-NEXT: li a2, 3 +; RV64I-NEXT: call __atomic_fetch_sub_2@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomicrmw_sub_i16_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a3, (a2) +; RV64IA-WMO-NEXT: sub a5, a3, a1 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB87_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_sub_i16_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: sub a5, a3, a1 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB87_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw sub ptr %a, i16 %b release ret i16 %1 } @@ -6627,26 +8085,47 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_sub_i16_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: sub a5, a3, a1 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB88_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_sub_i16_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: sub a5, a3, a1 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB88_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_sub_i16_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: sub a5, a3, a1 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB88_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_acq_rel: ; RV64I: # %bb.0: @@ -6658,26 +8137,47 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_sub_i16_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: sub a5, a3, a1 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB88_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_sub_i16_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: sub a5, a3, a1 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB88_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_sub_i16_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: sub a5, a3, a1 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB88_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw sub ptr %a, i16 %b acq_rel ret i16 %1 } @@ -7097,27 +8597,49 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_nand_i16_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: and a5, a3, a1 -; RV32IA-NEXT: not a5, a5 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB96_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_nand_i16_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: and a5, a3, a1 +; RV32IA-WMO-NEXT: not a5, a5 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB96_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_nand_i16_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a5, a3, a1 +; RV32IA-TSO-NEXT: not a5, a5 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB96_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i16_acquire: ; RV64I: # %bb.0: @@ -7129,27 +8651,49 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_nand_i16_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: and a5, a3, a1 -; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB96_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_nand_i16_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: and a5, a3, a1 +; RV64IA-WMO-NEXT: not a5, a5 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB96_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_nand_i16_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a5, a3, a1 +; RV64IA-TSO-NEXT: not a5, a5 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB96_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw nand ptr %a, i16 %b acquire ret i16 %1 } @@ -7165,27 +8709,49 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_nand_i16_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) -; RV32IA-NEXT: and a5, a3, a1 -; RV32IA-NEXT: not a5, a5 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB97_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_nand_i16_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a3, (a2) +; RV32IA-WMO-NEXT: and a5, a3, a1 +; RV32IA-WMO-NEXT: not a5, a5 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB97_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_nand_i16_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a5, a3, a1 +; RV32IA-TSO-NEXT: not a5, a5 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB97_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i16_release: ; RV64I: # %bb.0: @@ -7197,27 +8763,49 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_nand_i16_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) -; RV64IA-NEXT: and a5, a3, a1 -; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB97_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_nand_i16_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a3, (a2) +; RV64IA-WMO-NEXT: and a5, a3, a1 +; RV64IA-WMO-NEXT: not a5, a5 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB97_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_nand_i16_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a5, a3, a1 +; RV64IA-TSO-NEXT: not a5, a5 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB97_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw nand ptr %a, i16 %b release ret i16 %1 } @@ -7233,27 +8821,49 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_nand_i16_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: and a5, a3, a1 -; RV32IA-NEXT: not a5, a5 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB98_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_nand_i16_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: and a5, a3, a1 +; RV32IA-WMO-NEXT: not a5, a5 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB98_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_nand_i16_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a5, a3, a1 +; RV32IA-TSO-NEXT: not a5, a5 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB98_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i16_acq_rel: ; RV64I: # %bb.0: @@ -7265,27 +8875,49 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_nand_i16_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: and a5, a3, a1 -; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB98_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_nand_i16_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: and a5, a3, a1 +; RV64IA-WMO-NEXT: not a5, a5 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB98_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_nand_i16_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a5, a3, a1 +; RV64IA-TSO-NEXT: not a5, a5 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB98_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw nand ptr %a, i16 %b acq_rel ret i16 %1 } @@ -8011,36 +9643,67 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_max_i16_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: andi a3, a0, 24 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: slli a1, a1, 16 -; RV32IA-NEXT: srai a1, a1, 16 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 16 -; RV32IA-NEXT: sub a5, a5, a3 -; RV32IA-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: and a7, a3, a4 -; RV32IA-NEXT: mv a6, a3 -; RV32IA-NEXT: sll a7, a7, a5 -; RV32IA-NEXT: sra a7, a7, a5 -; RV32IA-NEXT: bge a7, a1, .LBB111_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 -; RV32IA-NEXT: xor a6, a3, a1 -; RV32IA-NEXT: and a6, a6, a4 -; RV32IA-NEXT: xor a6, a3, a6 -; RV32IA-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 -; RV32IA-NEXT: sc.w a6, a6, (a2) -; RV32IA-NEXT: bnez a6, .LBB111_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_max_i16_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: andi a3, a0, 24 +; RV32IA-WMO-NEXT: lui a4, 16 +; RV32IA-WMO-NEXT: addi a4, a4, -1 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NEXT: srai a1, a1, 16 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: li a5, 16 +; RV32IA-WMO-NEXT: sub a5, a5, a3 +; RV32IA-WMO-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: and a7, a3, a4 +; RV32IA-WMO-NEXT: mv a6, a3 +; RV32IA-WMO-NEXT: sll a7, a7, a5 +; RV32IA-WMO-NEXT: sra a7, a7, a5 +; RV32IA-WMO-NEXT: bge a7, a1, .LBB111_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 +; RV32IA-WMO-NEXT: xor a6, a3, a1 +; RV32IA-WMO-NEXT: and a6, a6, a4 +; RV32IA-WMO-NEXT: xor a6, a3, a6 +; RV32IA-WMO-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a6, a6, (a2) +; RV32IA-WMO-NEXT: bnez a6, .LBB111_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_max_i16_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: andi a3, a0, 24 +; RV32IA-TSO-NEXT: lui a4, 16 +; RV32IA-TSO-NEXT: addi a4, a4, -1 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NEXT: srai a1, a1, 16 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: li a5, 16 +; RV32IA-TSO-NEXT: sub a5, a5, a3 +; RV32IA-TSO-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a7, a3, a4 +; RV32IA-TSO-NEXT: mv a6, a3 +; RV32IA-TSO-NEXT: sll a7, a7, a5 +; RV32IA-TSO-NEXT: sra a7, a7, a5 +; RV32IA-TSO-NEXT: bge a7, a1, .LBB111_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 +; RV32IA-TSO-NEXT: xor a6, a3, a1 +; RV32IA-TSO-NEXT: and a6, a6, a4 +; RV32IA-TSO-NEXT: xor a6, a3, a6 +; RV32IA-TSO-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV32IA-TSO-NEXT: bnez a6, .LBB111_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i16_acquire: ; RV64I: # %bb.0: @@ -8084,36 +9747,67 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_max_i16_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: andi a3, a0, 24 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: slli a1, a1, 48 -; RV64IA-NEXT: srai a1, a1, 48 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 48 -; RV64IA-NEXT: sub a5, a5, a3 -; RV64IA-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: and a7, a3, a4 -; RV64IA-NEXT: mv a6, a3 -; RV64IA-NEXT: sll a7, a7, a5 -; RV64IA-NEXT: sra a7, a7, a5 -; RV64IA-NEXT: bge a7, a1, .LBB111_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 -; RV64IA-NEXT: xor a6, a3, a1 -; RV64IA-NEXT: and a6, a6, a4 -; RV64IA-NEXT: xor a6, a3, a6 -; RV64IA-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 -; RV64IA-NEXT: sc.w a6, a6, (a2) -; RV64IA-NEXT: bnez a6, .LBB111_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_max_i16_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: andi a3, a0, 24 +; RV64IA-WMO-NEXT: lui a4, 16 +; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NEXT: srai a1, a1, 48 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: li a5, 48 +; RV64IA-WMO-NEXT: sub a5, a5, a3 +; RV64IA-WMO-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: and a7, a3, a4 +; RV64IA-WMO-NEXT: mv a6, a3 +; RV64IA-WMO-NEXT: sll a7, a7, a5 +; RV64IA-WMO-NEXT: sra a7, a7, a5 +; RV64IA-WMO-NEXT: bge a7, a1, .LBB111_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 +; RV64IA-WMO-NEXT: xor a6, a3, a1 +; RV64IA-WMO-NEXT: and a6, a6, a4 +; RV64IA-WMO-NEXT: xor a6, a3, a6 +; RV64IA-WMO-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a6, a6, (a2) +; RV64IA-WMO-NEXT: bnez a6, .LBB111_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_max_i16_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: andi a3, a0, 24 +; RV64IA-TSO-NEXT: lui a4, 16 +; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NEXT: srai a1, a1, 48 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: li a5, 48 +; RV64IA-TSO-NEXT: sub a5, a5, a3 +; RV64IA-TSO-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a7, a3, a4 +; RV64IA-TSO-NEXT: mv a6, a3 +; RV64IA-TSO-NEXT: sll a7, a7, a5 +; RV64IA-TSO-NEXT: sra a7, a7, a5 +; RV64IA-TSO-NEXT: bge a7, a1, .LBB111_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 +; RV64IA-TSO-NEXT: xor a6, a3, a1 +; RV64IA-TSO-NEXT: and a6, a6, a4 +; RV64IA-TSO-NEXT: xor a6, a3, a6 +; RV64IA-TSO-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV64IA-TSO-NEXT: bnez a6, .LBB111_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw max ptr %a, i16 %b acquire ret i16 %1 } @@ -8161,36 +9855,67 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_max_i16_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: andi a3, a0, 24 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: slli a1, a1, 16 -; RV32IA-NEXT: srai a1, a1, 16 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 16 -; RV32IA-NEXT: sub a5, a5, a3 -; RV32IA-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) -; RV32IA-NEXT: and a7, a3, a4 -; RV32IA-NEXT: mv a6, a3 -; RV32IA-NEXT: sll a7, a7, a5 -; RV32IA-NEXT: sra a7, a7, a5 -; RV32IA-NEXT: bge a7, a1, .LBB112_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 -; RV32IA-NEXT: xor a6, a3, a1 -; RV32IA-NEXT: and a6, a6, a4 -; RV32IA-NEXT: xor a6, a3, a6 -; RV32IA-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a6, a6, (a2) -; RV32IA-NEXT: bnez a6, .LBB112_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_max_i16_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: andi a3, a0, 24 +; RV32IA-WMO-NEXT: lui a4, 16 +; RV32IA-WMO-NEXT: addi a4, a4, -1 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NEXT: srai a1, a1, 16 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: li a5, 16 +; RV32IA-WMO-NEXT: sub a5, a5, a3 +; RV32IA-WMO-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a3, (a2) +; RV32IA-WMO-NEXT: and a7, a3, a4 +; RV32IA-WMO-NEXT: mv a6, a3 +; RV32IA-WMO-NEXT: sll a7, a7, a5 +; RV32IA-WMO-NEXT: sra a7, a7, a5 +; RV32IA-WMO-NEXT: bge a7, a1, .LBB112_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 +; RV32IA-WMO-NEXT: xor a6, a3, a1 +; RV32IA-WMO-NEXT: and a6, a6, a4 +; RV32IA-WMO-NEXT: xor a6, a3, a6 +; RV32IA-WMO-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-WMO-NEXT: bnez a6, .LBB112_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_max_i16_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: andi a3, a0, 24 +; RV32IA-TSO-NEXT: lui a4, 16 +; RV32IA-TSO-NEXT: addi a4, a4, -1 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NEXT: srai a1, a1, 16 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: li a5, 16 +; RV32IA-TSO-NEXT: sub a5, a5, a3 +; RV32IA-TSO-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a7, a3, a4 +; RV32IA-TSO-NEXT: mv a6, a3 +; RV32IA-TSO-NEXT: sll a7, a7, a5 +; RV32IA-TSO-NEXT: sra a7, a7, a5 +; RV32IA-TSO-NEXT: bge a7, a1, .LBB112_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 +; RV32IA-TSO-NEXT: xor a6, a3, a1 +; RV32IA-TSO-NEXT: and a6, a6, a4 +; RV32IA-TSO-NEXT: xor a6, a3, a6 +; RV32IA-TSO-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV32IA-TSO-NEXT: bnez a6, .LBB112_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i16_release: ; RV64I: # %bb.0: @@ -8214,56 +9939,87 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a3, 14(sp) -; RV64I-NEXT: bnez a0, .LBB112_4 -; RV64I-NEXT: .LBB112_2: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 48 -; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a0, .LBB112_1 -; RV64I-NEXT: # %bb.3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB112_2 Depth=1 -; RV64I-NEXT: mv a2, s1 -; RV64I-NEXT: j .LBB112_1 -; RV64I-NEXT: .LBB112_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 -; RV64I-NEXT: ret -; -; RV64IA-LABEL: atomicrmw_max_i16_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: andi a3, a0, 24 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: slli a1, a1, 48 -; RV64IA-NEXT: srai a1, a1, 48 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 48 -; RV64IA-NEXT: sub a5, a5, a3 -; RV64IA-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) -; RV64IA-NEXT: and a7, a3, a4 -; RV64IA-NEXT: mv a6, a3 -; RV64IA-NEXT: sll a7, a7, a5 -; RV64IA-NEXT: sra a7, a7, a5 -; RV64IA-NEXT: bge a7, a1, .LBB112_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 -; RV64IA-NEXT: xor a6, a3, a1 -; RV64IA-NEXT: and a6, a6, a4 -; RV64IA-NEXT: xor a6, a3, a6 -; RV64IA-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a6, a6, (a2) -; RV64IA-NEXT: bnez a6, .LBB112_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64I-NEXT: bnez a0, .LBB112_4 +; RV64I-NEXT: .LBB112_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a0, a3, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: blt s2, a0, .LBB112_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB112_2 Depth=1 +; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: j .LBB112_1 +; RV64I-NEXT: .LBB112_4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomicrmw_max_i16_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: andi a3, a0, 24 +; RV64IA-WMO-NEXT: lui a4, 16 +; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NEXT: srai a1, a1, 48 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: li a5, 48 +; RV64IA-WMO-NEXT: sub a5, a5, a3 +; RV64IA-WMO-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a3, (a2) +; RV64IA-WMO-NEXT: and a7, a3, a4 +; RV64IA-WMO-NEXT: mv a6, a3 +; RV64IA-WMO-NEXT: sll a7, a7, a5 +; RV64IA-WMO-NEXT: sra a7, a7, a5 +; RV64IA-WMO-NEXT: bge a7, a1, .LBB112_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 +; RV64IA-WMO-NEXT: xor a6, a3, a1 +; RV64IA-WMO-NEXT: and a6, a6, a4 +; RV64IA-WMO-NEXT: xor a6, a3, a6 +; RV64IA-WMO-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-WMO-NEXT: bnez a6, .LBB112_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_max_i16_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: andi a3, a0, 24 +; RV64IA-TSO-NEXT: lui a4, 16 +; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NEXT: srai a1, a1, 48 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: li a5, 48 +; RV64IA-TSO-NEXT: sub a5, a5, a3 +; RV64IA-TSO-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a7, a3, a4 +; RV64IA-TSO-NEXT: mv a6, a3 +; RV64IA-TSO-NEXT: sll a7, a7, a5 +; RV64IA-TSO-NEXT: sra a7, a7, a5 +; RV64IA-TSO-NEXT: bge a7, a1, .LBB112_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 +; RV64IA-TSO-NEXT: xor a6, a3, a1 +; RV64IA-TSO-NEXT: and a6, a6, a4 +; RV64IA-TSO-NEXT: xor a6, a3, a6 +; RV64IA-TSO-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV64IA-TSO-NEXT: bnez a6, .LBB112_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw max ptr %a, i16 %b release ret i16 %1 } @@ -8311,36 +10067,67 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_max_i16_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: andi a3, a0, 24 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: slli a1, a1, 16 -; RV32IA-NEXT: srai a1, a1, 16 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 16 -; RV32IA-NEXT: sub a5, a5, a3 -; RV32IA-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: and a7, a3, a4 -; RV32IA-NEXT: mv a6, a3 -; RV32IA-NEXT: sll a7, a7, a5 -; RV32IA-NEXT: sra a7, a7, a5 -; RV32IA-NEXT: bge a7, a1, .LBB113_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 -; RV32IA-NEXT: xor a6, a3, a1 -; RV32IA-NEXT: and a6, a6, a4 -; RV32IA-NEXT: xor a6, a3, a6 -; RV32IA-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a6, a6, (a2) -; RV32IA-NEXT: bnez a6, .LBB113_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_max_i16_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: andi a3, a0, 24 +; RV32IA-WMO-NEXT: lui a4, 16 +; RV32IA-WMO-NEXT: addi a4, a4, -1 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NEXT: srai a1, a1, 16 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: li a5, 16 +; RV32IA-WMO-NEXT: sub a5, a5, a3 +; RV32IA-WMO-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: and a7, a3, a4 +; RV32IA-WMO-NEXT: mv a6, a3 +; RV32IA-WMO-NEXT: sll a7, a7, a5 +; RV32IA-WMO-NEXT: sra a7, a7, a5 +; RV32IA-WMO-NEXT: bge a7, a1, .LBB113_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 +; RV32IA-WMO-NEXT: xor a6, a3, a1 +; RV32IA-WMO-NEXT: and a6, a6, a4 +; RV32IA-WMO-NEXT: xor a6, a3, a6 +; RV32IA-WMO-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-WMO-NEXT: bnez a6, .LBB113_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_max_i16_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: andi a3, a0, 24 +; RV32IA-TSO-NEXT: lui a4, 16 +; RV32IA-TSO-NEXT: addi a4, a4, -1 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NEXT: srai a1, a1, 16 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: li a5, 16 +; RV32IA-TSO-NEXT: sub a5, a5, a3 +; RV32IA-TSO-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a7, a3, a4 +; RV32IA-TSO-NEXT: mv a6, a3 +; RV32IA-TSO-NEXT: sll a7, a7, a5 +; RV32IA-TSO-NEXT: sra a7, a7, a5 +; RV32IA-TSO-NEXT: bge a7, a1, .LBB113_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 +; RV32IA-TSO-NEXT: xor a6, a3, a1 +; RV32IA-TSO-NEXT: and a6, a6, a4 +; RV32IA-TSO-NEXT: xor a6, a3, a6 +; RV32IA-TSO-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV32IA-TSO-NEXT: bnez a6, .LBB113_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i16_acq_rel: ; RV64I: # %bb.0: @@ -8384,36 +10171,67 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_max_i16_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: andi a3, a0, 24 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: slli a1, a1, 48 -; RV64IA-NEXT: srai a1, a1, 48 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 48 -; RV64IA-NEXT: sub a5, a5, a3 -; RV64IA-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: and a7, a3, a4 -; RV64IA-NEXT: mv a6, a3 -; RV64IA-NEXT: sll a7, a7, a5 -; RV64IA-NEXT: sra a7, a7, a5 -; RV64IA-NEXT: bge a7, a1, .LBB113_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 -; RV64IA-NEXT: xor a6, a3, a1 -; RV64IA-NEXT: and a6, a6, a4 -; RV64IA-NEXT: xor a6, a3, a6 -; RV64IA-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a6, a6, (a2) -; RV64IA-NEXT: bnez a6, .LBB113_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_max_i16_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: andi a3, a0, 24 +; RV64IA-WMO-NEXT: lui a4, 16 +; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NEXT: srai a1, a1, 48 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: li a5, 48 +; RV64IA-WMO-NEXT: sub a5, a5, a3 +; RV64IA-WMO-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: and a7, a3, a4 +; RV64IA-WMO-NEXT: mv a6, a3 +; RV64IA-WMO-NEXT: sll a7, a7, a5 +; RV64IA-WMO-NEXT: sra a7, a7, a5 +; RV64IA-WMO-NEXT: bge a7, a1, .LBB113_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 +; RV64IA-WMO-NEXT: xor a6, a3, a1 +; RV64IA-WMO-NEXT: and a6, a6, a4 +; RV64IA-WMO-NEXT: xor a6, a3, a6 +; RV64IA-WMO-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-WMO-NEXT: bnez a6, .LBB113_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_max_i16_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: andi a3, a0, 24 +; RV64IA-TSO-NEXT: lui a4, 16 +; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NEXT: srai a1, a1, 48 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: li a5, 48 +; RV64IA-TSO-NEXT: sub a5, a5, a3 +; RV64IA-TSO-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a7, a3, a4 +; RV64IA-TSO-NEXT: mv a6, a3 +; RV64IA-TSO-NEXT: sll a7, a7, a5 +; RV64IA-TSO-NEXT: sra a7, a7, a5 +; RV64IA-TSO-NEXT: bge a7, a1, .LBB113_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 +; RV64IA-TSO-NEXT: xor a6, a3, a1 +; RV64IA-TSO-NEXT: and a6, a6, a4 +; RV64IA-TSO-NEXT: xor a6, a3, a6 +; RV64IA-TSO-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV64IA-TSO-NEXT: bnez a6, .LBB113_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw max ptr %a, i16 %b acq_rel ret i16 %1 } @@ -8761,36 +10579,67 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_min_i16_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: andi a3, a0, 24 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: slli a1, a1, 16 -; RV32IA-NEXT: srai a1, a1, 16 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 16 -; RV32IA-NEXT: sub a5, a5, a3 -; RV32IA-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: and a7, a3, a4 -; RV32IA-NEXT: mv a6, a3 -; RV32IA-NEXT: sll a7, a7, a5 -; RV32IA-NEXT: sra a7, a7, a5 -; RV32IA-NEXT: bge a1, a7, .LBB116_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 -; RV32IA-NEXT: xor a6, a3, a1 -; RV32IA-NEXT: and a6, a6, a4 -; RV32IA-NEXT: xor a6, a3, a6 -; RV32IA-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 -; RV32IA-NEXT: sc.w a6, a6, (a2) -; RV32IA-NEXT: bnez a6, .LBB116_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_min_i16_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: andi a3, a0, 24 +; RV32IA-WMO-NEXT: lui a4, 16 +; RV32IA-WMO-NEXT: addi a4, a4, -1 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NEXT: srai a1, a1, 16 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: li a5, 16 +; RV32IA-WMO-NEXT: sub a5, a5, a3 +; RV32IA-WMO-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: and a7, a3, a4 +; RV32IA-WMO-NEXT: mv a6, a3 +; RV32IA-WMO-NEXT: sll a7, a7, a5 +; RV32IA-WMO-NEXT: sra a7, a7, a5 +; RV32IA-WMO-NEXT: bge a1, a7, .LBB116_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 +; RV32IA-WMO-NEXT: xor a6, a3, a1 +; RV32IA-WMO-NEXT: and a6, a6, a4 +; RV32IA-WMO-NEXT: xor a6, a3, a6 +; RV32IA-WMO-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a6, a6, (a2) +; RV32IA-WMO-NEXT: bnez a6, .LBB116_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_min_i16_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: andi a3, a0, 24 +; RV32IA-TSO-NEXT: lui a4, 16 +; RV32IA-TSO-NEXT: addi a4, a4, -1 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NEXT: srai a1, a1, 16 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: li a5, 16 +; RV32IA-TSO-NEXT: sub a5, a5, a3 +; RV32IA-TSO-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a7, a3, a4 +; RV32IA-TSO-NEXT: mv a6, a3 +; RV32IA-TSO-NEXT: sll a7, a7, a5 +; RV32IA-TSO-NEXT: sra a7, a7, a5 +; RV32IA-TSO-NEXT: bge a1, a7, .LBB116_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 +; RV32IA-TSO-NEXT: xor a6, a3, a1 +; RV32IA-TSO-NEXT: and a6, a6, a4 +; RV32IA-TSO-NEXT: xor a6, a3, a6 +; RV32IA-TSO-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV32IA-TSO-NEXT: bnez a6, .LBB116_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i16_acquire: ; RV64I: # %bb.0: @@ -8834,36 +10683,67 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_min_i16_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: andi a3, a0, 24 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: slli a1, a1, 48 -; RV64IA-NEXT: srai a1, a1, 48 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 48 -; RV64IA-NEXT: sub a5, a5, a3 -; RV64IA-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: and a7, a3, a4 -; RV64IA-NEXT: mv a6, a3 -; RV64IA-NEXT: sll a7, a7, a5 -; RV64IA-NEXT: sra a7, a7, a5 -; RV64IA-NEXT: bge a1, a7, .LBB116_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 -; RV64IA-NEXT: xor a6, a3, a1 -; RV64IA-NEXT: and a6, a6, a4 -; RV64IA-NEXT: xor a6, a3, a6 -; RV64IA-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 -; RV64IA-NEXT: sc.w a6, a6, (a2) -; RV64IA-NEXT: bnez a6, .LBB116_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_min_i16_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: andi a3, a0, 24 +; RV64IA-WMO-NEXT: lui a4, 16 +; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NEXT: srai a1, a1, 48 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: li a5, 48 +; RV64IA-WMO-NEXT: sub a5, a5, a3 +; RV64IA-WMO-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: and a7, a3, a4 +; RV64IA-WMO-NEXT: mv a6, a3 +; RV64IA-WMO-NEXT: sll a7, a7, a5 +; RV64IA-WMO-NEXT: sra a7, a7, a5 +; RV64IA-WMO-NEXT: bge a1, a7, .LBB116_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 +; RV64IA-WMO-NEXT: xor a6, a3, a1 +; RV64IA-WMO-NEXT: and a6, a6, a4 +; RV64IA-WMO-NEXT: xor a6, a3, a6 +; RV64IA-WMO-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a6, a6, (a2) +; RV64IA-WMO-NEXT: bnez a6, .LBB116_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_min_i16_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: andi a3, a0, 24 +; RV64IA-TSO-NEXT: lui a4, 16 +; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NEXT: srai a1, a1, 48 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: li a5, 48 +; RV64IA-TSO-NEXT: sub a5, a5, a3 +; RV64IA-TSO-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a7, a3, a4 +; RV64IA-TSO-NEXT: mv a6, a3 +; RV64IA-TSO-NEXT: sll a7, a7, a5 +; RV64IA-TSO-NEXT: sra a7, a7, a5 +; RV64IA-TSO-NEXT: bge a1, a7, .LBB116_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 +; RV64IA-TSO-NEXT: xor a6, a3, a1 +; RV64IA-TSO-NEXT: and a6, a6, a4 +; RV64IA-TSO-NEXT: xor a6, a3, a6 +; RV64IA-TSO-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV64IA-TSO-NEXT: bnez a6, .LBB116_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw min ptr %a, i16 %b acquire ret i16 %1 } @@ -8911,36 +10791,67 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_min_i16_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: andi a3, a0, 24 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: slli a1, a1, 16 -; RV32IA-NEXT: srai a1, a1, 16 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 16 -; RV32IA-NEXT: sub a5, a5, a3 -; RV32IA-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) -; RV32IA-NEXT: and a7, a3, a4 -; RV32IA-NEXT: mv a6, a3 -; RV32IA-NEXT: sll a7, a7, a5 -; RV32IA-NEXT: sra a7, a7, a5 -; RV32IA-NEXT: bge a1, a7, .LBB117_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 -; RV32IA-NEXT: xor a6, a3, a1 -; RV32IA-NEXT: and a6, a6, a4 -; RV32IA-NEXT: xor a6, a3, a6 -; RV32IA-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a6, a6, (a2) -; RV32IA-NEXT: bnez a6, .LBB117_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_min_i16_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: andi a3, a0, 24 +; RV32IA-WMO-NEXT: lui a4, 16 +; RV32IA-WMO-NEXT: addi a4, a4, -1 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NEXT: srai a1, a1, 16 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: li a5, 16 +; RV32IA-WMO-NEXT: sub a5, a5, a3 +; RV32IA-WMO-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a3, (a2) +; RV32IA-WMO-NEXT: and a7, a3, a4 +; RV32IA-WMO-NEXT: mv a6, a3 +; RV32IA-WMO-NEXT: sll a7, a7, a5 +; RV32IA-WMO-NEXT: sra a7, a7, a5 +; RV32IA-WMO-NEXT: bge a1, a7, .LBB117_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 +; RV32IA-WMO-NEXT: xor a6, a3, a1 +; RV32IA-WMO-NEXT: and a6, a6, a4 +; RV32IA-WMO-NEXT: xor a6, a3, a6 +; RV32IA-WMO-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-WMO-NEXT: bnez a6, .LBB117_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_min_i16_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: andi a3, a0, 24 +; RV32IA-TSO-NEXT: lui a4, 16 +; RV32IA-TSO-NEXT: addi a4, a4, -1 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NEXT: srai a1, a1, 16 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: li a5, 16 +; RV32IA-TSO-NEXT: sub a5, a5, a3 +; RV32IA-TSO-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a7, a3, a4 +; RV32IA-TSO-NEXT: mv a6, a3 +; RV32IA-TSO-NEXT: sll a7, a7, a5 +; RV32IA-TSO-NEXT: sra a7, a7, a5 +; RV32IA-TSO-NEXT: bge a1, a7, .LBB117_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 +; RV32IA-TSO-NEXT: xor a6, a3, a1 +; RV32IA-TSO-NEXT: and a6, a6, a4 +; RV32IA-TSO-NEXT: xor a6, a3, a6 +; RV32IA-TSO-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV32IA-TSO-NEXT: bnez a6, .LBB117_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i16_release: ; RV64I: # %bb.0: @@ -8984,36 +10895,67 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_min_i16_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: andi a3, a0, 24 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: slli a1, a1, 48 -; RV64IA-NEXT: srai a1, a1, 48 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 48 -; RV64IA-NEXT: sub a5, a5, a3 -; RV64IA-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) -; RV64IA-NEXT: and a7, a3, a4 -; RV64IA-NEXT: mv a6, a3 -; RV64IA-NEXT: sll a7, a7, a5 -; RV64IA-NEXT: sra a7, a7, a5 -; RV64IA-NEXT: bge a1, a7, .LBB117_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 -; RV64IA-NEXT: xor a6, a3, a1 -; RV64IA-NEXT: and a6, a6, a4 -; RV64IA-NEXT: xor a6, a3, a6 -; RV64IA-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a6, a6, (a2) -; RV64IA-NEXT: bnez a6, .LBB117_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_min_i16_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: andi a3, a0, 24 +; RV64IA-WMO-NEXT: lui a4, 16 +; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NEXT: srai a1, a1, 48 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: li a5, 48 +; RV64IA-WMO-NEXT: sub a5, a5, a3 +; RV64IA-WMO-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a3, (a2) +; RV64IA-WMO-NEXT: and a7, a3, a4 +; RV64IA-WMO-NEXT: mv a6, a3 +; RV64IA-WMO-NEXT: sll a7, a7, a5 +; RV64IA-WMO-NEXT: sra a7, a7, a5 +; RV64IA-WMO-NEXT: bge a1, a7, .LBB117_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 +; RV64IA-WMO-NEXT: xor a6, a3, a1 +; RV64IA-WMO-NEXT: and a6, a6, a4 +; RV64IA-WMO-NEXT: xor a6, a3, a6 +; RV64IA-WMO-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-WMO-NEXT: bnez a6, .LBB117_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_min_i16_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: andi a3, a0, 24 +; RV64IA-TSO-NEXT: lui a4, 16 +; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NEXT: srai a1, a1, 48 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: li a5, 48 +; RV64IA-TSO-NEXT: sub a5, a5, a3 +; RV64IA-TSO-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a7, a3, a4 +; RV64IA-TSO-NEXT: mv a6, a3 +; RV64IA-TSO-NEXT: sll a7, a7, a5 +; RV64IA-TSO-NEXT: sra a7, a7, a5 +; RV64IA-TSO-NEXT: bge a1, a7, .LBB117_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 +; RV64IA-TSO-NEXT: xor a6, a3, a1 +; RV64IA-TSO-NEXT: and a6, a6, a4 +; RV64IA-TSO-NEXT: xor a6, a3, a6 +; RV64IA-TSO-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV64IA-TSO-NEXT: bnez a6, .LBB117_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw min ptr %a, i16 %b release ret i16 %1 } @@ -9061,36 +11003,67 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_min_i16_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: andi a3, a0, 24 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: slli a1, a1, 16 -; RV32IA-NEXT: srai a1, a1, 16 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 16 -; RV32IA-NEXT: sub a5, a5, a3 -; RV32IA-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: and a7, a3, a4 -; RV32IA-NEXT: mv a6, a3 -; RV32IA-NEXT: sll a7, a7, a5 -; RV32IA-NEXT: sra a7, a7, a5 -; RV32IA-NEXT: bge a1, a7, .LBB118_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 -; RV32IA-NEXT: xor a6, a3, a1 -; RV32IA-NEXT: and a6, a6, a4 -; RV32IA-NEXT: xor a6, a3, a6 -; RV32IA-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a6, a6, (a2) -; RV32IA-NEXT: bnez a6, .LBB118_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_min_i16_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: andi a3, a0, 24 +; RV32IA-WMO-NEXT: lui a4, 16 +; RV32IA-WMO-NEXT: addi a4, a4, -1 +; RV32IA-WMO-NEXT: sll a4, a4, a0 +; RV32IA-WMO-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NEXT: srai a1, a1, 16 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: li a5, 16 +; RV32IA-WMO-NEXT: sub a5, a5, a3 +; RV32IA-WMO-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: and a7, a3, a4 +; RV32IA-WMO-NEXT: mv a6, a3 +; RV32IA-WMO-NEXT: sll a7, a7, a5 +; RV32IA-WMO-NEXT: sra a7, a7, a5 +; RV32IA-WMO-NEXT: bge a1, a7, .LBB118_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 +; RV32IA-WMO-NEXT: xor a6, a3, a1 +; RV32IA-WMO-NEXT: and a6, a6, a4 +; RV32IA-WMO-NEXT: xor a6, a3, a6 +; RV32IA-WMO-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-WMO-NEXT: bnez a6, .LBB118_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_min_i16_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: andi a3, a0, 24 +; RV32IA-TSO-NEXT: lui a4, 16 +; RV32IA-TSO-NEXT: addi a4, a4, -1 +; RV32IA-TSO-NEXT: sll a4, a4, a0 +; RV32IA-TSO-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NEXT: srai a1, a1, 16 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: li a5, 16 +; RV32IA-TSO-NEXT: sub a5, a5, a3 +; RV32IA-TSO-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a7, a3, a4 +; RV32IA-TSO-NEXT: mv a6, a3 +; RV32IA-TSO-NEXT: sll a7, a7, a5 +; RV32IA-TSO-NEXT: sra a7, a7, a5 +; RV32IA-TSO-NEXT: bge a1, a7, .LBB118_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 +; RV32IA-TSO-NEXT: xor a6, a3, a1 +; RV32IA-TSO-NEXT: and a6, a6, a4 +; RV32IA-TSO-NEXT: xor a6, a3, a6 +; RV32IA-TSO-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV32IA-TSO-NEXT: bnez a6, .LBB118_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i16_acq_rel: ; RV64I: # %bb.0: @@ -9134,36 +11107,67 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_min_i16_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: andi a3, a0, 24 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: slli a1, a1, 48 -; RV64IA-NEXT: srai a1, a1, 48 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 48 -; RV64IA-NEXT: sub a5, a5, a3 -; RV64IA-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: and a7, a3, a4 -; RV64IA-NEXT: mv a6, a3 -; RV64IA-NEXT: sll a7, a7, a5 -; RV64IA-NEXT: sra a7, a7, a5 -; RV64IA-NEXT: bge a1, a7, .LBB118_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 -; RV64IA-NEXT: xor a6, a3, a1 -; RV64IA-NEXT: and a6, a6, a4 -; RV64IA-NEXT: xor a6, a3, a6 -; RV64IA-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a6, a6, (a2) -; RV64IA-NEXT: bnez a6, .LBB118_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_min_i16_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: andi a3, a0, 24 +; RV64IA-WMO-NEXT: lui a4, 16 +; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NEXT: srai a1, a1, 48 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: li a5, 48 +; RV64IA-WMO-NEXT: sub a5, a5, a3 +; RV64IA-WMO-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: and a7, a3, a4 +; RV64IA-WMO-NEXT: mv a6, a3 +; RV64IA-WMO-NEXT: sll a7, a7, a5 +; RV64IA-WMO-NEXT: sra a7, a7, a5 +; RV64IA-WMO-NEXT: bge a1, a7, .LBB118_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 +; RV64IA-WMO-NEXT: xor a6, a3, a1 +; RV64IA-WMO-NEXT: and a6, a6, a4 +; RV64IA-WMO-NEXT: xor a6, a3, a6 +; RV64IA-WMO-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-WMO-NEXT: bnez a6, .LBB118_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_min_i16_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: andi a3, a0, 24 +; RV64IA-TSO-NEXT: lui a4, 16 +; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: sllw a4, a4, a0 +; RV64IA-TSO-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NEXT: srai a1, a1, 48 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: li a5, 48 +; RV64IA-TSO-NEXT: sub a5, a5, a3 +; RV64IA-TSO-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a7, a3, a4 +; RV64IA-TSO-NEXT: mv a6, a3 +; RV64IA-TSO-NEXT: sll a7, a7, a5 +; RV64IA-TSO-NEXT: sra a7, a7, a5 +; RV64IA-TSO-NEXT: bge a1, a7, .LBB118_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 +; RV64IA-TSO-NEXT: xor a6, a3, a1 +; RV64IA-TSO-NEXT: and a6, a6, a4 +; RV64IA-TSO-NEXT: xor a6, a3, a6 +; RV64IA-TSO-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a6, a6, (a2) +; RV64IA-TSO-NEXT: bnez a6, .LBB118_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw min ptr %a, i16 %b acq_rel ret i16 %1 } @@ -9505,30 +11509,55 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_umax_i16_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: and a6, a3, a4 -; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a6, a1, .LBB121_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 -; RV32IA-NEXT: xor a5, a3, a1 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB121_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_umax_i16_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: and a6, a3, a4 +; RV32IA-WMO-NEXT: mv a5, a3 +; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB121_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a3, a1 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB121_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_umax_i16_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a6, a3, a4 +; RV32IA-TSO-NEXT: mv a5, a3 +; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB121_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a3, a1 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB121_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i16_acquire: ; RV64I: # %bb.0: @@ -9558,46 +11587,71 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: .LBB121_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s3, a0, .LBB121_1 -; RV64I-NEXT: # %bb.3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB121_2 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: j .LBB121_1 -; RV64I-NEXT: .LBB121_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 -; RV64I-NEXT: ret -; -; RV64IA-LABEL: atomicrmw_umax_i16_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: and a6, a3, a4 -; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a6, a1, .LBB121_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 -; RV64IA-NEXT: xor a5, a3, a1 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB121_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bltu s3, a0, .LBB121_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB121_2 Depth=1 +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB121_1 +; RV64I-NEXT: .LBB121_4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomicrmw_umax_i16_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: and a6, a3, a4 +; RV64IA-WMO-NEXT: mv a5, a3 +; RV64IA-WMO-NEXT: bgeu a6, a1, .LBB121_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a3, a1 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB121_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_umax_i16_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a6, a3, a4 +; RV64IA-TSO-NEXT: mv a5, a3 +; RV64IA-TSO-NEXT: bgeu a6, a1, .LBB121_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a3, a1 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB121_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw umax ptr %a, i16 %b acquire ret i16 %1 } @@ -9647,30 +11701,55 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_umax_i16_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) -; RV32IA-NEXT: and a6, a3, a4 -; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a6, a1, .LBB122_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 -; RV32IA-NEXT: xor a5, a3, a1 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB122_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_umax_i16_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a3, (a2) +; RV32IA-WMO-NEXT: and a6, a3, a4 +; RV32IA-WMO-NEXT: mv a5, a3 +; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB122_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a3, a1 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB122_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_umax_i16_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a6, a3, a4 +; RV32IA-TSO-NEXT: mv a5, a3 +; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB122_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a3, a1 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB122_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i16_release: ; RV64I: # %bb.0: @@ -9716,30 +11795,55 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_umax_i16_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) -; RV64IA-NEXT: and a6, a3, a4 -; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a6, a1, .LBB122_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 -; RV64IA-NEXT: xor a5, a3, a1 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB122_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_umax_i16_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a3, (a2) +; RV64IA-WMO-NEXT: and a6, a3, a4 +; RV64IA-WMO-NEXT: mv a5, a3 +; RV64IA-WMO-NEXT: bgeu a6, a1, .LBB122_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a3, a1 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB122_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_umax_i16_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a6, a3, a4 +; RV64IA-TSO-NEXT: mv a5, a3 +; RV64IA-TSO-NEXT: bgeu a6, a1, .LBB122_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a3, a1 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB122_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw umax ptr %a, i16 %b release ret i16 %1 } @@ -9789,30 +11893,55 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_umax_i16_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: and a6, a3, a4 -; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a6, a1, .LBB123_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 -; RV32IA-NEXT: xor a5, a3, a1 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB123_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_umax_i16_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: and a6, a3, a4 +; RV32IA-WMO-NEXT: mv a5, a3 +; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB123_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a3, a1 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB123_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_umax_i16_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a6, a3, a4 +; RV32IA-TSO-NEXT: mv a5, a3 +; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB123_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a3, a1 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB123_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i16_acq_rel: ; RV64I: # %bb.0: @@ -9858,30 +11987,55 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_umax_i16_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: and a6, a3, a4 -; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a6, a1, .LBB123_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 -; RV64IA-NEXT: xor a5, a3, a1 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB123_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_umax_i16_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: and a6, a3, a4 +; RV64IA-WMO-NEXT: mv a5, a3 +; RV64IA-WMO-NEXT: bgeu a6, a1, .LBB123_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a3, a1 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB123_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_umax_i16_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a6, a3, a4 +; RV64IA-TSO-NEXT: mv a5, a3 +; RV64IA-TSO-NEXT: bgeu a6, a1, .LBB123_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a3, a1 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB123_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw umax ptr %a, i16 %b acq_rel ret i16 %1 } @@ -10215,30 +12369,55 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_umin_i16_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: and a6, a3, a4 -; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a6, .LBB126_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 -; RV32IA-NEXT: xor a5, a3, a1 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB126_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_umin_i16_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: and a6, a3, a4 +; RV32IA-WMO-NEXT: mv a5, a3 +; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB126_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a3, a1 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB126_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_umin_i16_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a6, a3, a4 +; RV32IA-TSO-NEXT: mv a5, a3 +; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB126_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a3, a1 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB126_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i16_acquire: ; RV64I: # %bb.0: @@ -10284,30 +12463,55 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_umin_i16_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: and a6, a3, a4 -; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a6, .LBB126_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 -; RV64IA-NEXT: xor a5, a3, a1 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB126_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_umin_i16_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: and a6, a3, a4 +; RV64IA-WMO-NEXT: mv a5, a3 +; RV64IA-WMO-NEXT: bgeu a1, a6, .LBB126_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a3, a1 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB126_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_umin_i16_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a6, a3, a4 +; RV64IA-TSO-NEXT: mv a5, a3 +; RV64IA-TSO-NEXT: bgeu a1, a6, .LBB126_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a3, a1 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB126_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw umin ptr %a, i16 %b acquire ret i16 %1 } @@ -10357,30 +12561,55 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_umin_i16_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) -; RV32IA-NEXT: and a6, a3, a4 -; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a6, .LBB127_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 -; RV32IA-NEXT: xor a5, a3, a1 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB127_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_umin_i16_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a3, (a2) +; RV32IA-WMO-NEXT: and a6, a3, a4 +; RV32IA-WMO-NEXT: mv a5, a3 +; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB127_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a3, a1 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB127_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_umin_i16_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a6, a3, a4 +; RV32IA-TSO-NEXT: mv a5, a3 +; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB127_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a3, a1 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB127_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i16_release: ; RV64I: # %bb.0: @@ -10426,30 +12655,55 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_umin_i16_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) -; RV64IA-NEXT: and a6, a3, a4 -; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a6, .LBB127_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 -; RV64IA-NEXT: xor a5, a3, a1 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB127_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_umin_i16_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a3, (a2) +; RV64IA-WMO-NEXT: and a6, a3, a4 +; RV64IA-WMO-NEXT: mv a5, a3 +; RV64IA-WMO-NEXT: bgeu a1, a6, .LBB127_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a3, a1 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB127_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_umin_i16_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a6, a3, a4 +; RV64IA-TSO-NEXT: mv a5, a3 +; RV64IA-TSO-NEXT: bgeu a1, a6, .LBB127_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a3, a1 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB127_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw umin ptr %a, i16 %b release ret i16 %1 } @@ -10499,30 +12753,55 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_umin_i16_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a2) -; RV32IA-NEXT: and a6, a3, a4 -; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a6, .LBB128_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 -; RV32IA-NEXT: xor a5, a3, a1 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a2) -; RV32IA-NEXT: bnez a5, .LBB128_1 -; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_umin_i16_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: sll a4, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NEXT: and a6, a3, a4 +; RV32IA-WMO-NEXT: mv a5, a3 +; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB128_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a3, a1 +; RV32IA-WMO-NEXT: and a5, a5, a4 +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NEXT: bnez a5, .LBB128_1 +; RV32IA-WMO-NEXT: # %bb.4: +; RV32IA-WMO-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_umin_i16_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: sll a4, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NEXT: and a6, a3, a4 +; RV32IA-TSO-NEXT: mv a5, a3 +; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB128_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a3, a1 +; RV32IA-TSO-NEXT: and a5, a5, a4 +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NEXT: bnez a5, .LBB128_1 +; RV32IA-TSO-NEXT: # %bb.4: +; RV32IA-TSO-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i16_acq_rel: ; RV64I: # %bb.0: @@ -10568,30 +12847,55 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_umin_i16_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a2) -; RV64IA-NEXT: and a6, a3, a4 -; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a6, .LBB128_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 -; RV64IA-NEXT: xor a5, a3, a1 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a2) -; RV64IA-NEXT: bnez a5, .LBB128_1 -; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_umin_i16_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: sllw a4, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NEXT: and a6, a3, a4 +; RV64IA-WMO-NEXT: mv a5, a3 +; RV64IA-WMO-NEXT: bgeu a1, a6, .LBB128_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a3, a1 +; RV64IA-WMO-NEXT: and a5, a5, a4 +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NEXT: bnez a5, .LBB128_1 +; RV64IA-WMO-NEXT: # %bb.4: +; RV64IA-WMO-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_umin_i16_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: sllw a4, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NEXT: and a6, a3, a4 +; RV64IA-TSO-NEXT: mv a5, a3 +; RV64IA-TSO-NEXT: bgeu a1, a6, .LBB128_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a3, a1 +; RV64IA-TSO-NEXT: and a5, a5, a4 +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NEXT: bnez a5, .LBB128_1 +; RV64IA-TSO-NEXT: # %bb.4: +; RV64IA-TSO-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw umin ptr %a, i16 %b acq_rel ret i16 %1 } @@ -11487,17 +13791,29 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_nand_i32_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a2, (a0) -; RV32IA-NEXT: and a3, a2, a1 -; RV32IA-NEXT: not a3, a3 -; RV32IA-NEXT: sc.w a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB151_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: mv a0, a2 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_nand_i32_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a0) +; RV32IA-WMO-NEXT: and a3, a2, a1 +; RV32IA-WMO-NEXT: not a3, a3 +; RV32IA-WMO-NEXT: sc.w a3, a3, (a0) +; RV32IA-WMO-NEXT: bnez a3, .LBB151_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: mv a0, a2 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_nand_i32_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a0) +; RV32IA-TSO-NEXT: and a3, a2, a1 +; RV32IA-TSO-NEXT: not a3, a3 +; RV32IA-TSO-NEXT: sc.w a3, a3, (a0) +; RV32IA-TSO-NEXT: bnez a3, .LBB151_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: mv a0, a2 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i32_acquire: ; RV64I: # %bb.0: @@ -11509,17 +13825,29 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_nand_i32_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a2, (a0) -; RV64IA-NEXT: and a3, a2, a1 -; RV64IA-NEXT: not a3, a3 -; RV64IA-NEXT: sc.w a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB151_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: mv a0, a2 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_nand_i32_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a0) +; RV64IA-WMO-NEXT: and a3, a2, a1 +; RV64IA-WMO-NEXT: not a3, a3 +; RV64IA-WMO-NEXT: sc.w a3, a3, (a0) +; RV64IA-WMO-NEXT: bnez a3, .LBB151_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: mv a0, a2 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_nand_i32_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a0) +; RV64IA-TSO-NEXT: and a3, a2, a1 +; RV64IA-TSO-NEXT: not a3, a3 +; RV64IA-TSO-NEXT: sc.w a3, a3, (a0) +; RV64IA-TSO-NEXT: bnez a3, .LBB151_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: mv a0, a2 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw nand ptr %a, i32 %b acquire ret i32 %1 } @@ -11535,17 +13863,29 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_nand_i32_release: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a2, (a0) -; RV32IA-NEXT: and a3, a2, a1 -; RV32IA-NEXT: not a3, a3 -; RV32IA-NEXT: sc.w.rl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB152_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: mv a0, a2 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_nand_i32_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a2, (a0) +; RV32IA-WMO-NEXT: and a3, a2, a1 +; RV32IA-WMO-NEXT: not a3, a3 +; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a0) +; RV32IA-WMO-NEXT: bnez a3, .LBB152_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: mv a0, a2 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_nand_i32_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a0) +; RV32IA-TSO-NEXT: and a3, a2, a1 +; RV32IA-TSO-NEXT: not a3, a3 +; RV32IA-TSO-NEXT: sc.w a3, a3, (a0) +; RV32IA-TSO-NEXT: bnez a3, .LBB152_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: mv a0, a2 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i32_release: ; RV64I: # %bb.0: @@ -11557,17 +13897,29 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_nand_i32_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a2, (a0) -; RV64IA-NEXT: and a3, a2, a1 -; RV64IA-NEXT: not a3, a3 -; RV64IA-NEXT: sc.w.rl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB152_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: mv a0, a2 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_nand_i32_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a2, (a0) +; RV64IA-WMO-NEXT: and a3, a2, a1 +; RV64IA-WMO-NEXT: not a3, a3 +; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a0) +; RV64IA-WMO-NEXT: bnez a3, .LBB152_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: mv a0, a2 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_nand_i32_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a0) +; RV64IA-TSO-NEXT: and a3, a2, a1 +; RV64IA-TSO-NEXT: not a3, a3 +; RV64IA-TSO-NEXT: sc.w a3, a3, (a0) +; RV64IA-TSO-NEXT: bnez a3, .LBB152_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: mv a0, a2 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw nand ptr %a, i32 %b release ret i32 %1 } @@ -11583,17 +13935,29 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: atomicrmw_nand_i32_acq_rel: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a2, (a0) -; RV32IA-NEXT: and a3, a2, a1 -; RV32IA-NEXT: not a3, a3 -; RV32IA-NEXT: sc.w.rl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB153_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: mv a0, a2 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: atomicrmw_nand_i32_acq_rel: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a0) +; RV32IA-WMO-NEXT: and a3, a2, a1 +; RV32IA-WMO-NEXT: not a3, a3 +; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a0) +; RV32IA-WMO-NEXT: bnez a3, .LBB153_1 +; RV32IA-WMO-NEXT: # %bb.2: +; RV32IA-WMO-NEXT: mv a0, a2 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomicrmw_nand_i32_acq_rel: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a0) +; RV32IA-TSO-NEXT: and a3, a2, a1 +; RV32IA-TSO-NEXT: not a3, a3 +; RV32IA-TSO-NEXT: sc.w a3, a3, (a0) +; RV32IA-TSO-NEXT: bnez a3, .LBB153_1 +; RV32IA-TSO-NEXT: # %bb.2: +; RV32IA-TSO-NEXT: mv a0, a2 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i32_acq_rel: ; RV64I: # %bb.0: @@ -11605,17 +13969,29 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_nand_i32_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a2, (a0) -; RV64IA-NEXT: and a3, a2, a1 -; RV64IA-NEXT: not a3, a3 -; RV64IA-NEXT: sc.w.rl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB153_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: mv a0, a2 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_nand_i32_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a0) +; RV64IA-WMO-NEXT: and a3, a2, a1 +; RV64IA-WMO-NEXT: not a3, a3 +; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a0) +; RV64IA-WMO-NEXT: bnez a3, .LBB153_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: mv a0, a2 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_nand_i32_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a0) +; RV64IA-TSO-NEXT: and a3, a2, a1 +; RV64IA-TSO-NEXT: not a3, a3 +; RV64IA-TSO-NEXT: sc.w a3, a3, (a0) +; RV64IA-TSO-NEXT: bnez a3, .LBB153_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: mv a0, a2 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw nand ptr %a, i32 %b acq_rel ret i32 %1 } @@ -14650,17 +17026,29 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_nand_i64_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB206_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d.aq a2, (a0) -; RV64IA-NEXT: and a3, a2, a1 -; RV64IA-NEXT: not a3, a3 -; RV64IA-NEXT: sc.d a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB206_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: mv a0, a2 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_nand_i64_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB206_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aq a2, (a0) +; RV64IA-WMO-NEXT: and a3, a2, a1 +; RV64IA-WMO-NEXT: not a3, a3 +; RV64IA-WMO-NEXT: sc.d a3, a3, (a0) +; RV64IA-WMO-NEXT: bnez a3, .LBB206_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: mv a0, a2 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_nand_i64_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB206_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a2, (a0) +; RV64IA-TSO-NEXT: and a3, a2, a1 +; RV64IA-TSO-NEXT: not a3, a3 +; RV64IA-TSO-NEXT: sc.d a3, a3, (a0) +; RV64IA-TSO-NEXT: bnez a3, .LBB206_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: mv a0, a2 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw nand ptr %a, i64 %b acquire ret i64 %1 } @@ -14696,17 +17084,29 @@ define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_nand_i64_release: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB207_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d a2, (a0) -; RV64IA-NEXT: and a3, a2, a1 -; RV64IA-NEXT: not a3, a3 -; RV64IA-NEXT: sc.d.rl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB207_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: mv a0, a2 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_nand_i64_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB207_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d a2, (a0) +; RV64IA-WMO-NEXT: and a3, a2, a1 +; RV64IA-WMO-NEXT: not a3, a3 +; RV64IA-WMO-NEXT: sc.d.rl a3, a3, (a0) +; RV64IA-WMO-NEXT: bnez a3, .LBB207_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: mv a0, a2 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_nand_i64_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB207_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a2, (a0) +; RV64IA-TSO-NEXT: and a3, a2, a1 +; RV64IA-TSO-NEXT: not a3, a3 +; RV64IA-TSO-NEXT: sc.d a3, a3, (a0) +; RV64IA-TSO-NEXT: bnez a3, .LBB207_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: mv a0, a2 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw nand ptr %a, i64 %b release ret i64 %1 } @@ -14742,17 +17142,29 @@ define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: atomicrmw_nand_i64_acq_rel: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB208_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d.aq a2, (a0) -; RV64IA-NEXT: and a3, a2, a1 -; RV64IA-NEXT: not a3, a3 -; RV64IA-NEXT: sc.d.rl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB208_1 -; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: mv a0, a2 -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: atomicrmw_nand_i64_acq_rel: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB208_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aq a2, (a0) +; RV64IA-WMO-NEXT: and a3, a2, a1 +; RV64IA-WMO-NEXT: not a3, a3 +; RV64IA-WMO-NEXT: sc.d.rl a3, a3, (a0) +; RV64IA-WMO-NEXT: bnez a3, .LBB208_1 +; RV64IA-WMO-NEXT: # %bb.2: +; RV64IA-WMO-NEXT: mv a0, a2 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomicrmw_nand_i64_acq_rel: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB208_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a2, (a0) +; RV64IA-TSO-NEXT: and a3, a2, a1 +; RV64IA-TSO-NEXT: not a3, a3 +; RV64IA-TSO-NEXT: sc.d a3, a3, (a0) +; RV64IA-TSO-NEXT: bnez a3, .LBB208_1 +; RV64IA-TSO-NEXT: # %bb.2: +; RV64IA-TSO-NEXT: mv a0, a2 +; RV64IA-TSO-NEXT: ret %1 = atomicrmw nand ptr %a, i64 %b acq_rel ret i64 %1 }