-
Notifications
You must be signed in to change notification settings - Fork 12.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ELF] Implement R_RISCV_TLSDESC for RISC-V #79239
[ELF] Implement R_RISCV_TLSDESC for RISC-V #79239
Conversation
Created using spr 1.3.4
@llvm/pr-subscribers-lld @llvm/pr-subscribers-lld-elf Author: Fangrui Song (MaskRay) ChangesSupport For -no-pie/-pie links, TLSDESC to initial-exec or local-exec
Patch is 23.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/79239.diff 5 Files Affected:
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index d7d3d3e4781497..67d7e2562e9b17 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -61,6 +61,7 @@ enum Op {
AUIPC = 0x17,
JALR = 0x67,
LD = 0x3003,
+ LUI = 0x37,
LW = 0x2003,
SRLI = 0x5013,
SUB = 0x40000033,
@@ -73,6 +74,7 @@ enum Reg {
X_T0 = 5,
X_T1 = 6,
X_T2 = 7,
+ X_A0 = 10,
X_T3 = 28,
};
@@ -102,6 +104,26 @@ static uint32_t setLO12_S(uint32_t insn, uint32_t imm) {
(extractBits(imm, 4, 0) << 7);
}
+namespace {
+struct SymbolAnchor {
+ uint64_t offset;
+ Defined *d;
+ bool end; // true for the anchor of st_value+st_size
+};
+} // namespace
+
+struct elf::RISCVRelaxAux {
+ // This records symbol start and end offsets which will be adjusted according
+ // to the nearest relocDeltas element.
+ SmallVector<SymbolAnchor, 0> anchors;
+ // For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] :
+ // 0).
+ std::unique_ptr<uint32_t[]> relocDeltas;
+ // For relocations[i], the actual type is relocTypes[i].
+ std::unique_ptr<RelType[]> relocTypes;
+ SmallVector<uint32_t, 0> writes;
+};
+
RISCV::RISCV() {
copyRel = R_RISCV_COPY;
pltRel = R_RISCV_JUMP_SLOT;
@@ -119,6 +141,7 @@ RISCV::RISCV() {
tlsGotRel = R_RISCV_TLS_TPREL32;
}
gotRel = symbolicRel;
+ tlsDescRel = R_RISCV_TLSDESC;
// .got[0] = _DYNAMIC
gotHeaderEntriesNum = 1;
@@ -187,6 +210,8 @@ int64_t RISCV::getImplicitAddend(const uint8_t *buf, RelType type) const {
case R_RISCV_JUMP_SLOT:
// These relocations are defined as not having an implicit addend.
return 0;
+ case R_RISCV_TLSDESC:
+ return config->is64 ? read64le(buf + 8) : read32le(buf + 4);
}
}
@@ -295,6 +320,12 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
case R_RISCV_PCREL_LO12_I:
case R_RISCV_PCREL_LO12_S:
return R_RISCV_PC_INDIRECT;
+ case R_RISCV_TLSDESC_HI20:
+ case R_RISCV_TLSDESC_LOAD_LO12:
+ case R_RISCV_TLSDESC_ADD_LO12:
+ return R_TLSDESC_PC;
+ case R_RISCV_TLSDESC_CALL:
+ return R_TLSDESC_CALL;
case R_RISCV_TLS_GD_HI20:
return R_TLSGD_PC;
case R_RISCV_TLS_GOT_HI20:
@@ -419,6 +450,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
case R_RISCV_GOT_HI20:
case R_RISCV_PCREL_HI20:
+ case R_RISCV_TLSDESC_HI20:
case R_RISCV_TLS_GD_HI20:
case R_RISCV_TLS_GOT_HI20:
case R_RISCV_TPREL_HI20:
@@ -430,6 +462,8 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
}
case R_RISCV_PCREL_LO12_I:
+ case R_RISCV_TLSDESC_LOAD_LO12:
+ case R_RISCV_TLSDESC_ADD_LO12:
case R_RISCV_TPREL_LO12_I:
case R_RISCV_LO12_I: {
uint64_t hi = (val + 0x800) >> 12;
@@ -513,29 +547,113 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
break;
case R_RISCV_RELAX:
- return; // Ignored (for now)
-
+ return;
+ case R_RISCV_TLSDESC:
+ // The addend is stored in the second word.
+ if (config->is64)
+ write64le(loc + 8, val);
+ else
+ write32le(loc + 4, val);
+ break;
default:
llvm_unreachable("unknown relocation");
}
}
+static void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) {
+ switch (rel.type) {
+ case R_RISCV_TLSDESC_HI20:
+ case R_RISCV_TLSDESC_LOAD_LO12:
+ write32le(loc, 0x00000013); // nop
+ return;
+ case R_RISCV_TLSDESC_ADD_LO12:
+ write32le(loc, utype(AUIPC, X_A0, hi20(val))); // auipc a0,<hi20>
+ return;
+ case R_RISCV_TLSDESC_CALL:
+ if (config->is64)
+ write32le(loc, itype(LD, X_A0, X_A0, lo12(val))); // ld a0,<lo12>(a0)
+ else
+ write32le(loc, itype(LW, X_A0, X_A0, lo12(val))); // lw a0,<lo12>(a0)
+ return;
+ default:
+ llvm_unreachable("unsupported relocation for TLSDESC to IE relaxation");
+ }
+}
+
+static void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
+ switch (rel.type) {
+ case R_RISCV_TLSDESC_HI20:
+ case R_RISCV_TLSDESC_LOAD_LO12:
+ write32le(loc, 0x00000013); // nop
+ return;
+ case R_RISCV_TLSDESC_ADD_LO12:
+ write32le(loc, utype(LUI, X_A0, hi20(val))); // lui a0,<hi20>
+ return;
+ case R_RISCV_TLSDESC_CALL:
+ if (isInt<12>(val))
+ write32le(loc, itype(ADDI, X_A0, 0, val)); // addi a0,zero,<lo12>
+ else
+ write32le(loc, itype(ADDI, X_A0, X_A0, lo12(val))); // addi a0,a0,<lo12>
+ return;
+ default:
+ llvm_unreachable("unsupported relocation for TLSDESC to LE relaxation");
+ }
+}
+
void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
uint64_t secAddr = sec.getOutputSection()->addr;
if (auto *s = dyn_cast<InputSection>(&sec))
secAddr += s->outSecOff;
else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))
secAddr += ehIn->getParent()->outSecOff;
- for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) {
- const Relocation &rel = sec.relocs()[i];
+ uint64_t tlsdescVal = 0;
+ bool isToIe = true;
+ const ArrayRef<Relocation> relocs = sec.relocs();
+ for (size_t i = 0, size = relocs.size(); i != size; ++i) {
+ const Relocation &rel = relocs[i];
uint8_t *loc = buf + rel.offset;
- const uint64_t val =
+ uint64_t val =
sec.getRelocTargetVA(sec.file, rel.type, rel.addend,
secAddr + rel.offset, *rel.sym, rel.expr);
switch (rel.expr) {
case R_RELAX_HINT:
+ continue;
+ case R_TLSDESC_PC:
+ // For R_RISCV_TLSDESC_HI20, store &got(sym)-PC to be used by the
+ // following two instructions L[DW] and ADDI.
+ if (rel.type == R_RISCV_TLSDESC_HI20)
+ tlsdescVal = val;
+ else
+ val = tlsdescVal;
break;
+ case R_RELAX_TLS_GD_TO_IE:
+ // Only R_RISCV_TLSDESC_HI20 reaches here. tlsdescVal will be finalized
+ // after we see R_RISCV_TLSDESC_ADD_LO12 in the R_RELAX_TLS_GD_TO_LE case.
+ // The net effect is that tlsdescVal will be smaller than `val` to take
+ // into account of NOP instructions (in the absence of R_RISCV_RELAX)
+ // before AUIPC.
+ tlsdescVal = val + rel.offset;
+ isToIe = true;
+ tlsdescToIe(loc, rel, val);
+ continue;
+ case R_RELAX_TLS_GD_TO_LE:
+ // See the comment in handleTlsRelocation. For TLSDESC=>IE,
+ // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12,CALL} also reach here. If isToIe is
+ // true, this is actually TLSDESC=>IE optimization.
+ if (rel.type == R_RISCV_TLSDESC_HI20) {
+ tlsdescVal = val;
+ isToIe = false;
+ } else {
+ if (isToIe && rel.type == R_RISCV_TLSDESC_ADD_LO12)
+ tlsdescVal -= rel.offset;
+ val = tlsdescVal;
+ }
+ if (isToIe)
+ tlsdescToIe(loc, rel, val);
+ else
+ tlsdescToLe(loc, rel, val);
+ continue;
case R_RISCV_LEB128:
if (i + 1 < size) {
const Relocation &rel1 = sec.relocs()[i + 1];
@@ -554,32 +672,12 @@ void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
": R_RISCV_SET_ULEB128 not paired with R_RISCV_SUB_SET128");
return;
default:
- relocate(loc, rel, val);
break;
}
+ relocate(loc, rel, val);
}
}
-namespace {
-struct SymbolAnchor {
- uint64_t offset;
- Defined *d;
- bool end; // true for the anchor of st_value+st_size
-};
-} // namespace
-
-struct elf::RISCVRelaxAux {
- // This records symbol start and end offsets which will be adjusted according
- // to the nearest relocDeltas element.
- SmallVector<SymbolAnchor, 0> anchors;
- // For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] :
- // 0).
- std::unique_ptr<uint32_t[]> relocDeltas;
- // For relocations[i], the actual type is relocTypes[i].
- std::unique_ptr<RelType[]> relocTypes;
- SmallVector<uint32_t, 0> writes;
-};
-
static void initSymbolAnchors() {
SmallVector<InputSection *, 0> storage;
for (OutputSection *osec : outputSections) {
@@ -762,6 +860,14 @@ static bool relax(InputSection &sec) {
sec.relocs()[i + 1].type == R_RISCV_RELAX)
relaxHi20Lo12(sec, i, loc, r, remove);
break;
+ case R_RISCV_TLSDESC_HI20:
+ case R_RISCV_TLSDESC_LOAD_LO12:
+ // For LE or IE optimization, AUIPC and L[DW] are converted to a removable
+ // NOP.
+ if (r.expr != R_TLSDESC_PC && i + 1 != sec.relocs().size() &&
+ sec.relocs()[i + 1].type == R_RISCV_RELAX)
+ remove = 4;
+ break;
}
// For all anchors whose offsets are <= r.offset, they are preceded by
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index b6a317bc3b6d69..7e0cfc50605198 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1274,26 +1274,31 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
if (config->emachine == EM_MIPS)
return handleMipsTlsRelocation(type, sym, c, offset, addend, expr);
+ bool isRISCV = config->emachine == EM_RISCV;
if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
R_TLSDESC_GOTPLT>(expr) &&
config->shared) {
+ // R_RISCV_TLSDESC_{LOAD_LO12_I,ADD_LO12_I,CALL} reference a label. Do not
+ // set NEEDS_TLSDESC on the label.
if (expr != R_TLSDESC_CALL) {
- sym.setFlags(NEEDS_TLSDESC);
+ if (!isRISCV || type == R_RISCV_TLSDESC_HI20)
+ sym.setFlags(NEEDS_TLSDESC);
c.addReloc({expr, type, offset, addend, &sym});
}
return 1;
}
// ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE
- // relaxation.
+ // optimizations.
+ // RISC-V supports TLSDESC to IE/LE optimizations.
// For PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable
// relaxation as well.
- bool toExecRelax = !config->shared && config->emachine != EM_ARM &&
- config->emachine != EM_HEXAGON &&
- config->emachine != EM_LOONGARCH &&
- config->emachine != EM_RISCV &&
- !c.file->ppc64DisableTLSRelax;
+ bool toExecRelax =
+ !config->shared && config->emachine != EM_ARM &&
+ config->emachine != EM_HEXAGON && config->emachine != EM_LOONGARCH &&
+ !(isRISCV && expr != R_TLSDESC_PC && expr != R_TLSDESC_CALL) &&
+ !c.file->ppc64DisableTLSRelax;
// If we are producing an executable and the symbol is non-preemptable, it
// must be defined and the code sequence can be relaxed to use Local-Exec.
@@ -1347,8 +1352,12 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
return 1;
}
- // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec
+ // Global-Dynamic/TLSDESC can be relaxed to Initial-Exec or Local-Exec
// depending on the symbol being locally defined or not.
+ //
+ // R_RISCV_TLSDESC_{LOAD_LO12_I,ADD_LO12_I,CALL} reference a non-preemptible
+ // label, so the LE transition will be categorized as R_RELAX_TLS_GD_TO_LE.
+ // We fix the categorization in RISCV::relocateAlloc.
if (sym.isPreemptible) {
sym.setFlags(NEEDS_TLSGD_TO_IE);
c.addReloc({target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_IE), type,
diff --git a/lld/test/ELF/riscv-tlsdesc-gd-mixed.s b/lld/test/ELF/riscv-tlsdesc-gd-mixed.s
new file mode 100644
index 00000000000000..c0e91593ed9686
--- /dev/null
+++ b/lld/test/ELF/riscv-tlsdesc-gd-mixed.s
@@ -0,0 +1,26 @@
+# REQUIRES: riscv
+# RUN: llvm-mc -filetype=obj -triple=riscv64 %s -o %t.o
+# RUN: ld.lld -shared %t.o -o %t.so
+# RUN: llvm-readobj -r %t.so | FileCheck %s --check-prefix=RELA
+
+## Both TLSDESC and DTPMOD64/DTPREL64 should be present.
+# RELA: .rela.dyn {
+# RELA-NEXT: 0x[[#%X,ADDR:]] R_RISCV_TLSDESC a 0x0
+# RELA-NEXT: 0x[[#ADDR+16]] R_RISCV_TLS_DTPMOD64 a 0x0
+# RELA-NEXT: 0x[[#ADDR+24]] R_RISCV_TLS_DTPREL64 a 0x0
+# RELA-NEXT: }
+
+ la.tls.gd a0,a
+ call __tls_get_addr@plt
+
+.Ltlsdesc_hi0:
+ auipc a2, %tlsdesc_hi(a)
+ ld a3, %tlsdesc_load_lo(.Ltlsdesc_hi0)(a2)
+ addi a0, a2, %tlsdesc_add_lo(.Ltlsdesc_hi0)
+ jalr t0, 0(a3), %tlsdesc_call(.Ltlsdesc_hi0)
+
+.section .tbss,"awT",@nobits
+.globl a
+.zero 8
+a:
+.zero 4
diff --git a/lld/test/ELF/riscv-tlsdesc-relax.s b/lld/test/ELF/riscv-tlsdesc-relax.s
new file mode 100644
index 00000000000000..54cfac810990f6
--- /dev/null
+++ b/lld/test/ELF/riscv-tlsdesc-relax.s
@@ -0,0 +1,125 @@
+# REQUIRES: riscv
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+c,+relax a.s -o a.64.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+c,+relax c.s -o c.64.o
+# RUN: ld.lld -shared -soname=c.64.so c.64.o -o c.64.so
+
+# RUN: ld.lld -shared -z now a.64.o c.64.o -o a.64.so -z separate-code
+# RUN: llvm-objdump --no-show-raw-insn -M no-aliases -h -d a.64.so | FileCheck %s --check-prefix=GD64
+
+# RUN: ld.lld -e 0 -z now a.64.o c.64.o -o a.64.le -z separate-code
+# RUN: llvm-objdump --no-show-raw-insn -M no-aliases -h -d a.64.le | FileCheck %s --check-prefix=LE64
+
+# RUN: ld.lld -e 0 -z now a.64.o c.64.so -o a.64.ie -z separate-code
+# RUN: llvm-objdump --no-show-raw-insn -M no-aliases -h -d a.64.ie | FileCheck %s --check-prefix=IE64
+
+# GD64: .got 00000018 00000000000020c0
+# GD64-LABEL: <_start>:
+# GD64-NEXT: jal {{.*}} <foo>
+# GD64-LABEL: <foo>:
+## &.got[c]-. = 0x20c0+8 - 0x1004 = 0x10c4
+# GD64: 1004: auipc a2, 0x1
+# GD64-NEXT: ld a3, 0xc4(a2)
+# GD64-NEXT: addi a0, a2, 0xc4
+# GD64-NEXT: jalr t0, 0x0(a3)
+# GD64-NEXT: c.add a0, tp
+# GD64-NEXT: jal {{.*}} <foo>
+# GD64-NEXT: auipc a4, 0x1
+# GD64-NEXT: ld a5, 0xae(a4)
+# GD64-NEXT: addi a0, a4, 0xae
+# GD64-NEXT: jalr t0, 0x0(a5)
+# GD64-NEXT: c.add a0, tp
+
+# LE64-LABEL: <_start>:
+# LE64-NEXT: jal {{.*}} <foo>
+# LE64-LABEL: <foo>:
+# LE64-NEXT: 11004: lui a0, 0x0
+# LE64-NEXT: addi a0, zero, 0xc
+# LE64-NEXT: c.add a0, tp
+# LE64-NEXT: jal {{.*}} <foo>
+# LE64-NEXT: addi zero, zero, 0x0
+# LE64-NEXT: lui a0, 0x0
+# LE64-NEXT: addi a0, zero, 0xc
+# LE64-NEXT: c.add a0, tp
+# LE64-NEXT: addi zero, zero, 0x0
+# LE64-NEXT: lui a0, 0x0
+# LE64-NEXT: addi a0, zero, 0xc
+# LE64-NEXT: c.add a0, tp
+
+# IE64: .got 00000010 00000000000120e0
+# IE64-LABEL: <_start>:
+# IE64-NEXT: jal {{.*}} <foo>
+# IE64-LABEL: <foo>:
+## &.got[c]-. = 0x120e0+8 - 0x11004 = 0x10e4
+# IE64-NEXT: 11004: auipc a0, 0x1
+# IE64-NEXT: ld a0, 0xe4(a0)
+# IE64-NEXT: c.add a0, tp
+# IE64-NEXT: jal {{.*}} <foo>
+# IE64-NEXT: addi zero, zero, 0x0
+## &.got[c]-. = 0x120e0+8 - 0x11016 = 0x10d2
+# IE64-NEXT: 11016: auipc a0, 0x1
+# IE64-NEXT: ld a0, 0xd2(a0)
+# IE64-NEXT: c.add a0, tp
+# IE64-NEXT: addi zero, zero, 0x0
+## &.got[c]-. = 0x120e0+8 - 0x11024 = 0x10c4
+# IE64-NEXT: 11024: auipc a0, 0x1
+# IE64-NEXT: ld a0, 0xc4(a0)
+# IE64-NEXT: c.add a0, tp
+
+#--- a.s
+.globl _start
+_start:
+.balign 16
+ call foo
+
+foo:
+.Ltlsdesc_hi0:
+.option norelax
+## All 4 instructions have an R_RISCV_RELAX.
+ auipc a2, %tlsdesc_hi(c)
+ .reloc .-4, R_RISCV_RELAX, 0
+ ld a3, %tlsdesc_load_lo(.Ltlsdesc_hi0)(a2)
+ .reloc .-4, R_RISCV_RELAX, 0
+ addi a0, a2, %tlsdesc_add_lo(.Ltlsdesc_hi0)
+ .reloc .-4, R_RISCV_RELAX, 0
+ jalr t0, 0(a3), %tlsdesc_call(.Ltlsdesc_hi0)
+ .reloc .-4, R_RISCV_RELAX, 0
+ add a0, a0, tp
+.option relax
+
+ call foo
+
+.Ltlsdesc_hi1:
+.option norelax
+## LD has an R_RISCV_RELAX.
+ auipc a4, %tlsdesc_hi(c)
+ ld a5, %tlsdesc_load_lo(.Ltlsdesc_hi1)(a4)
+ .reloc .-4, R_RISCV_RELAX, 0
+ addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi1)
+ jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi1)
+ add a0, a0, tp
+.option relax
+
+.Ltlsdesc_hi2:
+.option norelax
+## AUIPC has an R_RISCV_RELAX.
+ auipc a6, %tlsdesc_hi(c)
+ .reloc .-4, R_RISCV_RELAX, 0
+ ld a7, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a6)
+ addi a0, a6, %tlsdesc_add_lo(.Ltlsdesc_hi2)
+ jalr t0, 0(a7), %tlsdesc_call(.Ltlsdesc_hi2)
+ add a0, a0, tp
+.option relax
+
+.section .tbss
+.globl a
+.zero 8
+a:
+.zero 3
+b:
+.zero 1
+
+#--- c.s
+.tbss
+.globl c
+c: .zero 4
diff --git a/lld/test/ELF/riscv-tlsdesc.s b/lld/test/ELF/riscv-tlsdesc.s
new file mode 100644
index 00000000000000..1759c6cc25583e
--- /dev/null
+++ b/lld/test/ELF/riscv-tlsdesc.s
@@ -0,0 +1,192 @@
+# REQUIRES: riscv
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=riscv64 a.s -o a.64.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64 c.s -o c.64.o
+# RUN: ld.lld -shared -soname=c.64.so c.64.o -o c.64.so
+# RUN: llvm-mc -filetype=obj -triple=riscv32 --defsym ELF32=1 a.s -o a.32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv32 --defsym ELF32=1 c.s -o c.32.o
+# RUN: ld.lld -shared -soname=c.32.so c.32.o -o c.32.so
+
+# RUN: ld.lld -shared -z now a.64.o c.64.o -o a.64.so
+# RUN: llvm-readobj -r -x .got a.64.so | FileCheck --check-prefix=GD64-RELA %s
+# RUN: llvm-objdump --no-show-raw-insn -M no-aliases -h -d a.64.so | FileCheck %s --check-prefix=GD64
+
+# RUN: ld.lld -shared -z now a.64.o c.64.o -o rel.64.so -z rel
+# RUN: llvm-readobj -r -x .got rel.64.so | FileCheck --check-prefix=GD64-REL %s
+
+# RUN: ld.lld -e 0 -z now a.64.o c.64.o -o a.64.le
+# RUN: llvm-readelf -r a.64.le | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-objdump --no-show-raw-insn -M no-aliases -h -d a.64.le | FileCheck %s --check-prefix=LE64
+
+# RUN: ld.lld -e 0 -z now a.64.o c.64.so -o a.64.ie
+# RUN: llvm-readobj -r a.64.ie | FileCheck --check-prefix=IE64-RELA %s
+# RUN: llvm-objdump --no-show-raw-insn -M no-aliases -h -d a.64.ie | FileCheck %s --check-prefix=IE64
+
+## 32-bit code is mostly the same. We only test a few variants. The IE optimization uses the LW instruction.
+
+# RUN: ld.lld -shared -z now a.32.o c.32.o -o rel.32.so -z rel
+# RUN: llvm-readobj -r -x .got rel.32.so | FileCheck --check-prefix=GD32-REL %s
+# RUN: ld.lld -e 0 -z now a.32.o c.32.so -o a.32.ie
+# RUN: llvm-objdump --no-show-raw-insn -M no-aliases -h -d a.32.ie | FileCheck %s --check-prefix=IE32
+
+# GD64-RELA: .rela.dyn {
+# GD64-RELA-NEXT: 0x2408 R_RISCV_TLSDESC - 0x7FF
+# GD64-RELA-NEXT: 0x23E8 R_RISCV_TLSDESC a 0x0
+# GD64-RELA-NEXT: 0x23F8 R_RISCV_TLSDESC c 0x0
+# GD64-RELA-NEXT: }
+# GD64-RELA: Hex dump of section '.got':
+# GD64-RELA-NEXT: 0x000023e0 20230000 00000000 00000000 00000000 #
+# GD64-RELA-NEXT: 0x000023f0 00000000 00000000 00000000 00000000 .
+
+# GD64-REL: .rel.dyn {
+# GD64-REL-NEXT: 0x23F0 R_RISCV_TLSDESC -
+# GD64-REL-NEXT: 0x23D0 R_RISCV_TLSDESC a
+# GD64-REL-NEXT: 0x23E0 R_RISCV_TLSDESC c
+# GD64-REL-NEXT: }
+# GD64-REL: Hex dump of section '.got':
+# GD64-REL-NEXT: 0x000023c8 08230000 00000000 00000000 00000000 .
+# GD64-REL-NEXT: 0x000023d8 00000000 00000000 00000000 00000000 .
+# GD64-REL-NEXT: 0x000023e8 00000000 00000000 00000000 00000000 .
+# GD64-REL-NEXT: 0x000023f8 ff070000 00000000 .
+
+# GD32-REL: .rel.dyn {
+# GD32-REL-NEXT: 0x2274 R_RISCV_TLSDESC -
+# GD32-REL-NEXT: 0x2264 R_RISCV_TLSDESC a
+# GD32-REL-NEXT: 0x226C R_RISCV_TLSDESC c
+# GD32-REL-NEXT: }
+# GD32-REL: Hex dump of section '.got':
+# GD32-REL-NEXT: 0x00002260 00220000 00000000 00000000 00000000 .
+# GD32-REL-NEXT: 0x00002270 00000000 00000000 ff070000 .
+
+# GD64: .got 00000038 00000000000023e0
+
+## &.got[a]-. = 0x23e0+8 - 0x12e0 = 0x1108
+# GD64: 12e0: auipc a0, 0x1
+# GD64-NEXT: ld a1, 0x108(a0)
+# GD64-NEXT: addi a0, a0, 0x108
+# GD64-NEXT: jalr t0, 0x0(a1)
+# GD64-NEXT: add a0, a0, tp
+
+## &.got[b]-. = 0x23e0+40 - 0x12f4 = 0x1114
+# GD64-NEXT: 12f4: auipc a2, 0x1
+# GD64-NEXT: ld a3, 0x114(a2)
+# GD64-NEXT: addi a0, a2, 0x114
+# GD64-NEXT: jalr t0, 0x0(a3)
+# GD64-NEXT: add a0, a0, tp
+
+## &.got[c]-. = 0x23e0+24 - 0x1308 = 0x10f0
+# GD64-NEXT: 1308: auipc a4, 0x1
+# GD64-NEXT: ld a5, 0xf0(a4)
+# GD64-NEXT: addi a0, a4, 0xf0
+# GD64-NEXT: jalr t0, 0x0(a5)
+# GD64-NEXT: add a0, a0, tp
+
+# NOREL: no relocations
+
+## st_value(a) = 8
+# LE64-LABEL: <.text>:
+# LE64-NEXT: addi zero, zero, 0x0
+# LE64-NEXT: addi...
[truncated]
|
Runtime tests using musl passed:) https://gist.github.com/MaskRay/7ad19393fdb0834540db6e7b6b02fa56 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the patch. My other version had missed a few important details, especially the TLS optimizations.
I really just have the 2 small questions inline, and otherwise LGTM. Its probably a good idea if one of the RISC-V owners double checks that though.
lld/ELF/Arch/RISCV.cpp
Outdated
// For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] : | ||
// 0). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] : | |
// 0). | |
// For relocations[i], the actual offset is: | |
// r_offset - (i ? relocDeltas[i-1] : 0). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: I know this is just moved from somewhere else, but maybe this is easier to read?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree. I have made the change locally and will update the PR when a larger update is needed.
lld/test/ELF/riscv-tlsdesc-relax.s
Outdated
auipc a2, %tlsdesc_hi(c) | ||
.reloc .-4, R_RISCV_RELAX, 0 | ||
ld a3, %tlsdesc_load_lo(.Ltlsdesc_hi0)(a2) | ||
.reloc .-4, R_RISCV_RELAX, 0 | ||
addi a0, a2, %tlsdesc_add_lo(.Ltlsdesc_hi0) | ||
.reloc .-4, R_RISCV_RELAX, 0 | ||
jalr t0, 0(a3), %tlsdesc_call(.Ltlsdesc_hi0) | ||
.reloc .-4, R_RISCV_RELAX, 0 | ||
add a0, a0, tp |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to test cases where another instruction is interleaved? The codegen is currently written to allow for that. I think the replacement and relaxations happen in 2 different phases, so my understanding is that should just work, but I'm not completely sure.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for noticing this. i386-tlsdesc-gd.s
has movl %edx, %ebx # GCC -O0 may add an extra insn in between.
that I forgot to copy here. Done.
To support an unrelated instruction, we need more code:
+ // For HI20/LOAD_LO12, disable NOP conversion in the presence of
+ // R_RISCV_RELAX, in case an unrelated instruction follows the current
+ // instruction.
+ if ((rel.type == R_RISCV_TLSDESC_HI20 ||
+ rel.type == R_RISCV_TLSDESC_LOAD_LO12) &&
+ i + 1 != relocs.size() && relocs[i + 1].type == R_RISCV_RELAX)
+ continue;
} | ||
} | ||
|
||
static void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like it handles the long form, but there is a short form that can be used when the offset from TP is w/in 2KB. Is that something we need to handle?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for catching this. Implemented the short form.
Created using spr 1.3.4
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM on my end. Thanks for all the help on this.
Created using spr 1.3.4
lld/ELF/Arch/RISCV.cpp
Outdated
if ((rel.type == R_RISCV_TLSDESC_HI20 || | ||
rel.type == R_RISCV_TLSDESC_LOAD_LO12 || | ||
(rel.type == R_RISCV_TLSDESC_ADD_LO12 && isToLe && !hi20(val))) && | ||
i + 1 != relocs.size() && relocs[i + 1].type == R_RISCV_RELAX) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a pretty complicated condition ... I know its a one off, but do you think it makes sense to use a helper, just for the readability aspect? Maybe canReplaceTlLSDESCWithNop()
or isTLSDESCRelocElegibleForNop()
?
It may make sense to at least use a helper for
!(i + 1 != relocs.size() && relocs[i + 1].type == R_RISCV_RELAX
since its used another time.
I'm fine either way, since this is a style choice(and is really a nit), but I think it would be easier to understand of some of the complexity was abstracted away.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am thinking of a simplification where I only check whether R_RISCV_TLSDESC_HI20 has an associated R_RISCV_RELAX. If yes, apply relaxation whether or not the following 3 instructions has an associated R_RISCV_RELAX.
Then, I just use another variable to hold "whether there is R_RISCV_RELAX" and arguably the straight line code will be more readable than introducing a function call.
On the LLVM side, relaxation can be enabled by adding R_RISCV_RELAX to just the first instruction, decreasing the size bloat (sizeof(Elf64_Rela) = 24). There is not what the spec says, but is likely more practical.
Since lld ignores redundant R_RISCV_RELAX, whether LLVM wants to be more "conformant" would be irrelevant to what lld tries to do here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am thinking of a simplification where I only check whether R_RISCV_TLSDESC_HI20 has an associated R_RISCV_RELAX. If yes, apply relaxation whether or not the following 3 instructions has an associated R_RISCV_RELAX.
Then, I just use another variable to hold "whether there is R_RISCV_RELAX" and arguably the straight line code will be more readable than introducing a function call.
That sounds like a nice approach.
On the LLVM side, relaxation can be enabled by adding R_RISCV_RELAX to just the first instruction, decreasing the size bloat (sizeof(Elf64_Rela) = 24). There is not what the spec says, but is likely more practical. Since lld ignores redundant R_RISCV_RELAX, whether LLVM wants to be more "conformant" would be irrelevant to what lld tries to do here.
Noted. Should we bring this up with the psABI?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That sounds like a nice approach.
Thanks. Adopted this approach:)
Noted. Should we bring this up with the psABI?
…C_HI20 Created using spr 1.3.4
Created using spr 1.3.4
""" Hmm. rebase + edit: merged in 1117fdd |
Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: riscv-non-isa/riscv-elf-psabi-doc#373 Reviewed By: ilovepi Pull Request: #79239
Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: riscv-non-isa/riscv-elf-psabi-doc#373 Reviewed By: ilovepi Pull Request: llvm#79239 (cherry picked from commit 1117fdd)
Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: riscv-non-isa/riscv-elf-psabi-doc#373 Reviewed By: ilovepi Pull Request: llvm#79239 (cherry picked from commit 1117fdd)
Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: riscv-non-isa/riscv-elf-psabi-doc#373 Reviewed By: ilovepi Pull Request: #79239 (cherry picked from commit 1117fdd)
Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: riscv-non-isa/riscv-elf-psabi-doc#373 Reviewed By: ilovepi Pull Request: llvm#79239 (cherry picked from commit 1117fdd)
Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: riscv-non-isa/riscv-elf-psabi-doc#373 Reviewed By: ilovepi Pull Request: llvm#79239 (cherry picked from commit 1117fdd)
Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: riscv-non-isa/riscv-elf-psabi-doc#373 Reviewed By: ilovepi Pull Request: llvm#79239 (cherry picked from commit 1117fdd)
Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: riscv-non-isa/riscv-elf-psabi-doc#373 Reviewed By: ilovepi Pull Request: llvm#79239 (cherry picked from commit 1117fdd)
Created using spr 1.3.5-bogner
See the comment in handleTlsRelocation. For TLSDESC=>IE (the TLS symbol is defined in another DSO), R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} referencing a non-preemptible label uses the `R_RELAX_TLS_GD_TO_LE` code path. If there is no TLS section, `getTlsTpOffset` will be called with null `Out::tlsPhdr`, leading to a null pointer dereference. Since the return value is used by `RISCV::relocateAlloc` and ignored there, just return 0. LoongArch TLSDESC doesn't use STT_NOTYPE labels. The `if (..) return 0;` is a no-op for LoongArch. This patch is a follow-up to #79239 and fixes some comments. Pull Request: #98569
See the comment in handleTlsRelocation. For TLSDESC=>IE (the TLS symbol is defined in another DSO), R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} referencing a non-preemptible label uses the `R_RELAX_TLS_GD_TO_LE` code path. If there is no TLS section, `getTlsTpOffset` will be called with null `Out::tlsPhdr`, leading to a null pointer dereference. Since the return value is used by `RISCV::relocateAlloc` and ignored there, just return 0. LoongArch TLSDESC doesn't use STT_NOTYPE labels. The `if (..) return 0;` is a no-op for LoongArch. This patch is a follow-up to llvm#79239 and fixes some comments. Pull Request: llvm#98569
LoongArch does not yet implement transition from TLSDESC to LE/IE, so TLSDESC dynamic relocation needs to be generated for each desc, which is ultimately handled by the dynamic linker. The test cases reference RISC-V: llvm#79239 Reviewed By: MaskRay, SixWeining Pull Request: llvm#94451
Support
R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL.
LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20
location, which requires special handling. We save the value of HI20 to
be reused. Two interleaved TLSDESC code sequences, which compilers do
not generate, are unsupported.
For -no-pie/-pie links, TLSDESC to initial-exec or local-exec
optimizations are eligible. Implement the relevant hooks
(R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions
are converted to NOP while the latter two are converted to a GOT load or
a lui+addi.
The first two instructions, which would be converted to NOP, are removed
instead in the presence of relaxation. Relaxation is eligible as long as
the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX,
regardless of whether the following instructions have a R_RISCV_RELAX.
In addition, for the TLSDESC to LE optimization (
lui a0,<hi20>; addi a0,a0,<lo12>
),lui
can be removed (i.e. use the short form) if hi20 is 0.The implementation carefully ensures that an instruction unrelated to
the current TLSDESC code sequence, if immediately follows a removable
instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not
converted to NOP.
riscv64-tlsdesc.s
is inspired byi386-tlsdesc-gd.s
(https://reviews.llvm.org/D112582).riscv64-tlsdesc-relax.s
tests linker relaxation.riscv-tlsdesc-gd-mixed.s
is inspired byx86-64-tlsdesc-gd-mixed.s
(https://reviews.llvm.org/D116900).Link: riscv-non-isa/riscv-elf-psabi-doc#373