diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 488f4803b2cb4..dc3cb4ee9d6b7 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -50,6 +50,7 @@ class X86_64 : public TargetInfo { bool deleteFallThruJmpInsn(InputSection &is, InputFile *file, InputSection *nextIS) const override; bool relaxOnce(int pass) const override; + void relaxCFIJumpTables() const override; void applyBranchToBranchOpt() const override; private: @@ -317,6 +318,170 @@ bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file, return true; } +void X86_64::relaxCFIJumpTables() const { + // Relax CFI jump tables. + // - Split jump table into pieces and place target functions inside the jump + // table if small enough. + // - Move jump table before last called function and delete last branch + // instruction. + std::map> sectionReplacements; + SmallVector storage; + for (OutputSection *osec : ctx.outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { + if (sec->type != SHT_LLVM_CFI_JUMP_TABLE || sec->entsize == 0 || + sec->size % sec->entsize != 0) + continue; + + // We're going to replace the jump table with this list of sections. This + // list will be made up of slices of the original section and function + // bodies that were moved into the jump table. + std::vector replacements; + + // First, push the original jump table section. This is only so that it + // can act as a relocation target. Later on, we will set the size of the + // jump table section to 0 so that the slices and moved function bodies + // become the actual relocation targets. + replacements.push_back(sec); + + // Add the slice [begin, end) of the original section to the replacement + // list. [rbegin, rend) is the slice of the relocation list that covers + // [begin, end). + auto addSectionSlice = [&](size_t begin, size_t end, Relocation *rbegin, + Relocation *rend) { + auto *slice = make( + sec->file, sec->name, sec->type, sec->flags, sec->entsize, + sec->entsize, + sec->contentMaybeDecompress().slice(begin, end - begin)); + for (const Relocation &r : ArrayRef(rbegin, rend)) { + slice->relocations.push_back( + Relocation{r.expr, r.type, r.offset - begin, r.addend, r.sym}); + } + replacements.push_back(slice); + }; + + // r is the only relocation in a jump table entry. Figure out whether it + // is a branch pointing to the start of a statically known section that + // hasn't already been moved while processing a different jump table + // section, and if so return it. + auto getMovableSection = [&](Relocation &r) -> InputSection * { + if (r.type != R_X86_64_PC32 && r.type != R_X86_64_PLT32) + return nullptr; + auto *sym = dyn_cast_or_null(r.sym); + if (!sym || sym->isPreemptible || sym->isGnuIFunc() || + sym->value + r.addend != -4ull) + return nullptr; + auto *target = dyn_cast_or_null(sym->section); + if (!target || target->addralign > sec->entsize || + sectionReplacements.count(target)) + return nullptr; + return target; + }; + + // Figure out the movable section for the last entry. We do this first + // because the last entry controls which output section the jump table is + // placed into, which affects move eligibility for other sections. + auto *lastSec = [&]() -> InputSection * { + Relocation *lastReloc = sec->relocs().end(); + while (lastReloc != sec->relocs().begin() && + (lastReloc - 1)->offset >= sec->size - sec->entsize) + --lastReloc; + if (lastReloc + 1 != sec->relocs().end()) + return nullptr; + return getMovableSection(*lastReloc); + }(); + OutputSection *targetOutputSec; + if (lastSec) { + // We've already decided to move the output section so make sure that we + // don't try to move it again. + sectionReplacements[lastSec] = replacements; + targetOutputSec = lastSec->getParent(); + } else { + targetOutputSec = sec->getParent(); + } + + // Walk the jump table entries other than the last one looking for sections + // that are small enough to be moved into the jump table and in the same + // section as the jump table's destination. + size_t begin = 0; + Relocation *rbegin = sec->relocs().begin(); + size_t cur = begin; + Relocation *rcur = rbegin; + while (cur != sec->size - sec->entsize) { + size_t next = cur + sec->entsize; + Relocation *rnext = rcur; + while (rnext != sec->relocs().end() && rnext->offset < next) + ++rnext; + if (rcur + 1 == rnext) { + InputSection *target = getMovableSection(*rcur); + if (target && target->size <= sec->entsize && + target->getParent() == targetOutputSec) { + // Okay, we found a small enough section. Move it into the jump + // table. First add a slice for the unmodified jump table entries + // before this one. + addSectionSlice(begin, cur, rbegin, rcur); + // Add the target to our replacement list, and set the target's + // replacement list to the empty list. This removes it from its + // original position and adds it here, as well as causing + // future getMovableSection() queries to return nullptr. + replacements.push_back(target); + sectionReplacements[target] = {}; + begin = next; + rbegin = rnext; + } + } + cur = next; + rcur = rnext; + } + + // Finally, process the last entry. If it is movable, move the entire + // jump table behind it and delete the last entry (so that the last + // function's body acts as the last jump table entry), otherwise leave the + // jump table where it is and keep the last entry. + if (lastSec) { + addSectionSlice(begin, cur, rbegin, rcur); + replacements.push_back(lastSec); + sectionReplacements[sec] = {}; + sectionReplacements[lastSec] = replacements; + for (auto *s : replacements) + s->parent = lastSec->parent; + } else { + addSectionSlice(begin, sec->size, rbegin, sec->relocs().end()); + sectionReplacements[sec] = replacements; + for (auto *s : replacements) + s->parent = sec->parent; + } + + // Everything from the original section has been recreated, so delete the + // original contents. + sec->relocations.clear(); + sec->size = 0; + } + } + + // Now that we have the complete mapping of replacements, go through the input + // section lists and apply the replacements. + for (OutputSection *osec : ctx.outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (SectionCommand *cmd : osec->commands) { + auto *isd = dyn_cast(cmd); + if (!isd) + continue; + SmallVector newSections; + for (auto *sec : isd->sections) { + auto i = sectionReplacements.find(sec); + if (i == sectionReplacements.end()) + newSections.push_back(sec); + else + newSections.append(i->second.begin(), i->second.end()); + } + isd->sections = std::move(newSections); + } + } +} + bool X86_64::relaxOnce(int pass) const { uint64_t minVA = UINT64_MAX, maxVA = 0; for (OutputSection *osec : ctx.outputSections) { diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index 1020dd9f2569e..52110c9452644 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -91,7 +91,8 @@ static bool canMergeToProgbits(Ctx &ctx, unsigned type) { return type == SHT_NOBITS || type == SHT_PROGBITS || type == SHT_INIT_ARRAY || type == SHT_PREINIT_ARRAY || type == SHT_FINI_ARRAY || type == SHT_NOTE || - (type == SHT_X86_64_UNWIND && ctx.arg.emachine == EM_X86_64); + (type == SHT_X86_64_UNWIND && ctx.arg.emachine == EM_X86_64) || + type == SHT_LLVM_CFI_JUMP_TABLE; } // Record that isec will be placed in the OutputSection. isec does not become diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 4333b032c9d4e..457137540ecc9 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1675,7 +1675,7 @@ void RelocationScanner::scan(Relocs rels) { // branch-to-branch optimization. if (is_contained({EM_RISCV, EM_LOONGARCH}, ctx.arg.emachine) || (ctx.arg.emachine == EM_PPC64 && sec->name == ".toc") || - ctx.arg.branchToBranch) + ctx.arg.branchToBranch || sec->type == SHT_LLVM_CFI_JUMP_TABLE) llvm::stable_sort(sec->relocs(), [](const Relocation &lhs, const Relocation &rhs) { return lhs.offset < rhs.offset; diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 93f15920bfedb..42ad5e9b1cf7e 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -96,6 +96,8 @@ class TargetInfo { // Do a linker relaxation pass and return true if we changed something. virtual bool relaxOnce(int pass) const { return false; } + // Relax CFI jump tables if implemented by target. + virtual void relaxCFIJumpTables() const {} // Do finalize relaxation after collecting relaxation infos. virtual void finalizeRelax(int passes) const {} diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 15909daf51ab6..abc156cb93bdc 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1528,6 +1528,8 @@ template void Writer::finalizeAddressDependentContent() { if (ctx.arg.randomizeSectionPadding) randomizeSectionPadding(ctx); + ctx.target->relaxCFIJumpTables(); + uint32_t pass = 0, assignPasses = 0; for (;;) { bool changed = ctx.target->needsThunks diff --git a/lld/test/ELF/x86_64-relax-jump-tables.s b/lld/test/ELF/x86_64-relax-jump-tables.s new file mode 100644 index 0000000000000..782d1be655a7d --- /dev/null +++ b/lld/test/ELF/x86_64-relax-jump-tables.s @@ -0,0 +1,181 @@ +// REQUIRES: x86 +// RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +// RUN: ld.lld %t.o -shared -o %t +// RUN: llvm-objdump -d --show-all-symbols %t | FileCheck %s + +// Mostly positive cases, except for f2. +.section .text.jt1,"ax",@llvm_cfi_jump_table,8 +// Function fits. +f1: +jmp f1.cfi +.balign 8, 0xcc + +// Function too large. +f2: +jmp f2.cfi +.balign 8, 0xcc + +// Function too large, but may be placed at the end. +// Because this causes the jump table to move, it is tested below. +f3: +jmp f3.cfi +.balign 8, 0xcc + +// Mostly negative cases, except for f4. +.section .text.jt2,"ax",@llvm_cfi_jump_table,16 +// Function already moved into jt1. +// CHECK: : +// CHECK-NEXT: jmp {{.*}} +f1a: +jmp f1.cfi +.balign 16, 0xcc + +// Function already moved into jt1. +// CHECK: : +// CHECK-NEXT: jmp {{.*}} +f3a: +jmp f3.cfi +.balign 16, 0xcc + +// Function too large for jt1 but small enough for jt2. +// CHECK: : +// CHECK-NEXT: : +// CHECK-NEXT: retq $0x4 +f4: +jmp f4.cfi +.balign 16, 0xcc + +// Function too large for jt2. +// CHECK: : +// CHECK-NEXT: jmp {{.*}} +f5: +jmp f5.cfi +.balign 16, 0xcc + +// Branch target not at start of section. +// CHECK: : +// CHECK-NEXT: jmp {{.*}} +f6: +jmp f6.cfi +.balign 16, 0xcc + +// Overaligned section. +// CHECK: : +// CHECK-NEXT: jmp {{.*}} +f7: +jmp f7.cfi +.balign 16, 0xcc + +// Branch to IFUNC. +// CHECK: : +// CHECK-NEXT: jmp 0x[[IPLT:[0-9a-f]*]] +f8: +jmp f8.cfi +.balign 16, 0xcc + +// Unexpected number of relocations in entry. +// CHECK: : +// CHECK-NEXT: jmp {{.*}} +// CHECK-NEXT: jmp {{.*}} +f9: +jmp f9.cfi +jmp f9.cfi +.balign 16, 0xcc + +// Branch to different output section. +f10: +jmp f10.cfi +.balign 16, 0xcc + +// Branch via PLT to STB_GLOBAL symbol. +// CHECK: : +// CHECK-NEXT: jmp {{.*}} +f11: +jmp f11.cfi +.balign 16, 0xcc + +// Invalid jumptable: entsize unset. +// CHECK: : +// CHECK-NEXT: jmp {{.*}} +.section .text.jt3,"ax",@0x6fff4c0e +f12: +jmp f12.cfi +.balign 8, 0xcc + +// Invalid jumptable: size not a multiple of entsize. +// CHECK: : +// CHECK-NEXT: jmp {{.*}} +.section .text.jt4,"ax",@llvm_cfi_jump_table,8 +f13: +jmp f13.cfi + +// CHECK: : +// CHECK-NEXT: : +// CHECK-NEXT: retq $0x1 +.section .text.f1,"ax",@progbits +f1.cfi: +ret $1 + +// CHECK: : +// CHECK-NEXT: jmp {{.*}} +.section .text.f2,"ax",@progbits +f2.cfi: +ret $2 +.zero 16 + +// CHECK: : +// CHECK-NEXT: : +// CHECK-NEXT: retq $0x3 +.section .text.f3,"ax",@progbits +f3.cfi: +ret $3 +.zero 16 + +.section .text.f4,"ax",@progbits +f4.cfi: +ret $4 +.zero 13 + +.section .text.f5,"ax",@progbits +f5.cfi: +ret $5 +.zero 14 + +.section .text.f6,"ax",@progbits +nop +f6.cfi: +ret $6 + +.section .text.f7,"ax",@progbits +.balign 32 +f7.cfi: +ret $7 + +.section .text.f8,"ax",@progbits +.type f8.cfi,@gnu_indirect_function +f8.cfi: +ret $8 + +.section .text.f9,"ax",@progbits +f9.cfi: +ret $9 + +.section foo,"ax",@progbits +f10.cfi: +ret $10 + +.section .text.f11,"ax",@progbits +.globl f11.cfi +f11.cfi: +ret $11 + +.section .text.f12,"ax",@progbits +f12.cfi: +ret $12 + +.section .text.f13,"ax",@progbits +f13.cfi: +ret $13 + +// CHECK: <.iplt>: +// CHECK-NEXT: [[IPLT]]: