Skip to content

Commit abe7943

Browse files
committed
Refactor
1 parent 764f802 commit abe7943

File tree

6 files changed

+141
-144
lines changed

6 files changed

+141
-144
lines changed

elf/arch-ppc32.cc

Lines changed: 66 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,42 @@
1-
// This file supports PowerPC 32-bit ISA. For 64-bit PowerPC, see
1+
// This file implements the PowerPC 32-bit ISA. For 64-bit PowerPC, see
22
// arch-ppc64v1.cpp and arch-ppc64v2.cpp.
33
//
44
// PPC32 is a RISC ISA. It has 32 general-purpose registers (GPRs).
55
// r0, r11 and r12 are reserved for static linkers, so we can use these
6-
// registers in PLTs and range extension thunks.
6+
// registers in PLTs and range extension thunks. In addition to that, it
7+
// has a few special registers. Notable ones are LR which holds a return
8+
// address and CTR which we can use to store a branch target address.
79
//
8-
// Besides GPRs, PowerPC a few special registesr. Notable ones are LR
9-
// which holds a return address and CTR which is used for branching.
10+
// It feels that the PPC32 psABI is unnecessarily complicated at first
11+
// glance, but that is mainly stemmed from the fact that the ISA lacks
12+
// PC-relative load/store instructions. Since machine instructions cannot
13+
// load data relative to its own address, it is not straightforward to
14+
// support position-independent code (PIC) on PPC32.
1015
//
11-
// PowerPC generally lacks PC-relative load/store instructions, so it is
12-
// not straightforward to support position-independent code. A position-
13-
// independent function contains code like this in its prologue to obtain
14-
// its own address
16+
// A position-independent function typically contains the following code
17+
// in its prologue to obtain its own own address:
1518
//
16-
// mflr r0 // save the current return address to %r0
17-
// bcl 20, 31, 4 // call the next instruction as if it were a function
18-
// mtlr r12 // save the return address to %r12
19-
// mtlr r0 // restore the original return address
19+
// mflr r0 // save the current return address to %r0
20+
// bcl 20, 31, 4 // call the next instruction as if it were a function
21+
// mtlr r12 // save the return address to %r12
22+
// mtlr r0 // restore the original return address
2023
//
21-
// , and then the function computes its .got2 address (or .got2+0x800) to
22-
// %r30. The rule for the %r30 value is complicated, so we essentially
23-
// ignore that part of the ABI. Our PLT and range extension thunks don't
24-
// depend on %r30 value and position-independent by themselves.
24+
// An object file compiled with -fPIC contains a data section named
25+
// `.got2` to store addresses of locally-defined global variables and
26+
// constants. A PIC function usually computes its .got2+0x8000 and set it
27+
// to %r30. This scheme allows the function to access global objects
28+
// defined in the same input file with a single %r30-relative load/store
29+
// instructions with a 16-bit offset, given that the object file doesn't
30+
// contain more than 65535 global objects.
31+
//
32+
// Since each object file has its own .got2, %r30 refers to different
33+
// places in a merged .got2 for two functions came from different input
34+
// files. Therefore, %r30 makes sense only within a single function.
35+
//
36+
// Technically, we can reuse a %r30 value in our PLT if we create a PLT
37+
// _for each input file_ (that's what GNU ld seems to be doing), but that
38+
// doesn't seems to worth its complexity. Our PLT simply doesn't rely on a
39+
// %r30 value.
2540

2641
#include "mold.h"
2742

@@ -42,7 +57,7 @@ static u64 highesta(u64 x) { return (x + 0x8000) >> 48; }
4257
template <>
4358
void write_plt_header(Context<E> &ctx, u8 *buf) {
4459
static const ub32 insn[] = {
45-
// Get the address of this thunk
60+
// Get the address of this PLT section
4661
0x7c08'02a6, // mflr r0
4762
0x429f'0005, // bcl 20, 31, 4
4863
0x7d88'02a6, // 1: mflr r12
@@ -75,25 +90,25 @@ void write_plt_header(Context<E> &ctx, u8 *buf) {
7590
loc[5] |= lo(ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 4);
7691
}
7792

93+
static const ub32 plt_entry[] = {
94+
// Get the address of this PLT entry
95+
0x7c08'02a6, // mflr r0
96+
0x429f'0005, // bcl 20, 31, 4
97+
0x7d88'02a6, // mflr r12
98+
0x7c08'03a6, // mtlr r0
99+
100+
// Load an address from the GOT/GOTPLT entry and jump to that address
101+
0x3d6c'0000, // addis r11, r12, OFFSET@higha
102+
0x396b'0000, // addi r11, r11, OFFSET@lo
103+
0x818b'0000, // lwz r12, 0(r11)
104+
0x7d89'03a6, // mtctr r12
105+
0x4e80'0420, // bctr
106+
};
107+
78108
template <>
79109
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
80-
static const ub32 insn[] = {
81-
// Get the address of this PLT entry
82-
0x7c08'02a6, // mflr r0
83-
0x429f'0005, // bcl 20, 31, 4
84-
0x7d88'02a6, // mflr r12
85-
0x7c08'03a6, // mtlr r0
86-
87-
// Load an address from the GOTPLT entry and jump to that address
88-
0x3d6c'0000, // addis r11, r12, OFFSET@higha
89-
0x396b'0000, // addi r11, r11, OFFSET@lo
90-
0x818b'0000, // lwz r12, 0(r11)
91-
0x7d89'03a6, // mtctr r12
92-
0x4e80'0420, // bctr
93-
};
94-
95-
static_assert(E::plt_size == sizeof(insn));
96-
memcpy(buf, insn, sizeof(insn));
110+
static_assert(E::plt_size == sizeof(plt_entry));
111+
memcpy(buf, plt_entry, sizeof(plt_entry));
97112

98113
ub32 *loc = (ub32 *)buf;
99114
i64 offset = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 8;
@@ -103,23 +118,8 @@ void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
103118

104119
template <>
105120
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
106-
static const ub32 insn[] = {
107-
// Get the address of this PLT entry
108-
0x7c08'02a6, // mflr r0
109-
0x429f'0005, // bcl 20, 31, 4
110-
0x7d88'02a6, // mflr r12
111-
0x7c08'03a6, // mtlr r0
112-
113-
// Load an address from the GOT entry and jump to that address
114-
0x3d6c'0000, // addis r11, r12, 0
115-
0x396b'0000, // addi r11, r11, 0
116-
0x818b'0000, // lwz r12, 0(r11)
117-
0x7d89'03a6, // mtctr r12
118-
0x4e80'0420, // bctr
119-
};
120-
121-
static_assert(E::pltgot_size == sizeof(insn));
122-
memcpy(buf, insn, sizeof(insn));
121+
static_assert(E::pltgot_size == sizeof(plt_entry));
122+
memcpy(buf, plt_entry, sizeof(plt_entry));
123123

124124
ub32 *loc = (ub32 *)buf;
125125
i64 offset = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 8;
@@ -133,6 +133,8 @@ void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
133133
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
134134

135135
switch (rel.r_type) {
136+
case R_NONE:
137+
break;
136138
case R_PPC_ADDR32:
137139
*(ub32 *)loc = val;
138140
break;
@@ -153,9 +155,9 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
153155
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
154156
file.reldyn_offset + this->reldyn_offset);
155157

156-
u64 GP = 0;
158+
u64 got2 = 0;
157159
if (file.ppc32_got2)
158-
GP = file.ppc32_got2->output_section->shdr.sh_addr + file.ppc32_got2->offset;
160+
got2 = file.ppc32_got2->output_section->shdr.sh_addr + file.ppc32_got2->offset;
159161

160162
for (i64 i = 0; i < rels.size(); i++) {
161163
const ElfRel<E> &rel = rels[i];
@@ -206,24 +208,24 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
206208
*(ub32 *)loc |= bits(S + A, 31, 2) << 2;
207209
break;
208210
case R_PPC_PLT16_LO:
209-
*(ub16 *)loc = lo(G + GOT - GP - A);
211+
*(ub16 *)loc = lo(G + GOT - got2 - A);
210212
break;
211213
case R_PPC_PLT16_HI:
212-
*(ub16 *)loc = hi(G + GOT - GP - A);
214+
*(ub16 *)loc = hi(G + GOT - got2 - A);
213215
break;
214216
case R_PPC_PLT16_HA:
215-
*(ub16 *)loc = ha(G + GOT - GP - A);
217+
*(ub16 *)loc = ha(G + GOT - got2 - A);
216218
break;
217219
case R_PPC_PLT32:
218-
*(ub32 *)loc = G + GOT - GP - A;
220+
*(ub32 *)loc = G + GOT - got2 - A;
219221
break;
220222
case R_PPC_REL14:
221223
*(ub32 *)loc &= 0b1111'1111'1111'1111'0000'0000'0000'0011;
222224
*(ub32 *)loc |= bits(S + A - P, 15, 2) << 2;
223225
break;
224226
case R_PPC_REL16:
225227
case R_PPC_REL16_LO:
226-
*(ub16 *)loc = S + A - P;
228+
*(ub16 *)loc = lo(S + A - P);
227229
break;
228230
case R_PPC_REL16_HI:
229231
*(ub16 *)loc = hi(S + A - P);
@@ -256,7 +258,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
256258
break;
257259
case R_PPC_GOT16:
258260
case R_PPC_GOT16_LO:
259-
*(ub16 *)loc = G + A;
261+
*(ub16 *)loc = lo(G + A);
260262
break;
261263
case R_PPC_GOT16_HI:
262264
*(ub16 *)loc = hi(G + A);
@@ -440,21 +442,6 @@ template <>
440442
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
441443
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
442444

443-
static const ub32 plt_thunk[] = {
444-
// Get this thunk's address
445-
0x7c08'02a6, // mflr r0
446-
0x429f'0005, // bcl 20, 31, 4
447-
0x7d88'02a6, // mflr r12
448-
0x7c08'03a6, // mtlr r0
449-
450-
// Load an address from the GOT/GOTPLT entry and jump to that address
451-
0x3d6c'0000, // addis r11, r12, OFFSET@higha
452-
0x396b'0000, // addi r11, r11, OFFSET@lo
453-
0x818b'0000, // lwz r12, 0(r11)
454-
0x7d89'03a6, // mtctr r12
455-
0x4e80'0420, // bctr
456-
};
457-
458445
static const ub32 local_thunk[] = {
459446
// Get this thunk's address
460447
0x7c08'02a6, // mflr r0
@@ -470,21 +457,17 @@ void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
470457
0x6000'0000, // nop
471458
};
472459

473-
static_assert(E::thunk_size == sizeof(plt_thunk));
460+
static_assert(E::thunk_size == sizeof(plt_entry));
474461
static_assert(E::thunk_size == sizeof(local_thunk));
475462

476463
for (i64 i = 0; i < symbols.size(); i++) {
477464
ub32 *loc = (ub32 *)(buf + i * E::thunk_size);
478465
Symbol<E> &sym = *symbols[i];
479466

480-
if (sym.has_got(ctx)) {
481-
memcpy(loc, plt_thunk, sizeof(plt_thunk));
482-
i64 val = sym.get_got_addr(ctx) - get_addr(i) - 8;
483-
loc[4] |= higha(val);
484-
loc[5] |= lo(val);
485-
} else if (sym.has_plt(ctx)) {
486-
memcpy(loc, plt_thunk, sizeof(plt_thunk));
487-
i64 val = sym.get_gotplt_addr(ctx) - get_addr(i) - 8;
467+
if (sym.has_plt(ctx)) {
468+
memcpy(loc, plt_entry, sizeof(plt_entry));
469+
u64 got = sym.has_got(ctx) ? sym.get_got_addr(ctx) : sym.get_gotplt_addr(ctx);
470+
i64 val = got - get_addr(i) - 8;
488471
loc[4] |= higha(val);
489472
loc[5] |= lo(val);
490473
} else {

0 commit comments

Comments
 (0)