1
- // This file supports PowerPC 32-bit ISA. For 64-bit PowerPC, see
1
+ // This file implements the PowerPC 32-bit ISA. For 64-bit PowerPC, see
2
2
// arch-ppc64v1.cpp and arch-ppc64v2.cpp.
3
3
//
4
4
// PPC32 is a RISC ISA. It has 32 general-purpose registers (GPRs).
5
5
// r0, r11 and r12 are reserved for static linkers, so we can use these
6
- // registers in PLTs and range extension thunks.
6
+ // registers in PLTs and range extension thunks. In addition to that, it
7
+ // has a few special registers. Notable ones are LR which holds a return
8
+ // address and CTR which we can use to store a branch target address.
7
9
//
8
- // Besides GPRs, PowerPC a few special registesr. Notable ones are LR
9
- // which holds a return address and CTR which is used for branching.
10
+ // It feels that the PPC32 psABI is unnecessarily complicated at first
11
+ // glance, but that is mainly stemmed from the fact that the ISA lacks
12
+ // PC-relative load/store instructions. Since machine instructions cannot
13
+ // load data relative to its own address, it is not straightforward to
14
+ // support position-independent code (PIC) on PPC32.
10
15
//
11
- // PowerPC generally lacks PC-relative load/store instructions, so it is
12
- // not straightforward to support position-independent code. A position-
13
- // independent function contains code like this in its prologue to obtain
14
- // its own address
16
+ // A position-independent function typically contains the following code
17
+ // in its prologue to obtain its own own address:
15
18
//
16
- // mflr r0 // save the current return address to %r0
17
- // bcl 20, 31, 4 // call the next instruction as if it were a function
18
- // mtlr r12 // save the return address to %r12
19
- // mtlr r0 // restore the original return address
19
+ // mflr r0 // save the current return address to %r0
20
+ // bcl 20, 31, 4 // call the next instruction as if it were a function
21
+ // mtlr r12 // save the return address to %r12
22
+ // mtlr r0 // restore the original return address
20
23
//
21
- // , and then the function computes its .got2 address (or .got2+0x800) to
22
- // %r30. The rule for the %r30 value is complicated, so we essentially
23
- // ignore that part of the ABI. Our PLT and range extension thunks don't
24
- // depend on %r30 value and position-independent by themselves.
24
+ // An object file compiled with -fPIC contains a data section named
25
+ // `.got2` to store addresses of locally-defined global variables and
26
+ // constants. A PIC function usually computes its .got2+0x8000 and set it
27
+ // to %r30. This scheme allows the function to access global objects
28
+ // defined in the same input file with a single %r30-relative load/store
29
+ // instructions with a 16-bit offset, given that the object file doesn't
30
+ // contain more than 65535 global objects.
31
+ //
32
+ // Since each object file has its own .got2, %r30 refers to different
33
+ // places in a merged .got2 for two functions came from different input
34
+ // files. Therefore, %r30 makes sense only within a single function.
35
+ //
36
+ // Technically, we can reuse a %r30 value in our PLT if we create a PLT
37
+ // _for each input file_ (that's what GNU ld seems to be doing), but that
38
+ // doesn't seems to worth its complexity. Our PLT simply doesn't rely on a
39
+ // %r30 value.
25
40
26
41
#include " mold.h"
27
42
@@ -42,7 +57,7 @@ static u64 highesta(u64 x) { return (x + 0x8000) >> 48; }
42
57
template <>
43
58
void write_plt_header (Context<E> &ctx, u8 *buf) {
44
59
static const ub32 insn[] = {
45
- // Get the address of this thunk
60
+ // Get the address of this PLT section
46
61
0x7c08'02a6 , // mflr r0
47
62
0x429f'0005 , // bcl 20, 31, 4
48
63
0x7d88'02a6 , // 1: mflr r12
@@ -75,25 +90,25 @@ void write_plt_header(Context<E> &ctx, u8 *buf) {
75
90
loc[5 ] |= lo (ctx.gotplt ->shdr .sh_addr - ctx.plt ->shdr .sh_addr + 4 );
76
91
}
77
92
93
+ static const ub32 plt_entry[] = {
94
+ // Get the address of this PLT entry
95
+ 0x7c08'02a6 , // mflr r0
96
+ 0x429f'0005 , // bcl 20, 31, 4
97
+ 0x7d88'02a6 , // mflr r12
98
+ 0x7c08'03a6 , // mtlr r0
99
+
100
+ // Load an address from the GOT/GOTPLT entry and jump to that address
101
+ 0x3d6c'0000 , // addis r11, r12, OFFSET@higha
102
+ 0x396b'0000 , // addi r11, r11, OFFSET@lo
103
+ 0x818b'0000 , // lwz r12, 0(r11)
104
+ 0x7d89'03a6 , // mtctr r12
105
+ 0x4e80'0420 , // bctr
106
+ };
107
+
78
108
template <>
79
109
void write_plt_entry (Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
80
- static const ub32 insn[] = {
81
- // Get the address of this PLT entry
82
- 0x7c08'02a6 , // mflr r0
83
- 0x429f'0005 , // bcl 20, 31, 4
84
- 0x7d88'02a6 , // mflr r12
85
- 0x7c08'03a6 , // mtlr r0
86
-
87
- // Load an address from the GOTPLT entry and jump to that address
88
- 0x3d6c'0000 , // addis r11, r12, OFFSET@higha
89
- 0x396b'0000 , // addi r11, r11, OFFSET@lo
90
- 0x818b'0000 , // lwz r12, 0(r11)
91
- 0x7d89'03a6 , // mtctr r12
92
- 0x4e80'0420 , // bctr
93
- };
94
-
95
- static_assert (E::plt_size == sizeof (insn));
96
- memcpy (buf, insn, sizeof (insn));
110
+ static_assert (E::plt_size == sizeof (plt_entry));
111
+ memcpy (buf, plt_entry, sizeof (plt_entry));
97
112
98
113
ub32 *loc = (ub32 *)buf;
99
114
i64 offset = sym.get_gotplt_addr (ctx) - sym.get_plt_addr (ctx) - 8 ;
@@ -103,23 +118,8 @@ void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
103
118
104
119
template <>
105
120
void write_pltgot_entry (Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
106
- static const ub32 insn[] = {
107
- // Get the address of this PLT entry
108
- 0x7c08'02a6 , // mflr r0
109
- 0x429f'0005 , // bcl 20, 31, 4
110
- 0x7d88'02a6 , // mflr r12
111
- 0x7c08'03a6 , // mtlr r0
112
-
113
- // Load an address from the GOT entry and jump to that address
114
- 0x3d6c'0000 , // addis r11, r12, 0
115
- 0x396b'0000 , // addi r11, r11, 0
116
- 0x818b'0000 , // lwz r12, 0(r11)
117
- 0x7d89'03a6 , // mtctr r12
118
- 0x4e80'0420 , // bctr
119
- };
120
-
121
- static_assert (E::pltgot_size == sizeof (insn));
122
- memcpy (buf, insn, sizeof (insn));
121
+ static_assert (E::pltgot_size == sizeof (plt_entry));
122
+ memcpy (buf, plt_entry, sizeof (plt_entry));
123
123
124
124
ub32 *loc = (ub32 *)buf;
125
125
i64 offset = sym.get_got_addr (ctx) - sym.get_plt_addr (ctx) - 8 ;
@@ -133,6 +133,8 @@ void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
133
133
u8 *loc = ctx.buf + this ->shdr .sh_offset + offset;
134
134
135
135
switch (rel.r_type ) {
136
+ case R_NONE:
137
+ break ;
136
138
case R_PPC_ADDR32:
137
139
*(ub32 *)loc = val;
138
140
break ;
@@ -153,9 +155,9 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
153
155
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn ->shdr .sh_offset +
154
156
file.reldyn_offset + this ->reldyn_offset );
155
157
156
- u64 GP = 0 ;
158
+ u64 got2 = 0 ;
157
159
if (file.ppc32_got2 )
158
- GP = file.ppc32_got2 ->output_section ->shdr .sh_addr + file.ppc32_got2 ->offset ;
160
+ got2 = file.ppc32_got2 ->output_section ->shdr .sh_addr + file.ppc32_got2 ->offset ;
159
161
160
162
for (i64 i = 0 ; i < rels.size (); i++) {
161
163
const ElfRel<E> &rel = rels[i];
@@ -206,24 +208,24 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
206
208
*(ub32 *)loc |= bits (S + A, 31 , 2 ) << 2 ;
207
209
break ;
208
210
case R_PPC_PLT16_LO:
209
- *(ub16 *)loc = lo (G + GOT - GP - A);
211
+ *(ub16 *)loc = lo (G + GOT - got2 - A);
210
212
break ;
211
213
case R_PPC_PLT16_HI:
212
- *(ub16 *)loc = hi (G + GOT - GP - A);
214
+ *(ub16 *)loc = hi (G + GOT - got2 - A);
213
215
break ;
214
216
case R_PPC_PLT16_HA:
215
- *(ub16 *)loc = ha (G + GOT - GP - A);
217
+ *(ub16 *)loc = ha (G + GOT - got2 - A);
216
218
break ;
217
219
case R_PPC_PLT32:
218
- *(ub32 *)loc = G + GOT - GP - A;
220
+ *(ub32 *)loc = G + GOT - got2 - A;
219
221
break ;
220
222
case R_PPC_REL14:
221
223
*(ub32 *)loc &= 0b1111'1111'1111'1111'0000'0000'0000'0011 ;
222
224
*(ub32 *)loc |= bits (S + A - P, 15 , 2 ) << 2 ;
223
225
break ;
224
226
case R_PPC_REL16:
225
227
case R_PPC_REL16_LO:
226
- *(ub16 *)loc = S + A - P;
228
+ *(ub16 *)loc = lo ( S + A - P) ;
227
229
break ;
228
230
case R_PPC_REL16_HI:
229
231
*(ub16 *)loc = hi (S + A - P);
@@ -256,7 +258,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
256
258
break ;
257
259
case R_PPC_GOT16:
258
260
case R_PPC_GOT16_LO:
259
- *(ub16 *)loc = G + A;
261
+ *(ub16 *)loc = lo ( G + A) ;
260
262
break ;
261
263
case R_PPC_GOT16_HI:
262
264
*(ub16 *)loc = hi (G + A);
@@ -440,21 +442,6 @@ template <>
440
442
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
441
443
u8 *buf = ctx.buf + output_section.shdr .sh_offset + offset;
442
444
443
- static const ub32 plt_thunk[] = {
444
- // Get this thunk's address
445
- 0x7c08'02a6 , // mflr r0
446
- 0x429f'0005 , // bcl 20, 31, 4
447
- 0x7d88'02a6 , // mflr r12
448
- 0x7c08'03a6 , // mtlr r0
449
-
450
- // Load an address from the GOT/GOTPLT entry and jump to that address
451
- 0x3d6c'0000 , // addis r11, r12, OFFSET@higha
452
- 0x396b'0000 , // addi r11, r11, OFFSET@lo
453
- 0x818b'0000 , // lwz r12, 0(r11)
454
- 0x7d89'03a6 , // mtctr r12
455
- 0x4e80'0420 , // bctr
456
- };
457
-
458
445
static const ub32 local_thunk[] = {
459
446
// Get this thunk's address
460
447
0x7c08'02a6 , // mflr r0
@@ -470,21 +457,17 @@ void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
470
457
0x6000'0000 , // nop
471
458
};
472
459
473
- static_assert (E::thunk_size == sizeof (plt_thunk ));
460
+ static_assert (E::thunk_size == sizeof (plt_entry ));
474
461
static_assert (E::thunk_size == sizeof (local_thunk));
475
462
476
463
for (i64 i = 0 ; i < symbols.size (); i++) {
477
464
ub32 *loc = (ub32 *)(buf + i * E::thunk_size);
478
465
Symbol<E> &sym = *symbols[i];
479
466
480
- if (sym.has_got (ctx)) {
481
- memcpy (loc, plt_thunk, sizeof (plt_thunk));
482
- i64 val = sym.get_got_addr (ctx) - get_addr (i) - 8 ;
483
- loc[4 ] |= higha (val);
484
- loc[5 ] |= lo (val);
485
- } else if (sym.has_plt (ctx)) {
486
- memcpy (loc, plt_thunk, sizeof (plt_thunk));
487
- i64 val = sym.get_gotplt_addr (ctx) - get_addr (i) - 8 ;
467
+ if (sym.has_plt (ctx)) {
468
+ memcpy (loc, plt_entry, sizeof (plt_entry));
469
+ u64 got = sym.has_got (ctx) ? sym.get_got_addr (ctx) : sym.get_gotplt_addr (ctx);
470
+ i64 val = got - get_addr (i) - 8 ;
488
471
loc[4 ] |= higha (val);
489
472
loc[5 ] |= lo (val);
490
473
} else {
0 commit comments