Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
rui314 committed Jan 19, 2025
1 parent 4679573 commit 1fbc6f3
Show file tree
Hide file tree
Showing 9 changed files with 35 additions and 31 deletions.
4 changes: 4 additions & 0 deletions lib/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,10 @@ inline i64 sign_extend(u64 val, i64 n) {
return (i64)(val << (64 - n)) >> (64 - n);
}

inline bool is_int(u64 val, i64 n) {
return sign_extend(val, n) == val;
}

template <typename T, typename Compare = std::less<T>>
void update_minimum(std::atomic<T> &atomic, u64 new_val, Compare cmp = {}) {
T old_val = atomic.load(std::memory_order_relaxed);
Expand Down
10 changes: 5 additions & 5 deletions src/arch-arm32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -304,10 +304,10 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
i64 val1 = S + A - P;
i64 val2 = align_to(S + A - P, 4);

if (T && sign_extend(val1, 25) == val1) {
if (T && is_int(val1, 25)) {
*(ul16 *)(loc + 2) |= 0x1000; // BL
write_thm_b_imm(loc, val1);
} else if (!T && sign_extend(val2, 25) == val2) {
} else if (!T && is_int(val2, 25)) {
*(ul16 *)(loc + 2) &= ~0x1000; // BLX
write_thm_b_imm(loc, val2);
} else {
Expand Down Expand Up @@ -343,7 +343,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
Fatal(ctx) << *this << ": R_ARM_CALL refers to neither BL nor BLX";

i64 val = S + A - P;
if (sign_extend(val, 26) == val) {
if (is_int(val, 26)) {
if (T) {
*(ul32 *)loc = 0xfa00'0000; // BLX
*(ul32 *)loc |= (bit(val, 1) << 24) | bits(val, 25, 2);
Expand All @@ -370,7 +370,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
// required, we jump to a linker-synthesized thunk which does the
// job with a longer code sequence.
i64 val = S + A - P;
if (sign_extend(val, 26) != val || T)
if (T || !is_int(val, 26) )
val = get_arm_thunk_addr() + A - P;
*(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2);
break;
Expand Down Expand Up @@ -416,7 +416,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
// Just like R_ARM_JUMP24, we need to jump to a thunk if we need to
// switch processor mode.
i64 val = S + A - P;
if (sign_extend(val, 25) != val || !T)
if (!T || !is_int(val, 25))
val = get_thumb_thunk_addr() + A - P;
write_thm_b_imm(loc, val);
break;
Expand Down
2 changes: 1 addition & 1 deletion src/arch-arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
i + 1 < rels.size()) {
i64 val = S + A - P - 4;
const ElfRel<E> &rel2 = rels[i + 1];
if (sign_extend(val, 21) == val &&
if (is_int(val, 21) &&
rel2.r_type == R_AARCH64_ADD_ABS_LO12_NC &&
rel2.r_sym == rel.r_sym &&
rel2.r_offset == rel.r_offset + 4 &&
Expand Down
14 changes: 7 additions & 7 deletions src/arch-loongarch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
case R_LARCH_TLS_DESC_PC_LO12:
if (sym.has_tlsdesc(ctx) && removed_bytes == 0) {
i64 dist = sym.get_tlsdesc_addr(ctx) + A - P;
if (sign_extend(dist, 22) == dist) {
if (is_int(dist, 22)) {
// If we can directly materialize the PC-relative address
// with pcaddi, do that.
*(ul32 *)loc = 0x1800'0000 | get_rd(*(ul32 *)loc); // pcaddi
Expand Down Expand Up @@ -649,7 +649,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {

// Rewrite `addi.d $t0, $t0, <offset>` with `addi.d $t0, $tp, <offset>`
// if the offset is directly accessible using tp. tp is r2.
if (sign_extend(val, 12) == val)
if (is_int(val, 12))
set_rj(loc, 2);
break;
}
Expand Down Expand Up @@ -931,7 +931,7 @@ void shrink_section(Context<E> &ctx, InputSection<E> &isec) {
//
// addi.d $t0, $tp, <tp-offset>
if (i64 val = sym.get_addr(ctx) + r.r_addend - ctx.tp_addr;
sign_extend(val, 12) == val)
is_int(val, 12))
remove(4);
break;
case R_LARCH_PCALA_HI20:
Expand All @@ -954,7 +954,7 @@ void shrink_section(Context<E> &ctx, InputSection<E> &isec) {
u32 insn2 = *(ul32 *)(buf + rels[i].r_offset + 4);
bool is_addi_d = (insn2 & 0xffc0'0000) == 0x02c0'0000;

if ((dist & 0b11) == 0 && sign_extend(dist, 22) == dist &&
if ((dist & 0b11) == 0 && is_int(dist, 22) &&
is_addi_d && get_rd(insn1) == get_rd(insn2) &&
get_rd(insn2) == get_rj(insn2))
remove(4);
Expand All @@ -970,7 +970,7 @@ void shrink_section(Context<E> &ctx, InputSection<E> &isec) {
// If the displacement is PC ± 128 MiB, we can use B or BL instead.
// Note that $zero is $r0 and $ra is $r1.
if (i64 dist = compute_distance(ctx, sym, isec, r);
sign_extend(dist, 28) == dist)
is_int(dist, 28))
if (u32 jirl = *(ul32 *)(buf + rels[i].r_offset + 4);
get_rd(jirl) == 0 || get_rd(jirl) == 1)
remove(4);
Expand All @@ -988,15 +988,15 @@ void shrink_section(Context<E> &ctx, InputSection<E> &isec) {
// pcaddi $t0, <offset>
if (is_relaxable_got_load(ctx, isec, i)) {
i64 dist = compute_distance(ctx, sym, isec, r);
if ((dist & 0b11) == 0 && sign_extend(dist, 22) == dist)
if ((dist & 0b11) == 0 && is_int(dist, 22))
remove(4);
}
break;
case R_LARCH_TLS_DESC_PC_HI20:
if (sym.has_tlsdesc(ctx)) {
u64 P = isec.get_addr() + r.r_offset;
i64 dist = sym.get_tlsdesc_addr(ctx) + r.r_addend - P;
if (sign_extend(dist, 22) == dist)
if (is_int(dist, 22))
remove(4);
} else {
remove(4);
Expand Down
4 changes: 2 additions & 2 deletions src/arch-ppc32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -213,14 +213,14 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
case R_PPC_REL24:
case R_PPC_LOCAL24PC: {
i64 val = S + A - P;
if (sign_extend(val, 26) != val)
if (!is_int(val, 26))
val = sym.get_thunk_addr(ctx, P) - P;
*(ub32 *)loc |= bits(val, 25, 2) << 2;
break;
}
case R_PPC_PLTREL24: {
i64 val = S - P;
if (sym.has_plt(ctx) || sign_extend(val, 26) != val)
if (sym.has_plt(ctx) || !is_int(val, 26))
val = sym.get_thunk_addr(ctx, P) - P;
*(ub32 *)loc |= bits(val, 25, 2) << 2;
break;
Expand Down
2 changes: 1 addition & 1 deletion src/arch-ppc64v1.cc
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
break;
case R_PPC64_REL24: {
i64 val = sym.get_addr(ctx, NO_OPD) + A - P;
if (sym.has_plt(ctx) || sign_extend(val, 26) != val)
if (sym.has_plt(ctx) || !is_int(val, 26))
val = sym.get_thunk_addr(ctx, P) + A - P;

check(val, -(1 << 25), 1 << 25);
Expand Down
4 changes: 2 additions & 2 deletions src/arch-ppc64v2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
*(ul32 *)(loc + 4) = 0xe841'0018; // ld r2, 24(r1)
} else {
i64 val = S + get_local_entry_offset(ctx, sym) + A - P;
if (sign_extend(val, 26) != val)
if (!is_int(val, 26))
val = no_r2save_thunk_addr() + A - P;
*(ul32 *)loc |= bits(val, 25, 2) << 2;
}
Expand All @@ -238,7 +238,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
*(ul32 *)loc |= bits(val, 25, 2) << 2;
} else {
i64 val = S + A - P;
if (sign_extend(val, 26) != val)
if (!is_int(val, 26))
val = no_r2save_thunk_addr() + A - P;
*(ul32 *)loc |= bits(val, 25, 2) << 2;
}
Expand Down
24 changes: 12 additions & 12 deletions src/arch-riscv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
// Rewrite `lw t1, 0(t0)` with `lw t1, 0(x0)` if the address is
// accessible relative to the zero register because if that's the
// case, corresponding LUI might have been removed by relaxation.
if (sign_extend(S + A, 12) == S + A)
if (is_int(S + A, 12))
set_rs1(loc, 0);
break;
case R_RISCV_TPREL_HI20:
Expand All @@ -457,7 +457,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {

// Rewrite `lw t1, 0(t0)` with `lw t1, 0(tp)` if the address is
// directly accessible using tp. tp is x4.
if (sign_extend(val, 12) == val)
if (is_int(val, 12))
set_rs1(loc, 4);
break;
}
Expand Down Expand Up @@ -540,7 +540,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
write_itype(loc, sym2.get_gottp_addr(ctx) + A - P);
} else {
i64 val = S + A - ctx.tp_addr;
if (sign_extend(val, 12) == val)
if (is_int(val, 12))
*(ul32 *)loc = 0x513; // addi a0,zero,<lo12>
else
*(ul32 *)loc = 0x50513; // addi a0,a0,<lo12>
Expand Down Expand Up @@ -888,15 +888,15 @@ void shrink_section(Context<E> &ctx, InputSection<E> &isec) {

i64 rd = get_rd(buf + r.r_offset + 4);

if (use_rvc && rd == 0 && sign_extend(dist, 12) == dist) {
if (use_rvc && rd == 0 && is_int(dist, 12)) {
// If rd is x0 and the jump target is within ±2 KiB, we can use
// C.J, saving 6 bytes.
remove(6);
} else if (use_rvc && !E::is_64 && rd == 1 && sign_extend(dist, 12) == dist) {
} else if (use_rvc && !E::is_64 && rd == 1 && is_int(dist, 12)) {
// If rd is x1 and the jump target is within ±2 KiB, we can use
// C.JAL. This is RV32 only because C.JAL is RV32-only instruction.
remove(6);
} else if (sign_extend(dist, 21) == dist) {
} else if (is_int(dist, 21)) {
// If the jump target is within ±1 MiB, we can use JAL.
remove(4);
}
Expand All @@ -919,10 +919,10 @@ void shrink_section(Context<E> &ctx, InputSection<E> &isec) {
if (rd == get_rd(buf + r.r_offset + 4)) {
u64 val = sym.get_addr(ctx) + r.r_addend;

if (use_rvc && rd != 0 && sign_extend(val, 6) == val) {
if (use_rvc && rd != 0 && is_int(val, 6)) {
// Replace AUIPC + LD with C.LI.
remove(6);
} else if (sign_extend(val, 12) == val) {
} else if (is_int(val, 12)) {
// Replace AUIPC + LD with ADDI.
remove(4);
}
Expand All @@ -934,13 +934,13 @@ void shrink_section(Context<E> &ctx, InputSection<E> &isec) {
u64 val = sym.get_addr(ctx) + r.r_addend;
i64 rd = get_rd(buf + r.r_offset);

if (sign_extend(val, 12) == val) {
if (is_int(val, 12)) {
// We can replace `lui t0, %hi(foo)` and `add t0, t0, %lo(foo)`
// instruction pair with `add t0, x0, %lo(foo)` if foo's bits
// [32:11] are all one or all zero.
remove(4);
} else if (use_rvc && rd != 0 && rd != 2 &&
sign_extend(val + 0x800, 18) == val + 0x800) {
is_int(val + 0x800, 18)) {
// If the upper 20 bits can actually be represented in 6 bits,
// we can use C.LUI instead of LUI.
remove(2);
Expand Down Expand Up @@ -969,7 +969,7 @@ void shrink_section(Context<E> &ctx, InputSection<E> &isec) {
//
// Here, we remove `lui` and `add` if the offset is within ±2 KiB.
if (i64 val = sym.get_addr(ctx) + r.r_addend - ctx.tp_addr;
sign_extend(val, 12) == val)
is_int(val, 12))
remove(4);
break;
case R_RISCV_TLSDESC_HI20:
Expand All @@ -988,7 +988,7 @@ void shrink_section(Context<E> &ctx, InputSection<E> &isec) {
assert(r.r_type == R_RISCV_TLSDESC_ADD_LO12);
if (!sym2.has_tlsdesc(ctx) && !sym2.has_gottp(ctx))
if (i64 val = sym2.get_addr(ctx) + rel2.r_addend - ctx.tp_addr;
sign_extend(val, 12) == val)
is_int(val, 12))
remove(4);
}
break;
Expand Down
2 changes: 1 addition & 1 deletion src/arch-s390x.cc
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
u64 op = *(ub16 *)(loc - 2);
u64 val = S + A - P;
if ((op & 0xff0f) == 0xc408 && A == 2 && (val & 1) == 0 &&
sign_extend(val, 33) == val) {
is_int(val, 33)) {
*(ub16 *)(loc - 2) = 0xc000 | (op & 0x00f0);
*(ub32 *)loc = val >> 1;
break;
Expand Down

0 comments on commit 1fbc6f3

Please sign in to comment.