Skip to content

Staticlib hide internal symbols#155338

Open
cezarbbb wants to merge 2 commits into
rust-lang:mainfrom
cezarbbb:staticlib-symbol-hygiene
Open

Staticlib hide internal symbols#155338
cezarbbb wants to merge 2 commits into
rust-lang:mainfrom
cezarbbb:staticlib-symbol-hygiene

Conversation

@cezarbbb
Copy link
Copy Markdown
Contributor

@cezarbbb cezarbbb commented Apr 15, 2026

View all comments

According to issue #104707, when building a staticlib, all Rust internal symbols — mangled symbols, #[rustc_std_internal_symbol] items, allocator shims, etc. — leak out of the static archive. In contrast, cdylib correctly exports only #[no_mangle] symbols via a linker version script.

-Zstaticlib-hide-internal-symbols directly post-processes ELF object files in the archive: parsing the SHT_SYMTAB sections and setting STV_HIDDEN visibility on any GLOBAL/WEAK defined symbol that is not in the exported symbol set, without changing the binding. This is an in-place modification (only writing the st_other byte per matching entry), with zero overhead.

Supported on ELF targets (Linux, BSD, etc.) and Apple targets (macOS, iOS, etc.). On unsupported targets (Windows), a warning is emitted and the flag has no effect.

Update: The rename counterpart (-Zstaticlib-rename-internal-symbols) is in #156950.

The test code are as follows:

1.a std rust staticlib:

use std::collections::HashMap;
use std::panic::{catch_unwind, AssertUnwindSafe};

#[no_mangle]
pub extern "C" fn my_add(a: i32, b: i32) -> i32 { a + b }

#[no_mangle]
pub extern "C" fn my_hash_lookup(key: u64) -> u64 {
    let mut map = HashMap::new();
    for i in 0..100u64 { map.insert(i, i.wrapping_mul(2654435761)); }
    *map.get(&key).unwrap_or(&0)
}

pub fn internal_reverse(s: &str) -> String { s.chars().rev().collect() }

#[no_mangle]
pub extern "C" fn my_format_number(n: i32) -> i32 {
    let s = format!("number: {}", n); s.len() as i32
}

#[no_mangle]
pub extern "C" fn my_safe_div(a: i32, b: i32) -> i32 {
    match catch_unwind(AssertUnwindSafe(|| {
        if b == 0 { panic!("division by zero!"); }
        a / b
    })) {
        Ok(result) => result,
        Err(_) => -1,
    }
}

#[no_mangle]
pub extern "C" fn my_uncaught_panic() { panic!("uncaught panic across FFI"); }

1.b downstream c program:

extern int my_add(int a, int b);
extern unsigned long my_hash_lookup(unsigned long key);
extern int my_format_number(int n);
extern int my_safe_div(int a, int b);
extern void my_uncaught_panic(void);

int main() {
    int failures = 0;
    if (my_add(10, 20) != 30) failures++;
    if (my_hash_lookup(5) != 5UL * 2654435761UL) failures++;
    if (my_format_number(42) != 10) failures++;
    if (my_safe_div(100, 5) != 20) failures++;
    if (my_safe_div(100, 0) != -1) failures++;
    pid_t pid = fork();
    if (pid == 0) { alarm(5); my_uncaught_panic(); _exit(0); }
    else { waitpid(pid, &status, 0); }
    return failures;
}

The test results with different compiler flags(which might cause binary size reduction) are as follows:
1.c result with -Zstaticlib-hide-internal-symbols

  settings                   OFF        ON  -Zsave     ALL    OFF.dynsym ON.dynsym 
  ------------------------------------------------------------------------
  default                 1.7M      1.5M  204K (12%)    1735       5    1730
  lto_thin                616K      584K  33K (5%)     246       5     241
  lto_fat                 525K      525K    0 (0%)       6       5       1
  opt_s                   1.7M      1.5M  204K (12%)    1735       5    1730
  opt_z                   1.7M      1.5M  204K (12%)    1735       5    1730
  lto_thin_z              602K      570K  32K (5%)     246       5     241
  lto_fat_z               514K      514K    0 (0%)       6       5       1
  full                    514K      514K    0 (0%)       6       5       1

1.d result with -Zstaticlib-hide-internal-symbols + -Zstaticlib-rename-internal-symbols

  settings                   OFF        ON  -Zsave     ALL    OFF.dynsym ON.dynsym 
  ------------------------------------------------------------------------
  default                 1.7M      1.5M  162K (9%)    1735       5    1730
  lto_thin                616K      599K  18K (2%)     246       5     241
  lto_fat                 525K      535K  -1% (-1%)       6       5       1
  opt_s                   1.7M      1.5M  162K (9%)    1735       5    1730
  opt_z                   1.7M      1.5M  162K (9%)    1735       5    1730
  lto_thin_z              602K      585K  18K (2%)     246       5     241
  lto_fat_z               514K      524K  -1% (-1%)       6       5       1
  full                    514K      523K  -1% (-1%)       6       5       1

2.a no_std rust staticlib

#![no_std]
#![feature(core_intrinsics)]

use core::panic::PanicInfo;

#[panic_handler]
fn panic(_info: &PanicInfo) -> ! { loop {} }

#[no_mangle]
pub extern "C" fn embedded_add(a: i32, b: i32) -> i32 { a.wrapping_add(b) }

#[no_mangle]
pub extern "C" fn embedded_checksum(data: *const u8, len: usize) -> u8 {
    if data.is_null() { return 0; }
    let slice = unsafe { core::slice::from_raw_parts(data, len) };
    let mut sum: u8 = 0;
    for &byte in slice { sum = sum.wrapping_add(byte); }
    sum
}

fn internal_helper() -> i32 { 42 }
#[no_mangle]
pub extern "C" fn call_internal() -> i32 { internal_helper() }

#[no_mangle]
pub extern "C" fn embedded_trigger_abort() { core::intrinsics::abort(); }

2.b downstream c program

extern int embedded_add(int a, int b);
extern unsigned char embedded_checksum(const unsigned char *data, unsigned long len);
extern int call_internal(void);
extern void embedded_trigger_abort(void);

int main() {
    int failures = 0;
    if (embedded_add(10, 20) != 30) failures++;
    unsigned char data[] = {1, 2, 3};
    if (embedded_checksum(data, 3) != 6) failures++;
    if (call_internal() != 42) failures++;
    pid_t pid = fork();
    if (pid == 0) { embedded_trigger_abort(); _exit(0); }
    else { waitpid(pid, &status, 0); }
    return failures;
}

The test results with different compiler flags(which might cause binary size reduction) are as follows:
2.c result with -Zstaticlib-hide-internal-symbols

  settings                   OFF        ON  -Zsave     ALL    OFF.dynsym ON.dynsym 
  ------------------------------------------------------------------------
  default                 485K      429K  56K (11%)     490       4     486
  lto_thin                180K      180K    0 (0%)       4       4       0
  lto_fat                 179K      179K    0 (0%)       4       4       0
  opt_s                   485K      429K  56K (11%)     490       4     486
  opt_z                   485K      429K  56K (11%)     490       4     486
  lto_thin_z              180K      180K    0 (0%)       4       4       0
  lto_fat_z               179K      179K    0 (0%)       4       4       0
  full                    179K      179K    0 (0%)       4       4       0

2.d result with -Zstaticlib-hide-internal-symbols + -Zstaticlib-rename-internal-symbols

  settings                   OFF        ON  -Zsave     ALL    OFF.dynsym ON.dynsym 
  ------------------------------------------------------------------------
  default                 485K      447K  39K (7%)     490       4     486
  lto_thin                180K      189K  -5% (-5%)       4       4       0
  lto_fat                 179K      189K  -5% (-5%)       4       4       0
  opt_s                   485K      448K  38K (7%)     490       4     486
  opt_z                   485K      448K  38K (7%)     490       4     486
  lto_thin_z              180K      189K  -5% (-5%)       4       4       0
  lto_fat_z               179K      189K  -5% (-5%)       4       4       0
  full                    179K      189K  -5% (-5%)       4       4       0

Test results show that this compiler option is beneficial for scenarios where LTO cannot be enabled.

r? @bjorn3 @petrochenkov

@rustbot rustbot added A-LLVM Area: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues. A-run-make Area: port run-make Makefiles to rmake.rs S-waiting-on-review Status: Awaiting review from the assignee but also interested parties. T-compiler Relevant to the compiler team, which will review and decide on the PR/issue. labels Apr 15, 2026
@rustbot
Copy link
Copy Markdown
Collaborator

rustbot commented Apr 15, 2026

r? @petrochenkov

rustbot has assigned @petrochenkov.
They will have a look at your PR within the next two weeks and either review your PR or reassign to another reviewer.

Use r? to explicitly pick a reviewer

Why was this reviewer chosen?

The reviewer was selected based on:

  • Owners of files modified in this PR: codegen, compiler
  • codegen, compiler expanded to 69 candidates
  • Random selection from 16 candidates

@rustbot

This comment has been minimized.

@rustbot rustbot assigned bjorn3 and unassigned petrochenkov Apr 15, 2026
@bjorn3
Copy link
Copy Markdown
Member

bjorn3 commented Apr 15, 2026

This would also need to rename symbols to avoid conflicts between two rust staticlibs ending up getting linked together, right?

@bjorn3
Copy link
Copy Markdown
Member

bjorn3 commented Apr 15, 2026

The rust_eh_personality symbol is always kept visible to ensure .eh_frame unwinding works correctly for C consumers.

Why exactly is that the case? rust_eh_personality is actually the symbol that is most likely to cause conflicts as it is the only one whose name doesn't get mangled depending on the rustc version.

@cezarbbb cezarbbb force-pushed the staticlib-symbol-hygiene branch from ff707ad to 7ac49d1 Compare April 15, 2026 12:35
@rustbot

This comment has been minimized.

@rust-log-analyzer

This comment has been minimized.

@cezarbbb
Copy link
Copy Markdown
Contributor Author

This would also need to rename symbols to avoid conflicts between two rust staticlibs ending up getting linked together, right?

My primary goal right now is to reduce binary size, so I don't have immediate plans to implement symbol renaming. This means that linking multiple Rust staticlibs together can still result in multiple definition errors. Would you like me to address that in this PR as well? It seems feasible to implement — for example, by rehashing symbols and updating their references accordingly.

@cezarbbb
Copy link
Copy Markdown
Contributor Author

The rust_eh_personality symbol is always kept visible to ensure .eh_frame unwinding works correctly for C consumers.

Why exactly is that the case? rust_eh_personality is actually the symbol that is most likely to cause conflicts as it is the only one whose name doesn't get mangled depending on the rustc version.

I previously assumed this symbol needed to remain externally visible to support scenarios requiring cross-language exception propagation. Do you think we should also set rust_eh_personality as hidden?

@bjorn3
Copy link
Copy Markdown
Member

bjorn3 commented Apr 15, 2026

If it isn't too hard it would be nice to do symbol renaming too. I think doing in-place modification isn't going to work for that though. Adding a unique suffix would require growing the size of the string table.

@bjorn3
Copy link
Copy Markdown
Member

bjorn3 commented Apr 15, 2026

I previously assumed this symbol needed to remain externally visible to support scenarios requiring cross-language exception propagation. Do you think we should also set rust_eh_personality as hidden?

rust_eh_personality is only meant to be referenced by the .eh_frame section of rust object files. The only reason it's name isn't mangled is because LLVM hard codes the name to determine the exception table format to emit.

@cezarbbb
Copy link
Copy Markdown
Contributor Author

If it isn't too hard it would be nice to do symbol renaming too. I think doing in-place modification isn't going to work for that though. Adding a unique suffix would require growing the size of the string table.

Got it. I will first fix the rust_eh_personality issue, and then try to implement symbol renaming.

@rust-log-analyzer

This comment has been minimized.

@rust-log-analyzer

This comment has been minimized.

@cezarbbb cezarbbb force-pushed the staticlib-symbol-hygiene branch from 5e1c3a1 to c7d4e98 Compare April 16, 2026 03:27
@SparrowLii
Copy link
Copy Markdown
Member

@bors delegate=try

@rust-bors
Copy link
Copy Markdown
Contributor

rust-bors Bot commented Apr 16, 2026

✌️ @cezarbbb, you can now perform try builds on this pull request!

You can now post @bors try to start a try build.

@cezarbbb
Copy link
Copy Markdown
Contributor Author

@bors try

@rust-bors

This comment has been minimized.

rust-bors Bot pushed a commit that referenced this pull request Apr 16, 2026
`-Zstaticlib-hide-internal-symbols`: Hide non-exported internal symbols from staticlibs
@rust-bors
Copy link
Copy Markdown
Contributor

rust-bors Bot commented Apr 16, 2026

☀️ Try build successful (CI)
Build commit: a9431d3 (a9431d37da1d0346038257cec9d94f2783997621, parent: e8e4541ff19649d95afab52fdde2c2eaa6829965)

@cezarbbb
Copy link
Copy Markdown
Contributor Author

@bors try jobs=x86_64-*

@rust-bors

This comment has been minimized.

@cezarbbb cezarbbb force-pushed the staticlib-symbol-hygiene branch from 786373a to 40c254b Compare May 26, 2026 03:38
@cezarbbb cezarbbb changed the title -Zstaticlib-hide-internal-symbols and Zstaticlib-rename-internal-symbols: hide/rename internal symbols in staticlibs Staticlib hide internal symbols May 26, 2026
@rust-log-analyzer

This comment has been minimized.

@cezarbbb cezarbbb force-pushed the staticlib-symbol-hygiene branch from 40c254b to 54a7c4c Compare May 26, 2026 03:47
@petrochenkov
Copy link
Copy Markdown
Contributor

The column names are bad — I'll rewrite the table with clearer labels when I update the PR description.

Yes, could you put the legend from #155338 (comment) into the PR description before the tables as well.

Comment thread tests/run-make/staticlib-hide-internal-symbols/rmake.rs Outdated
Comment thread tests/run-make/staticlib-hide-internal-symbols/rmake.rs Outdated
Comment thread tests/run-make/staticlib-hide-internal-symbols/rmake.rs Outdated
Comment thread tests/run-make/staticlib-hide-internal-symbols/rmake.rs Outdated
Comment thread tests/run-make/staticlib-hide-internal-symbols/rmake.rs Outdated
Comment thread tests/run-make/staticlib-hide-internal-symbols/rmake.rs Outdated
Comment thread tests/run-make/staticlib-hide-internal-symbols/rmake.rs Outdated
Comment thread tests/run-make/staticlib-hide-internal-symbols/rmake.rs
Comment thread compiler/rustc_codegen_ssa/src/back/link.rs Outdated
@cezarbbb cezarbbb force-pushed the staticlib-symbol-hygiene branch from 54a7c4c to 303ab30 Compare May 27, 2026 03:17
Comment thread compiler/rustc_codegen_ssa/src/back/archive.rs Outdated
Comment thread compiler/rustc_codegen_ssa/src/back/archive.rs Outdated
Comment thread compiler/rustc_codegen_ssa/src/back/archive.rs Outdated
Comment thread compiler/rustc_codegen_ssa/src/back/archive.rs Outdated
Comment thread compiler/rustc_codegen_ssa/src/back/archive.rs Outdated
Comment thread compiler/rustc_codegen_ssa/src/back/archive.rs Outdated
Comment thread compiler/rustc_codegen_ssa/src/back/archive.rs Outdated
Comment thread compiler/rustc_codegen_ssa/src/back/archive.rs Outdated
Comment thread compiler/rustc_codegen_ssa/src/back/archive.rs Outdated
Comment thread compiler/rustc_codegen_ssa/src/back/archive.rs Outdated
Comment thread compiler/rustc_codegen_ssa/src/back/archive.rs Outdated
@petrochenkov
Copy link
Copy Markdown
Contributor

Enough for now.
@rustbot author

@rustbot rustbot added S-waiting-on-author Status: This is awaiting some action (such as code changes or more information) from the author. and removed S-waiting-on-review Status: Awaiting review from the assignee but also interested parties. labels May 27, 2026
@rustbot
Copy link
Copy Markdown
Collaborator

rustbot commented May 27, 2026

Reminder, once the PR becomes ready for a review, use @rustbot ready.

@cezarbbb
Copy link
Copy Markdown
Contributor Author

@rustbot ready

@rustbot rustbot added S-waiting-on-review Status: Awaiting review from the assignee but also interested parties. and removed S-waiting-on-author Status: This is awaiting some action (such as code changes or more information) from the author. labels May 30, 2026
@rust-bors

This comment has been minimized.

cezarbbb added 2 commits June 1, 2026 09:02
- Symbol collection directly utilizes `exported_symbols` for reverse matching
- Using RmetaLink to accurately track archive content instead of only checking `.rcgu.o`
@cezarbbb cezarbbb force-pushed the staticlib-symbol-hygiene branch from dca392e to a9c6743 Compare June 1, 2026 01:36
@rustbot
Copy link
Copy Markdown
Collaborator

rustbot commented Jun 1, 2026

This PR was rebased onto a different main commit. Here's a range-diff highlighting what actually changed.

Rebasing is a normal part of keeping PRs up to date, so no action is needed—this note is just to help reviewers.


ab.build(out_filename);
let exported_symbols = if sess.opts.unstable_opts.staticlib_hide_internal_symbols {
if !matches!(&*sess.target.archive_format, "gnu" | "bsd" | "darwin") {
Copy link
Copy Markdown
Contributor

@petrochenkov petrochenkov Jun 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect the right condition here is "is the target using ELF or Mach-O" rather than something based on archive format?
We should be able to update ELF/Macho-O files in archives regardless of the archive's own format.

The object file format can be obtained from sess.target.binary_format.

View changes since the review

}
};

let is_macho = self.sess.target.llvm_target.contains("-apple-macosx");
Copy link
Copy Markdown
Contributor

@petrochenkov petrochenkov Jun 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let is_macho = self.sess.target.llvm_target.contains("-apple-macosx");
let is_macho = self.sess.target.binary_format == BinaryFormat::MachO;

Also, can change the is_macho checks into matches on sess.target.binary_format, everything except BinaryFormat::{Elf,MachO} should be unreachable!() here.

View changes since the review

if !is_rust_object || exported_symbols.is_none() {
Box::new(data)
} else if is_macho {
match macho_apply_hide(data, exported_symbols.as_ref().unwrap()) {
Copy link
Copy Markdown
Contributor

@petrochenkov petrochenkov Jun 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: better use if let Some(exported_symbols) = exported_symbols or match exported_symbols to avoid the unwraps.

View changes since the review

} else if is_macho {
match macho_apply_hide(data, exported_symbols.as_ref().unwrap()) {
Cow::Borrowed(_) => Box::new(data),
Cow::Owned(v) => Box::new(v),
Copy link
Copy Markdown
Contributor

@petrochenkov petrochenkov Jun 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't Cow itself AsRef<[u8]>?
Then it would be possible to box the whole Cow without matching.

View changes since the review

/// Offset of `st_other` in a 32-bit ELF symbol table entry.
const ELF_ST_OTHER_ELF32: usize = 13;
/// Offset of `n_type` in a Mach-O nlist entry (same for 32-bit and 64-bit).
const MACHO_N_TYPE_OFFSET: usize = 4;
Copy link
Copy Markdown
Contributor

@petrochenkov petrochenkov Jun 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps could use mem::offset_of!(...) here instead of hardcoding the numbers (if that's not too inconvenient).

View changes since the review

let sections = header.sections(endian, data).ok()?;
let symtab = sections.symbols(endian, data, elf::SHT_SYMTAB).ok()?;

if symtab.len() <= 1 {
Copy link
Copy Markdown
Contributor

@petrochenkov petrochenkov Jun 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This check is redundant and probably not useful for performance.

View changes since the review

let strings = symtab.strings();
let mut patches = Vec::new();

for (_, sym) in symtab.enumerate().skip(1) {
Copy link
Copy Markdown
Contributor

@petrochenkov petrochenkov Jun 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This null symbols skipping is likely also redundant and not useful for performance (also confuses the reader).

View changes since the review

continue;
}
let Ok(name_bytes) = sym.name(endian, strings) else { continue };
let Ok(name) = std::str::from_utf8(name_bytes) else { continue };
Copy link
Copy Markdown
Contributor

@petrochenkov petrochenkov Jun 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let Ok(name) = std::str::from_utf8(name_bytes) else { continue };
let Ok(name) = str::from_utf8(name_bytes) else { continue };

View changes since the review

let Ok(name_bytes) = sym.name(endian, strings) else { continue };
let Ok(name) = std::str::from_utf8(name_bytes) else { continue };
if !exported.contains(name) {
let sym_addr = sym as *const Elf::Sym as *const u8 as usize;
Copy link
Copy Markdown
Contributor

@petrochenkov petrochenkov Jun 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let sym_addr = sym as *const Elf::Sym as *const u8 as usize;
let sym_addr = sym as *const Elf::Sym as usize;

View changes since the review

let Ok(name) = std::str::from_utf8(name_bytes) else { continue };
if !exported.contains(name) {
let sym_addr = sym as *const Elf::Sym as *const u8 as usize;
let offset = sym_addr.wrapping_sub(data_ptr) + st_other_offset;
Copy link
Copy Markdown
Contributor

@petrochenkov petrochenkov Jun 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let offset = sym_addr.wrapping_sub(data_ptr) + st_other_offset;
let offset = sym_addr - data_ptr + st_other_offset;

Wrapping shouldn't happen here.

View changes since the review

@petrochenkov
Copy link
Copy Markdown
Contributor

Most of the ELF comments apply to Mach-O too, as usual.
@rustbot author

@rustbot rustbot added S-waiting-on-author Status: This is awaiting some action (such as code changes or more information) from the author. and removed S-waiting-on-review Status: Awaiting review from the assignee but also interested parties. labels Jun 1, 2026
@petrochenkov
Copy link
Copy Markdown
Contributor

#157263 is a better alternative for the is_rust_object: bool flag.
@rustbot blocked

@rustbot rustbot added S-blocked Status: Blocked on something else such as an RFC or other implementation work. and removed S-waiting-on-author Status: This is awaiting some action (such as code changes or more information) from the author. labels Jun 1, 2026
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

A-LLVM Area: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues. A-run-make Area: port run-make Makefiles to rmake.rs S-blocked Status: Blocked on something else such as an RFC or other implementation work. T-compiler Relevant to the compiler team, which will review and decide on the PR/issue.

Projects

None yet

Development

Successfully merging this pull request may close these issues.

8 participants