From 7a8c14a63488a883e9524ab24e38db6b1c307682 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Sun, 16 Jan 2022 12:51:51 +0900 Subject: [PATCH] [ELF] Do not use parallel scan to compute section offsets A function passed to tbb::parallel_scan must be associative, but I don't think our function satisfied that constraint. So I rewrote code without tbb::parallel_scan. --- elf/passes.cc | 72 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/elf/passes.cc b/elf/passes.cc index 0c9a2cff74..318b2c9bb1 100644 --- a/elf/passes.cc +++ b/elf/passes.cc @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -203,7 +202,6 @@ void compute_merged_section_sizes(Context &ctx) { template static std::vector> split(std::vector &input, i64 unit) { - assert(input.size() > 0); std::span span(input); std::vector> vec; @@ -469,36 +467,54 @@ template void compute_section_sizes(Context &ctx) { Timer t(ctx, "compute_section_sizes"); - struct T { - i64 offset; - i64 align; + struct Group { + i64 size = 0; + i64 alignment = 1; + i64 offset = 0; + std::span *> members; }; tbb::parallel_for_each(ctx.output_sections, [&](std::unique_ptr> &osec) { - T sum = tbb::parallel_scan( - tbb::blocked_range(0, osec->members.size(), 10000), - T{0, 1}, - [&](const tbb::blocked_range &r, T sum, bool is_final) { - for (i64 i = r.begin(); i < r.end(); i++) { - InputSection &isec = *osec->members[i]; - sum.offset = align_to(sum.offset, isec.shdr.sh_addralign); - if (is_final) - isec.offset = sum.offset; - sum.offset += isec.shdr.sh_size; - sum.align = std::max(sum.align, isec.shdr.sh_addralign); - } - return sum; - }, - [](T lhs, T rhs) { - i64 offset = align_to(lhs.offset, rhs.align) + rhs.offset; - i64 align = std::max(lhs.align, rhs.align); - return T{offset, align}; - }, - tbb::simple_partitioner()); - - osec->shdr.sh_size = sum.offset; - osec->shdr.sh_addralign = sum.align; + // Since one output section may contain millions of input sections, + // we first split input sections into groups and assign offsets to + // groups. + std::vector groups; + constexpr i64 group_size = 10000; + + for (std::span *> span : split(osec->members, group_size)) + groups.push_back(Group{.members = span}); + + tbb::parallel_for_each(groups, [](Group &group) { + for (InputSection *isec : group.members) { + group.size = align_to(group.size, isec->shdr.sh_addralign) + + isec->shdr.sh_size; + group.alignment = std::max(group.alignment, isec->shdr.sh_addralign); + } + }); + + i64 offset = 0; + i64 align = 1; + + for (i64 i = 0; i < groups.size(); i++) { + offset = align_to(offset, groups[i].alignment); + groups[i].offset = offset; + offset += groups[i].size; + align = std::max(align, groups[i].alignment); + } + + osec->shdr.sh_size = offset; + osec->shdr.sh_addralign = align; + + // Assign offsets to input sections. + tbb::parallel_for_each(groups, [](Group &group) { + i64 offset = group.offset; + for (InputSection *isec : group.members) { + offset = align_to(offset, isec->shdr.sh_addralign); + isec->offset = offset; + offset += isec->shdr.sh_size; + } + }); }); }