From 45cf74a916c6939260cc6fd135bd0aea738b7263 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 25 Oct 2025 13:36:33 -0700 Subject: [PATCH] Removed indirection for sub-messages. To do this, we use weak definitions of actual MiniTables for tree shaking, instead of pointers to MiniTables. This means that a tree shaken MiniTable will take up `sizeof(upb_MiniTable)` space, instead of `sizeof(void*)`, which is a size regression. On the other hand, this saves `sizeof(void*)` bytes from every sub-message field, so it may be a size improvement overall. PiperOrigin-RevId: 823966877 --- hpb_generator/generator.cc | 4 +-- upb/mini_descriptor/decode.c | 6 +---- upb/mini_descriptor/link.c | 2 +- upb/mini_table/debug_string.c | 8 +++--- upb/mini_table/internal/message.h | 2 +- upb/mini_table/internal/sub.h | 7 ++--- upb/mini_table/message_benchmark.cc | 2 +- upb/text/encode_debug_test.cc | 4 +-- upb/wire/decode.c | 14 +++------- upb/wire/encode.c | 4 +-- upb_generator/minitable/generator.cc | 39 +++++++++++++++------------- upb_generator/minitable/names.cc | 4 --- upb_generator/minitable/names.h | 6 ----- 13 files changed, 40 insertions(+), 62 deletions(-) diff --git a/hpb_generator/generator.cc b/hpb_generator/generator.cc index f534ade7115b2..4098a3b31bfec 100644 --- a/hpb_generator/generator.cc +++ b/hpb_generator/generator.cc @@ -164,7 +164,7 @@ void WriteHeader(const google::protobuf::FileDescriptor* file, Context& ctx) { } ctx.Emit({{"class_name", ClassName(desc)}, {"minitable_name", - upb::generator::MiniTableMessagePtrVarName(desc->full_name())}, + upb::generator::MiniTableMessageVarName(desc->full_name())}, {"outer_namespace", outer_namespace}, {"c_api_msg_type", upb::generator::CApiMessageType(desc->full_name())}}, @@ -172,7 +172,7 @@ void WriteHeader(const google::protobuf::FileDescriptor* file, Context& ctx) { template <> struct AssociatedUpbTypes<$outer_namespace$$class_name$> { using CMessageType = $c_api_msg_type$; - static inline const upb_MiniTable* kMiniTable = $minitable_name$; + static inline const upb_MiniTable* kMiniTable = &$minitable_name$; }; )cc"); } diff --git a/upb/mini_descriptor/decode.c b/upb/mini_descriptor/decode.c index 292d1b0027190..eba0da039470a 100644 --- a/upb/mini_descriptor/decode.c +++ b/upb/mini_descriptor/decode.c @@ -444,15 +444,11 @@ static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, upb_SubCounts sub_counts) { uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count; size_t subs_bytes = sizeof(*d->table.UPB_PRIVATE(subs)) * total_count; - size_t ptrs_bytes = sizeof(upb_MiniTable*) * sub_counts.submsg_count; upb_MiniTableSubInternal* subs = subs_bytes ? upb_MtDecoder_CheckedMalloc(d, subs_bytes) : NULL; - const upb_MiniTable** subs_ptrs = - ptrs_bytes ? upb_MtDecoder_CheckedMalloc(d, ptrs_bytes) : NULL; uint32_t i = 0; for (; i < sub_counts.submsg_count; i++) { - subs_ptrs[i] = NULL; - subs[i].UPB_PRIVATE(submsg) = &subs_ptrs[i]; + subs[i].UPB_PRIVATE(submsg) = NULL; } if (sub_counts.subenum_count) { upb_MiniTableField* f = d->fields; diff --git a/upb/mini_descriptor/link.c b/upb/mini_descriptor/link.c index d28c07eb1f95b..7a7c51840687e 100644 --- a/upb/mini_descriptor/link.c +++ b/upb/mini_descriptor/link.c @@ -59,7 +59,7 @@ bool upb_MiniTable_SetSubMessage(upb_MiniTable* table, // TODO: Add this assert back once YouTube is updated to not call // this function repeatedly. // UPB_ASSERT(upb_MiniTable_GetSubMessageTable(table, field) == NULL); - memcpy((void*)table_subs[idx].UPB_PRIVATE(submsg), &sub, sizeof(void*)); + table_subs[idx].UPB_PRIVATE(submsg) = sub; return true; } diff --git a/upb/mini_table/debug_string.c b/upb/mini_table/debug_string.c index 2fd2db41a2aa4..1d7b49260045d 100644 --- a/upb/mini_table/debug_string.c +++ b/upb/mini_table/debug_string.c @@ -204,8 +204,8 @@ static void upb_MiniTablePrinter_PrintField(upb_MiniTablePrinter* p, if (field->UPB_PRIVATE(submsg_index) != kUpb_NoSub) { if (upb_MiniTableField_CType(field) == kUpb_CType_Message) { int id = upb_MiniTablePrinter_GetIdForRef( - p, *mini_table->UPB_PRIVATE(subs)[field->UPB_PRIVATE(submsg_index)] - .UPB_PRIVATE(submsg)); + p, mini_table->UPB_PRIVATE(subs)[field->UPB_PRIVATE(submsg_index)] + .UPB_PRIVATE(submsg)); upb_MiniTablePrinter_Printf(p, " .submsg = MiniTable#%d\n", id); } else { int id = upb_MiniTablePrinter_GetIdForRef( @@ -292,8 +292,8 @@ static void upb_MiniTablePrinter_PrintMessage(upb_MiniTablePrinter* p, if (field->UPB_PRIVATE(submsg_index) == kUpb_NoSub) continue; if (upb_MiniTableField_CType(field) == kUpb_CType_Message) { upb_MiniTablePrinter_PrintMessage( - p, *mini_table->UPB_PRIVATE(subs)[field->UPB_PRIVATE(submsg_index)] - .UPB_PRIVATE(submsg)); + p, mini_table->UPB_PRIVATE(subs)[field->UPB_PRIVATE(submsg_index)] + .UPB_PRIVATE(submsg)); } else { upb_MiniTablePrinter_PrintEnum( p, mini_table->UPB_PRIVATE(subs)[field->UPB_PRIVATE(submsg_index)] diff --git a/upb/mini_table/internal/message.h b/upb/mini_table/internal/message.h index f1c1641becc8b..4a67aedc29e24 100644 --- a/upb/mini_table/internal/message.h +++ b/upb/mini_table/internal/message.h @@ -160,7 +160,7 @@ UPB_API_INLINE const struct upb_MiniTableField* upb_MiniTable_GetFieldByIndex( UPB_INLINE const struct upb_MiniTable* UPB_PRIVATE( _upb_MiniTable_GetSubTableByIndex)(const struct upb_MiniTable* m, uint32_t i) { - return *m->UPB_PRIVATE(subs)[i].UPB_PRIVATE(submsg); + return m->UPB_PRIVATE(subs)[i].UPB_PRIVATE(submsg); } UPB_API_INLINE const struct upb_MiniTable* upb_MiniTable_SubMessage( diff --git a/upb/mini_table/internal/sub.h b/upb/mini_table/internal/sub.h index 4c21569ba0ad7..2263931a24cca 100644 --- a/upb/mini_table/internal/sub.h +++ b/upb/mini_table/internal/sub.h @@ -11,16 +11,13 @@ // Must be last. #include "upb/port/def.inc" -typedef union { - const struct upb_MiniTable* const* UPB_PRIVATE(submsg); - const struct upb_MiniTableEnum* UPB_PRIVATE(subenum); -} upb_MiniTableSubInternal; - union upb_MiniTableSub { const struct upb_MiniTable* UPB_PRIVATE(submsg); const struct upb_MiniTableEnum* UPB_PRIVATE(subenum); }; +typedef union upb_MiniTableSub upb_MiniTableSubInternal; + #ifdef __cplusplus extern "C" { #endif diff --git a/upb/mini_table/message_benchmark.cc b/upb/mini_table/message_benchmark.cc index 040b1c027b0ca..407d6675f9094 100644 --- a/upb/mini_table/message_benchmark.cc +++ b/upb/mini_table/message_benchmark.cc @@ -17,7 +17,7 @@ static void BM_FindFieldByNumber(benchmark::State& state) { max = 552; } const upb_MiniTable* ptr = - third_0party_0upb_0upb_0mini_0table__TestManyFields_msg_init_ptr; + &third_0party_0upb_0upb_0mini_0table__TestManyFields_msg_init; absl::BitGen bitgen; uint32_t search[1024]; for (auto& s : search) { diff --git a/upb/text/encode_debug_test.cc b/upb/text/encode_debug_test.cc index 740fb3ab3d2e1..20c7def4faa6b 100644 --- a/upb/text/encode_debug_test.cc +++ b/upb/text/encode_debug_test.cc @@ -32,7 +32,7 @@ std::string GetDebugString(const upb_Message* input, } TEST(TextNoReflection, ExtensionsString) { - const upb_MiniTable* mt_main = upb_0test__ModelWithExtensions_msg_init_ptr; + const upb_MiniTable* mt_main = &upb_0test__ModelWithExtensions_msg_init; upb_Arena* arena = upb_Arena_New(); upb_test_ModelExtension1* extension1 = upb_test_ModelExtension1_new(arena); @@ -53,7 +53,7 @@ TEST(TextNoReflection, ExtensionsString) { } TEST(TextNoReflection, ExtensionsInt) { - const upb_MiniTable* mt_main = upb_0test__ModelWithExtensions_msg_init_ptr; + const upb_MiniTable* mt_main = &upb_0test__ModelWithExtensions_msg_init; upb_Arena* arena = upb_Arena_New(); upb_test_ModelExtension2* extension2 = upb_test_ModelExtension2_new(arena); diff --git a/upb/wire/decode.c b/upb/wire/decode.c index 2ee2b1ea774b4..13f630c7ade46 100644 --- a/upb/wire/decode.c +++ b/upb/wire/decode.c @@ -106,7 +106,7 @@ typedef union { // from an array of MiniTableSubs. static const upb_MiniTable* _upb_MiniTableSubs_MessageByField( const upb_MiniTableSubInternal* subs, const upb_MiniTableField* field) { - return *subs[field->UPB_PRIVATE(submsg_index)].UPB_PRIVATE(submsg); + return subs[field->UPB_PRIVATE(submsg_index)].UPB_PRIVATE(submsg); } // Returns the MiniTableEnum corresponding to a given MiniTableField @@ -925,7 +925,7 @@ void _upb_Decoder_CheckUnlinked(upb_Decoder* d, const upb_MiniTable* mt, do { UPB_ASSERT(upb_MiniTableField_CType(oneof) == kUpb_CType_Message); const upb_MiniTable* oneof_sub = - *mt->UPB_PRIVATE(subs)[oneof->UPB_PRIVATE(submsg_index)].UPB_PRIVATE( + mt->UPB_PRIVATE(subs)[oneof->UPB_PRIVATE(submsg_index)].UPB_PRIVATE( submsg); UPB_ASSERT(!oneof_sub); } while (upb_MiniTable_NextOneofField(mt, &oneof)); @@ -1078,7 +1078,6 @@ const char* _upb_Decoder_DecodeKnownField(upb_Decoder* d, const char* ptr, int op, wireval* val) { const upb_MiniTableSubInternal* subs = layout->UPB_PRIVATE(subs); uint8_t mode = field->UPB_PRIVATE(mode); - upb_MiniTableSubInternal ext_sub; if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) { const upb_MiniTableExtension* ext_layout = @@ -1090,14 +1089,7 @@ const char* _upb_Decoder_DecodeKnownField(upb_Decoder* d, const char* ptr, } d->original_msg = msg; msg = &ext->data.UPB_PRIVATE(ext_msg_val); - if (upb_MiniTableField_IsSubMessage(&ext->ext->UPB_PRIVATE(field))) { - ext_sub.UPB_PRIVATE(submsg) = - &ext->ext->UPB_PRIVATE(sub).UPB_PRIVATE(submsg); - } else { - ext_sub.UPB_PRIVATE(subenum) = - ext->ext->UPB_PRIVATE(sub).UPB_PRIVATE(subenum); - } - subs = &ext_sub; + subs = &ext->ext->UPB_PRIVATE(sub); } switch (mode & kUpb_FieldMode_Mask) { diff --git a/upb/wire/encode.c b/upb/wire/encode.c index 1b3b1e2e25f04..e39046dd4805e 100644 --- a/upb/wire/encode.c +++ b/upb/wire/encode.c @@ -49,7 +49,7 @@ // from an array of MiniTableSubs. static const upb_MiniTable* _upb_Encoder_GetSubMiniTable( const upb_MiniTableSubInternal* subs, const upb_MiniTableField* field) { - return *subs[field->UPB_PRIVATE(submsg_index)].UPB_PRIVATE(submsg); + return subs[field->UPB_PRIVATE(submsg_index)].UPB_PRIVATE(submsg); } static uint32_t encode_zz32(int32_t n) { @@ -698,7 +698,7 @@ static char* encode_ext(char* ptr, upb_encstate* e, } else { upb_MiniTableSubInternal sub; if (upb_MiniTableField_IsSubMessage(&ext->UPB_PRIVATE(field))) { - sub.UPB_PRIVATE(submsg) = &ext->UPB_PRIVATE(sub).UPB_PRIVATE(submsg); + sub.UPB_PRIVATE(submsg) = ext->UPB_PRIVATE(sub).UPB_PRIVATE(submsg); } else { sub.UPB_PRIVATE(subenum) = ext->UPB_PRIVATE(sub).UPB_PRIVATE(subenum); } diff --git a/upb_generator/minitable/generator.cc b/upb_generator/minitable/generator.cc index b41a2773e5bdb..6db5a013be15d 100644 --- a/upb_generator/minitable/generator.cc +++ b/upb_generator/minitable/generator.cc @@ -47,10 +47,6 @@ std::string MessageVarName(upb::MessageDefPtr message) { return MiniTableMessageVarName(message.full_name()); } -std::string MessagePtrVarName(upb::MessageDefPtr message) { - return MiniTableMessagePtrVarName(message.full_name()); -} - std::string EnumVarName(upb::EnumDefPtr e) { return MiniTableEnumVarName(e.full_name()); } @@ -87,8 +83,7 @@ void WriteMessageField(upb::FieldDefPtr field, std::string GetSub(upb::FieldDefPtr field, bool is_extension) { if (auto message_def = field.message_type()) { return absl::Substitute("{.UPB_PRIVATE(submsg) = &$0}", - is_extension ? MessageVarName(message_def) - : MessagePtrVarName(message_def)); + MessageVarName(message_def)); } if (auto enum_def = field.enum_subdef()) { @@ -128,10 +123,19 @@ void WriteMessage(upb::MessageDefPtr message, const DefPoolPair& pools, if (options.one_output_per_message && field.IsSubMessage() && IsCrossFile(field) && !upb_MiniTableField_IsMap(f)) { if (seen.insert(pools.GetMiniTable64(field.message_type())).second) { - output( - "__attribute__((weak)) const upb_MiniTable* const $0 =" - " &UPB_PRIVATE(_kUpb_MiniTable_StaticallyTreeShaken);\n", - MessagePtrVarName(field.message_type())); + output(R"cc( + __attribute__((weak)) const upb_MiniTable $0 = { + .UPB_PRIVATE(subs) = NULL, + .UPB_PRIVATE(fields) = NULL, + .UPB_PRIVATE(size) = sizeof(struct upb_Message), + .UPB_PRIVATE(field_count) = 0, + .UPB_PRIVATE(ext) = kUpb_ExtMode_NonExtendable, + .UPB_PRIVATE(dense_below) = 0, + .UPB_PRIVATE(table_mask) = -1, + .UPB_PRIVATE(required_count) = 0, + }; + )cc", + MessageVarName(field.message_type())); } } } @@ -205,8 +209,9 @@ void WriteMessage(upb::MessageDefPtr message, const DefPoolPair& pools, output(" })\n"); } output("};\n\n"); - output("const upb_MiniTable* const $0 = &$1;\n", MessagePtrVarName(message), - MessageVarName(message)); + // output("const upb_MiniTable* const $0 = &$1;\n", + // MessagePtrVarName(message), + // MessageVarName(message)); } void WriteEnum(upb::EnumDefPtr e, Output& output) { @@ -281,8 +286,6 @@ void WriteMiniTableHeader(const DefPoolPair& pools, upb::FileDefPtr file, for (auto message : this_file_messages) { output("extern const upb_MiniTable $0;\n", MessageVarName(message)); - output("extern const upb_MiniTable* const $0;\n", - MessagePtrVarName(message)); } for (auto ext : this_file_exts) { output("extern const upb_MiniTableExtension $0;\n", ExtensionVarName(ext)); @@ -351,10 +354,10 @@ void WriteMiniTableSource(const DefPoolPair& pools, upb::FileDefPtr file, std::vector enums = SortedEnums(file, kClosedEnums); if (options.one_output_per_message) { - for (auto message : messages) { - output("extern const upb_MiniTable* const $0;\n", - MessagePtrVarName(message)); - } + // for (auto message : messages) { + // output("extern const upb_MiniTable* const $0;\n", + // MessagePtrVarName(message)); + // } for (const auto e : enums) { output("extern const upb_MiniTableEnum $0;\n", EnumVarName(e)); } diff --git a/upb_generator/minitable/names.cc b/upb_generator/minitable/names.cc index 976f7723701c8..1dea5972fc110 100644 --- a/upb_generator/minitable/names.cc +++ b/upb_generator/minitable/names.cc @@ -36,10 +36,6 @@ std::string MiniTableMessageVarName(absl::string_view full_name) { return MangleName(full_name) + "_msg_init"; } -std::string MiniTableMessagePtrVarName(absl::string_view full_name) { - return MiniTableMessageVarName(full_name) + "_ptr"; -} - std::string MiniTableEnumVarName(absl::string_view full_name) { return MangleName(full_name) + "_enum_init"; } diff --git a/upb_generator/minitable/names.h b/upb_generator/minitable/names.h index 46173f1ad4a9b..dcab56220c6d5 100644 --- a/upb_generator/minitable/names.h +++ b/upb_generator/minitable/names.h @@ -34,12 +34,6 @@ UPBC_API std::string MiniTableEnumVarName(absl::string_view full_name); UPBC_API std::string MiniTableExtensionVarName(absl::string_view full_name); UPBC_API std::string MiniTableFileVarName(absl::string_view proto_filename); -// This is used for weak linking and tree shaking. Other translation units may -// define weak versions of this symbol that point to a dummy message, to -// gracefully degrade the behavior of the generated code when the message is not -// linked into the current binary. -UPBC_API std::string MiniTableMessagePtrVarName(absl::string_view full_name); - } // namespace generator } // namespace upb