From 94e2d0a85eb871831818c10312eef1566de16e02 Mon Sep 17 00:00:00 2001 From: Peter Edwards Date: Sat, 17 Aug 2024 00:13:39 +0100 Subject: [PATCH 1/3] Fix logic for scanning DW_AT_stmt_list attributes. Do this for all CUs, not just the first one. A previous incarnation of the DWARF parser didn't seem to separate out the translation units, and would implicitly iterate all. The change to making compilation units more visibile seems to have broken this. --- src/dwarf.cc | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/dwarf.cc b/src/dwarf.cc index 291265b..b421d48 100644 --- a/src/dwarf.cc +++ b/src/dwarf.cc @@ -724,25 +724,23 @@ void ReadDWARFInlines(const dwarf::File& file, RangeSink* sink, dwarf::InfoReader reader(file); dwarf::CUIter iter = reader.GetCUIter(dwarf::InfoReader::Section::kDebugInfo); dwarf::CU cu; - dwarf::DIEReader die_reader = cu.GetDIEReader(); dwarf::LineInfoReader line_info_reader(file); - if (!iter.NextCU(reader, &cu)) { - THROW("debug info is present, but empty"); - } - - while (auto abbrev = die_reader.ReadCode(cu)) { - absl::optional stmt_list; - die_reader.ReadAttributes( - cu, abbrev, [&stmt_list, &cu](uint16_t tag, dwarf::AttrValue val) { - if (tag == DW_AT_stmt_list) { - stmt_list = val.ToUint(cu); - } - }); - - if (stmt_list) { - line_info_reader.SeekToOffset(*stmt_list, cu.unit_sizes().address_size()); - ReadDWARFStmtList(include_line, &line_info_reader, sink); + while (iter.NextCU(reader, &cu)) { + dwarf::DIEReader die_reader = cu.GetDIEReader(); + while (auto abbrev = die_reader.ReadCode(cu)) { + absl::optional stmt_list; + die_reader.ReadAttributes(cu, abbrev, + [&](uint16_t tag, dwarf::AttrValue val) { + if (tag == DW_AT_stmt_list) { + stmt_list = val.ToUint(cu); + }}); + + if (stmt_list) { + line_info_reader.SeekToOffset(*stmt_list, cu.unit_sizes().address_size()); + ReadDWARFStmtList(include_line, &line_info_reader, sink); + } + break; // only root-level unit entries have DW_AT_stmt_list attributes. } } } From 0f727cad7cefb2d75a998d6cfc2bb150981d77d3 Mon Sep 17 00:00:00 2001 From: Peter Edwards Date: Sat, 17 Aug 2024 01:33:51 +0100 Subject: [PATCH 2/3] Fix parsing of DW_AT_ref_udata This attribute is a LEB128 encoded value - decode it as such. Later use of attributes of this form may need some work, but without parsing it correctly, we break parsing of further attributes completely. --- src/dwarf/attr.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dwarf/attr.cc b/src/dwarf/attr.cc index 358288f..0813c40 100644 --- a/src/dwarf/attr.cc +++ b/src/dwarf/attr.cc @@ -144,6 +144,7 @@ AttrValue AttrValue::ParseAttr(const CU& cu, uint16_t form, string_view* data) { case DW_FORM_ref8: return AttrValue(form, ReadFixed(data)); case DW_FORM_ref_udata: + return AttrValue(form, ReadLEB128(data)); case DW_FORM_strx1: return AttrValue::UnresolvedString(form, ReadFixed(data)); case DW_FORM_strx2: From 5497473072068cfe53a311cbe472fa401a3ed90c Mon Sep 17 00:00:00 2001 From: Peter Edwards Date: Sat, 17 Aug 2024 01:35:34 +0100 Subject: [PATCH 3/3] Implement debug_lines parsing for DWARFv5 There are some differences between the DWARF4 and DWARF5 .debug_lines table format. Implement the DWARF5 changes. Mostly, the changes are to make the parser aware of the more sophisticated format of the directory and file tables, with some minor conditional changes in the header itself. --- src/dwarf/line_info.cc | 140 +++++++++++++++++++++++++++++++++-------- 1 file changed, 115 insertions(+), 25 deletions(-) diff --git a/src/dwarf/line_info.cc b/src/dwarf/line_info.cc index c2dfc18..e13b04b 100644 --- a/src/dwarf/line_info.cc +++ b/src/dwarf/line_info.cc @@ -80,12 +80,19 @@ void LineInfoReader::SeekToOffset(uint64_t offset, uint8_t address_size) { sizes_.SetAddressSize(address_size); data = sizes_.ReadInitialLength(&data); sizes_.ReadDWARFVersion(&data); + if (sizes_.dwarf_version() >= 5) { + auto encoded_addr_size = ReadFixed(&data); + auto encoded_selector_size = ReadFixed(&data); + assert(encoded_addr_size == address_size); + (void)encoded_selector_size; + } + uint64_t header_length = sizes_.ReadDWARFOffset(&data); string_view program = data; SkipBytes(header_length, &program); params_.minimum_instruction_length = ReadFixed(&data); - if (sizes_.dwarf_version() == 4) { + if (sizes_.dwarf_version() >= 4) { params_.maximum_operations_per_instruction = ReadFixed(&data); if (params_.maximum_operations_per_instruction == 0) { @@ -109,37 +116,120 @@ void LineInfoReader::SeekToOffset(uint64_t offset, uint8_t address_size) { // Read include_directories. include_directories_.clear(); + filenames_.clear(); + expanded_filenames_.clear(); - // Implicit current directory entry. - include_directories_.push_back(string_view()); + if (sizes_.dwarf_version() <= 4) { + // Implicit current directory entry. + include_directories_.push_back(string_view()); - while (true) { - string_view dir = ReadNullTerminated(&data); - if (dir.empty()) { - break; + while (true) { + string_view dir = ReadNullTerminated(&data); + if (dir.empty()) { + break; + } + include_directories_.push_back(dir); } - include_directories_.push_back(dir); - } - // Read file_names. - filenames_.clear(); - expanded_filenames_.clear(); + // Read file_names. - // Filename 0 is unused. - filenames_.push_back(FileName()); - while (true) { - FileName file_name; - file_name.name = ReadNullTerminated(&data); - if (file_name.name.empty()) { - break; + // Filename 0 is unused. + filenames_.push_back(FileName()); + while (true) { + FileName file_name; + file_name.name = ReadNullTerminated(&data); + if (file_name.name.empty()) { + break; + } + file_name.directory_index = ReadLEB128(&data); + file_name.modified_time = ReadLEB128(&data); + file_name.file_size = ReadLEB128(&data); + if (file_name.directory_index >= include_directories_.size()) { + THROW("directory index out of range"); + } + filenames_.push_back(file_name); + } + } else { + // Dwarf V5 and beyond. + // + auto readPath = [&] (DwarfForm form) { + switch (form) { + case DW_FORM_string: + return ReadNullTerminated(&data); + case DW_FORM_line_strp: { + auto offset = sizes_.ReadDWARFOffset(&data); + return ReadDebugStrEntry(file_.debug_line_str, offset); + } + default: + THROW("directory index out of range"); + } + }; + + auto readEntryFormats = [&]() { + std::vector> entryFormats; + auto formatCount = ReadFixed(&data); + for (uint8_t i = 0; i < formatCount; ++i) { + auto type = static_cast( + ReadLEB128(&data)); + auto form = static_cast(ReadLEB128(&data)); + entryFormats.emplace_back(type, form); + } + return entryFormats; + }; + + auto entryFormats = readEntryFormats(); + + auto directoryCount = ReadLEB128(&data); + while (directoryCount--) { + std::string_view path = ""; + for (auto [ type, form ] : entryFormats) { + switch (type) { + case DW_LNCT_path: + path = readPath(form); + break; + default: + THROW("unhandled directory entry format"); + } + } + include_directories_.push_back(path); } - file_name.directory_index = ReadLEB128(&data); - file_name.modified_time = ReadLEB128(&data); - file_name.file_size = ReadLEB128(&data); - if (file_name.directory_index >= include_directories_.size()) { - THROW("directory index out of range"); + auto fileFormats = readEntryFormats(); + auto fileCount = ReadLEB128(&data); + while (fileCount--) { + FileName file_name; + auto &idx = file_name.directory_index; + idx = 0; + for (auto &[ type, form ] : fileFormats) { + switch (type) { + case DW_LNCT_path: + file_name.name = readPath(form); + break; + case DW_LNCT_directory_index: { + switch (form) { + case DW_FORM_udata: + idx = ReadLEB128(&data); + break; + case DW_FORM_data1: + idx = ReadFixed(&data); + break; + case DW_FORM_data2: + idx = ReadFixed(&data); + break; + case DW_FORM_data4: + idx = ReadFixed(&data); + break; + default: + THROW("unhandled form for directory index"); + } + break; + } + default: { + THROW("unhandled type for file format"); + } + } + } + filenames_.push_back(file_name); } - filenames_.push_back(file_name); } info_ = LineInfo(params_.default_is_stmt);