Skip to content

Commit

Permalink
Initial release
Browse files Browse the repository at this point in the history
  • Loading branch information
integeruser committed Jun 18, 2019
0 parents commit ee37c11
Show file tree
Hide file tree
Showing 48 changed files with 16,603 additions and 0 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
**/*.dylib
**/__pycache__/
**/*.pyc
FDPutils/build/
!FDPutils/build/CMakeLists.txt
FDPutils/PyFDP/PyFDP.egg-info/
KDPutils/kdputils.egg-info/
25 changes: 25 additions & 0 deletions DWARFutils/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# DWARFutils
This folder contains some utility scripts for working with the DWARF debugging data format. Currently, the scripts parse the output of the `dwarfdump` macOS utility (included here in the repository to preserve compatibility among macOS versions), but they can be easily modified to work with other similar utilities for dumping Mach-O DWARF information like `objdump`.

## Files

### `./dump-dwarf-dies.py`
```
usage: dump-dwarf-dies.py [-h] [--children] [--filter FILTER] dwarffile symbol
```
This convenient script dumps DIEs at a specific offset or with a specific name, optionally filtering the output only to DIEs (as printed by `dwarfdump`) which contain specific strings (e.g. `structure`, `declaration`, or any other).

### `./parse-dwarf-types-to-c-source.py`
```
usage: parse-dwarf-types-to-c-source.py [-h] dwarffile offset [offset ...]
```
This script extracts (as compilable C sources) the definitions of the types (typedefs, structs, unions, enums) and the variables defined in a DWARF file at the specified offsets. The generated C files can be formatted more nicely with any C code beautifier.

### `./relocate-dwarf-variable.py`
```
usage: relocate-dwarf-variable.py [-h] dwarffile varname newaddr
```
This script can be used to change the address of any variable in a DWARF file to a new address. This can be useful, for example, when a binary with DWARF information is debugged with LLDB, since to locate variables in memory the debugger uses the addresses specified in the DWARF sections.

### `./misc/debug.sh` and `./misc/test.sh`
Scripts for testing `./parse-dwarf-types-to-c.py`.
28 changes: 28 additions & 0 deletions DWARFutils/dump-dwarf-dies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env python3
import argparse

import dwarfutils

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("dwarffile", type=argparse.FileType())
parser.add_argument("symbol")
parser.add_argument("--children", action="store_true")
parser.add_argument("--filter")
args = parser.parse_args()

try:
offset = int(args.symbol, 0)
textdies = dwarfutils.extract_dies_by_offset(
args.dwarffile.name, offset, children=args.children
)
except ValueError:
name = args.symbol
textdies = dwarfutils.extract_dies_by_name(
args.dwarffile.name, name, children=args.children
)

for textdie in textdies:
if not args.filter or args.filter in dwarfutils.extract_tag(textdie):
print(textdie)
print()
Binary file added DWARFutils/dwarfdump
Binary file not shown.
141 changes: 141 additions & 0 deletions DWARFutils/dwarfutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/usr/bin/env python3
import os
import pickle
import re
import subprocess
import sys

dwarfdump = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), "dwarfdump")


def extract_dies_by_name(dwarffilepath, name, children=False, parents=False):
return extract_dies(["--name={}".format(name), dwarffilepath], children, parents)


def extract_dies_by_offset(dwarffilepath, offset, children=False, parents=False):
return extract_dies(
["--debug-info={}".format(offset), dwarffilepath], children, parents
)


def extract_dies(dwarfdumpargs, children, parents):
popenargs = [dwarfdump, "--verbose"] + dwarfdumpargs
if children:
popenargs.append("--show-children")
if parents:
popenargs.append("--show-parents")
stdout = subprocess.run(popenargs, stdout=subprocess.PIPE).stdout.decode("ascii")
dies = stdout.strip().split("\n\n")[1:]
return dies


def extract_uuid(dwarffilepath):
stdout = subprocess.run(
[dwarfdump, "--uuid", dwarffilepath], stdout=subprocess.PIPE
).stdout.decode("ascii")
uuid = stdout.strip().split()[1]
return uuid


def _compute_cache_filepath(dwarffilepath):
return "/tmp/cache-{}.pickle".format(extract_uuid(dwarffilepath))


def load_cache(dwarffilepath):
try:
with open(_compute_cache_filepath(dwarffilepath), "rb") as f:
DIEs = pickle.load(f)
return DIEs
except FileNotFoundError:
return {}


def save_cache(dwarffilepath, DIEs):
with open(_compute_cache_filepath(dwarffilepath), "wb") as f:
pickle.dump(DIEs, f)


re_bit_offset = re.compile(r"AT_data_bit_offset\( (0x[0-9a-f]+) \)")
re_bit_size = re.compile(r"AT_bit_size\( (0x[0-9a-f]+) \)")
re_byte_size = re.compile(r"AT_byte_size\( (0x[0-9a-f]+) \)")
re_const_value = re.compile(r"AT_const_value\( (0x[0-9a-f]+) \)")
re_count = re.compile(r"AT_count\( (0x[0-9a-f]+) \)")
re_decl_file = re.compile(r'AT_decl_file\( .*?"(.+?)" \)')
re_decl_line = re.compile(r"AT_decl_line\( .*?\( ([0-9]+) \) \)")
re_location = re.compile(
r"AT_data_member_location\( (?:.+?plus-uconst )?.+(0x[0-9a-f]+?) \)"
)
re_name = re.compile(r'AT_name\( .*?"(.+?)" \)')
re_offset = re.compile(r"^(0x[0-9a-f]+)\:")
re_tag = re.compile(r"(TAG\_.+?) ")
re_ttype = re.compile(r"AT_type\( .*?\{(0x[0-9a-f]+)\} \( +(.+?) +\) \)")


def extract_bit_size(textdie):
bit_size, = re_bit_size.search(textdie).groups()
bit_size = int(bit_size, 16)
return bit_size


def extract_byte_size(textdie):
byte_size, = re_byte_size.search(textdie).groups()
byte_size = int(byte_size, 16)
return byte_size


def extract_const_value(textdie):
const_value, = re_const_value.search(textdie).groups()
const_value = int(const_value, 16)
return const_value


def extract_count(textdie):
count, = re_count.search(textdie).groups()
count = int(count, 16)
return count


def extract_decl_file(textdie):
decl_file, = re_decl_file.search(textdie).groups()
return decl_file


def extract_decl_line(textdie):
decl_line, = re_decl_line.search(textdie).groups()
decl_line = int(decl_line)
return decl_line


def extract_location(textdie):
location, = re_location.search(textdie).groups()
location = int(location, 16)
return location


def extract_bit_location(textdie):
bit_offset, = re_bit_offset.search(textdie).groups()
bit_offset = int(bit_offset, 16)
location = (bit_offset // 0x8, bit_offset % 0x8)
return location


def extract_name(textdie):
name, = re_name.search(textdie).groups()
return name


def extract_offset(textdie):
offset, = re_offset.search(textdie).groups()
offset = int(offset, 16)
return offset


def extract_tag(textdie):
tag, = re_tag.search(textdie).groups()
return tag


def extract_type(textdie):
ttype_offset, ttype_name = re_ttype.search(textdie).groups()
ttype_offset = int(ttype_offset, 16)
return (ttype_offset, ttype_name)
20 changes: 20 additions & 0 deletions DWARFutils/misc/debug.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash
set -e
dirname () { python -c "import os; print(os.path.dirname(os.path.realpath('$0')))"; }
cd "$(dirname "$0")"

DWARFUTILS_DWARFFILE="../../data/10-14-2-18C54/DWARF/kernel"

: ${1?"Usage: $0 OFFSETS"}
OFFSETS="$*"

echo "DWARFUTILS_DWARFFILE=\"$DWARFUTILS_DWARFFILE\""
echo "OFFSETS=\"$OFFSETS\""

DWARFUTILS_SRCDIRECTORY=$(../parse-dwarf-types-to-c-source.py "$DWARFUTILS_DWARFFILE" $OFFSETS \
| python -c 'import re, sys; print(re.search("Output directory: .(.+?).$", sys.stdin.read()).group(1))' )
echo $DWARFUTILS_SRCDIRECTORY
cd "$DWARFUTILS_SRCDIRECTORY" >/dev/null
clang -g -x c -shared *.c
command -v clang-format >/dev/null && clang-format -i -style="{AlignConsecutiveDeclarations: true}" *.c
cd - 1>/dev/null
12 changes: 12 additions & 0 deletions DWARFutils/misc/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -e
dirname () { python -c "import os; print(os.path.dirname(os.path.realpath('$0')))"; }
cd "$(dirname "$0")"

OFFSETS=( 0x00028090 0x00026D0E 0x000F0124 0x0002E494 0x0002E00B 0x00027DF6 0x0000E7E4 0x001EB43F 0x00DC1EA5 0x000272F1)
for OFFSET in "${OFFSETS[@]}"
do
echo "Testing $OFFSET"
./debug.sh "$OFFSET"
echo
done
Loading

0 comments on commit ee37c11

Please sign in to comment.