diff --git a/Documentation/debugging/index.rst b/Documentation/debugging/index.rst index 50bc1c6e5ade3..0f65ebe3a665e 100644 --- a/Documentation/debugging/index.rst +++ b/Documentation/debugging/index.rst @@ -16,6 +16,7 @@ This page contains a collection of guides on how to debug problems with NuttX. coresight.rst stackcheck.rst stackrecord.rst + stackusage.rst disabling_stackdumpdebug.rst debuggingflash_nuttxonarm.rst cortexmhardfaults.rst diff --git a/Documentation/debugging/stackusage.rst b/Documentation/debugging/stackusage.rst new file mode 100644 index 0000000000000..9a024aadc4abb --- /dev/null +++ b/Documentation/debugging/stackusage.rst @@ -0,0 +1,159 @@ +=========================== +Static Stack Usage Analysis +=========================== + +Overview +======== + +``tools/stackusage.py`` performs static stack usage analysis by reading +DWARF ``.debug_frame`` data from an ELF file. It extracts per-function +stack sizes from CFA (Canonical Frame Address) offsets and optionally +builds a call graph via disassembly to compute worst-case total stack +depth. + +- **Self** – stack bytes used by the function itself (max CFA offset). +- **Total** – worst-case stack depth through the deepest call chain + (self + callees). A marker prefix flags uncertain values. + +Dependencies +============ + +The tool invokes standard toolchain binaries: + +- **readelf** – symbol table and DWARF frame info +- **objdump** – disassembly for call graph analysis +- **addr2line** – source file and line resolution + +Both GNU and LLVM toolchains are supported. Use ``-p`` to set the +toolchain prefix (e.g. ``-p arm-none-eabi-`` for GCC, +``-p llvm-`` for LLVM). + +The ELF must contain DWARF debug info (``-g`` or ``-gdwarf``). +No special Kconfig option is needed. + +Usage +===== + +Analyze a native ELF (no prefix needed):: + + python3 tools/stackusage.py nuttx + +Cross-compiled ELF with GCC toolchain:: + + python3 tools/stackusage.py -p arm-none-eabi- nuttx + +Cross-compiled ELF with LLVM toolchain:: + + python3 tools/stackusage.py -p llvm- nuttx + +Show top 20 functions:: + + python3 tools/stackusage.py -p arm-none-eabi- -n 20 nuttx + +Estimate recursion depth of 10:: + + python3 tools/stackusage.py -p arm-none-eabi- -r 10 nuttx + +Command Line Options +==================== + +.. code-block:: text + + positional arguments: + elf path to ELF file with DWARF debug info + + options: + -p, --prefix PREFIX toolchain prefix (e.g. arm-none-eabi- or llvm-) + -n, --rank N show top N functions (default: 0 = all) + -r, --recursion-depth N + assumed recursion depth (default: 0) + +Text Output +=========== + +The default output is an aligned table. Each function's deepest +backtrace is shown with one frame per row. The ``Self`` column shows +each frame's own stack cost. The ``Backtrace`` column shows the +function name followed by its code size in parentheses (when available +from the symbol table), e.g. ``main(128)``. The entry point of each +call chain is suffixed with ``~``. + +Example (``nucleo-f429zi:trace``, ``-n 3``):: + + Total Self Backtrace File:Line + ----- ---- --------------------------- ------------------------------------------- + @2344 56 telnetd_main(236)~ apps/system/telnetd/telnetd.c:42 + ^24 nsh_telnetmain(128) apps/nshlib/nsh_telnetd.c:48 + ^48 nsh_session(400) apps/nshlib/nsh_session.c:73 + ... + @224 nsh_parse_cmdparm(1024) apps/nshlib/nsh_parse.c:2362 + @96 nsh_execute(512) apps/nshlib/nsh_parse.c:510 + ^56 nsh_builtin(320) apps/nshlib/nsh_builtin.c:76 + 88 exec_builtin(256) apps/builtin/exec_builtin.c:61 + ... + ^64 file_vopen(192) nuttx/fs/vfs/fs_open.c:124 + ... + @2328 16 sh_main(64)~ apps/system/nsh/sh_main.c:40 + 16 nsh_system_ctty(96) apps/nshlib/nsh_system.c:105 + ^32 nsh_system_(160) apps/nshlib/nsh_system.c:41 + ^48 nsh_session(400) apps/nshlib/nsh_session.c:73 + ... + @2312 24 nsh_main(80)~ apps/system/nsh/nsh_main.c:54 + ^24 nsh_consolemain(48) apps/nshlib/nsh_consolemain.c:65 + ^48 nsh_session(400) apps/nshlib/nsh_session.c:73 + ... + +Uncertainty markers on both Total and Self columns indicate the most +significant reason: + +======= ========================================== +Marker Meaning +======= ========================================== +``~`` entry point of the call chain (suffix) +``?`` no DWARF data (self counted as zero) +``*`` dynamic stack (alloca or VLA) +``@`` recursion detected +``^`` indirect call (function pointer) +======= ========================================== + +Uncertainty Reasons +=================== + +====================================== ========================================= +Reason Description +====================================== ========================================= +recursion: A->B->...->A Recursive cycle detected. Use ``-r N`` + to estimate. +indirect call (function pointer) Callee unknown at compile time. +no DWARF data No ``.debug_frame`` entry; self counted + as zero. +dynamic stack (alloca/VLA) Function uses ``alloca()`` or + variable-length arrays; self is a + minimum. +====================================== ========================================= + +Uncertainty propagates upward: if any callee in the deepest path is +uncertain the caller is also marked uncertain. + +Recursion Depth Estimation +========================== + +By default (``-r 0``) recursive back-edges contribute zero stack. +With ``-r N`` (N > 0) the tool estimates:: + + cycle_body_cost × N + +For example ``A(64) -> B(32) -> A``:: + + cycle_body_cost = 64 + 32 = 96 + -r 10 → 96 × 10 = 960 bytes + +The result is still marked uncertain. + +Supported Architectures +======================= + +Any architecture supported by the toolchain's ``readelf``, +``objdump``, and ``addr2line`` is supported. This includes +ARM, AArch64, x86, x86_64, MIPS, RISC-V, Xtensa, PowerPC, SPARC, +TriCore, SuperH, and others. diff --git a/Kconfig b/Kconfig index b5774f42ba0db..def0bcf71a06e 100644 --- a/Kconfig +++ b/Kconfig @@ -2631,7 +2631,7 @@ config STACK_USAGE ---help--- Generate an extra file that specifies the maximum amount of stack used, on a per-function basis. Please use https://github.com/HBehrens/puncover - or tools/showstack.sh to generate the more useful report. + to generate the more useful report. config STACK_USAGE_WARNING int "Detect use of large stack variables" diff --git a/tools/showstack.sh b/tools/showstack.sh deleted file mode 100755 index ad1f5e22700e2..0000000000000 --- a/tools/showstack.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env bash -############################################################################ -# tools/showstack.sh -# -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. The -# ASF licenses this file to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance with the -# License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -############################################################################ - -function analyse() -{ - sufiles=$(find $1 -name "*.su") - if [[ $sufiles != "" ]] - then - echo $sufiles | xargs -n1 cat | awk -F'\t' '{print $2", "$1}' | sort -n -r | head -n $2 - fi -} - -if [[ $# == 0 ]]; then - echo "usage: $0 ..." - exit -fi - -rank=20 - -if [[ "$1" =~ ^[0-9]+$ ]]; then - rank=$1 - shift -fi - -for dir in $@; do - analyse $dir $rank -done diff --git a/tools/stackusage.py b/tools/stackusage.py new file mode 100755 index 0000000000000..d63f82ea19092 --- /dev/null +++ b/tools/stackusage.py @@ -0,0 +1,678 @@ +#!/usr/bin/env python3 +# tools/stackusage.py +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. The +# ASF licenses this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance with the +# License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +"""Static stack usage analyzer using DWARF debug info and disassembly. + +Extracts per-function stack frame sizes from DWARF .debug_frame CFA offsets +(via readelf -wF) and builds call graphs from objdump -d disassembly. +Reports worst-case total stack depth through the call chain, flagging +uncertain cases such as recursion and indirect calls (function pointers). + +Works with both GNU and LLVM toolchains (readelf/objdump/addr2line). +No Python package dependencies required. +""" + +import argparse +import os +import re +import subprocess +import sys + +# Call mnemonics across all supported architectures. +# Direct vs indirect is determined by whether objdump annotates +# the target with . +CALL_MNEMONICS = { + "bl", + "blx", + "blr", # ARM / AArch64 + "call", + "callq", # x86 / x86_64 / SPARC + "jal", + "jalr", # MIPS / RISC-V + "bctrl", # PowerPC + "jmpl", # SPARC + "bsr", + "jsr", # SuperH + "call0", + "call4", + "call8", + "call12", # Xtensa + "callx0", + "callx4", + "callx8", + "callx12", # Xtensa indirect + "calli", # TriCore +} + +# Function names that indicate dynamic stack allocation. +ALLOCA_NAMES = frozenset( + { + "alloca", + "__alloca", + "__builtin_alloca", + "__builtin_alloca_with_align", + } +) + + +def build_symbol_map(prefix, elf): + """Build address<->name maps by parsing ``readelf -sW`` output. + + Returns (addr_to_name, name_to_addr, func_sizes, is_thumb, machine). + """ + + out = subprocess.run( + [prefix + "readelf", "-hsW", elf], + capture_output=True, + text=True, + ) + if out.returncode != 0: + return {}, {}, {}, False, "" + + addr_to_name = {} + name_to_addr = {} + func_sizes = {} + is_thumb = False + machine = "" + + for line in out.stdout.splitlines(): + # Detect machine type from readelf header + if "Machine:" in line: + machine = line.split(":", 1)[1].strip() + if "ARM" in machine and "AArch64" not in machine: + is_thumb = True + continue + + # Symbol table line format (readelf -sW): + # Num: Value Size Type Bind Vis Ndx Name + # 1: 08000100 64 FUNC GLOBAL DEFAULT 1 main + parts = line.split() + if len(parts) < 8: + continue + if parts[3] != "FUNC": + continue + if parts[6] == "UND": + continue + + try: + addr = int(parts[1], 16) + size = int(parts[2]) + except ValueError: + continue + + name = parts[7] + if name.startswith("$"): + continue + + if is_thumb: + addr = addr & ~1 + + if addr in addr_to_name and len(addr_to_name[addr]) >= len(name): + continue + + addr_to_name[addr] = name + name_to_addr[name] = addr + func_sizes[addr] = size + + return addr_to_name, name_to_addr, func_sizes, is_thumb, machine + + +def parse_stack_sizes(prefix, elf, addr_to_name, name_to_addr, func_sizes): + """Parse ``readelf -wF`` output for per-function CFA stack sizes. + + Returns funcs dict: name -> {file, line, self, code_size, qualifier, + reasons}. + """ + + out = subprocess.run( + [prefix + "readelf", "-wF", elf], + capture_output=True, + text=True, + ) + + funcs = {} + current_addr = None + max_offset = 0 + + for line in out.stdout.splitlines(): + # FDE header: look for pc=ADDR..ADDR + m = re.search(r"pc=([0-9a-f]+)\.\.", line, re.IGNORECASE) + if m: + # Flush previous FDE + if current_addr is not None: + name = addr_to_name.get(current_addr) + if name and name not in funcs: + code_size = func_sizes.get(current_addr, 0) + if not code_size: + sym_addr = name_to_addr.get(name) + if sym_addr is not None: + code_size = func_sizes.get(sym_addr, 0) + funcs[name] = { + "file": "?", + "line": "?", + "self": max_offset, + "code_size": code_size, + "qualifier": "static", + "reasons": [], + } + current_addr = int(m.group(1), 16) + # Also try with Thumb bit cleared + if current_addr not in addr_to_name: + alt = current_addr & ~1 + if alt in addr_to_name: + current_addr = alt + max_offset = 0 + continue + + # CFA row: extract offset from e.g. "sp+16" or "r13+24" + m = re.search(r"[a-zA-Z]\w*\+(\d+)", line) + if m and current_addr is not None: + val = int(m.group(1)) + if val > max_offset: + max_offset = val + + # Flush last FDE + if current_addr is not None: + name = addr_to_name.get(current_addr) + if name and name not in funcs: + code_size = func_sizes.get(current_addr, 0) + if not code_size: + sym_addr = name_to_addr.get(name) + if sym_addr is not None: + code_size = func_sizes.get(sym_addr, 0) + funcs[name] = { + "file": "?", + "line": "?", + "self": max_offset, + "code_size": code_size, + "qualifier": "static", + "reasons": [], + } + + return funcs + + +def resolve_sources(prefix, elf, funcs, name_to_addr): + """Batch-resolve source file:line via ``addr2line -fe``.""" + + addrs = [] + names = [] + for name in funcs: + addr = name_to_addr.get(name) + if addr is not None: + addrs.append("%x" % addr) + names.append(name) + + if not addrs: + return + + out = subprocess.run( + [prefix + "addr2line", "-fe", elf], + capture_output=True, + text=True, + input="\n".join(addrs) + "\n", + ) + + lines = out.stdout.splitlines() + # addr2line outputs two lines per address: function_name, file:line + for i in range(0, min(len(lines), len(names) * 2), 2): + idx = i // 2 + if idx >= len(names): + break + if i + 1 < len(lines): + loc = lines[i + 1] + if loc and loc != "??:0" and loc != "??:?": + parts = loc.rsplit(":", 1) + if len(parts) == 2: + funcs[names[idx]]["file"] = parts[0] + funcs[names[idx]]["line"] = parts[1] + + +def _sp_pattern(machine): + """Return a compiled regex matching the stack pointer register. + + Architecture-specific SP register names: + ARM: sp, r13 + AArch64: sp + x86: %esp, %rsp + MIPS: $sp, $29 + RISC-V: sp, x2 + Xtensa: a1 + PowerPC: r1 + SPARC: %sp, %o6 + SuperH: r15 + TriCore: a10, sp + """ + + m = machine.upper() + if "ARM" in m and "AARCH64" not in m: + return re.compile(r"\bsp\b|\br13\b", re.IGNORECASE) + if "AARCH64" in m: + return re.compile(r"\bsp\b", re.IGNORECASE) + if "80386" in m or "X86" in m or "AMD" in m: + return re.compile(r"%[er]sp\b", re.IGNORECASE) + if "MIPS" in m: + return re.compile(r"\$sp\b|\$29\b", re.IGNORECASE) + if "RISC-V" in m: + return re.compile(r"\bsp\b|\bx2\b", re.IGNORECASE) + if "XTENSA" in m or "TENSILICA" in m: + return re.compile(r"\ba1\b", re.IGNORECASE) + if "POWER" in m or "PPC" in m: + return re.compile(r"\br1\b", re.IGNORECASE) + if "SPARC" in m: + return re.compile(r"\bsp\b|%sp\b|%o6\b", re.IGNORECASE) + if "SH" in m or "SUPER" in m or "RENESAS" in m: + return re.compile(r"\br15\b", re.IGNORECASE) + if "TRICORE" in m: + return re.compile(r"\ba10\b|\bsp\b", re.IGNORECASE) + # Fallback: common SP names + return re.compile(r"\bsp\b", re.IGNORECASE) + + +def _is_dynamic_sp_sub(mnemonic, operand_str, sp_re): + """Return True if instruction dynamically adjusts the stack pointer. + + Detects VLA / alloca patterns where a sub instruction adjusts SP + by a register operand (not an immediate constant). + """ + + if not mnemonic.startswith("sub"): + return False + ops = operand_str.lower() + if not sp_re.search(ops): + return False + # ARM/AArch64: immediates use # prefix + if "#" in ops: + return False + # x86 AT&T syntax: $ means immediate + if "%" in ops and "$" in ops: + return False + return True + + +def parse_call_graph(prefix, elf, machine): + """Build call graph by parsing ``objdump -d`` output. + + Returns (graph, dynamic_funcs) where: + graph: dict mapping caller -> set of (callee_name_or_None, is_indirect). + dynamic_funcs: set of function names with dynamic stack (alloca/VLA). + """ + + sp_re = _sp_pattern(machine) + graph = {} + dynamic_funcs = set() + current_func = None + + proc = subprocess.Popen( + [prefix + "objdump", "-d", elf], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + ) + + for line in proc.stdout: + # Function boundary: "0000abcd :" + m = re.match(r"^[0-9a-f]+ <(.+)>:\s*$", line) + if m: + current_func = m.group(1) + graph.setdefault(current_func, set()) + continue + + if current_func is None: + continue + + # Instruction line: " addr: bytes mnemonic operands" + parts = line.split("\t") + if len(parts) < 3: + continue + + mnemonic = parts[2].strip().split()[0].lower() + + # Extract operand string (used by both call and dynamic-SP checks) + operands = parts[2].strip().split(None, 1) + operand_str = operands[1] if len(operands) > 1 else "" + if len(parts) > 3: + operand_str += " " + parts[3] + + # Check for call instructions + if mnemonic in CALL_MNEMONICS: + target_m = re.search(r"<([^>+]+)>", operand_str) + if target_m: + callee = target_m.group(1) + if callee in ALLOCA_NAMES: + dynamic_funcs.add(current_func) + else: + graph[current_func].add((callee, False)) + else: + graph[current_func].add((None, True)) + continue + + # Check for dynamic stack pointer adjustment (VLA / inlined alloca) + if _is_dynamic_sp_sub(mnemonic, operand_str, sp_re): + dynamic_funcs.add(current_func) + + proc.wait() + return graph, dynamic_funcs + + +def compute_worst_stack(funcs, graph, recursion_depth): + """Compute worst-case total stack for every function via memoized DFS. + + Args: + funcs: dict from parse_stack_sizes + graph: dict from parse_call_graph + recursion_depth: how many times a recursive cycle body is counted + (0 = back-edges contribute nothing) + + Returns dict mapping function name to result dict with keys: + self, total, uncertain, reasons, stack, self_reasons + """ + + cache = {} + + def dfs(func, path): + if func in cache: + return cache[func] + + info = funcs.get(func) + self_size = info["self"] if info else 0 + self_reasons = set(info["reasons"]) if info else {"no DWARF data"} + reasons = set(self_reasons) + uncertain = bool(reasons) + + callees = graph.get(func, set()) + worst_callee = 0 + best_substack = [] + + for callee, is_indirect in callees: + if is_indirect: + uncertain = True + reasons.add("indirect call (function pointer)") + self_reasons.add("indirect call (function pointer)") + continue + + cur_path = path + [func] + if callee in cur_path: + # Recursion detected + uncertain = True + idx = cur_path.index(callee) + cycle = cur_path[idx:] + [callee] + reason = "recursion: %s" % "->".join(cycle) + reasons.add(reason) + self_reasons.add(reason) + if recursion_depth > 0: + cycle_nodes = cycle[:-1] + cycle_cost = sum( + funcs[c]["self"] if c in funcs else 0 for c in cycle_nodes + ) + cycle_total = cycle_cost * recursion_depth + if cycle_total > worst_callee: + worst_callee = cycle_total + est_frames = [] + for _ in range(recursion_depth): + for node in cycle_nodes: + est_frames.append((node, "recursive estimate")) + best_substack = est_frames + continue + + callee_total, callee_unc, callee_reasons, callee_stack, _ = dfs( + callee, cur_path + ) + if callee_unc: + uncertain = True + reasons.update(callee_reasons) + if callee_total > worst_callee: + worst_callee = callee_total + best_substack = callee_stack + + total = self_size + worst_callee + result = ( + total, + uncertain, + reasons, + [(func, None)] + best_substack, + self_reasons, + ) + cache[func] = result + return result + + results = {} + all_funcs = set(funcs.keys()) | set(graph.keys()) + for func in all_funcs: + total, uncertain, reasons, stack, self_reasons = dfs(func, []) + # Skip symbols with no DWARF data and zero stack + if func not in funcs and total == 0: + continue + results[func] = { + "self": funcs[func]["self"] if func in funcs else 0, + "total": total, + "uncertain": uncertain, + "reasons": list(reasons), + "stack": stack, + "self_reasons": list(self_reasons), + } + + return results + + +def reason_marker(reasons): + """Return a single-char marker for uncertainty. + + Marker priority (highest first): + ? = no DWARF data + * = dynamic stack (alloca/VLA) + @ = recursion + ^ = indirect call (function pointer) + """ + + for reason in reasons: + if reason == "no DWARF data": + return "?" + for reason in reasons: + if reason.startswith("dynamic stack"): + return "*" + for reason in reasons: + if reason.startswith("recursion:"): + return "@" + for reason in reasons: + if reason == "indirect call (function pointer)": + return "^" + return "" + + +def frame_info(funcs, results, frame): + """Return (size_str, func_name, file:line) for one stack frame. + + size_str is prefixed with a marker indicating the function's own + uncertainty type (see reason_marker). func_name includes the code + size in parentheses when available, e.g. ``main(128)``. + """ + + func, _note = frame + info = funcs.get(func) + res = results.get(func) + if info: + self_reasons = res["self_reasons"] if res else info["reasons"] + prefix = reason_marker(self_reasons) + code_size = info.get("code_size", 0) + name = "%s(%d)" % (func, code_size) if code_size else func + return ( + "%s%d" % (prefix, info["self"]), + name, + "%s:%s" % (info["file"], info["line"]), + ) + return ("?", func, "?:?") + + +def format_text(results, funcs, rank): + """Format results as aligned table with deepest call stack frames.""" + + # Compute common path prefix to shorten File:Line column + paths = [ + f["file"] + for f in funcs.values() + if f["file"] not in ("?", "") and f["file"].startswith("/") + ] + if paths: + prefix = os.path.commonpath(paths) + if not prefix.endswith("/"): + prefix += "/" + else: + prefix = "" + + maxname = 40 + items = sorted(results.items(), key=lambda x: x[1]["total"], reverse=True) + if rank > 0: + items = items[:rank] + + hdr = ("Total", "Self", "Backtrace", "File:Line") + w = list(map(len, hdr)) + + rows = [] + for func, info in items: + frames = [frame_info(funcs, results, f) for f in info["stack"]] + # Strip common prefix from file paths + if prefix: + plen = len(prefix) + frames = [ + ( + sz, + fn, + loc[plen:] if loc.startswith(prefix) else loc, + ) + for sz, fn, loc in frames + ] + # Derive Total marker from the displayed stack frames' own reasons, + # not from all propagated reasons across every branch. + stack_reasons = [] + for f, _note in info["stack"]: + res = results.get(f) + if res: + stack_reasons.extend(res["self_reasons"]) + elif f not in funcs: + stack_reasons.append("no DWARF data") + marker = reason_marker(stack_reasons) if info["uncertain"] else "" + total_str = "%s%d" % (marker, info["total"]) + w[0] = max(w[0], len(total_str)) + for sz, fn, loc in frames: + w[1] = max(w[1], len(sz)) + w[2] = max(w[2], min(len(fn), maxname)) + w[3] = max(w[3], len(loc)) + rows.append((total_str, frames)) + + def trunc(s): + return s[: maxname - 3] + "..." if len(s) > maxname else s + + fmt = "%%%ds %%%ds %%-%ds %%s" % (w[0], w[1], w[2]) + lines = [fmt % hdr] + lines.append(fmt % ("-" * w[0], "-" * w[1], "-" * w[2], "-" * w[3])) + + for total_str, frames in rows: + for i, (sz, fn, loc) in enumerate(frames): + if i == 0: + lines.append(fmt % (total_str, sz, trunc(fn + "~"), loc)) + else: + lines.append(fmt % ("", sz, trunc(fn), loc)) + if not frames: + lines.append(fmt % (total_str, "", "", "")) + + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser( + description="Analyze per-function stack usage from DWARF debug info, " + "combined with disassembly-based call graph analysis to compute " + "worst-case total (self + callees) stack depth.", + ) + parser.add_argument( + "elf", + help="path to ELF file with DWARF debug info", + ) + parser.add_argument( + "-p", + "--prefix", + default="", + help="toolchain prefix (e.g. arm-none-eabi- or llvm-)", + ) + parser.add_argument( + "-n", + "--rank", + type=int, + default=0, + help="show top N functions (default: 0 = all)", + ) + parser.add_argument( + "-r", + "--recursion-depth", + type=int, + default=0, + help="assumed recursion depth for stack estimation (default: 0). " + "When > 0, recursive cycle cost is multiplied by this value.", + ) + args = parser.parse_args() + + if not os.path.isfile(args.elf): + print("ELF file not found: %s" % args.elf, file=sys.stderr) + sys.exit(1) + + # Phase 1: Symbol table + addr_to_name, name_to_addr, func_sizes, is_thumb, machine = build_symbol_map( + args.prefix, + args.elf, + ) + if not addr_to_name: + print("No function symbols found in ELF", file=sys.stderr) + sys.exit(1) + + # Phase 2: Stack sizes from DWARF .debug_frame + funcs = parse_stack_sizes( + args.prefix, + args.elf, + addr_to_name, + name_to_addr, + func_sizes, + ) + if not funcs: + print( + "No stack size data found in DWARF .debug_frame", + file=sys.stderr, + ) + sys.exit(1) + + # Phase 3: Source locations via addr2line + resolve_sources(args.prefix, args.elf, funcs, name_to_addr) + + # Phase 4: Call graph via objdump + graph, dynamic_funcs = parse_call_graph(args.prefix, args.elf, machine) + + # Mark functions with dynamic stack allocation (alloca/VLA) + for name in dynamic_funcs: + if name in funcs: + funcs[name]["reasons"].append("dynamic stack (alloca/VLA)") + + results = compute_worst_stack(funcs, graph, args.recursion_depth) + + print(format_text(results, funcs, args.rank)) + + +if __name__ == "__main__": + main()