Skip to content

Commit 5477af2

Browse files
richl9brenns10
authored andcommitted
lockup: Print tasks which have been on-cpu for too long
Orabug: 37187006 Signed-off-by: Richard Li <[email protected]>
1 parent 2388184 commit 5477af2

File tree

2 files changed

+77
-0
lines changed

2 files changed

+77
-0
lines changed

drgn_tools/lockup.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Copyright (c) 2024, Oracle and/or its affiliates.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
3+
import argparse
4+
5+
from drgn import Program
6+
from drgn.helpers.common import escape_ascii_string
7+
from drgn.helpers.linux.cpumask import for_each_online_cpu
8+
from drgn.helpers.linux.percpu import per_cpu
9+
10+
from drgn_tools.bt import bt
11+
from drgn_tools.corelens import CorelensModule
12+
from drgn_tools.task import task_lastrun2now
13+
from drgn_tools.util import timestamp_str
14+
15+
16+
def scan_lockup(
17+
prog: Program, min_run_time_seconds: int = 1, skip_swapper: bool = True
18+
) -> None:
19+
"""
20+
Scan potential lockups on cpus.
21+
22+
:param prog: drgn program
23+
:param min_run_time_seconds: int
24+
:param skip_swapper: bool
25+
"""
26+
nr_processes = 0
27+
for cpus in for_each_online_cpu(prog):
28+
runqueue = per_cpu(prog["runqueues"], cpus)
29+
curr_task_addr = runqueue.curr.value_()
30+
curr_task = runqueue.curr[0]
31+
comm = escape_ascii_string(curr_task.comm.string_())
32+
pid = curr_task.pid.value_()
33+
run_time = task_lastrun2now(curr_task)
34+
prio = curr_task.prio.value_()
35+
if run_time < min_run_time_seconds * 1e9:
36+
continue
37+
if skip_swapper and comm == f"swapper/{cpus}":
38+
continue
39+
print(f"CPU {cpus} RUNQUEUE: {runqueue.address_of_().value_():x}")
40+
print(
41+
f" PID: {pid:<6d} TASK: {curr_task_addr:x} PRIO: {prio}"
42+
f' COMMAND: "{comm}"',
43+
f" LOCKUP TIME: {timestamp_str(run_time)}",
44+
)
45+
print("\nCalltrace:")
46+
bt(task_or_prog=curr_task.address_of_())
47+
print()
48+
nr_processes += 1
49+
50+
print(
51+
f"We found {nr_processes} processes running more than {min_run_time_seconds} seconds."
52+
)
53+
54+
55+
class LockUp(CorelensModule):
56+
"""Print tasks which have been on-cpu for too long"""
57+
58+
name = "lockup"
59+
60+
def add_args(self, parser: argparse.ArgumentParser) -> None:
61+
parser.add_argument(
62+
"--time",
63+
"-t",
64+
type=float,
65+
default=2,
66+
help="list all the processes that have been running more than <time> seconds",
67+
)
68+
69+
def run(self, prog: Program, args: argparse.Namespace) -> None:
70+
scan_lockup(prog, args.time)

tests/test_lockup.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Copyright (c) 2024, Oracle and/or its affiliates.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
3+
from drgn_tools import lockup
4+
5+
6+
def test_lockup(prog):
7+
lockup.scan_lockup(prog)

0 commit comments

Comments
 (0)