From f640c97c9b2f2b9b5fdc792531534574a69de218 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Tue, 31 Oct 2017 09:43:17 +0100 Subject: [PATCH] dmesg: detect new RIP pattern To help auto bisect a number of boot errors. For example, [ 956.671551] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 [ 956.671557] IP: pgtable_trans_huge_withdraw+0x4c/0xc0 ... [ 956.671650] RIP: pgtable_trans_huge_withdraw+0x4c/0xc0 RSP: ffffc90026b07c20 We failed to auto bisect it since the important "RIP:pgtable_trans_huge_withdraw" feature is missed. The remaining ones like "dmesg.BUG:unable_to_handle_kernel" are way too common. wfg@inn /result/stress-ng/1s-memory-performance/lkp-bdw-ep6/debian-x86_64-2016-08-31.cgz/x86_64-rhel-7.2/gcc-6/bb176f67090ca54869fc1262c913aa69d2ede070/0% cat dmesg.json { "dmesg.boot_failures": [ 1 ], "dmesg.BUG:unable_to_handle_kernel": [ 1 ], "dmesg.Oops:#[##]": [ 1 ], "dmesg.Kernel_panic-not_syncing:Fatal_exception": [ 1 ], ... After patch, $ /c/lkp-tests/stats/dmesg dmesg-lkp-bdw-ep6:20171029153441:x86_64-rhel-7.2:gcc-6:4.14.0-rc6:1 boot_failures: 1 # BUG: unable to handle kernel BUG:unable_to_handle_kernel: 1 message:BUG:unable_to_handle_kernel: [ 328.471917] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 pattern:BUG:unable_to_handle_kernel: BUG: unable to handle kernel # Oops: Oops:#[##]: 1 message:Oops:#[##]: [ 328.471930] Oops: 0000 [#1] SMP pattern:Oops:#[##]: Oops: + # RIP: pgtable_trans_huge_withdraw+0x + RIP:pgtable_trans_huge_withdraw: 1 + message:RIP:pgtable_trans_huge_withdraw: [ 328.471980] RIP: 0010:pgtable_trans_huge_withdraw+0x4c/0xc0 + pattern:RIP:pgtable_trans_huge_withdraw: RIP: pgtable_trans_huge_withdraw+0x # Kernel panic - not syncing: Fatal exception Kernel_panic-not_syncing:Fatal_exception: 1 message:Kernel_panic-not_syncing:Fatal_exception: [ 328.489702] Kernel panic - not syncing: Fatal exception pattern:Kernel_panic-not_syncing:Fatal_exception: Kernel panic - not syncing: Fatal exception timestamp:last: 328.496311 timestamp:BUG:unable_to_handle_kernel: 328.471917 timestamp:Oops:#[##]: 328.471930 timestamp:RIP:pgtable_trans_huge_withdraw: 328.471980 timestamp:Kernel_panic-not_syncing:Fatal_exception: 328.489702 CC: "Kirill A. Shutemov" Signed-off-by: Fengguang Wu Signed-off-by: Philip Li --- etc/oops-pattern | 1 + lib/dmesg.rb | 1 + 2 files changed, 2 insertions(+) diff --git a/etc/oops-pattern b/etc/oops-pattern index 052c7a44e..6ec50722a 100644 --- a/etc/oops-pattern +++ b/etc/oops-pattern @@ -47,6 +47,7 @@ IP-Config: Auto-configuration of network failed EIP is at [a-zA-Z0-9._]+\+0x.*/0x.* EIP: [a-zA-Z0-9._]+\+0x[a-f0-9]+/0x[a-f0-9]+ RIP: [0-9a-f]{4}:\[.*\] [a-zA-Z0-9._]+\+0x.*/0x.* +RIP: [0-9a-f]{4}:[a-zA-Z0-9._]+\+0x.*/0x.* PANIC: early exception PANIC: double fault, Unknown interrupt or fault at: diff --git a/lib/dmesg.rb b/lib/dmesg.rb index 74479723c..03bde1116 100755 --- a/lib/dmesg.rb +++ b/lib/dmesg.rb @@ -361,6 +361,7 @@ def analyze_error_id(line) error_id.gsub!(/([a-z]:)[0-9]+\b/, '\1') # WARNING: at arch/x86/kernel/cpu/perf_event.c:1077 x86_pmu_start+0xaa/0x110() error_id.gsub!(/#:\[<#>\]\[<#>\]/, '') # RIP: 0010:[] [] validate_chain+0xed/0xe80 + error_id.gsub!(/RIP:#:/, 'RIP:') # RIP: 0010:__might_sleep+0x72/0x80 [error_id, bug_to_bisect] end