[Intel-gfx] [PATCH] Add Dmesg Triage Feature: further triage i-g-t kmsg log to reduce result noise resulted from piglit dmesg defect
ethan gao
ethan.gao at intel.com
Wed Mar 18 19:36:51 PDT 2015
tests/igt.py: add igt env to enable or disable dmesg triage
framework/test/base.py: trigger dmesg triage depending on dmesg log occurrence
framework/dmesg.py: employ dmesg triage simply for Linux dmesg
dmesg_triage/*: deal with kmsg log with pre-defined dmesg oops pattern
In general, if dmesg triage is enabled and there is new dmesg along with i-g-t testcases
running, the new dmesg will be captured and worked out a tag and head to rollback the
detail when necessary, in addition, on the basis of different dmesg tag or head, the final
result noise of a testcase can be reduced with defined rules or strategy.
Signed-off-by: ethan gao <ethan.gao at intel.com>
---
dmesg_triage/debug.sh | 67 ++++++++++++++
dmesg_triage/dmesg.rb | 117 ++++++++++++++++++++++++
dmesg_triage/kmsg_triage | 182 ++++++++++++++++++++++++++++++++++++++
dmesg_triage/libdmesg.sh | 156 ++++++++++++++++++++++++++++++++
dmesg_triage/oops-context-pattern | 37 ++++++++
dmesg_triage/oops-pattern | 59 ++++++++++++
framework/dmesg.py | 60 +++++++++++++
framework/test/base.py | 4 +
tests/igt.py | 7 ++
9 files changed, 689 insertions(+)
create mode 100644 dmesg_triage/debug.sh
create mode 100644 dmesg_triage/dmesg.rb
create mode 100755 dmesg_triage/kmsg_triage
create mode 100644 dmesg_triage/libdmesg.sh
create mode 100644 dmesg_triage/oops-context-pattern
create mode 100644 dmesg_triage/oops-pattern
diff --git a/dmesg_triage/debug.sh b/dmesg_triage/debug.sh
new file mode 100644
index 0000000..ec8280e
--- /dev/null
+++ b/dmesg_triage/debug.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Copyright (c) 2015 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+[[ "$BASHPID" ]] || echo "======= Run me with bash ======="
+
+dump_shell_vars()
+{
+ (
+ set -o posix
+ local branch_commit_files=()
+ local sparse_lines=()
+ local remote_url=()
+ set >&2
+ )
+}
+
+dump_call_stack()
+{
+ local stack_depth=${#FUNCNAME[@]}
+ local i
+ for ((i = 0; i < $((stack_depth-1)); i++)); do
+ echo " ${BASH_SOURCE[i+1]}:${BASH_LINENO[i]}: ${FUNCNAME[i+1]}" >&2
+ done
+}
+
+# nr_stack_dumps=0
+dump_stack()
+{
+ # echo "Stack dump: $BASH_COMMAND"
+ dump_call_stack
+ echo
+ dump_shell_vars
+}
+
+notice()
+{
+ local time_str="$(date +'%F %H:%M:%S') "
+ echo -e ${color[MAGENTA]}"${time_str}$*"$reset_color
+}
+
+die()
+{
+ notice "$*"
+
+ dump_stack
+ email "$*"
+ exit
+}
diff --git a/dmesg_triage/dmesg.rb b/dmesg_triage/dmesg.rb
new file mode 100644
index 0000000..d7aecae
--- /dev/null
+++ b/dmesg_triage/dmesg.rb
@@ -0,0 +1,117 @@
+#!/usr/bin/ruby
+
+# Copyright (c) 2015 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# [Description]: Find out all crash dmesg clauses with tailored pattern
+# [Author]: ethan.gao at intel.com
+
+
+
+require "fileutils"
+require "tempfile"
+
+def fixup_dmesg(line)
+ line.chomp!
+
+ # remove absolute path names
+ line.sub!(%r{/kbuild/src/[^/]+/}, '')
+
+ line.sub!(/\.(isra|constprop|part)\.[0-9]+\+0x/, '+0x')
+
+ # break up mixed messages
+ case line
+ when /^<[0-9]>/
+ when /(.+)(\[ *[0-9]{1,6}\.[0-9]{6}\] .*)/
+ line = $1 + "\n" + $2
+ end
+
+ return line
+end
+
+def fixup_dmesg_file(dmesg_file)
+ tmpfile = Tempfile.new '.fixup-dmesg-', File.dirname(dmesg_file)
+ dmesg_lines = []
+ File.open(dmesg_file, 'rb') do |f|
+ f.each_line { |line|
+ line = fixup_dmesg(line)
+ dmesg_lines << line
+ tmpfile.puts line
+ }
+ end
+ tmpfile.chmod 0664
+ tmpfile.close
+ FileUtils.mv tmpfile.path, dmesg_file, :force => true
+ return dmesg_lines
+end
+
+def grep_crash_head(dmesg, grep_options = '')
+ oops = %x[ grep -a -f #{DMESG_ROOT}/oops-pattern #{grep_options} #{dmesg} |
+ grep -v -e 'INFO: NMI handler .* took too long to run' |
+ awk '{line = $0; sub(/^(<[0-9]>)?\[[ 0-9.]+\] /, "", line); if (!x[line]++) print;}'
+ ]
+ return oops unless oops.empty?
+
+
+ if system "grep -q -F 'EXT4-fs (' #{dmesg}"
+ oops = `grep -a -f #{DMESG_ROOT}/ext4-crit-pattern #{grep_options} #{dmesg}`
+ return oops unless oops.empty?
+ end
+
+ if system "grep -q -F 'XFS (' #{dmesg}"
+ oops = `grep -a -f #{DMESG_ROOT}/xfs-alert-pattern #{grep_options} #{dmesg}`
+ return oops unless oops.empty?
+ end
+
+ if system "grep -q -F 'btrfs: ' #{dmesg}"
+ oops = `grep -a -f #{DMESG_ROOT}/btrfs-crit-pattern #{grep_options} #{dmesg}`
+ return oops unless oops.empty?
+ end
+
+ return ''
+end
+
+def grep_printk_errors(dmesg_file, dmesg_lines)
+ oops = `grep -a -f #{DMESG_ROOT}/oops-pattern #{dmesg_file}`
+ dmesg = dmesg_lines.join "\n"
+ oops += `grep -a -f #{DMESG_ROOT}/ext4-crit-pattern #{dmesg_file}` if dmesg.index 'EXT4-fs ('
+ oops += `grep -a -f #{DMESG_ROOT}/xfs-alert-pattern #{dmesg_file}` if dmesg.index 'XFS ('
+ oops += `grep -a -f #{DMESG_ROOT}/btrfs-crit-pattern #{dmesg_file}` if dmesg.index 'btrfs: '
+ return oops
+end
+
+def common_error_id(line)
+ line = line.chomp
+ line.gsub! /\b3\.[0-9]+[-a-z0-9.]+/, '#' # linux version: 3.17.0-next-20141008-g099669ed
+ line.gsub! /\b[1-9][0-9]-[A-Z][a-z]+-[0-9]{4}\b/, '#' # Date: 28-Dec-2013
+ line.gsub! /\b0x[0-9a-f]+\b/, '#' # hex number
+ line.gsub! /\b[a-f0-9]{40}\b/, '#' # SHA-1
+ line.gsub! /\b[0-9][0-9.]*/, '#' # number
+ line.gsub! /#x\b/, '0x'
+ line.gsub! /[ \t]/, ' '
+ line.gsub! /\ \ +/, ' '
+ line.gsub! /([^a-zA-Z0-9])\ /, '\1'
+ line.gsub! /\ ([^a-zA-Z])/, '\1'
+ line.gsub! /^\ /, ''
+ line.gsub! /\ _/, '_'
+ line.gsub! /\ /, '_'
+ line.gsub! /[-_.,;:#!\[\(]+$/, ''
+ line
+end
diff --git a/dmesg_triage/kmsg_triage b/dmesg_triage/kmsg_triage
new file mode 100755
index 0000000..5832871
--- /dev/null
+++ b/dmesg_triage/kmsg_triage
@@ -0,0 +1,182 @@
+#!/usr/bin/ruby
+
+# Copyright (c) 2015 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# [Description]: Triage various dmesg trace log for an unique tag
+# [Author]: ethan.gao at intel.com
+
+
+DMESG_ROOT = ENV['DMESG_TRIAGE_ROOT'] || File.dirname(File.dirname(File.realpath($0)))
+require "#{DMESG_ROOT}/dmesg.rb"
+
+if ENV['KMSG_TRIAGE_LOG']
+ KMSG = ENV['KMSG_TRIAGE_LOG']
+ kmsg_file = "#{KMSG}/.dmesg"
+
+ if File.exist? kmsg_file
+ dmesg_file = kmsg_file
+ end
+
+elsif ARGV[0]
+ dmesg_file = ARGV[0]
+else
+ exit
+end
+
+if not File.exist?(dmesg_file) or File.size(dmesg_file) == 0
+ puts "No required dmesg file found !"
+ exit
+end
+
+def oops_to_bisect_pattern(line)
+ words = line.split
+ return '' if words.empty?
+ patterns = []
+ words.each { |w|
+ case w
+ when /([a-zA-Z0-9_]+)\.(isra|constprop|part)\.[0-9]+\+0x/
+ patterns << $1
+ break
+ when /([a-zA-Z0-9_]+\+0x)/, /([a-zA-Z0-9_]+=)/
+ patterns << $1
+ break
+ when /[^a-zA-Z\/:._-]/
+ patterns << '.*' if patterns[-1] != '.*'
+ else
+ patterns << w
+ end
+ }
+ patterns.shift while patterns[0] == '.*'
+ patterns.pop if patterns[-1] == '.*'
+ patterns.join(' ')
+end
+
+error_ids = {}
+if $PROGRAM_NAME =~ /kmsg_triage/
+ output = grep_crash_head dmesg_file, '-o'
+end
+exit if output.empty?
+
+output.each_line { |line|
+ line.chomp!
+
+ next if line =~ /INFO: Stall ended before state dump start/
+ next if line =~ /INFO: NMI handler .* took too long to run:/
+ next if line =~ /Killed process \d+ \(/ # duplicated with "Out of memory: Kill process "
+
+ # print_hex_dump
+ next if line =~ /(\s[0-9a-f]{2}){16}/
+ next if line =~ /(\s[0-9a-f]{4}){8}/
+ next if line =~ /(\s[0-9a-f]{8}){4}/
+
+ next if line =~ /[^\t\n\0[:print:]]/
+ line.tr! "\0", ''
+
+ case line
+ when /(INFO: rcu[_a-z]* self-detected stall on CPU)/,
+ /(INFO: rcu[_a-z]* detected stalls on CPUs\/tasks:)/
+ line = $1
+ bug_to_bisect = $1
+ when /(BUG: unable to handle kernel)/,
+ /(BUG: unable to handle kernel) NULL pointer dereference/,
+ /(BUG: unable to handle kernel) paging request/
+ line = $1
+ bug_to_bisect = $1
+ when /(BUG: scheduling while atomic:)/,
+ /(BUG: Bad page map in process)/,
+ /(BUG: Bad page state in process)/,
+ /(BUG: soft lockup - CPU#\d+ stuck for \d+s!)/,
+ /(BUG: spinlock .* on CPU#\d+)/
+ line = $1
+ bug_to_bisect = $1
+ when /(BUG: ).* (still has locks held)/,
+ /(INFO: task ).* (blocked for more than \d+ seconds)/
+ line = $1 + $2
+ bug_to_bisect = $2
+ when /WARNING:.* at .* ([a-zA-Z.0-9_]+\+0x)/
+ bug_to_bisect = 'WARNING:.* at .* ' + $1.sub(/\.(isra|constprop|part)\.[0-9]+\+0x/, '')
+ line =~ /(at .*)/
+ line = "WARNING: " + $1
+ when /(Kernel panic - not syncing: No working init found.) Try passing init= option to kernel. /,
+ /(Kernel panic - not syncing: No init found.) Try passing init= option to kernel. /
+ line = $1
+ bug_to_bisect = line
+ when /(Out of memory: Kill process) \d+ \(/
+ line = $1
+ bug_to_bisect = $1
+ when /(Writer stall state) \d+ g\d+ c\d+ f/
+ line = $1
+ bug_to_bisect = $1
+ when /(used greatest stack depth:)/
+ line = $1
+ bug_to_bisect = $1
+ # printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} still in use (%d) [unmount of %s %s]\n"
+ when /(BUG: Dentry ).* (still in use) .* \[unmount of /
+ line = $1 + $2
+ bug_to_bisect = $1 + '.* ' + $2
+ when /([a-zA-Z]+[ a-zA-Z]*: [a-f0-9]{4}) \[#[0-9]+\] [A-Z_ ]*$/
+ line = $1
+ bug_to_bisect = $1
+ when /^backtrace:([a-zA-Z0-9_]+)/
+ bug_to_bisect = $1 + '+0x'
+ else
+ bug_to_bisect = oops_to_bisect_pattern line
+ end
+
+ error_id = line.sub(/^[^a-zA-Z]+/, "")
+
+ error_id.gsub! /\ \]$/, "" # [ INFO: possible recursive locking detected ]
+ #error_id.gsub! /\/c\/kernel-tests\/src\/[^\/]+\//, ''
+ #error_id.gsub! /\/c\/(wfg|yliu)\/[^\/]+\//, ''
+ #error_id.gsub! /\/lkp\/[^\/]+\/linux[0-9]*\//, ''
+ #error_id.gsub! /\/kernel-tests\/linux[0-9]*\//, ''
+ error_id.gsub! /\.(isra|constprop|part)\.[0-9]+/, ''
+
+ error_id.gsub! /\b[0-9a-f]{8}\b/, "#"
+ error_id.gsub! /\b[0-9a-f]{16}\b/, "#"
+ error_id.gsub! /(=)[0-9a-f]+\b/, '\1#'
+ error_id.gsub! /[+\/]0x[0-9a-f]+\b/, ''
+ error_id.gsub! /[+\/][0-9a-f]+\b/, ''
+
+ #error_id = common_error_id(error_id) + ': 1'
+ error_id = common_error_id(error_id)
+
+ error_id.gsub! /([a-z]:)[0-9]+\b/, '\1' # WARNING: at arch/x86/kernel/cpu/perf_event.c:1077 x86_pmu_start+0xaa/0x110()
+ error_id.gsub! /#:\[<#>\]\[<#>\]/, '' # RIP: 0010:[<ffffffff91906d8d>] [<ffffffff91906d8d>] validate_chain+0xed/0xe80
+
+ next if error_id.size <= 3
+
+ error_ids[error_id] ||= bug_to_bisect
+}
+
+exit if error_ids.empty?
+
+puts "Found necessary kmsg error ids" if $PROGRAM_NAME =~ /kmsg_triage/
+
+KMSG_ERROR_ID = ENV['KMSG_TRIAGE_LOG'] || "#{DMESG_ROOT}"
+# This stores each error id
+f = File.new(File.join("#{KMSG_ERROR_ID}", ".dmesg_error_ids"), "w")
+error_ids.each { |error_id, head|
+ f.write("#{error_id}@#{head}\n")
+}
+
+f.close
+
diff --git a/dmesg_triage/libdmesg.sh b/dmesg_triage/libdmesg.sh
new file mode 100644
index 0000000..7a53fcf
--- /dev/null
+++ b/dmesg_triage/libdmesg.sh
@@ -0,0 +1,156 @@
+# !/bin/bash
+
+# Copyright (c) 2015 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# [Description]: Defined utilities to check out specified dmesg segment
+# [Author]: ethan.gao at intel.com
+
+
+if [[ -z "${DMESG_ROOT}" ]]; then
+ DMESG_ROOT=`pwd`
+fi
+
+source $DMESG_ROOT/debug.sh
+
+ERR=1
+
+# desc: capture all the heads of various kmsg exceptional log
+# param:
+# [dmesg]: the dmesg log which you'll employ
+grep_crash_head()
+{
+ [[ ! "$*" || "$*" =~ ^-[a-z]+$ ]] && {
+ echo "input of grep_crash_head is empty" >&2
+ dump_stack
+ return 1
+ }
+ grep -a -f $DMESG_ROOT/oops-pattern "$@" && return
+
+ grep -B8 'Call Trace:$' "$@" |
+ grep -B1 -e 'Pid: [0-9]+, comm: ' \
+ -e 'CPU: [0-9]+ PID: [0-9]+ Comm: ' |
+ grep -v -e ' [cC]omm: ' \
+ -e '^--$' \
+ -e '^$' \
+ -e '^\[[ 0-9.]*\] $' \
+ -e 'The following trace is a kernel self test and not a bug'
+}
+
+# desc: filter the kmsg exceptional log messages
+# param:
+# [dmesg]: the dmesg log which you'll engage
+grep_crash_dmesg()
+{
+ [[ "$@" ]] || {
+ echo "input of grep_crash_dmesg is empty" >&2
+ dump_stack
+ return 1
+ }
+
+ grep -a -f $DMESG_ROOT/oops-pattern \
+ -f $DMESG_ROOT/oops-context-pattern "$@"
+}
+
+# desc: filter the first kmsg exceptional log
+# param:
+# [dmesg]: the dmesg source you'll use
+first_crash_dmesg()
+{
+ grep_crash_dmesg -C3 "$@" | awk 'BEGIN { nr_first_head=0; };
+ /^--$/ { exit };
+ /---\[ end trace .*\]---/ { print; getline; print; exit };
+ /(kernel BUG at |Kernel panic -|\<BUG: |WARNING: |INFO: )/ { if (nr_first_head > NR + 1) exit; nr_first_head = NR; };
+ { print };
+ NR > 200 { exit };'
+}
+
+# desc: retrieve dmesg trace info based on dmesg tag
+# param:
+# [tag]: a dmesg tag or head
+# [dmesg]: the file which contains your full dmesg log
+head_to_dmesg()
+{
+ [[ $# -eq 2 ]] || {
+ echo "ERROR:input of head_to_dmesg is empty !" >&2
+ echo "Usage: head_to_dmesg <dmesg_head> <dmesg_log_file>"
+ return $ERR
+ }
+
+ dmesg_head=$1
+ dmesg_file=$2
+
+ awk -v pattern="${dmesg_head}" '
+ BEGIN { nr_dmesg_target = 0; }
+ {
+ if ( $0 !~ pattern){
+ next;
+ }
+ else {
+ nr_dmesg_target = FNR; print;
+ }
+
+ do
+ {
+ if (getline > 0)
+ print;
+ else
+ break;
+
+ if ($0 ~ /---\[ end trace .*\]/){
+ break;
+ }
+ } while (NR - nr_dmesg_target < 60)
+ }
+ END {
+ if (nr_dmesg_target > 0) {
+ print "---[kernel msg end]---";
+ }
+ else {
+ print "Not Found";
+ }
+ }' "${dmesg_file}"
+}
+
+
+# desc: sort out the call trace funcs from the given dmesg log
+# param:
+# [dmesg]: the dmesg source you'll use, if a file
+CALL_TRACE_FUNCS='[a-zA-Z0-9._]+\+0x[0-9a-f]+\/0x[0-9a-f]+'
+call_trace_funcs()
+{
+ local dmesg=$1
+ grep -Eo "$CALL_TRACE_FUNCS" $1 | cut -f1 -d+ | uniq
+}
+
+# desc: sort out funcs from the first call trace stack of dmesg
+# param:
+# [dmesg]: the dmesg call trace log
+first_call_trace()
+{
+ local dmesg=$1
+ awk 'BEGIN { in_trace=0; };
+ /(Call Trace:|state was registered at:)/ { in_trace++; nr=0; next; };
+ /'"$CALL_TRACE_FUNCS"'/{ if (match($0, />\] (?\s)?(_)?[a-zA-Z_][a-zA-Z0-9._]+\+0x/))
+ { print substr($0, RSTART+3, RLENGTH-6); nr++; }; next; };
+ // {if (in_trace > 0 && nr > 1) exit;};' $dmesg |
+ sed -r -e 's/\.(isra|constprop|part)\.[0-9]+//g' \
+ | uniq
+}
diff --git a/dmesg_triage/oops-context-pattern b/dmesg_triage/oops-context-pattern
new file mode 100644
index 0000000..f7dc209
--- /dev/null
+++ b/dmesg_triage/oops-context-pattern
@@ -0,0 +1,37 @@
+invoked oom-killer:
+INFO: possible recursive locking detected
+ is trying to acquire lock:
+ state was registered at:
+Possible unsafe locking scenario:
+ CPU0$
+ ----$
+ lock(.*);
+\*\*\* DEADLOCK \*\*\*
+May be due to missing lock nesting notation
+ <Interrupt>
+but task is already holding lock:
+which lock already depends on the new lock.
+other info that might help us debug this:
+stack backtrace:
+ EFLAGS:
+RBP:
+ knlGS:
+ CR3:
+Stack:
+Code:
+sending NMI to all CPUs:
+Modules linked in:
+Hardware name:
+Call Trace:
+\[<[a-f0-9]\+>\] [a-z0-9._]\++0x[0-9a-f]\+/0x[0-9a-f]\+
+\[<[a-f0-9]\+>\] ? [a-z0-9._]\++0x[0-9a-f]\+/0x[0-9a-f]\+
+\( [0-9a-f]\{16\}\)\{7\}
+slab_unreclaimable:
+Swap cache stats:
+pages RAM
+ locks held by
+ lock held by
+RCU used illegally from
+ =========================$
+Object [0-9a-f]\+: [0-9a-f ]\+ \.\.\.
+Redzone [0-9a-f]\+: [0-9a-f ]\+
diff --git a/dmesg_triage/oops-pattern b/dmesg_triage/oops-pattern
new file mode 100644
index 0000000..9ce4148
--- /dev/null
+++ b/dmesg_triage/oops-pattern
@@ -0,0 +1,59 @@
+[a-zA-Z ]\+: [a-f0-9]\{4\} \[#[0-9]\+\] [A-Z_ ]*$
+kernel BUG at .*
+Kernel panic - not syncing: .*
+^BUG: .*
+\[ *[0-9.]\+\] BUG: .*
+BUG .* (.*): .*
+Oops: .*
+WARNING: at .*
+WARNING: CPU: [0-9]\+ PID: [0-9]\+ at .*
+^INFO: .*detected stall.*
+^INFO: .* cpu=.*
+^INFO: Slab 0x.*
+^INFO: Object 0x.*
+^INFO: 0x.*. First byte 0x.*
+\[ *[0-9.]\+\] INFO: .*
+\[ INFO: [^i].* \]
+RCU used illegally from offline CPU!
+RCU used illegally from idle CPU!
+RCU used illegally from extended quiescent state!
+inconsistent {.*} -> {.*} usage\.
+initcall .* returned with .*
+\[ BUG: [^b].*! \]
+ is trying to release lock (.*) at
+ is trying to contend lock (.*) at
+ is trying to lock: .*
+ but this task is not holding: .*
+ is leaving the kernel with locks still held!
+ is exiting with locks still held!
+ is freeing memory .*, with a lock still held there!
+genirq: Flags mismatch irq .*
+Out of memory: Kill process
+page allocation failure: order:[0-9]\+, mode:0x.*
+pagealloc: single bit error
+pagealloc: memory corruption
+audit: (.*) error: .*
+getblk(): invalid block size [0-9]\+ requested
+KGDB: re-enter exception: ALL breakpoints killed
+IP-Config: Auto-configuration of network failed
+EIP is at [a-zA-Z0-9._]\++0x.*/0x.*
+RIP: [0-9a-f]\{4\}:\[.*\] [a-zA-Z0-9._]\++0x.*/0x.*
+PANIC: early exception
+PANIC: double fault,
+Unknown interrupt or fault at:
+End of test: FAILURE
+End of test: RCU_HOTPLUG
+S0Box: more than 5 loops in s0box_interrupt
+Initramfs unpacking failed:
+assertion failed
+Assertion failed
+assertion failure
+Assertion failure
+32-bit relocation outside of kernel!
+64-bit relocation outside of kernel!
+Kernel is not a valid ELF file
+Failed to allocate space for phdrs
+Destination address inappropriately aligned
+Destination address too large
+Wrong destination address
+ -- System halted
diff --git a/framework/dmesg.py b/framework/dmesg.py
index 1a5f629..d23ba23 100644
--- a/framework/dmesg.py
+++ b/framework/dmesg.py
@@ -41,6 +41,7 @@ import sys
import subprocess
import warnings
import abc
+import os, errno
__all__ = [
'BaseDmesg',
@@ -201,6 +202,65 @@ class LinuxDmesg(BaseDmesg):
# Attempt to store the last element of dmesg, unless there was no dmesg
self._last_message = dmesg[-1] if dmesg else None
+ def dmesg_triage(self, result, config):
+ """ Triage dmesg log to sort out different tag and head
+
+ Retrieve the contents of dmesg log, then work out a tag and a head, which
+ can be used to rollback the original dmesg trace details.
+
+ """
+ try:
+ os.stat(config["DMESG_TRIAGE_SRC"])
+ except OSError as e:
+ if e.errno == errno.ENOENT or e.errno == errno.ENODIR:
+ sys.stderr.write("[Dmesg Triage Err]: %s" % str(e))
+ return result
+
+ if not os.path.exists(config["DMESG_TRIAGE_TMP"]):
+ try:
+ os.makedirs(config["DMESG_TRIAGE_TMP"])
+ except OSError as e:
+ sys.stderr.write("[Dmesg Triage Warn]: %s" % str(e))
+ config["DMESG_TRIAGE_TMP"] = "/tmp"
+
+ os.environ["DMESG_TRIAGE_ROOT"] = config["DMESG_TRIAGE_SRC"]
+ os.environ["KMSG_TRIAGE_LOG"] = config["DMESG_TRIAGE_TMP"]
+
+ fd = open("%s" % os.path.join(config["DMESG_TRIAGE_TMP"], ".dmesg"), 'wb')
+ try:
+ fd.write(result["dmesg"])
+ fd.flush()
+ finally:
+ fd.close()
+
+ try:
+ os.chdir(config["DMESG_TRIAGE_SRC"])
+ ret = subprocess.call("which ruby > /dev/null", shell=True)
+ if ret != 0:
+ sys.stderr.write("[Dmesg Triage Err]: No ruby binary found, please install and try again !\n")
+ return result
+ subprocess.check_call("ruby kmsg_triage", shell=True)
+ except subprocess.CalledProcessError as e:
+ sys.stderr.write("[Dmesg Triage Err]: %s" % str(e))
+ return result
+
+ if not os.path.exists("%s" % os.path.join(config["DMESG_TRIAGE_TMP"], ".dmesg_error_ids")):
+ return result
+
+ error_ids = dict()
+ with open(os.path.join(config["DMESG_TRIAGE_TMP"], ".dmesg_error_ids")) as f:
+ for err in iter(f.readline, ''):
+ if re.search(r'@', err) is None:
+ f.close()
+ return result
+ err_id = re.split(r'@', err)
+ error_ids[err_id[0]] = err_id[1].strip("\n").replace("0x", "*")
+
+ f.close()
+
+ result["dmesg_triage_info"] = error_ids
+ return result
+
class DummyDmesg(BaseDmesg):
""" An dummy class for dmesg on non unix-like systems
diff --git a/framework/test/base.py b/framework/test/base.py
index efc20cb..a57c7ea 100644
--- a/framework/test/base.py
+++ b/framework/test/base.py
@@ -157,6 +157,10 @@ class Test(object):
self.run()
self.result['time'] = time.time() - time_start
self.result = dmesg.update_result(self.result)
+ if self.result.has_key("dmesg") and self.env["ENABLE_DMESG_TRIAGE"]:
+ self.env["DMESG_TRIAGE_SRC"] = os.path.join(self.OPTS.env["PIGLIT_SOURCE_DIR"], self.env["DMESG_TRIAGE_SRC"])
+ self.result = dmesg.dmesg_triage(self.result, self.env)
+
# This is a rare case where a bare exception is okay, since we're
# using it to log exceptions
except:
diff --git a/tests/igt.py b/tests/igt.py
index 2047781..704ee75 100644
--- a/tests/igt.py
+++ b/tests/igt.py
@@ -104,6 +104,13 @@ class IGTTest(Test):
[os.path.join(IGT_TEST_ROOT, binary)] + arguments)
self.timeout = 600
+ # enable or disable dmesg triage feature here
+ self.env["ENABLE_DMESG_TRIAGE"] = True
+ if self.env["ENABLE_DMESG_TRIAGE"]:
+ # Initialize dmesg triage resources
+ self.env["DMESG_TRIAGE_TMP"] = "/tmp/dmesg"
+ self.env["DMESG_TRIAGE_SRC"] = "dmesg_triage"
+
def interpret_result(self):
if self.result['returncode'] == 0:
self.result['result'] = 'pass'
--
1.9.1
More information about the Intel-gfx
mailing list