[Intel-gfx] [PATCH] Add Dmesg Triage Feature: further triage i-g-t kmsg log to reduce result noise resulted from piglit dmesg defect

ethan gao ethan.gao at intel.com
Wed Mar 18 19:36:51 PDT 2015


tests/igt.py: add igt env to enable or disable dmesg triage
framework/test/base.py: trigger dmesg triage depending on dmesg log occurrence
framework/dmesg.py: employ dmesg triage simply for Linux dmesg
dmesg_triage/*: deal with kmsg log with pre-defined dmesg oops pattern

In general, if dmesg triage is enabled and there is new dmesg along with i-g-t testcases
running, the new dmesg will be captured and worked out a tag and head to rollback the
detail when necessary, in addition, on the basis of different dmesg tag or head, the final
result noise of a testcase can be reduced with defined rules or strategy.

Signed-off-by: ethan gao <ethan.gao at intel.com>
---
 dmesg_triage/debug.sh             |  67 ++++++++++++++
 dmesg_triage/dmesg.rb             | 117 ++++++++++++++++++++++++
 dmesg_triage/kmsg_triage          | 182 ++++++++++++++++++++++++++++++++++++++
 dmesg_triage/libdmesg.sh          | 156 ++++++++++++++++++++++++++++++++
 dmesg_triage/oops-context-pattern |  37 ++++++++
 dmesg_triage/oops-pattern         |  59 ++++++++++++
 framework/dmesg.py                |  60 +++++++++++++
 framework/test/base.py            |   4 +
 tests/igt.py                      |   7 ++
 9 files changed, 689 insertions(+)
 create mode 100644 dmesg_triage/debug.sh
 create mode 100644 dmesg_triage/dmesg.rb
 create mode 100755 dmesg_triage/kmsg_triage
 create mode 100644 dmesg_triage/libdmesg.sh
 create mode 100644 dmesg_triage/oops-context-pattern
 create mode 100644 dmesg_triage/oops-pattern

diff --git a/dmesg_triage/debug.sh b/dmesg_triage/debug.sh
new file mode 100644
index 0000000..ec8280e
--- /dev/null
+++ b/dmesg_triage/debug.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Copyright (c) 2015 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+[[ "$BASHPID" ]] || echo "======= Run me with bash ======="
+
+dump_shell_vars()
+{
+	(
+		set -o posix
+		local branch_commit_files=()
+		local sparse_lines=()
+		local remote_url=()
+		set >&2
+	)
+}
+
+dump_call_stack()
+{
+	local stack_depth=${#FUNCNAME[@]}
+	local i
+	for ((i = 0; i < $((stack_depth-1)); i++)); do
+		echo "  ${BASH_SOURCE[i+1]}:${BASH_LINENO[i]}: ${FUNCNAME[i+1]}" >&2
+	done
+}
+
+# nr_stack_dumps=0
+dump_stack()
+{
+	# echo "Stack dump: $BASH_COMMAND"
+	dump_call_stack
+	echo
+	dump_shell_vars
+}
+
+notice()
+{
+	local time_str="$(date +'%F %H:%M:%S') "
+	echo -e ${color[MAGENTA]}"${time_str}$*"$reset_color
+}
+
+die()
+{
+	notice "$*"
+
+	dump_stack
+	email "$*"
+	exit
+}
diff --git a/dmesg_triage/dmesg.rb b/dmesg_triage/dmesg.rb
new file mode 100644
index 0000000..d7aecae
--- /dev/null
+++ b/dmesg_triage/dmesg.rb
@@ -0,0 +1,117 @@
+#!/usr/bin/ruby
+
+# Copyright (c) 2015 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# [Description]: Find out all crash dmesg clauses with tailored pattern
+# [Author]: ethan.gao at intel.com
+
+
+
+require "fileutils"
+require "tempfile"
+
+def fixup_dmesg(line)
+	line.chomp!
+
+	# remove absolute path names
+	line.sub!(%r{/kbuild/src/[^/]+/}, '')
+
+	line.sub!(/\.(isra|constprop|part)\.[0-9]+\+0x/, '+0x')
+
+	# break up mixed messages
+	case line
+	when /^<[0-9]>/
+	when /(.+)(\[ *[0-9]{1,6}\.[0-9]{6}\] .*)/
+		line = $1 + "\n" + $2
+	end
+
+	return line
+end
+
+def fixup_dmesg_file(dmesg_file)
+	tmpfile = Tempfile.new '.fixup-dmesg-', File.dirname(dmesg_file)
+	dmesg_lines = []
+	File.open(dmesg_file, 'rb') do |f|
+		f.each_line { |line|
+			line = fixup_dmesg(line)
+			dmesg_lines << line
+			tmpfile.puts line
+		}
+	end
+	tmpfile.chmod 0664
+	tmpfile.close
+	FileUtils.mv tmpfile.path, dmesg_file, :force => true
+	return dmesg_lines
+end
+
+def grep_crash_head(dmesg, grep_options = '')
+	oops = %x[ grep -a -f #{DMESG_ROOT}/oops-pattern #{grep_options} #{dmesg} |
+		   grep -v -e 'INFO: NMI handler .* took too long to run' |
+		   awk '{line = $0; sub(/^(<[0-9]>)?\[[ 0-9.]+\] /, "", line); if (!x[line]++) print;}'
+	]
+	return oops unless oops.empty?
+
+
+	if system "grep -q -F 'EXT4-fs ('	#{dmesg}"
+		oops = `grep -a -f #{DMESG_ROOT}/ext4-crit-pattern	#{grep_options} #{dmesg}`
+		return oops unless oops.empty?
+	end
+
+	if system "grep -q -F 'XFS ('	#{dmesg}"
+		oops = `grep -a -f #{DMESG_ROOT}/xfs-alert-pattern	#{grep_options} #{dmesg}`
+		return oops unless oops.empty?
+	end
+
+	if system "grep -q -F 'btrfs: '	#{dmesg}"
+		oops = `grep -a -f #{DMESG_ROOT}/btrfs-crit-pattern	#{grep_options} #{dmesg}`
+		return oops unless oops.empty?
+	end
+
+	return ''
+end
+
+def grep_printk_errors(dmesg_file, dmesg_lines)
+	oops = `grep -a -f #{DMESG_ROOT}/oops-pattern #{dmesg_file}`
+	dmesg = dmesg_lines.join "\n"
+	oops += `grep -a -f #{DMESG_ROOT}/ext4-crit-pattern	#{dmesg_file}` if dmesg.index 'EXT4-fs ('
+	oops += `grep -a -f #{DMESG_ROOT}/xfs-alert-pattern	#{dmesg_file}` if dmesg.index 'XFS ('
+	oops += `grep -a -f #{DMESG_ROOT}/btrfs-crit-pattern	#{dmesg_file}` if dmesg.index 'btrfs: '
+	return oops
+end
+
+def common_error_id(line)
+	line = line.chomp
+	line.gsub! /\b3\.[0-9]+[-a-z0-9.]+/, '#'			# linux version: 3.17.0-next-20141008-g099669ed
+	line.gsub! /\b[1-9][0-9]-[A-Z][a-z]+-[0-9]{4}\b/, '#'		# Date: 28-Dec-2013
+	line.gsub! /\b0x[0-9a-f]+\b/, '#'				# hex number
+	line.gsub! /\b[a-f0-9]{40}\b/, '#'				# SHA-1
+	line.gsub! /\b[0-9][0-9.]*/, '#'				# number
+	line.gsub! /#x\b/, '0x'
+	line.gsub! /[ \t]/, ' '
+	line.gsub! /\ \ +/, ' '
+	line.gsub! /([^a-zA-Z0-9])\ /, '\1'
+	line.gsub! /\ ([^a-zA-Z])/, '\1'
+	line.gsub! /^\ /, ''
+	line.gsub! /\  _/, '_'
+	line.gsub! /\ /, '_'
+	line.gsub! /[-_.,;:#!\[\(]+$/, ''
+	line
+end
diff --git a/dmesg_triage/kmsg_triage b/dmesg_triage/kmsg_triage
new file mode 100755
index 0000000..5832871
--- /dev/null
+++ b/dmesg_triage/kmsg_triage
@@ -0,0 +1,182 @@
+#!/usr/bin/ruby
+
+# Copyright (c) 2015 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# [Description]: Triage various dmesg trace log for an unique tag 
+# [Author]: ethan.gao at intel.com
+
+
+DMESG_ROOT = ENV['DMESG_TRIAGE_ROOT'] || File.dirname(File.dirname(File.realpath($0)))
+require "#{DMESG_ROOT}/dmesg.rb"
+
+if ENV['KMSG_TRIAGE_LOG']
+	KMSG = ENV['KMSG_TRIAGE_LOG']
+	kmsg_file = "#{KMSG}/.dmesg"
+	
+	if File.exist? kmsg_file
+		dmesg_file = kmsg_file
+	end
+
+elsif ARGV[0]
+	dmesg_file = ARGV[0]
+else
+	exit
+end
+
+if not File.exist?(dmesg_file) or File.size(dmesg_file) == 0
+	puts "No required dmesg file found !"
+	exit
+end
+
+def oops_to_bisect_pattern(line)
+		words = line.split
+		return '' if words.empty?
+		patterns = []
+		words.each { |w|
+			case w
+			when /([a-zA-Z0-9_]+)\.(isra|constprop|part)\.[0-9]+\+0x/
+				patterns << $1
+				break
+			when /([a-zA-Z0-9_]+\+0x)/, /([a-zA-Z0-9_]+=)/
+				patterns << $1
+				break
+			when /[^a-zA-Z\/:._-]/
+				patterns << '.*' if patterns[-1] != '.*'
+			else
+				patterns << w
+			end
+		}
+		patterns.shift while patterns[0] == '.*'
+		patterns.pop   if patterns[-1] == '.*'
+		patterns.join(' ')
+end
+
+error_ids = {}
+if $PROGRAM_NAME =~ /kmsg_triage/
+	output = grep_crash_head dmesg_file, '-o'
+end
+exit if output.empty?
+
+output.each_line { |line|
+	line.chomp!
+
+	next if line =~ /INFO: Stall ended before state dump start/
+	next if line =~ /INFO: NMI handler .* took too long to run:/
+	next if line =~ /Killed process \d+ \(/					# duplicated with "Out of memory: Kill process "
+
+	# print_hex_dump
+	next if line =~ /(\s[0-9a-f]{2}){16}/
+	next if line =~ /(\s[0-9a-f]{4}){8}/
+	next if line =~ /(\s[0-9a-f]{8}){4}/
+
+	next if line =~ /[^\t\n\0[:print:]]/
+	line.tr! "\0", ''
+
+	case line
+	when /(INFO: rcu[_a-z]* self-detected stall on CPU)/,
+	     /(INFO: rcu[_a-z]* detected stalls on CPUs\/tasks:)/
+		line = $1
+		bug_to_bisect = $1
+	when /(BUG: unable to handle kernel)/,
+	     /(BUG: unable to handle kernel) NULL pointer dereference/,
+	     /(BUG: unable to handle kernel) paging request/
+		line = $1
+		bug_to_bisect = $1
+	when /(BUG: scheduling while atomic:)/,
+	     /(BUG: Bad page map in process)/,
+	     /(BUG: Bad page state in process)/,
+	     /(BUG: soft lockup - CPU#\d+ stuck for \d+s!)/,
+	     /(BUG: spinlock .* on CPU#\d+)/
+		line = $1
+		bug_to_bisect = $1
+	when /(BUG: ).* (still has locks held)/,
+	     /(INFO: task ).* (blocked for more than \d+ seconds)/
+		line = $1 + $2
+		bug_to_bisect = $2
+	when /WARNING:.* at .* ([a-zA-Z.0-9_]+\+0x)/
+		bug_to_bisect = 'WARNING:.* at .* ' + $1.sub(/\.(isra|constprop|part)\.[0-9]+\+0x/, '')
+		line =~ /(at .*)/
+		line = "WARNING: " + $1
+	when /(Kernel panic - not syncing: No working init found.)  Try passing init= option to kernel. /,
+	     /(Kernel panic - not syncing: No init found.)  Try passing init= option to kernel. /
+		line = $1
+		bug_to_bisect = line
+	when /(Out of memory: Kill process) \d+ \(/
+		line = $1
+		bug_to_bisect = $1
+	when /(Writer stall state) \d+ g\d+ c\d+ f/
+		line = $1
+		bug_to_bisect = $1
+	when /(used greatest stack depth:)/
+		line = $1
+		bug_to_bisect = $1
+	# printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} still in use (%d) [unmount of %s %s]\n"
+	when  /(BUG: Dentry ).* (still in use) .* \[unmount of /
+		line = $1 + $2
+		bug_to_bisect = $1 + '.* ' + $2
+	when /([a-zA-Z]+[ a-zA-Z]*: [a-f0-9]{4}) \[#[0-9]+\] [A-Z_ ]*$/
+		line = $1
+		bug_to_bisect = $1
+	when /^backtrace:([a-zA-Z0-9_]+)/
+		bug_to_bisect = $1 + '+0x'
+	else
+		bug_to_bisect = oops_to_bisect_pattern line
+	end
+
+	error_id = line.sub(/^[^a-zA-Z]+/, "")
+
+	error_id.gsub! /\ \]$/, ""					# [ INFO: possible recursive locking detected ]
+	#error_id.gsub! /\/c\/kernel-tests\/src\/[^\/]+\//, ''
+	#error_id.gsub! /\/c\/(wfg|yliu)\/[^\/]+\//, ''
+	#error_id.gsub! /\/lkp\/[^\/]+\/linux[0-9]*\//, ''
+	#error_id.gsub! /\/kernel-tests\/linux[0-9]*\//, ''
+	error_id.gsub! /\.(isra|constprop|part)\.[0-9]+/, ''
+
+	error_id.gsub! /\b[0-9a-f]{8}\b/, "#"
+	error_id.gsub! /\b[0-9a-f]{16}\b/, "#"
+	error_id.gsub! /(=)[0-9a-f]+\b/, '\1#'
+	error_id.gsub! /[+\/]0x[0-9a-f]+\b/, ''
+	error_id.gsub! /[+\/][0-9a-f]+\b/, ''
+
+	#error_id = common_error_id(error_id) + ': 1'
+	error_id = common_error_id(error_id)
+
+	error_id.gsub! /([a-z]:)[0-9]+\b/, '\1'				# WARNING: at arch/x86/kernel/cpu/perf_event.c:1077 x86_pmu_start+0xaa/0x110()
+	error_id.gsub! /#:\[<#>\]\[<#>\]/, ''				# RIP: 0010:[<ffffffff91906d8d>]  [<ffffffff91906d8d>] validate_chain+0xed/0xe80
+
+	next if error_id.size <= 3
+
+	error_ids[error_id] ||= bug_to_bisect
+}
+
+exit if error_ids.empty?
+
+puts "Found necessary kmsg error ids" if $PROGRAM_NAME =~ /kmsg_triage/
+
+KMSG_ERROR_ID = ENV['KMSG_TRIAGE_LOG'] || "#{DMESG_ROOT}"
+# This stores each error id
+f = File.new(File.join("#{KMSG_ERROR_ID}", ".dmesg_error_ids"), "w")
+error_ids.each { |error_id, head|
+	f.write("#{error_id}@#{head}\n")
+}
+
+f.close
+
diff --git a/dmesg_triage/libdmesg.sh b/dmesg_triage/libdmesg.sh
new file mode 100644
index 0000000..7a53fcf
--- /dev/null
+++ b/dmesg_triage/libdmesg.sh
@@ -0,0 +1,156 @@
+# !/bin/bash
+
+# Copyright (c) 2015 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# [Description]: Defined utilities to check out specified dmesg segment 
+# [Author]: ethan.gao at intel.com
+
+
+if [[ -z "${DMESG_ROOT}" ]]; then
+	DMESG_ROOT=`pwd`
+fi
+
+source $DMESG_ROOT/debug.sh
+
+ERR=1
+
+# desc: capture all the heads of various kmsg exceptional log
+# param:
+#   [dmesg]: the dmesg log which you'll employ
+grep_crash_head()
+{
+	[[ ! "$*" || "$*" =~ ^-[a-z]+$ ]] && {
+		echo "input of grep_crash_head is empty" >&2
+		dump_stack
+		return 1
+	}
+	grep -a -f $DMESG_ROOT/oops-pattern "$@" && return
+
+	grep -B8 'Call Trace:$' "$@" |
+	grep -B1 -e 'Pid: [0-9]+, comm: ' \
+		 -e 'CPU: [0-9]+ PID: [0-9]+ Comm: ' |
+	grep -v -e ' [cC]omm: ' \
+		-e '^--$' \
+		-e '^$' \
+		-e '^\[[ 0-9.]*\] $' \
+		-e 'The following trace is a kernel self test and not a bug'
+}
+
+# desc: filter the kmsg exceptional log messages
+# param:
+#   [dmesg]: the dmesg log which you'll engage
+grep_crash_dmesg()
+{
+	[[ "$@" ]] || {
+		echo "input of grep_crash_dmesg is empty" >&2
+		dump_stack
+		return 1
+	}
+
+	grep -a -f $DMESG_ROOT/oops-pattern \
+		-f $DMESG_ROOT/oops-context-pattern "$@"
+}
+
+# desc: filter the first kmsg exceptional log 
+# param:
+#   [dmesg]: the dmesg source you'll use
+first_crash_dmesg()
+{
+	grep_crash_dmesg -C3 "$@" | awk 'BEGIN { nr_first_head=0; };
+					/^--$/ { exit };
+					/---\[ end trace .*\]---/ { print; getline; print; exit };
+					/(kernel BUG at |Kernel panic -|\<BUG: |WARNING: |INFO: )/ { if (nr_first_head > NR + 1) exit; nr_first_head = NR; };
+					{ print };
+					NR > 200 { exit };'
+}
+
+# desc: retrieve dmesg trace info based on dmesg tag
+# param:
+#   [tag]: a dmesg tag or head
+#   [dmesg]: the file which contains your full dmesg log
+head_to_dmesg()
+{
+	[[ $# -eq 2 ]] || {
+		echo "ERROR:input of head_to_dmesg is empty !" >&2
+		echo "Usage: head_to_dmesg <dmesg_head> <dmesg_log_file>"
+		return $ERR
+	}
+	
+	dmesg_head=$1
+	dmesg_file=$2
+
+	awk -v pattern="${dmesg_head}" '
+	BEGIN { nr_dmesg_target = 0; }
+	{
+		if ( $0 !~ pattern){
+			next;
+		} 
+		else { 
+			nr_dmesg_target = FNR; print;
+		}
+
+		do
+		{
+			if (getline > 0) 
+				print; 
+			else
+				break;
+
+			if ($0 ~ /---\[ end trace .*\]/){
+				break;
+			}
+		} while (NR - nr_dmesg_target < 60)
+	}	
+	END { 
+		if (nr_dmesg_target > 0) {
+			print "---[kernel msg end]---";
+		}
+		else {
+			print "Not Found";
+		}
+	}' "${dmesg_file}"
+}
+
+
+# desc: sort out the call trace funcs from the given dmesg log
+# param:
+#   [dmesg]: the dmesg source you'll use, if a file 
+CALL_TRACE_FUNCS='[a-zA-Z0-9._]+\+0x[0-9a-f]+\/0x[0-9a-f]+'
+call_trace_funcs()
+{
+	local dmesg=$1
+	grep -Eo "$CALL_TRACE_FUNCS" $1 | cut -f1 -d+ | uniq
+}
+
+# desc: sort out funcs from the first call trace stack of dmesg
+# param:
+#   [dmesg]: the dmesg call trace log
+first_call_trace()
+{
+	local dmesg=$1
+	awk 'BEGIN { in_trace=0; };
+		/(Call Trace:|state was registered at:)/ { in_trace++; nr=0; next; };
+		/'"$CALL_TRACE_FUNCS"'/{ if (match($0, />\] (?\s)?(_)?[a-zA-Z_][a-zA-Z0-9._]+\+0x/))
+						{ print substr($0, RSTART+3, RLENGTH-6); nr++; }; next; };
+		// {if (in_trace > 0 && nr > 1) exit;};' $dmesg |
+		sed -r -e 's/\.(isra|constprop|part)\.[0-9]+//g' \
+			| uniq
+}
diff --git a/dmesg_triage/oops-context-pattern b/dmesg_triage/oops-context-pattern
new file mode 100644
index 0000000..f7dc209
--- /dev/null
+++ b/dmesg_triage/oops-context-pattern
@@ -0,0 +1,37 @@
+invoked oom-killer:
+INFO: possible recursive locking detected
+ is trying to acquire lock:
+ state was registered at:
+Possible unsafe locking scenario:
+       CPU0$
+       ----$
+  lock(.*);
+\*\*\* DEADLOCK \*\*\*
+May be due to missing lock nesting notation
+  <Interrupt>
+but task is already holding lock:
+which lock already depends on the new lock.
+other info that might help us debug this:
+stack backtrace:
+ EFLAGS: 
+RBP: 
+ knlGS:
+ CR3: 
+Stack:
+Code: 
+sending NMI to all CPUs:
+Modules linked in:
+Hardware name: 
+Call Trace:
+\[<[a-f0-9]\+>\] [a-z0-9._]\++0x[0-9a-f]\+/0x[0-9a-f]\+
+\[<[a-f0-9]\+>\] ? [a-z0-9._]\++0x[0-9a-f]\+/0x[0-9a-f]\+
+\( [0-9a-f]\{16\}\)\{7\}
+slab_unreclaimable:
+Swap cache stats:
+pages RAM
+ locks held by 
+ lock held by 
+RCU used illegally from 
+ =========================$
+Object [0-9a-f]\+: [0-9a-f ]\+  \.\.\.
+Redzone [0-9a-f]\+: [0-9a-f ]\+ 
diff --git a/dmesg_triage/oops-pattern b/dmesg_triage/oops-pattern
new file mode 100644
index 0000000..9ce4148
--- /dev/null
+++ b/dmesg_triage/oops-pattern
@@ -0,0 +1,59 @@
+[a-zA-Z ]\+: [a-f0-9]\{4\} \[#[0-9]\+\] [A-Z_ ]*$
+kernel BUG at .*
+Kernel panic - not syncing: .*
+^BUG: .*
+\[ *[0-9.]\+\] BUG: .*
+BUG .* (.*): .*
+Oops: .*
+WARNING: at .*
+WARNING: CPU: [0-9]\+ PID: [0-9]\+ at .*
+^INFO: .*detected stall.*
+^INFO: .* cpu=.*
+^INFO: Slab 0x.*
+^INFO: Object 0x.*
+^INFO: 0x.*. First byte 0x.*
+\[ *[0-9.]\+\] INFO: .*
+\[ INFO: [^i].* \]
+RCU used illegally from offline CPU!
+RCU used illegally from idle CPU!
+RCU used illegally from extended quiescent state!
+inconsistent {.*} -> {.*} usage\.
+initcall .* returned with .*
+\[ BUG: [^b].*! \]
+ is trying to release lock (.*) at
+ is trying to contend lock (.*) at
+ is trying to lock: .*
+ but this task is not holding: .*
+ is leaving the kernel with locks still held!
+ is exiting with locks still held!
+ is freeing memory .*, with a lock still held there!
+genirq: Flags mismatch irq .*
+Out of memory: Kill process 
+page allocation failure: order:[0-9]\+, mode:0x.*
+pagealloc: single bit error
+pagealloc: memory corruption
+audit: (.*) error: .*
+getblk(): invalid block size [0-9]\+ requested
+KGDB: re-enter exception: ALL breakpoints killed
+IP-Config: Auto-configuration of network failed
+EIP is at [a-zA-Z0-9._]\++0x.*/0x.*
+RIP: [0-9a-f]\{4\}:\[.*\] [a-zA-Z0-9._]\++0x.*/0x.*
+PANIC: early exception 
+PANIC: double fault, 
+Unknown interrupt or fault at:
+End of test: FAILURE
+End of test: RCU_HOTPLUG
+S0Box: more than 5 loops in s0box_interrupt
+Initramfs unpacking failed: 
+assertion failed
+Assertion failed
+assertion failure
+Assertion failure
+32-bit relocation outside of kernel!
+64-bit relocation outside of kernel!
+Kernel is not a valid ELF file
+Failed to allocate space for phdrs
+Destination address inappropriately aligned
+Destination address too large
+Wrong destination address
+ -- System halted
diff --git a/framework/dmesg.py b/framework/dmesg.py
index 1a5f629..d23ba23 100644
--- a/framework/dmesg.py
+++ b/framework/dmesg.py
@@ -41,6 +41,7 @@ import sys
 import subprocess
 import warnings
 import abc
+import os, errno
 
 __all__ = [
     'BaseDmesg',
@@ -201,6 +202,65 @@ class LinuxDmesg(BaseDmesg):
         # Attempt to store the last element of dmesg, unless there was no dmesg
         self._last_message = dmesg[-1] if dmesg else None
 
+    def dmesg_triage(self, result, config):
+        """ Triage dmesg log to sort out different tag and head
+
+        Retrieve the contents of dmesg log, then work out a tag and a head, which
+        can be used to rollback the original dmesg trace details.
+
+        """
+        try:
+            os.stat(config["DMESG_TRIAGE_SRC"])
+        except OSError as e:
+            if e.errno == errno.ENOENT or e.errno == errno.ENODIR:
+                sys.stderr.write("[Dmesg Triage Err]: %s" % str(e))
+                return result
+
+        if not os.path.exists(config["DMESG_TRIAGE_TMP"]):
+            try:
+                os.makedirs(config["DMESG_TRIAGE_TMP"])
+            except OSError as e:
+                sys.stderr.write("[Dmesg Triage Warn]: %s" % str(e))
+                config["DMESG_TRIAGE_TMP"] = "/tmp" 	
+
+        os.environ["DMESG_TRIAGE_ROOT"] = config["DMESG_TRIAGE_SRC"]
+        os.environ["KMSG_TRIAGE_LOG"] = config["DMESG_TRIAGE_TMP"]
+
+        fd = open("%s" % os.path.join(config["DMESG_TRIAGE_TMP"], ".dmesg"), 'wb')
+        try:
+            fd.write(result["dmesg"])
+            fd.flush()
+        finally:
+            fd.close()
+
+        try:
+            os.chdir(config["DMESG_TRIAGE_SRC"])
+            ret = subprocess.call("which ruby > /dev/null", shell=True)
+            if ret != 0:
+                sys.stderr.write("[Dmesg Triage Err]: No ruby binary found, please install and try again !\n")
+                return result
+            subprocess.check_call("ruby kmsg_triage", shell=True)
+        except subprocess.CalledProcessError as e:
+            sys.stderr.write("[Dmesg Triage Err]: %s" % str(e))
+            return result
+
+        if not os.path.exists("%s" % os.path.join(config["DMESG_TRIAGE_TMP"], ".dmesg_error_ids")):
+            return result
+
+        error_ids = dict()
+        with open(os.path.join(config["DMESG_TRIAGE_TMP"], ".dmesg_error_ids")) as f:
+            for err in iter(f.readline, ''):
+                if re.search(r'@', err) is None:
+                    f.close()
+                    return result
+                err_id = re.split(r'@', err)
+                error_ids[err_id[0]] = err_id[1].strip("\n").replace("0x", "*")
+             
+            f.close()
+
+        result["dmesg_triage_info"] = error_ids
+        return result
+    
 
 class DummyDmesg(BaseDmesg):
     """ An dummy class for dmesg on non unix-like systems
diff --git a/framework/test/base.py b/framework/test/base.py
index efc20cb..a57c7ea 100644
--- a/framework/test/base.py
+++ b/framework/test/base.py
@@ -157,6 +157,10 @@ class Test(object):
                 self.run()
                 self.result['time'] = time.time() - time_start
                 self.result = dmesg.update_result(self.result)
+                if self.result.has_key("dmesg") and self.env["ENABLE_DMESG_TRIAGE"]:
+                    self.env["DMESG_TRIAGE_SRC"] = os.path.join(self.OPTS.env["PIGLIT_SOURCE_DIR"], self.env["DMESG_TRIAGE_SRC"])
+                    self.result = dmesg.dmesg_triage(self.result, self.env)
+
             # This is a rare case where a bare exception is okay, since we're
             # using it to log exceptions
             except:
diff --git a/tests/igt.py b/tests/igt.py
index 2047781..704ee75 100644
--- a/tests/igt.py
+++ b/tests/igt.py
@@ -104,6 +104,13 @@ class IGTTest(Test):
             [os.path.join(IGT_TEST_ROOT, binary)] + arguments)
         self.timeout = 600
 
+        # enable or disable dmesg triage feature here
+        self.env["ENABLE_DMESG_TRIAGE"] = True
+        if self.env["ENABLE_DMESG_TRIAGE"]:
+            # Initialize dmesg triage resources
+            self.env["DMESG_TRIAGE_TMP"] = "/tmp/dmesg"
+            self.env["DMESG_TRIAGE_SRC"] = "dmesg_triage"
+
     def interpret_result(self):
         if self.result['returncode'] == 0:
             self.result['result'] = 'pass'
-- 
1.9.1



More information about the Intel-gfx mailing list