[Piglit] [PATCH 6/8] framework: When a test fails, try it again to see if it's consistent.

Wed Sep 18 15:00:31 PDT 2013

Drivers often have some small set of testcases that sometimes fail due
to no fault of the patchset being tested.  Learning what set of
testcases is unstable and thus doesn't need to be investigated per
patchset is an overhead for new developers, and even among experienced
developers we waste too much time starting down the path of debugging
before we remember that it's just an intermittent failure.

To reduce this problem, try the test again (up to 5 times total) to
see if we ever get a pass.  If so, log a failure at about the same
level as a 'warn' result, but with a different result name so it's
obvious what's going on.

v2: use xrange for the for loop (suggested by Dylan), only print the
    test status once (and thus allow "unstable" to be printed), fix
    junit execution.

Reviewed-by: Chad Versace <chad.versace at linux.intel.com> (v1)
Reviewed-by: Dylan Baker <baker.dylan.c at gmail.com> (v1)
---
 framework/core.py       | 79 +++++++++++++++++++++++++++++++++----------------
 framework/summary.py    | 17 +++++++----
 piglit-summary-html.py  |  4 +--
 piglit-summary-junit.py | 17 ++++++-----
 templates/index.css     |  5 +++-
 5 files changed, 80 insertions(+), 42 deletions(-)

diff --git a/framework/core.py b/framework/core.py
index 25e84c1..5cb24aa 100644
--- a/framework/core.py
+++ b/framework/core.py
@@ -415,6 +415,7 @@ class Test:
         '''
         self.runConcurrent = runConcurrent
         self.skip_test = False
+        self.current_path = None
 
     def run(self):
         raise NotImplementedError
@@ -430,6 +431,33 @@ class Test:
         if self.runConcurrent:
             ConcurrentTestPool().put(self.doRun, args=args)
 
+    def status(self, msg):
+        log(msg=msg, channel=self.current_path)
+
+    def runOnce(self, env):
+        try:
+            time_start = time.time()
+            result = self.run(env.valgrind)
+            time_end = time.time()
+            if 'time' not in result:
+                result['time'] = time_end - time_start
+            if 'result' not in result:
+                result['result'] = 'fail'
+            if not isinstance(result, TestResult):
+                result = TestResult(result)
+                result['result'] = 'warn'
+                result['note'] = 'Result not returned as an instance ' \
+                                    'of TestResult'
+        except:
+            result = TestResult()
+            result['result'] = 'fail'
+            result['exception'] = str(sys.exc_info()[0]) + \
+                str(sys.exc_info()[1])
+            result['traceback'] = \
+                "".join(traceback.format_tb(sys.exc_info()[2]))
+
+        return result
+
     def doRun(self, env, path, json_writer):
         '''
         Run the test immediately.
@@ -438,34 +466,33 @@ class Test:
             Fully qualified test name as a string.  For example,
             ``spec/glsl-1.30/preprocessor/compiler/keywords/void.frag``.
         '''
-        def status(msg):
-            log(msg=msg, channel=path)
+
+        self.current_path = path
 
         # Run the test
         if env.execute:
-            try:
-                status("running")
-                time_start = time.time()
-                result = self.run(env.valgrind)
-                time_end = time.time()
-                if 'time' not in result:
-                    result['time'] = time_end - time_start
-                if 'result' not in result:
-                    result['result'] = 'fail'
-                if not isinstance(result, TestResult):
-                    result = TestResult(result)
-                    result['result'] = 'warn'
-                    result['note'] = 'Result not returned as an instance ' \
-                                     'of TestResult'
-            except:
-                result = TestResult()
-                result['result'] = 'fail'
-                result['exception'] = str(sys.exc_info()[0]) + \
-                    str(sys.exc_info()[1])
-                result['traceback'] = \
-                    "".join(traceback.format_tb(sys.exc_info()[2]))
-
-            status(result['result'])
+            self.status("running")
+
+            # If the test fails, try executing it a few more times to
+            # see if it ever passes.  Often drivers have bugs for race
+            # conditions that show up intermittently, which don't get
+            # fixed quickly.  By detecting when a test failure is one
+            # of these, we can save a developer a bunch of time in
+            # debugging a piglit result not related to whatever change
+            # they're testing.
+            lastresult = None
+            for i in xrange(5):
+                result = self.runOnce(env)
+                if result['result'] in ['pass', 'skip']:
+                    if lastresult is not None:
+                        result = lastresult
+                        result['result'] = 'unstable'
+                    break
+                if 'exception' in result and 'KeyboardInterrupt' in result['exception']:
+                    break
+                lastresult = result
+
+            self.status(result['result'])
 
             if 'subtest' in result and len(result['subtest'].keys()) > 1:
                 for test in result['subtest'].keys():
@@ -474,7 +501,7 @@ class Test:
             else:
                 json_writer.write_dict_item(path, result)
         else:
-            status("dry-run")
+            self.status("dry-run")
 
     # Returns True iff the given error message should be ignored
     def isIgnored(self, error):
diff --git a/framework/summary.py b/framework/summary.py
index be9c96e..42f8a8e 100644
--- a/framework/summary.py
+++ b/framework/summary.py
@@ -325,10 +325,12 @@ class Summary:
                     return 2
                 elif status == 'warn':
                     return 3
-                elif status == 'fail':
+                elif status == 'unstable':
                     return 4
-                elif status == 'crash':
+                elif status == 'fail':
                     return 5
+                elif status == 'crash':
+                    return 6
 
             openGroup('fake')
             openGroup('all')
@@ -412,12 +414,14 @@ class Summary:
                 return 1
             elif status == 'warn':
                 return 2
-            elif status == 'fail':
+            elif status == 'unstable':
                 return 3
-            elif status == 'skip':
+            elif status == 'fail':
                 return 4
-            elif status == 'crash':
+            elif status == 'skip':
                 return 5
+            elif status == 'crash':
+                return 6
             elif status == 'special':
                 return 0
 
@@ -452,7 +456,7 @@ class Summary:
         Private: Find the total number of pass, fail, crash, skip, and warn in
         the *last* set of results stored in self.results.
         """
-        self.totals = {'pass': 0, 'fail': 0, 'crash': 0, 'skip': 0, 'warn': 0}
+        self.totals = {'pass': 0, 'fail': 0, 'crash': 0, 'skip': 0, 'warn': 0, 'unstable' : 0}
 
         for test in self.results[-1].tests.values():
             self.totals[test['result']] += 1
@@ -597,6 +601,7 @@ class Summary:
         print "      crash: %d" % self.totals['crash']
         print "       skip: %d" % self.totals['skip']
         print "       warn: %d" % self.totals['warn']
+        print "   unstable: %d" % self.totals['unstable']
         if self.tests['changes']:
             print "    changes: %d" % len(self.tests['changes'])
             print "      fixes: %d" % len(self.tests['fixes'])
diff --git a/piglit-summary-html.py b/piglit-summary-html.py
index b9a2996..a6daf67 100755
--- a/piglit-summary-html.py
+++ b/piglit-summary-html.py
@@ -45,7 +45,7 @@ def main():
     parser.add_argument("-e", "--exclude-details",
                         default=[],
                         action="append",
-                        choices=['skip', 'pass', 'warn', 'crash' 'fail',
+                        choices=['skip', 'pass', 'warn', 'unstable', 'crash' 'fail',
                                  'all'],
                         help="Optionally exclude the generation of HTML pages "
                              "for individual test pages with the status(es) "
@@ -67,7 +67,7 @@ def main():
 
     # If exclude-results has all, then change it to be all
     if 'all' in args.exclude_details:
-        args.exclude_details = ['skip', 'pass', 'warn', 'crash', 'fail']
+        args.exclude_details = ['skip', 'pass', 'warn', 'unstable', 'crash', 'fail']
 
     # if overwrite is requested delete the output directory
     if path.exists(args.summaryDir) and args.overwrite:
diff --git a/piglit-summary-junit.py b/piglit-summary-junit.py
index d9e4b9c..4d578c9 100755
--- a/piglit-summary-junit.py
+++ b/piglit-summary-junit.py
@@ -39,9 +39,10 @@ class PassVector:
     pass/fail/skip/etc.
     """
 
-    def __init__(self, p, w, f, s, c):
+    def __init__(self, p, w, u, f, s, c):
         self.passnr = p
         self.warnnr = w
+        self.unstablenr = u
         self.failnr = f
         self.skipnr = s
         self.crashnr = c
@@ -49,6 +50,7 @@ class PassVector:
     def add(self, o):
         self.passnr += o.passnr
         self.warnnr += o.warnnr
+        self.unstablenr += o.unstablenr
         self.failnr += o.failnr
         self.skipnr += o.skipnr
         self.crashnr += o.crashnr
@@ -89,11 +91,12 @@ results is an array of TestResult instances, one per testrun
                 result.status = result['result']
 
             vectormap = {
-                    'pass': PassVector(1,0,0,0,0),
-                    'warn': PassVector(0,1,0,0,0),
-                    'fail': PassVector(0,0,1,0,0),
-                    'skip': PassVector(0,0,0,1,0),
-                    'crash': PassVector(0,0,0,0,1)
+                    'pass':     PassVector(1,0,0,0,0,0),
+                    'warn':     PassVector(0,1,0,0,0,0),
+                    'unstable': PassVector(0,0,1,0,0,0),
+                    'fail':     PassVector(0,0,0,1,0,0),
+                    'skip':     PassVector(0,0,0,0,1,0),
+                    'crash':    PassVector(0,0,0,0,0,1)
             }
 
             if result.status not in vectormap:
@@ -144,7 +147,7 @@ results is an array of GroupResult instances, one per testrun
         # Perform some initial annotations
         for j in range(len(self.results)):
             result = self.results[j]
-            result.passvector = PassVector(0, 0, 0, 0, 0)
+            result.passvector = PassVector(0, 0, 0, 0, 0, 0)
             result.testrun = self.summary.testruns[j]
 
         # Collect, create and annotate children
diff --git a/templates/index.css b/templates/index.css
index 875333f..6194a90 100644
--- a/templates/index.css
+++ b/templates/index.css
@@ -36,7 +36,7 @@ td:first-child > div {
 	background-color: #c8c838
 }
 
-td.skip, td.warn, td.fail, td.pass, td.trap, td.abort, td.crash {
+td.skip, td.warn, td.unstable, td.fail, td.pass, td.trap, td.abort, td.crash {
 	text-align: right;
 }
 
@@ -60,6 +60,9 @@ tr:nth-child(even) td.skip  { background-color: #a0a0a0; }
 tr:nth-child(odd)  td.warn  { background-color: #ff9020; }
 tr:nth-child(even) td.warn  { background-color: #ef8010; }
 
+tr:nth-child(odd)  td.unstable  { background-color: #ff9020; }
+tr:nth-child(even) td.unstable  { background-color: #ef8010; }
+
 tr:nth-child(odd)  td.fail  { background-color: #ff2020; }
 tr:nth-child(even) td.fail  { background-color: #e00505; }
 
-- 
1.8.4.rc3