[Piglit] [PATCH 1/2] framework: When a test fails, try it again to see if it's consistent.

Tue Sep 17 10:54:00 PDT 2013

Drivers often have some small set of testcases that sometimes fail due
to no fault of the patchset being tested.  Learning what set of
testcases is unstable and thus doesn't need to be investigated per
patchset is an overhead for new developers, and even among experienced
developers we waste too much time starting down the path of debugging
before we remember that it's just an intermittent failure.

To reduce this problem, try the test again (up to 5 times total) to
see if we ever get a pass.  If so, log a failure at about the same
level as a 'warn' result, but with a different result name so it's
obvious what's going on.
---
 framework/core.py       | 82 +++++++++++++++++++++++++++++++++----------------
 framework/summary.py    | 17 ++++++----
 piglit-summary-html.py  |  4 +--
 piglit-summary-junit.py | 15 +++++----
 templates/index.css     |  5 ++-
 5 files changed, 82 insertions(+), 41 deletions(-)

diff --git a/framework/core.py b/framework/core.py
index 150a70c..14d0119 100644
--- a/framework/core.py
+++ b/framework/core.py
@@ -417,6 +417,7 @@ class Test:
         '''
         self.runConcurrent = runConcurrent
         self.skip_test = False
+        self.current_path = None
 
     def run(self):
         raise NotImplementedError
@@ -432,6 +433,36 @@ class Test:
         if self.runConcurrent:
             ConcurrentTestPool().put(self.doRun, args=args)
 
+    def status(self, msg):
+        log(msg=msg, channel=self.current_path)
+
+    def runOnce(self, env):
+        try:
+            self.status("running")
+            time_start = time.time()
+            result = self.run(env.valgrind)
+            time_end = time.time()
+            if 'time' not in result:
+                result['time'] = time_end - time_start
+            if 'result' not in result:
+                result['result'] = 'fail'
+            if not isinstance(result, TestResult):
+                result = TestResult(result)
+                result['result'] = 'warn'
+                result['note'] = 'Result not returned as an instance ' \
+                                    'of TestResult'
+        except:
+            result = TestResult()
+            result['result'] = 'fail'
+            result['exception'] = str(sys.exc_info()[0]) + \
+                str(sys.exc_info()[1])
+            result['traceback'] = \
+                "".join(traceback.format_tb(sys.exc_info()[2]))
+
+        self.status(result['result'])
+
+        return result
+
     def doRun(self, env, path, json_writer):
         '''
         Run the test immediately.
@@ -440,34 +471,33 @@ class Test:
             Fully qualified test name as a string.  For example,
             ``spec/glsl-1.30/preprocessor/compiler/keywords/void.frag``.
         '''
-        def status(msg):
-            log(msg=msg, channel=path)
+
+        self.current_path = path
 
         # Run the test
         if env.execute:
-            try:
-                status("running")
-                time_start = time.time()
-                result = self.run(env.valgrind)
-                time_end = time.time()
-                if 'time' not in result:
-                    result['time'] = time_end - time_start
-                if 'result' not in result:
-                    result['result'] = 'fail'
-                if not isinstance(result, TestResult):
-                    result = TestResult(result)
-                    result['result'] = 'warn'
-                    result['note'] = 'Result not returned as an instance ' \
-                                     'of TestResult'
-            except:
-                result = TestResult()
-                result['result'] = 'fail'
-                result['exception'] = str(sys.exc_info()[0]) + \
-                    str(sys.exc_info()[1])
-                result['traceback'] = \
-                    "".join(traceback.format_tb(sys.exc_info()[2]))
-
-            status(result['result'])
+            # If the test fails, try executing it a few more times to
+            # see if it ever passes.  Often drivers have bugs for race
+            # conditions that show up intermittently, which don't get
+            # fixed quickly.  By detecting when a test failure is one
+            # of these, we can save a developer a bunch of time in
+            # debugging a piglit result not related to whatever change
+            # they're testing.
+            i = 0
+            lastresult = None
+            while True:
+                result = self.runOnce(env)
+                if result['result'] in ['pass', 'skip']:
+                    if lastresult is not None:
+                        result = lastresult
+                        result['result'] = 'unstable'
+                    break
+                if 'exception' in result and 'KeyboardInterrupt' in result['exception']:
+                    break
+                lastresult = result
+                i = i + 1
+                if i >= 5:
+                    break
 
             if 'subtest' in result and len(result['subtest'].keys()) > 1:
                 for test in result['subtest'].keys():
@@ -476,7 +506,7 @@ class Test:
             else:
                 json_writer.write_dict_item(path, result)
         else:
-            status("dry-run")
+            self.status("dry-run")
 
     # Returns True iff the given error message should be ignored
     def isIgnored(self, error):
diff --git a/framework/summary.py b/framework/summary.py
index 1cdbab7..3d09007 100644
--- a/framework/summary.py
+++ b/framework/summary.py
@@ -325,10 +325,12 @@ class Summary:
                     return 2
                 elif status == 'warn':
                     return 3
-                elif status == 'fail':
+                elif status == 'unstable':
                     return 4
-                elif status == 'crash':
+                elif status == 'fail':
                     return 5
+                elif status == 'crash':
+                    return 6
 
             openGroup('fake')
             openGroup('all')
@@ -421,12 +423,14 @@ class Summary:
                 return 1
             elif status == 'warn':
                 return 2
-            elif status == 'fail':
+            elif status == 'unstable':
                 return 3
-            elif status == 'skip':
+            elif status == 'fail':
                 return 4
-            elif status == 'crash':
+            elif status == 'skip':
                 return 5
+            elif status == 'crash':
+                return 6
             elif status == 'special':
                 return 0
 
@@ -480,7 +484,7 @@ class Summary:
         Private: Find the total number of pass, fail, crash, skip, and warn in
         the *last* set of results stored in self.results.
         """
-        self.totals = {'pass': 0, 'fail': 0, 'crash': 0, 'skip': 0, 'warn': 0}
+        self.totals = {'pass': 0, 'fail': 0, 'crash': 0, 'skip': 0, 'warn': 0, 'unstable' : 0}
 
         for test in self.results[-1].tests.values():
             self.totals[test['result']] += 1
@@ -625,6 +629,7 @@ class Summary:
         print "      crash: %d" % self.totals['crash']
         print "       skip: %d" % self.totals['skip']
         print "       warn: %d" % self.totals['warn']
+        print "   unstable: %d" % self.totals['unstable']
         if self.tests['changes']:
             print "    changes: %d" % len(self.tests['changes'])
             print "      fixes: %d" % len(self.tests['fixes'])
diff --git a/piglit-summary-html.py b/piglit-summary-html.py
index b9a2996..a6daf67 100755
--- a/piglit-summary-html.py
+++ b/piglit-summary-html.py
@@ -45,7 +45,7 @@ def main():
     parser.add_argument("-e", "--exclude-details",
                         default=[],
                         action="append",
-                        choices=['skip', 'pass', 'warn', 'crash' 'fail',
+                        choices=['skip', 'pass', 'warn', 'unstable', 'crash' 'fail',
                                  'all'],
                         help="Optionally exclude the generation of HTML pages "
                              "for individual test pages with the status(es) "
@@ -67,7 +67,7 @@ def main():
 
     # If exclude-results has all, then change it to be all
     if 'all' in args.exclude_details:
-        args.exclude_details = ['skip', 'pass', 'warn', 'crash', 'fail']
+        args.exclude_details = ['skip', 'pass', 'warn', 'unstable', 'crash', 'fail']
 
     # if overwrite is requested delete the output directory
     if path.exists(args.summaryDir) and args.overwrite:
diff --git a/piglit-summary-junit.py b/piglit-summary-junit.py
index d9e4b9c..d24ec1a 100755
--- a/piglit-summary-junit.py
+++ b/piglit-summary-junit.py
@@ -39,9 +39,10 @@ class PassVector:
     pass/fail/skip/etc.
     """
 
-    def __init__(self, p, w, f, s, c):
+    def __init__(self, p, w, u, f, s, c):
         self.passnr = p
         self.warnnr = w
+        self.unstablenr = u
         self.failnr = f
         self.skipnr = s
         self.crashnr = c
@@ -49,6 +50,7 @@ class PassVector:
     def add(self, o):
         self.passnr += o.passnr
         self.warnnr += o.warnnr
+        self.unstablenr += o.unstablenr
         self.failnr += o.failnr
         self.skipnr += o.skipnr
         self.crashnr += o.crashnr
@@ -89,11 +91,12 @@ results is an array of TestResult instances, one per testrun
                 result.status = result['result']
 
             vectormap = {
-                    'pass': PassVector(1,0,0,0,0),
-                    'warn': PassVector(0,1,0,0,0),
-                    'fail': PassVector(0,0,1,0,0),
-                    'skip': PassVector(0,0,0,1,0),
-                    'crash': PassVector(0,0,0,0,1)
+                    'pass':     PassVector(1,0,0,0,0,0),
+                    'warn':     PassVector(0,1,0,0,0,0),
+                    'unstable': PassVector(0,0,1,0,0,0),
+                    'fail':     PassVector(0,0,0,1,0,0),
+                    'skip':     PassVector(0,0,0,0,1,0),
+                    'crash':    PassVector(0,0,0,0,0,1)
             }
 
             if result.status not in vectormap:
diff --git a/templates/index.css b/templates/index.css
index 875333f..6194a90 100644
--- a/templates/index.css
+++ b/templates/index.css
@@ -36,7 +36,7 @@ td:first-child > div {
 	background-color: #c8c838
 }
 
-td.skip, td.warn, td.fail, td.pass, td.trap, td.abort, td.crash {
+td.skip, td.warn, td.unstable, td.fail, td.pass, td.trap, td.abort, td.crash {
 	text-align: right;
 }
 
@@ -60,6 +60,9 @@ tr:nth-child(even) td.skip  { background-color: #a0a0a0; }
 tr:nth-child(odd)  td.warn  { background-color: #ff9020; }
 tr:nth-child(even) td.warn  { background-color: #ef8010; }
 
+tr:nth-child(odd)  td.unstable  { background-color: #ff9020; }
+tr:nth-child(even) td.unstable  { background-color: #ef8010; }
+
 tr:nth-child(odd)  td.fail  { background-color: #ff2020; }
 tr:nth-child(even) td.fail  { background-color: #e00505; }
 
-- 
1.8.4.rc3