[Piglit] [PATCH 1/2] framework: When a test fails, try it again to see if it's consistent.

Tue Sep 17 11:50:15 PDT 2013

On Tuesday 17 September 2013 10:54:00 Eric Anholt wrote:
> Drivers often have some small set of testcases that sometimes fail due
> to no fault of the patchset being tested.  Learning what set of
> testcases is unstable and thus doesn't need to be investigated per
> patchset is an overhead for new developers, and even among experienced
> developers we waste too much time starting down the path of debugging
> before we remember that it's just an intermittent failure.
> 
> To reduce this problem, try the test again (up to 5 times total) to
> see if we ever get a pass.  If so, log a failure at about the same
> level as a 'warn' result, but with a different result name so it's
> obvious what's going on.

Instead of "unstable" might I recomend "inconsistant"?

> ---
>  framework/core.py       | 82
> +++++++++++++++++++++++++++++++++---------------- framework/summary.py    |
> 17 ++++++----
>  piglit-summary-html.py  |  4 +--
>  piglit-summary-junit.py | 15 +++++----
>  templates/index.css     |  5 ++-
>  5 files changed, 82 insertions(+), 41 deletions(-)
> 
> diff --git a/framework/core.py b/framework/core.py
> index 150a70c..14d0119 100644
> --- a/framework/core.py
> +++ b/framework/core.py
> @@ -417,6 +417,7 @@ class Test:
>          '''
>          self.runConcurrent = runConcurrent
>          self.skip_test = False
> +        self.current_path = None
> 
>      def run(self):
>          raise NotImplementedError
> @@ -432,6 +433,36 @@ class Test:
>          if self.runConcurrent:
>              ConcurrentTestPool().put(self.doRun, args=args)
> 
> +    def status(self, msg):
> +        log(msg=msg, channel=self.current_path)
> +
> +    def runOnce(self, env):
> +        try:
> +            self.status("running")
> +            time_start = time.time()
> +            result = self.run(env.valgrind)
> +            time_end = time.time()
> +            if 'time' not in result:
> +                result['time'] = time_end - time_start
> +            if 'result' not in result:
> +                result['result'] = 'fail'
> +            if not isinstance(result, TestResult):
> +                result = TestResult(result)
> +                result['result'] = 'warn'
> +                result['note'] = 'Result not returned as an instance ' \
> +                                    'of TestResult'
> +        except:
> +            result = TestResult()
> +            result['result'] = 'fail'
> +            result['exception'] = str(sys.exc_info()[0]) + \
> +                str(sys.exc_info()[1])
> +            result['traceback'] = \
> +                "".join(traceback.format_tb(sys.exc_info()[2]))
> +
> +        self.status(result['result'])
> +
> +        return result
> +
>      def doRun(self, env, path, json_writer):
>          '''
>          Run the test immediately.
> @@ -440,34 +471,33 @@ class Test:
>              Fully qualified test name as a string.  For example,
>              ``spec/glsl-1.30/preprocessor/compiler/keywords/void.frag``.
>          '''
> -        def status(msg):
> -            log(msg=msg, channel=path)
> +
> +        self.current_path = path
> 
>          # Run the test
>          if env.execute:
> -            try:
> -                status("running")
> -                time_start = time.time()
> -                result = self.run(env.valgrind)
> -                time_end = time.time()
> -                if 'time' not in result:
> -                    result['time'] = time_end - time_start
> -                if 'result' not in result:
> -                    result['result'] = 'fail'
> -                if not isinstance(result, TestResult):
> -                    result = TestResult(result)
> -                    result['result'] = 'warn'
> -                    result['note'] = 'Result not returned as an instance '
> \ -                                     'of TestResult'
> -            except:
> -                result = TestResult()
> -                result['result'] = 'fail'
> -                result['exception'] = str(sys.exc_info()[0]) + \
> -                    str(sys.exc_info()[1])
> -                result['traceback'] = \
> -                    "".join(traceback.format_tb(sys.exc_info()[2]))
> -
> -            status(result['result'])
> +            # If the test fails, try executing it a few more times to
> +            # see if it ever passes.  Often drivers have bugs for race
> +            # conditions that show up intermittently, which don't get
> +            # fixed quickly.  By detecting when a test failure is one
> +            # of these, we can save a developer a bunch of time in
> +            # debugging a piglit result not related to whatever change
> +            # they're testing.
> +            i = 0
> +            lastresult = None
> +            while True:
> +                result = self.runOnce(env)
> +                if result['result'] in ['pass', 'skip']:
> +                    if lastresult is not None:
> +                        result = lastresult
> +                        result['result'] = 'unstable'
> +                    break
> +                if 'exception' in result and 'KeyboardInterrupt' in
> result['exception']: +                    break
> +                lastresult = result
> +                i = i + 1
> +                if i >= 5:
> +                    break

using a for loop here would be much clearer. why not: for i in xrange(0, 5) 
instead of while True?

> 
>              if 'subtest' in result and len(result['subtest'].keys()) > 1:
>                  for test in result['subtest'].keys():
> @@ -476,7 +506,7 @@ class Test:
>              else:
>                  json_writer.write_dict_item(path, result)
>          else:
> -            status("dry-run")
> +            self.status("dry-run")
> 
>      # Returns True iff the given error message should be ignored
>      def isIgnored(self, error):
> diff --git a/framework/summary.py b/framework/summary.py
> index 1cdbab7..3d09007 100644
> --- a/framework/summary.py
> +++ b/framework/summary.py
> @@ -325,10 +325,12 @@ class Summary:
>                      return 2
>                  elif status == 'warn':
>                      return 3
> -                elif status == 'fail':
> +                elif status == 'unstable':
>                      return 4
> -                elif status == 'crash':
> +                elif status == 'fail':
>                      return 5
> +                elif status == 'crash':
> +                    return 6
> 
>              openGroup('fake')
>              openGroup('all')
> @@ -421,12 +423,14 @@ class Summary:
>                  return 1
>              elif status == 'warn':
>                  return 2
> -            elif status == 'fail':
> +            elif status == 'unstable':
>                  return 3
> -            elif status == 'skip':
> +            elif status == 'fail':
>                  return 4
> -            elif status == 'crash':
> +            elif status == 'skip':
>                  return 5
> +            elif status == 'crash':
> +                return 6
>              elif status == 'special':
>                  return 0
> 
> @@ -480,7 +484,7 @@ class Summary:
>          Private: Find the total number of pass, fail, crash, skip, and warn
> in the *last* set of results stored in self.results.
>          """
> -        self.totals = {'pass': 0, 'fail': 0, 'crash': 0, 'skip': 0, 'warn':
> 0} +        self.totals = {'pass': 0, 'fail': 0, 'crash': 0, 'skip': 0,
> 'warn': 0, 'unstable' : 0}
> 
>          for test in self.results[-1].tests.values():
>              self.totals[test['result']] += 1
> @@ -625,6 +629,7 @@ class Summary:
>          print "      crash: %d" % self.totals['crash']
>          print "       skip: %d" % self.totals['skip']
>          print "       warn: %d" % self.totals['warn']
> +        print "   unstable: %d" % self.totals['unstable']
>          if self.tests['changes']:
>              print "    changes: %d" % len(self.tests['changes'])
>              print "      fixes: %d" % len(self.tests['fixes'])
> diff --git a/piglit-summary-html.py b/piglit-summary-html.py
> index b9a2996..a6daf67 100755
> --- a/piglit-summary-html.py
> +++ b/piglit-summary-html.py
> @@ -45,7 +45,7 @@ def main():
>      parser.add_argument("-e", "--exclude-details",
>                          default=[],
>                          action="append",
> -                        choices=['skip', 'pass', 'warn', 'crash' 'fail',
> +                        choices=['skip', 'pass', 'warn', 'unstable',
> 'crash' 'fail', 'all'],
>                          help="Optionally exclude the generation of HTML
> pages " "for individual test pages with the status(es) " @@ -67,7 +67,7 @@
> def main():
> 
>      # If exclude-results has all, then change it to be all
>      if 'all' in args.exclude_details:
> -        args.exclude_details = ['skip', 'pass', 'warn', 'crash', 'fail']
> +        args.exclude_details = ['skip', 'pass', 'warn', 'unstable',
> 'crash', 'fail']
> 
>      # if overwrite is requested delete the output directory
>      if path.exists(args.summaryDir) and args.overwrite:
> diff --git a/piglit-summary-junit.py b/piglit-summary-junit.py
> index d9e4b9c..d24ec1a 100755
> --- a/piglit-summary-junit.py
> +++ b/piglit-summary-junit.py
> @@ -39,9 +39,10 @@ class PassVector:
>      pass/fail/skip/etc.
>      """
> 
> -    def __init__(self, p, w, f, s, c):
> +    def __init__(self, p, w, u, f, s, c):
>          self.passnr = p
>          self.warnnr = w
> +        self.unstablenr = u
>          self.failnr = f
>          self.skipnr = s
>          self.crashnr = c
> @@ -49,6 +50,7 @@ class PassVector:
>      def add(self, o):
>          self.passnr += o.passnr
>          self.warnnr += o.warnnr
> +        self.unstablenr += o.unstablenr
>          self.failnr += o.failnr
>          self.skipnr += o.skipnr
>          self.crashnr += o.crashnr
> @@ -89,11 +91,12 @@ results is an array of TestResult instances, one per
> testrun result.status = result['result']
> 
>              vectormap = {
> -                    'pass': PassVector(1,0,0,0,0),
> -                    'warn': PassVector(0,1,0,0,0),
> -                    'fail': PassVector(0,0,1,0,0),
> -                    'skip': PassVector(0,0,0,1,0),
> -                    'crash': PassVector(0,0,0,0,1)
> +                    'pass':     PassVector(1,0,0,0,0,0),
> +                    'warn':     PassVector(0,1,0,0,0,0),
> +                    'unstable': PassVector(0,0,1,0,0,0),
> +                    'fail':     PassVector(0,0,0,1,0,0),
> +                    'skip':     PassVector(0,0,0,0,1,0),
> +                    'crash':    PassVector(0,0,0,0,0,1)
>              }
> 
>              if result.status not in vectormap:
> diff --git a/templates/index.css b/templates/index.css
> index 875333f..6194a90 100644
> --- a/templates/index.css
> +++ b/templates/index.css
> @@ -36,7 +36,7 @@ td:first-child > div {
>  	background-color: #c8c838
>  }
> 
> -td.skip, td.warn, td.fail, td.pass, td.trap, td.abort, td.crash {
> +td.skip, td.warn, td.unstable, td.fail, td.pass, td.trap, td.abort,
> td.crash { text-align: right;
>  }
> 
> @@ -60,6 +60,9 @@ tr:nth-child(even) td.skip  { background-color: #a0a0a0; }
> tr:nth-child(odd)  td.warn  { background-color: #ff9020; }
>  tr:nth-child(even) td.warn  { background-color: #ef8010; }
> 
> +tr:nth-child(odd)  td.unstable  { background-color: #ff9020; }
> +tr:nth-child(even) td.unstable  { background-color: #ef8010; }
> +
>  tr:nth-child(odd)  td.fail  { background-color: #ff2020; }
>  tr:nth-child(even) td.fail  { background-color: #e00505; }

This is also a good candidate for an option switch in piglit-run.

Consider all of these except changing the while loop to a for loop 
suggestions. With that change this patch is:
Reviewed-by: Dylan Baker <baker.dylan.c at gmail.com>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 490 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/piglit/attachments/20130917/c9fff853/attachment-0001.pgp>