[Piglit] [PATCH 3/3] Add the ability to resume an interrupted test run where it left off.

Wed Jan 4 10:25:13 PST 2012

On 21 December 2011 11:46, Kenneth Graunke <kenneth at whitecape.org> wrote:

> GPUs like to hang, especially when barraged with lots of mean Piglit
> tests.  Usually this results in the poor developer having to figure out
> what test hung, blacklist it via -x, and start the whole test run over.
> This can waste a huge amount of time, especially when many tests hang.
>
> This patch adds the ability to resume a Piglit run where you left off.
>
> The workflow is:
> $ piglit-run.py -t foo tests/quick.tests results/foobar-1
> <interrupt the test run somehow>
> $ piglit-run.py -r -x bad-test results/foobar-1
>
> To accomplish this, piglit-run.py now stores the test profile
> (quick.tests) and -t/-x options in the JSON results file so it can tell
> what you were originally running.  When run with the --resume option, it
> re-reads the results file to obtain this information (repairing broken
> JSON if necessary), rewrites the existing results, and runs any
> remaining tests.
>
> Suggested future work is to add an --incomplete=<skip|retry> option
> to automatically skip or retry any tests that didn't finish.
> Conveniently, these are exactly the ones that needed JSON repair.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  framework/core.py |    5 ++++
>  piglit-run.py     |   60
> +++++++++++++++++++++++++++++++++++++++++++++++-----
>  2 files changed, 59 insertions(+), 6 deletions(-)
>
> diff --git a/framework/core.py b/framework/core.py
> index e64edbd..ea24ea3 100644
> --- a/framework/core.py
> +++ b/framework/core.py
> @@ -266,6 +266,7 @@ class ResultFileInOldFormatError(Exception):
>  class TestrunResult:
>        def __init__(self):
>                self.serialized_keys = [
> +                       'options',
>                        'name',
>                        'tests',
>                        'glxinfo',
> @@ -392,6 +393,7 @@ class Environment:
>                self.execute = True
>                self.filter = []
>                self.exclude_filter = []
> +               self.exclude_tests = []
>

Since this is potentially going to have thousands of items in it, it would
probably be better to make it a set than a list (self.exclude_tests =
set()).

>
>        def run(self, command):
>                try:
> @@ -457,6 +459,9 @@ class Test:
>                        if True in map(lambda f: f.search(path) != None,
> env.exclude_filter):
>                                return None
>
> +               if path in env.exclude_tests:
> +                       return None
> +
>                def status(msg):
>                        log(msg = msg, channel = path)
>
> diff --git a/piglit-run.py b/piglit-run.py
> index 39e2ee0..5f50b3d 100755
> --- a/piglit-run.py
> +++ b/piglit-run.py
> @@ -28,6 +28,7 @@ import re
>  import sys, os
>  import time
>  import traceback
> +import json
>
>  sys.path.append(path.dirname(path.realpath(sys.argv[0])))
>  import framework.core as core
> @@ -39,10 +40,12 @@ from framework.threads import synchronized_self
>  def usage():
>        USAGE = """\
>  Usage: %(progName)s [options] [profile.tests] [results]
> +       %(progName)s [options] -r [results]
>
>  Options:
>   -h, --help                Show this message
>   -d, --dry-run             Do not execute the tests
> +  -r, --resume              Resume an interrupted test run.
>   -t regexp, --tests=regexp Run only matching tests (can be used more
>                             than once)
>   -x regexp, --exclude-tests=regexp Excludey matching tests (can be used
> @@ -60,6 +63,10 @@ Example:
>   %(progName)s -t ^glean/ -t tex tests/all.tests results/all
>          Run all tests that are in the 'glean' group or whose path contains
>                 the substring 'tex'
> +
> +  %(progName)s -r -x bad-test results/all
> +         Resume an interrupted test run whose results are stored in the
> +        directory results/all, skipping bad-test.
>  """
>        print USAGE % {'progName': sys.argv[0]}
>        sys.exit(1)
> @@ -71,25 +78,33 @@ def main():
>                option_list = [
>                         "help",
>                         "dry-run",
> +                        "resume",
>                         "tests=",
>                         "name=",
>                         "exclude-tests=",
>                         "concurrent=",
>                         ]
> -               options, args = getopt(sys.argv[1:], "hdt:n:x:c:",
> option_list)
> +               options, args = getopt(sys.argv[1:], "hdrt:n:x:c:",
> option_list)
>        except GetoptError:
>                usage()
>
>        OptionName = ''
> +       OptionResume = False
> +       test_filter = []
> +       exclude_filter = []
>
>        for name, value in options:
>                if name in ('-h', '--help'):
>                        usage()
>                elif name in ('-d', '--dry-run'):
>                        env.execute = False
> +               elif name in ('-r', '--resume'):
> +                       OptionResume = True
>                elif name in ('-t', '--tests'):
> +                       test_filter[:0] = [value]
>                        env.filter[:0] = [re.compile(value)]
>                elif name in ('-x', '--exclude-tests'):
> +                       exclude_filter[:0] = [value]
>                        env.exclude_filter[:0] = [re.compile(value)]
>

The standard Python way to add an item to a list is

my_list.append(item)

rather than

my_list[:0] = [item]

The latter has two problems: (1) inefficiency (it forces the list to be
copied), and (2) it reverses the list order.

As long as we're changing this code, can we build up the lists using append?

>                elif name in ('-n', '--name'):
>                        OptionName = value
> @@ -101,11 +116,26 @@ def main():
>                        else:
>                                usage()
>
> -       if len(args) != 2:
> -               usage()
> +       if OptionResume:
> +               if test_filter or OptionName:
> +                       print "-r is not compatible with -t or -n."
> +                       usage()
> +               if len(args) != 1:
> +                       usage()
> +               resultsDir = args[0]
>
> -       profileFilename = args[0]
> -       resultsDir = args[1]
> +               # Load settings from the old results JSON
> +               old_results = core.loadTestResults(resultsDir)
> +               profileFilename = old_results.options['profile']
> +               for value in old_results.options['filter']:
> +                       env.filter[:0] = [re.compile(value)]
> +               for value in old_results.options['exclude_filter']:
> +                       env.exclude_filter[:0] = [re.compile(value)]
>

I think these two loops need to also populate test_filter and
exclude_filter.  Otherwise, if a test is resumed twice, the second resume
will lose the filter information.  Also my above comments about append
apply here too.

> +       else:
> +               if len(args) != 2:
> +                       usage()
> +               profileFilename = args[0]
> +               resultsDir = args[1]
>
>        # Change to the piglit's path
>        piglit_dir = path.dirname(path.realpath(sys.argv[0]))
> @@ -127,15 +157,33 @@ def main():
>        json_writer = core.JSONWriter(result_file)
>        json_writer.open_dict()
>
> +       # Write out command line options for use in resuming.
> +       json_writer.write_dict_key('options')
> +       json_writer.open_dict()
> +       json_writer.write_dict_item('profile', profileFilename)
> +       json_writer.write_dict_key('filter')
> +       result_file.write(json.dumps(test_filter))
> +       json_writer.write_dict_key('exclude_filter')
> +       result_file.write(json.dumps(exclude_filter))
> +       json_writer.close_dict()
> +
>        json_writer.write_dict_item('name', results.name)
>        for (key, value) in env.collectData().items():
>                json_writer.write_dict_item(key, value)
>
>        profile = core.loadTestProfile(profileFilename, resultsDir)
> -       time_start = time.time()
>
>        json_writer.write_dict_key('tests')
>        json_writer.open_dict()
> +       # If resuming an interrupted test run, re-write all of the existing
> +       # results since we clobbered the results file.  Also, exclude them
> +       # from being run again.
> +       if OptionResume:
> +               for (key, value) in old_results.tests.items():
> +                       json_writer.write_dict_item(key, value)
> +                       env.exclude_tests[:0] = [key]
>

Assuming we change env.exclude_tests to a set, the proper way to add "key"
to it is

env.exclude_tests.add(key)

> +
> +       time_start = time.time()
>        profile.run(env, json_writer)
>        json_writer.close_dict()
>
> --
> 1.7.7.3
>
> _______________________________________________
> Piglit mailing list
> Piglit at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/piglit
>

Other than the above comments, this patch series is:

Reviewed-by: Paul Berry <stereotype441 at gmail.com>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/piglit/attachments/20120104/056b19fc/attachment.htm>