[Piglit] [PATCH 3/3] Add the ability to resume an interrupted test run where it left off.

Wed Dec 21 11:46:26 PST 2011

GPUs like to hang, especially when barraged with lots of mean Piglit
tests.  Usually this results in the poor developer having to figure out
what test hung, blacklist it via -x, and start the whole test run over.
This can waste a huge amount of time, especially when many tests hang.

This patch adds the ability to resume a Piglit run where you left off.

The workflow is:
$ piglit-run.py -t foo tests/quick.tests results/foobar-1
<interrupt the test run somehow>
$ piglit-run.py -r -x bad-test results/foobar-1

To accomplish this, piglit-run.py now stores the test profile
(quick.tests) and -t/-x options in the JSON results file so it can tell
what you were originally running.  When run with the --resume option, it
re-reads the results file to obtain this information (repairing broken
JSON if necessary), rewrites the existing results, and runs any
remaining tests.

Suggested future work is to add an --incomplete=<skip|retry> option
to automatically skip or retry any tests that didn't finish.
Conveniently, these are exactly the ones that needed JSON repair.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 framework/core.py |    5 ++++
 piglit-run.py     |   60 +++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 59 insertions(+), 6 deletions(-)

diff --git a/framework/core.py b/framework/core.py
index e64edbd..ea24ea3 100644
--- a/framework/core.py
+++ b/framework/core.py
@@ -266,6 +266,7 @@ class ResultFileInOldFormatError(Exception):
 class TestrunResult:
 	def __init__(self):
 		self.serialized_keys = [
+			'options',
 			'name',
 			'tests',
 			'glxinfo',
@@ -392,6 +393,7 @@ class Environment:
 		self.execute = True
 		self.filter = []
 		self.exclude_filter = []
+		self.exclude_tests = []
 
 	def run(self, command):
 		try:
@@ -457,6 +459,9 @@ class Test:
 			if True in map(lambda f: f.search(path) != None, env.exclude_filter):
 				return None
 
+		if path in env.exclude_tests:
+			return None
+
 		def status(msg):
 			log(msg = msg, channel = path)
 
diff --git a/piglit-run.py b/piglit-run.py
index 39e2ee0..5f50b3d 100755
--- a/piglit-run.py
+++ b/piglit-run.py
@@ -28,6 +28,7 @@ import re
 import sys, os
 import time
 import traceback
+import json
 
 sys.path.append(path.dirname(path.realpath(sys.argv[0])))
 import framework.core as core
@@ -39,10 +40,12 @@ from framework.threads import synchronized_self
 def usage():
 	USAGE = """\
 Usage: %(progName)s [options] [profile.tests] [results]
+       %(progName)s [options] -r [results]
 
 Options:
   -h, --help                Show this message
   -d, --dry-run             Do not execute the tests
+  -r, --resume              Resume an interrupted test run.
   -t regexp, --tests=regexp Run only matching tests (can be used more
                             than once)
   -x regexp, --exclude-tests=regexp Excludey matching tests (can be used
@@ -60,6 +63,10 @@ Example:
   %(progName)s -t ^glean/ -t tex tests/all.tests results/all
          Run all tests that are in the 'glean' group or whose path contains
 		 the substring 'tex'
+
+  %(progName)s -r -x bad-test results/all
+         Resume an interrupted test run whose results are stored in the
+	 directory results/all, skipping bad-test.
 """
 	print USAGE % {'progName': sys.argv[0]}
 	sys.exit(1)
@@ -71,25 +78,33 @@ def main():
 		option_list = [
 			 "help",
 			 "dry-run",
+			 "resume",
 			 "tests=",
 			 "name=",
 			 "exclude-tests=",
 			 "concurrent=",
 			 ]
-		options, args = getopt(sys.argv[1:], "hdt:n:x:c:", option_list)
+		options, args = getopt(sys.argv[1:], "hdrt:n:x:c:", option_list)
 	except GetoptError:
 		usage()
 
 	OptionName = ''
+	OptionResume = False
+	test_filter = []
+	exclude_filter = []
 
 	for name, value in options:
 		if name in ('-h', '--help'):
 			usage()
 		elif name in ('-d', '--dry-run'):
 			env.execute = False
+		elif name in ('-r', '--resume'):
+			OptionResume = True
 		elif name in ('-t', '--tests'):
+			test_filter[:0] = [value]
 			env.filter[:0] = [re.compile(value)]
 		elif name in ('-x', '--exclude-tests'):
+			exclude_filter[:0] = [value]
 			env.exclude_filter[:0] = [re.compile(value)]
 		elif name in ('-n', '--name'):
 			OptionName = value
@@ -101,11 +116,26 @@ def main():
 			else:
 				usage()
 
-	if len(args) != 2:
-		usage()
+	if OptionResume:
+		if test_filter or OptionName:
+			print "-r is not compatible with -t or -n."
+			usage()
+		if len(args) != 1:
+			usage()
+		resultsDir = args[0]
 
-	profileFilename = args[0]
-	resultsDir = args[1]
+		# Load settings from the old results JSON
+		old_results = core.loadTestResults(resultsDir)
+		profileFilename = old_results.options['profile']
+		for value in old_results.options['filter']:
+			env.filter[:0] = [re.compile(value)]
+		for value in old_results.options['exclude_filter']:
+			env.exclude_filter[:0] = [re.compile(value)]
+	else:
+		if len(args) != 2:
+			usage()
+		profileFilename = args[0]
+		resultsDir = args[1]
 
 	# Change to the piglit's path
 	piglit_dir = path.dirname(path.realpath(sys.argv[0]))
@@ -127,15 +157,33 @@ def main():
 	json_writer = core.JSONWriter(result_file)
 	json_writer.open_dict()
 
+	# Write out command line options for use in resuming.
+	json_writer.write_dict_key('options')
+	json_writer.open_dict()
+	json_writer.write_dict_item('profile', profileFilename)
+	json_writer.write_dict_key('filter')
+	result_file.write(json.dumps(test_filter))
+	json_writer.write_dict_key('exclude_filter')
+	result_file.write(json.dumps(exclude_filter))
+	json_writer.close_dict()
+
 	json_writer.write_dict_item('name', results.name)
 	for (key, value) in env.collectData().items():
 		json_writer.write_dict_item(key, value)
 
 	profile = core.loadTestProfile(profileFilename, resultsDir)
-	time_start = time.time()
 
 	json_writer.write_dict_key('tests')
 	json_writer.open_dict()
+	# If resuming an interrupted test run, re-write all of the existing
+	# results since we clobbered the results file.  Also, exclude them
+	# from being run again.
+	if OptionResume:
+		for (key, value) in old_results.tests.items():
+			json_writer.write_dict_item(key, value)
+			env.exclude_tests[:0] = [key]
+
+	time_start = time.time()
 	profile.run(env, json_writer)
 	json_writer.close_dict()
 
-- 
1.7.7.3