[Ezbench-dev] [PATCH 14/15] smartezbench: resume incomplete runs, if possible
Martin Peres
martin.peres at linux.intel.com
Mon Jan 30 20:54:12 UTC 2017
---
python-modules/ezbench/smartezbench.py | 71 +++++++++++++++++++++++++---------
1 file changed, 52 insertions(+), 19 deletions(-)
diff --git a/python-modules/ezbench/smartezbench.py b/python-modules/ezbench/smartezbench.py
index 5f3474b..0355a07 100644
--- a/python-modules/ezbench/smartezbench.py
+++ b/python-modules/ezbench/smartezbench.py
@@ -653,6 +653,7 @@ class SmartEzbench:
exit_code = 1
task_tree = list()
events_str = []
+ resumable_tasks = []
# Make sure we catch *any* error, because we need to send stuff in the
# Queue if we do not want the parent process to get stuck
@@ -678,8 +679,6 @@ class SmartEzbench:
for key in result.results():
full_name = Test.partial_name(result.test.full_name, [key])
SmartEzbench.__remove_task_from_tasktree__(task_tree, commit.full_sha1, full_name, len(result.result(key)))
- # HACK: Remove this when all the new reports use the full_sha1 for storage
- SmartEzbench.__remove_task_from_tasktree__(task_tree, commit.sha1, full_name, len(result.result(key)))
# Delete the tests on commits that do not compile
for commit in report.commits:
@@ -690,13 +689,21 @@ class SmartEzbench:
del task_tree[commit.sha1]
exit_code = 0
+ resumable_tasks = report.journal.incomplete_tests()
except Exception as e:
traceback.print_exc(file=sys.stderr)
sys.stderr.write("\n")
pass
# Return the result
- q.put((exit_code, task_tree, events_str))
+ q.put((exit_code, task_tree, events_str, resumable_tasks))
+
+ def pop_from_resumable_tasks(self, resumable_tasks, version, test):
+ for i in range(0, len(resumable_tasks)):
+ if resumable_tasks[i]["version"] == version and resumable_tasks[i]["test"] == test:
+ task = resumable_tasks.pop(i)
+ return task.get("result_file", None)
+ return None
def run(self):
self.__log(Criticality.II, "----------------------")
@@ -731,7 +738,7 @@ class SmartEzbench:
p = multiprocessing.Process(target=SmartEzbench.__generate_task_and_events_list__,
args=(q, self.state, self.log_folder, self.repo()))
p.start()
- exit_code, task_tree, self._events_str = q.get()
+ exit_code, task_tree, self._events_str, resumable_tasks = q.get()
p.join()
if len(task_tree) == 0:
@@ -740,6 +747,7 @@ class SmartEzbench:
task_tree_str = pprint.pformat(task_tree)
self.__log(Criticality.II, "Task list: {tsk_str}".format(tsk_str=task_tree_str))
+ self.__log(Criticality.II, "Incomplete runs: {}".format(resumable_tasks))
# Lock the report for further changes (like for profiles)
self.__write_attribute__('beenRunBefore', True)
@@ -784,21 +792,46 @@ class SmartEzbench:
self._task_current.started()
for r in range(0, e.rounds):
self._task_lock.release()
- try:
- time, cmd_output = runner.run(e.commit, e.test, False)
- except RunnerError as error:
- err_code = error.args[0]['err_code']
- # We got an error, let's see what we can do about it!
- if (err_code.value != RunnerErrorCode.NO_ERROR and
- err_code.value < RunnerErrorCode.COMP_DEP_UNK_ERROR.value):
- # Error we cannot do anything about, probably a setup issue
- # Let's mark the run as aborted until the user resets it!
- self.__log(Criticality.EE, "The run returned the error {}".format(err_code))
- self.set_running_mode(RunningMode.ERROR)
- elif (err_code == RunnerErrorCode.COMPILATION_FAILED or
- err_code == RunnerErrorCode.DEPLOYMENT_FAILED):
- # Cancel any other test on this commit
- self._task_list = [x for x in self._task_list if not x.commit == e.commit]
+
+ # Try to resume tasks before trying to add a new run
+ while True:
+ try:
+ result_file = self.pop_from_resumable_tasks(resumable_tasks, e.commit, e.test)
+ if result_file is not None:
+ time, cmd_output = runner.resume(e.commit, e.test, result_file, False)
+ else:
+ time, cmd_output = runner.run(e.commit, e.test, False)
+ except RunnerError as error:
+ err_code = error.args[0]['err_code']
+ err_str = error.args[0]['err_str']
+ # We got an error, let's see what we can do about it!
+ if (err_code == RunnerErrorCode.CMD_TEST_EXEC_TYPE_UNSUPPORTED or
+ err_code == RunnerErrorCode.CMD_TEST_EXEC_TYPE_NEED_VALID_RESULT_FILE or
+ err_code == RunnerErrorCode.CMD_RESULT_ALREADY_COMPLETE):
+ # The resume failed, try all the other ones until we try a normal run
+ continue
+ elif err_code == RunnerErrorCode.REBOOT_NEEDED:
+ # TODO: have some sort of hooks here to warn the rest of the world
+ # that we are about to reboot
+ self._task_list = []
+ self._task_current = None
+ self.__log(Criticality.II, "Rebooting...")
+ runner.reboot()
+ sys.exit(0)
+ elif (err_code.value != RunnerErrorCode.NO_ERROR and
+ err_code.value < RunnerErrorCode.COMP_DEP_UNK_ERROR.value):
+ # Error we cannot do anything about, probably a setup issue
+ # Let's mark the run as aborted until the user resets it!
+ self.__log(Criticality.EE, error.args[0]['err_code'])
+ self.set_running_mode(RunningMode.ERROR)
+ elif (err_code == RunnerErrorCode.COMPILATION_FAILED or
+ err_code == RunnerErrorCode.DEPLOYMENT_FAILED):
+ # Cancel any other test on this commit
+ self._task_list = [x for x in self._task_list if not x.commit == e.commit]
+
+ # Loop only if a resume failed
+ break
+
self._task_lock.acquire()
self._task_current = None
--
2.11.0
More information about the Ezbench-dev
mailing list