[Ezbench-dev] [PATCH 1/2] smartezbench: resume incomplete runs, if possible
Petri Latvala
petri.latvala at intel.com
Thu Feb 2 09:12:54 UTC 2017
From: Martin Peres <martin.peres at linux.intel.com>
v2 (Petri): Allow resuming runs with subtests
---
First version wasn't resuming when a bisecting jammed the
machine. Churned the code a bit to just schedule all resumable tasks
to happen first, preferring the deployed version, and it seems to work
now.
python-modules/ezbench/smartezbench.py | 68 ++++++++++++++++++++++++++--------
1 file changed, 52 insertions(+), 16 deletions(-)
diff --git a/python-modules/ezbench/smartezbench.py b/python-modules/ezbench/smartezbench.py
index 5f3474b..4e0f6dc 100644
--- a/python-modules/ezbench/smartezbench.py
+++ b/python-modules/ezbench/smartezbench.py
@@ -25,7 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
-from collections import namedtuple
+from collections import namedtuple, deque
from datetime import datetime, timedelta
from enum import Enum
import numpy as np
@@ -92,10 +92,11 @@ def list_smart_ezbench_report_names(ezbench_dir, updatedSince = 0):
return reports
class TaskEntry:
- def __init__(self, commit, test, rounds):
+ def __init__(self, commit, test, rounds, resumeResultFile = None):
self.commit = commit
self.test = test
self.rounds = rounds
+ self.resumeResultFile = resumeResultFile
self.start_date = None
self.exec_time = None
self.build_time = None
@@ -563,8 +564,8 @@ class SmartEzbench:
return c, tl, self._events_str
- def __prioritize_runs(self, task_tree, deployed_version):
- task_list = list()
+ def __prioritize_runs(self, task_tree, deployed_version, resumable_tasks):
+ task_list = deque()
# Aggregate all the subtests
for commit in task_tree:
@@ -587,7 +588,22 @@ class SmartEzbench:
task_tree[commit]["tests"][full_name] = dict()
task_tree[commit]["tests"][full_name]["rounds"] = test_rounds[basename]
- # Schedule the tests using the already-deployed version
+ # Schedule resumable tasks. First the already-deployed
+ # versions, other versions later
+ for task in resumable_tasks:
+ result_file = task.get("result_file", None)
+ if result_file is not None:
+ entry = TaskEntry(deployed_version, task["test"], 1, result_file)
+ else:
+ continue
+
+ if deployed_version == task["version"]:
+ task_list.appendleft(entry)
+ else:
+ task_list.append(entry)
+
+ # Schedule the tests using the already-deployed version after
+ # all resumable tasks
if deployed_version is not None and deployed_version in task_tree:
for test in task_tree[deployed_version]["tests"]:
rounds = task_tree[deployed_version]["tests"][test]["rounds"]
@@ -678,8 +694,8 @@ class SmartEzbench:
for key in result.results():
full_name = Test.partial_name(result.test.full_name, [key])
SmartEzbench.__remove_task_from_tasktree__(task_tree, commit.full_sha1, full_name, len(result.result(key)))
- # HACK: Remove this when all the new reports use the full_sha1 for storage
- SmartEzbench.__remove_task_from_tasktree__(task_tree, commit.sha1, full_name, len(result.result(key)))
+
+ resumable_tasks = report.journal.incomplete_tests()
# Delete the tests on commits that do not compile
for commit in report.commits:
@@ -696,7 +712,7 @@ class SmartEzbench:
pass
# Return the result
- q.put((exit_code, task_tree, events_str))
+ q.put((exit_code, task_tree, events_str, resumable_tasks))
def run(self):
self.__log(Criticality.II, "----------------------")
@@ -731,7 +747,7 @@ class SmartEzbench:
p = multiprocessing.Process(target=SmartEzbench.__generate_task_and_events_list__,
args=(q, self.state, self.log_folder, self.repo()))
p.start()
- exit_code, task_tree, self._events_str = q.get()
+ exit_code, task_tree, self._events_str, resumable_tasks = q.get()
p.join()
if len(task_tree) == 0:
@@ -740,13 +756,14 @@ class SmartEzbench:
task_tree_str = pprint.pformat(task_tree)
self.__log(Criticality.II, "Task list: {tsk_str}".format(tsk_str=task_tree_str))
+ self.__log(Criticality.II, "Incomplete runs: {}".format(resumable_tasks))
# Lock the report for further changes (like for profiles)
self.__write_attribute__('beenRunBefore', True)
# Prioritize --> return a list of commits to do in order
self._task_lock.acquire()
- self._task_list = self.__prioritize_runs(task_tree, deployed_commit)
+ self._task_list = self.__prioritize_runs(task_tree, deployed_commit, resumable_tasks)
# Call the hook file, telling we started running
self.__call_hook__('start_running_tests')
@@ -763,7 +780,7 @@ class SmartEzbench:
self.__done_running__(runner)
return False
- self._task_current = e = self._task_list.pop(0)
+ self._task_current = e = self._task_list.popleft()
short_name=e.test[:80].rsplit('|', 1)[0]+'...'
self.__log(Criticality.DD,
"make {count} runs for test {test} using commit {commit}".format(count=e.rounds,
@@ -784,21 +801,40 @@ class SmartEzbench:
self._task_current.started()
for r in range(0, e.rounds):
self._task_lock.release()
+
try:
- time, cmd_output = runner.run(e.commit, e.test, False)
+ if e.resumeResultFile is not None:
+ time, cmd_output = runner.resume(e.commit, e.test, e.resumeResultFile, False)
+ else:
+ time, cmd_output = runner.run(e.commit, e.test, False)
except RunnerError as error:
err_code = error.args[0]['err_code']
+ err_str = error.args[0]['err_str']
# We got an error, let's see what we can do about it!
- if (err_code.value != RunnerErrorCode.NO_ERROR and
- err_code.value < RunnerErrorCode.COMP_DEP_UNK_ERROR.value):
+ if (err_code == RunnerErrorCode.CMD_TEST_EXEC_TYPE_UNSUPPORTED or
+ err_code == RunnerErrorCode.CMD_TEST_EXEC_TYPE_NEED_VALID_RESULT_FILE or
+ err_code == RunnerErrorCode.CMD_RESULT_ALREADY_COMPLETE):
+ # The resume failed, nothing to do
+ pass
+ elif err_code == RunnerErrorCode.REBOOT_NEEDED:
+ # TODO: have some sort of hooks here to warn the rest of the world
+ # that we are about to reboot
+ self._task_list = []
+ self._task_current = None
+ self.__log(Criticality.II, "Rebooting...")
+ runner.reboot()
+ sys.exit(0)
+ elif (err_code.value != RunnerErrorCode.NO_ERROR and
+ err_code.value < RunnerErrorCode.COMP_DEP_UNK_ERROR.value):
# Error we cannot do anything about, probably a setup issue
# Let's mark the run as aborted until the user resets it!
- self.__log(Criticality.EE, "The run returned the error {}".format(err_code))
+ self.__log(Criticality.EE, error.args[0]['err_code'])
self.set_running_mode(RunningMode.ERROR)
elif (err_code == RunnerErrorCode.COMPILATION_FAILED or
- err_code == RunnerErrorCode.DEPLOYMENT_FAILED):
+ err_code == RunnerErrorCode.DEPLOYMENT_FAILED):
# Cancel any other test on this commit
self._task_list = [x for x in self._task_list if not x.commit == e.commit]
+
self._task_lock.acquire()
self._task_current = None
--
2.9.3
More information about the Ezbench-dev
mailing list