Mesa (main): ci/lava: Sleep before, not after, API calls

Thu Apr 28 07:34:16 UTC 2022

Module: Mesa
Branch: main
Commit: b3ba448ba529caa9d88f180794d479cc441d5f2f
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b3ba448ba529caa9d88f180794d479cc441d5f2f

Author: Daniel Stone <daniels at collabora.com>
Date:   Wed Feb 16 17:52:23 2022 +0000

ci/lava: Sleep before, not after, API calls

We rate-limit LAVA API calls as they are standard polling calls rather
than blocking for changes. However when we sleep after making the calls
rather than before, we can block when we want to exit - e.g. after
getting the final logs, we will still sleep even though we can drop out.

Fix this by moving the calls to before the API calls, rather than after.
This means that the first calls (when we're waiting to be scheduled, or
haven't got our first log lines yet), will be delayed compared to
previously, but that's not going to slow us down as even in the best
case we won't be executing in a device within the first 15 seconds.

Signed-off-by: Daniel Stone <daniels at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15938>

---

 .gitlab-ci/lava/lava_job_submitter.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/.gitlab-ci/lava/lava_job_submitter.py b/.gitlab-ci/lava/lava_job_submitter.py
index 0975a3a0d8d..1f9eb8bffcf 100755
--- a/.gitlab-ci/lava/lava_job_submitter.py
+++ b/.gitlab-ci/lava/lava_job_submitter.py
@@ -242,10 +242,10 @@ def wait_until_job_is_started(proxy, job_id):
     current_state = "Submitted"
     waiting_states = ["Submitted", "Scheduling", "Scheduled"]
     while current_state in waiting_states:
+        time.sleep(WAIT_FOR_DEVICE_POLLING_TIME_SEC)
         job_state = _call_proxy(proxy.scheduler.job_state, job_id)
         current_state = job_state["job_state"]
 
-        time.sleep(WAIT_FOR_DEVICE_POLLING_TIME_SEC)
     print_log(f"Job {job_id} started.")
 
 def follow_job_execution(proxy, job_id):
@@ -253,6 +253,11 @@ def follow_job_execution(proxy, job_id):
     finished = False
     last_time_logs = datetime.now()
     while not finished:
+        # `proxy.scheduler.jobs.logs` does not block, even when there is no
+        # new log to be fetched. To avoid dosing the LAVA dispatcher
+        # machine, let's add a sleep to save them some stamina.
+        time.sleep(LOG_POLLING_TIME_SEC)
+
         (finished, data) = _call_proxy(proxy.scheduler.jobs.logs, job_id, line_count)
         if logs := yaml.load(str(data), Loader=loader(False)):
             # Reset the timeout
@@ -261,18 +266,12 @@ def follow_job_execution(proxy, job_id):
                 print("{} {}".format(line["dt"], line["msg"]))
 
             line_count += len(logs)
-
         else:
             time_limit = timedelta(seconds=DEVICE_HANGING_TIMEOUT_SEC)
             if datetime.now() - last_time_logs > time_limit:
                 print_log("LAVA job {} doesn't advance (machine got hung?). Retry.".format(job_id))
                 return False
 
-        # `proxy.scheduler.jobs.logs` does not block, even when there is no
-        # new log to be fetched. To avoid dosing the LAVA dispatcher
-        # machine, let's add a sleep to save them some stamina.
-        time.sleep(LOG_POLLING_TIME_SEC)
-
     return True
 
 def show_job_data(proxy, job_id):