Mesa (main): ci/bare-metal: Add test phase timeouts to all boards.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jun 21 22:18:46 UTC 2022


Module: Mesa
Branch: main
Commit: 5f09b1ebe98840fec90e9f8a0ee8ea07a31c381b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5f09b1ebe98840fec90e9f8a0ee8ea07a31c381b

Author: Emma Anholt <emma at anholt.net>
Date:   Thu Jun 16 14:38:50 2022 -0700

ci/bare-metal: Add test phase timeouts to all boards.

This should help with "marge got stuck for an hour and all I got was this
failed job with no results/" when a system intermittently wedges.

This replaces the BM_POE_TIMEOUT ("did we get something on serial in the
last 3 minutes?") that rpi had, in favor of checking that the whole test
job gets through in 20 minutes.

Acked-by: Juan A. Suarez <jasuarez at igalia.com>
Reviewed-by: Christian Gmeiner <christian.gmeiner at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17096>

---

 .gitlab-ci/bare-metal/cros-servo.sh          |  3 ++-
 .gitlab-ci/bare-metal/cros_servo_run.py      | 11 +++++++----
 .gitlab-ci/bare-metal/fastboot.sh            |  1 +
 .gitlab-ci/bare-metal/fastboot_run.py        | 15 ++++++++-------
 .gitlab-ci/bare-metal/poe-powered.sh         |  2 +-
 .gitlab-ci/bare-metal/poe_run.py             | 13 +++++++------
 src/broadcom/ci/gitlab-ci.yml                |  5 -----
 src/freedreno/ci/gitlab-ci.yml               |  6 ++++--
 src/gallium/drivers/nouveau/ci/gitlab-ci.yml |  1 -
 9 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/.gitlab-ci/bare-metal/cros-servo.sh b/.gitlab-ci/bare-metal/cros-servo.sh
index 53fbc817666..5e9a3ed7509 100755
--- a/.gitlab-ci/bare-metal/cros-servo.sh
+++ b/.gitlab-ci/bare-metal/cros-servo.sh
@@ -90,7 +90,8 @@ echo "$BM_CMDLINE" > /tftp/cmdline
 set +e
 python3 $BM/cros_servo_run.py \
         --cpu $BM_SERIAL \
-        --ec $BM_SERIAL_EC
+        --ec $BM_SERIAL_EC \
+        --test-timeout ${TEST_PHASE_TIMEOUT:-20}
 ret=$?
 set -e
 
diff --git a/.gitlab-ci/bare-metal/cros_servo_run.py b/.gitlab-ci/bare-metal/cros_servo_run.py
index 7215253fb7a..a5593c46d44 100755
--- a/.gitlab-ci/bare-metal/cros_servo_run.py
+++ b/.gitlab-ci/bare-metal/cros_servo_run.py
@@ -31,13 +31,14 @@ import threading
 
 
 class CrosServoRun:
-    def __init__(self, cpu, ec):
+    def __init__(self, cpu, ec, test_timeout):
         self.cpu_ser = SerialBuffer(
             cpu, "results/serial.txt", "R SERIAL-CPU> ")
         # Merge the EC serial into the cpu_ser's line stream so that we can
         # effectively poll on both at the same time and not have to worry about
         self.ec_ser = SerialBuffer(
             ec, "results/serial-ec.txt", "R SERIAL-EC> ", line_queue=self.cpu_ser.line_queue)
+        self.test_timeout = test_timeout
 
     def close(self):
         self.ec_ser.close()
@@ -90,7 +91,7 @@ class CrosServoRun:
             return 2
 
         tftp_failures = 0
-        for line in self.cpu_ser.lines(timeout=120 * 60, phase="test"):
+        for line in self.cpu_ser.lines(timeout=self.test_timeout, phase="test"):
             if re.search("---. end Kernel panic", line):
                 return 1
 
@@ -150,7 +151,7 @@ class CrosServoRun:
 
         self.print_error(
             "Reached the end of the CPU serial log without finding a result")
-        return 1
+        return 2
 
 
 def main():
@@ -159,9 +160,11 @@ def main():
                         help='CPU Serial device', required=True)
     parser.add_argument(
         '--ec', type=str, help='EC Serial device', required=True)
+    parser.add_argument(
+        '--test-timeout', type=int, help='Test phase timeout (minutes)', required=True)
     args = parser.parse_args()
 
-    servo = CrosServoRun(args.cpu, args.ec)
+    servo = CrosServoRun(args.cpu, args.ec, args.test_timeout * 60)
 
     while True:
         retval = servo.run()
diff --git a/.gitlab-ci/bare-metal/fastboot.sh b/.gitlab-ci/bare-metal/fastboot.sh
index 86d3a9b6c69..d4571819c03 100755
--- a/.gitlab-ci/bare-metal/fastboot.sh
+++ b/.gitlab-ci/bare-metal/fastboot.sh
@@ -133,6 +133,7 @@ fi
 set +e
 $BM/fastboot_run.py \
   --dev="$BM_SERIAL" \
+  --test-timeout ${TEST_PHASE_TIMEOUT:-20} \
   --fbserial="$BM_FASTBOOT_SERIAL" \
   --powerup="$BM_POWERUP" \
   --powerdown="$BM_POWERDOWN"
diff --git a/.gitlab-ci/bare-metal/fastboot_run.py b/.gitlab-ci/bare-metal/fastboot_run.py
index ca27b6fe7e3..8721515b100 100755
--- a/.gitlab-ci/bare-metal/fastboot_run.py
+++ b/.gitlab-ci/bare-metal/fastboot_run.py
@@ -30,14 +30,13 @@ import threading
 
 
 class FastbootRun:
-    def __init__(self, args):
+    def __init__(self, args, test_timeout):
         self.powerup = args.powerup
-        # We would like something like a 1 minute timeout, but the piglit traces
-        # jobs stall out for long periods of time.
         self.ser = SerialBuffer(
-            args.dev, "results/serial-output.txt", "R SERIAL> ", timeout=600)
+            args.dev, "results/serial-output.txt", "R SERIAL> ")
         self.fastboot = "fastboot boot -s {ser} artifacts/fastboot.img".format(
             ser=args.fbserial)
+        self.test_timeout = test_timeout
 
     def close(self):
         self.ser.close()
@@ -76,7 +75,7 @@ class FastbootRun:
             return 1
 
         print_more_lines = -1
-        for line in self.ser.lines(timeout=20 * 60, phase="test"):
+        for line in self.ser.lines(timeout=self.test_timeout, phase="test"):
             if print_more_lines == 0:
                 return 2
             if print_more_lines > 0:
@@ -138,9 +137,11 @@ def main():
                         help='shell command for powering off', required=True)
     parser.add_argument('--fbserial', type=str,
                         help='fastboot serial number of the board', required=True)
+    parser.add_argument('--test-timeout', type=int,
+                        help='Test phase timeout (minutes)', required=True)
     args = parser.parse_args()
 
-    fastboot = FastbootRun(args)
+    fastboot = FastbootRun(args, args.test_timeout * 60)
 
     while True:
         retval = fastboot.run()
@@ -148,7 +149,7 @@ def main():
         if retval != 2:
             break
 
-        fastboot = FastbootRun(args)
+        fastboot = FastbootRun(args, args.test_timeout * 60)
 
     fastboot.logged_system(args.powerdown)
 
diff --git a/.gitlab-ci/bare-metal/poe-powered.sh b/.gitlab-ci/bare-metal/poe-powered.sh
index 4bd0057459f..aea132e4cba 100755
--- a/.gitlab-ci/bare-metal/poe-powered.sh
+++ b/.gitlab-ci/bare-metal/poe-powered.sh
@@ -131,7 +131,7 @@ while [ $((ATTEMPTS--)) -gt 0 ]; do
           --dev="$BM_SERIAL" \
           --powerup="$BM_POWERUP" \
           --powerdown="$BM_POWERDOWN" \
-          --timeout="${BM_POE_TIMEOUT:-60}"
+          --test-timeout ${TEST_PHASE_TIMEOUT:-20}
   ret=$?
 
   if [ $ret -eq 2 ]; then
diff --git a/.gitlab-ci/bare-metal/poe_run.py b/.gitlab-ci/bare-metal/poe_run.py
index 30cbc7795b0..ab216ec36a5 100755
--- a/.gitlab-ci/bare-metal/poe_run.py
+++ b/.gitlab-ci/bare-metal/poe_run.py
@@ -30,11 +30,12 @@ import threading
 
 
 class PoERun:
-    def __init__(self, args):
+    def __init__(self, args, test_timeout):
         self.powerup = args.powerup
         self.powerdown = args.powerdown
         self.ser = SerialBuffer(
-            args.dev, "results/serial-output.txt", "", args.timeout)
+            args.dev, "results/serial-output.txt", "")
+        self.test_timeout = test_timeout
 
     def print_error(self, message):
         RED = '\033[0;31m'
@@ -60,7 +61,7 @@ class PoERun:
                 "Something wrong; couldn't detect the boot start up sequence")
             return 2
 
-        for line in self.ser.lines(timeout=20 * 60, phase="test"):
+        for line in self.ser.lines(timeout=self.test_timeout, phase="test"):
             if re.search("---. end Kernel panic", line):
                 return 1
 
@@ -93,11 +94,11 @@ def main():
                         help='shell command for rebooting', required=True)
     parser.add_argument('--powerdown', type=str,
                         help='shell command for powering off', required=True)
-    parser.add_argument('--timeout', type=int, default=60,
-                        help='time in seconds to wait for activity', required=False)
+    parser.add_argument(
+        '--test-timeout', type=int, help='Test phase timeout (minutes)', required=True)
     args = parser.parse_args()
 
-    poe = PoERun(args)
+    poe = PoERun(args, args.test_timeout * 60)
     retval = poe.run()
 
     poe.logged_system(args.powerdown)
diff --git a/src/broadcom/ci/gitlab-ci.yml b/src/broadcom/ci/gitlab-ci.yml
index e86fba280c8..bf64b62a790 100644
--- a/src/broadcom/ci/gitlab-ci.yml
+++ b/src/broadcom/ci/gitlab-ci.yml
@@ -40,7 +40,6 @@ vc4-rpi3-egl:armhf:
     - .piglit-test
     - .vc4-rpi3-test:armhf
   variables:
-    BM_POE_TIMEOUT: 180
     HWCI_START_XORG: 1
     PIGLIT_PLATFORM: mixed_glx_egl
 
@@ -68,7 +67,6 @@ vc4-rpi3-piglit-quick_shader:armhf:
   variables:
     HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
     BM_BOOTFS: /boot/raspberrypi_armhf
-    BM_POE_TIMEOUT: 300
     FLAKES_CHANNEL: "#videocore-ci"
     GPU_VERSION: broadcom-rpi4
     HWCI_KERNEL_MODULES: v3d,vc4
@@ -113,7 +111,6 @@ v3d-rpi4-piglit:armhf:
   variables:
     HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
     BM_BOOTFS: /boot/raspberrypi_armhf
-    BM_POE_TIMEOUT: 300
     FLAKES_CHANNEL: "#videocore-ci"
     GPU_VERSION: broadcom-rpi4
     HWCI_KERNEL_MODULES: v3d,vc4
@@ -134,7 +131,6 @@ v3d-rpi4-traces:arm64:
     - .test-manual-mr
   variables:
     BM_BOOTFS: /boot/raspberrypi_arm64
-    BM_POE_TIMEOUT: 300
     GPU_VERSION: broadcom-rpi4
     HWCI_KERNEL_MODULES: v3d,vc4
     HWCI_START_XORG: 1
@@ -164,7 +160,6 @@ v3dv-rpi4-vk:arm64:
   variables:
     HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
     BM_BOOTFS: /boot/raspberrypi_arm64
-    BM_POE_TIMEOUT: 300
     DEQP_EXPECTED_RENDERER: "V3D.4.2"
     DEQP_FRACTION: 10
     DEQP_VER: vk
diff --git a/src/freedreno/ci/gitlab-ci.yml b/src/freedreno/ci/gitlab-ci.yml
index 911c8146804..bb6aa07923d 100644
--- a/src/freedreno/ci/gitlab-ci.yml
+++ b/src/freedreno/ci/gitlab-ci.yml
@@ -231,8 +231,9 @@ a630_vk:
     DEQP_SUITE: freedreno-a630-vk
 
 a630_vk_full:
-  # We use a longer timeout to keep the parallel down so that we don't lock up
-  # too many runners for a long time when a dev is trying out at full VK status.
+  # We use a longer timeout (2 hour job, 90 minute deqp) to keep the parallel
+  # down so that we don't lock up too many runners for a long time when a dev is
+  # testing full VK status.
   timeout: 2h
   extends:
     - a630_vk
@@ -240,6 +241,7 @@ a630_vk_full:
   parallel: 2
   variables:
     DEQP_SUITE: freedreno-a630-vk-full
+    TEST_PHASE_TIMEOUT: 90
 
 a630_vk_asan:
   extends:
diff --git a/src/gallium/drivers/nouveau/ci/gitlab-ci.yml b/src/gallium/drivers/nouveau/ci/gitlab-ci.yml
index 97de5cfae9a..91b0530ead9 100644
--- a/src/gallium/drivers/nouveau/ci/gitlab-ci.yml
+++ b/src/gallium/drivers/nouveau/ci/gitlab-ci.yml
@@ -6,7 +6,6 @@
   variables:
     HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
     BM_BOOTFS: /baremetal-files/jetson-nano/
-    BM_POE_TIMEOUT: 300
     BM_CMDLINE: "console=ttyS0,115200n8 rw nfsrootdebug init=/init"
     FLAKES_CHANNEL: "#nouveau-ci"
     GPU_VERSION: nouveau-gm20b



More information about the mesa-commit mailing list