Mesa (main): ci/bare-metal: Add test phase timeouts to all boards.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Jun 21 22:18:46 UTC 2022
Module: Mesa
Branch: main
Commit: 5f09b1ebe98840fec90e9f8a0ee8ea07a31c381b
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5f09b1ebe98840fec90e9f8a0ee8ea07a31c381b
Author: Emma Anholt <emma at anholt.net>
Date: Thu Jun 16 14:38:50 2022 -0700
ci/bare-metal: Add test phase timeouts to all boards.
This should help with "marge got stuck for an hour and all I got was this
failed job with no results/" when a system intermittently wedges.
This replaces the BM_POE_TIMEOUT ("did we get something on serial in the
last 3 minutes?") that rpi had, in favor of checking that the whole test
job gets through in 20 minutes.
Acked-by: Juan A. Suarez <jasuarez at igalia.com>
Reviewed-by: Christian Gmeiner <christian.gmeiner at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17096>
---
.gitlab-ci/bare-metal/cros-servo.sh | 3 ++-
.gitlab-ci/bare-metal/cros_servo_run.py | 11 +++++++----
.gitlab-ci/bare-metal/fastboot.sh | 1 +
.gitlab-ci/bare-metal/fastboot_run.py | 15 ++++++++-------
.gitlab-ci/bare-metal/poe-powered.sh | 2 +-
.gitlab-ci/bare-metal/poe_run.py | 13 +++++++------
src/broadcom/ci/gitlab-ci.yml | 5 -----
src/freedreno/ci/gitlab-ci.yml | 6 ++++--
src/gallium/drivers/nouveau/ci/gitlab-ci.yml | 1 -
9 files changed, 30 insertions(+), 27 deletions(-)
diff --git a/.gitlab-ci/bare-metal/cros-servo.sh b/.gitlab-ci/bare-metal/cros-servo.sh
index 53fbc817666..5e9a3ed7509 100755
--- a/.gitlab-ci/bare-metal/cros-servo.sh
+++ b/.gitlab-ci/bare-metal/cros-servo.sh
@@ -90,7 +90,8 @@ echo "$BM_CMDLINE" > /tftp/cmdline
set +e
python3 $BM/cros_servo_run.py \
--cpu $BM_SERIAL \
- --ec $BM_SERIAL_EC
+ --ec $BM_SERIAL_EC \
+ --test-timeout ${TEST_PHASE_TIMEOUT:-20}
ret=$?
set -e
diff --git a/.gitlab-ci/bare-metal/cros_servo_run.py b/.gitlab-ci/bare-metal/cros_servo_run.py
index 7215253fb7a..a5593c46d44 100755
--- a/.gitlab-ci/bare-metal/cros_servo_run.py
+++ b/.gitlab-ci/bare-metal/cros_servo_run.py
@@ -31,13 +31,14 @@ import threading
class CrosServoRun:
- def __init__(self, cpu, ec):
+ def __init__(self, cpu, ec, test_timeout):
self.cpu_ser = SerialBuffer(
cpu, "results/serial.txt", "R SERIAL-CPU> ")
# Merge the EC serial into the cpu_ser's line stream so that we can
# effectively poll on both at the same time and not have to worry about
self.ec_ser = SerialBuffer(
ec, "results/serial-ec.txt", "R SERIAL-EC> ", line_queue=self.cpu_ser.line_queue)
+ self.test_timeout = test_timeout
def close(self):
self.ec_ser.close()
@@ -90,7 +91,7 @@ class CrosServoRun:
return 2
tftp_failures = 0
- for line in self.cpu_ser.lines(timeout=120 * 60, phase="test"):
+ for line in self.cpu_ser.lines(timeout=self.test_timeout, phase="test"):
if re.search("---. end Kernel panic", line):
return 1
@@ -150,7 +151,7 @@ class CrosServoRun:
self.print_error(
"Reached the end of the CPU serial log without finding a result")
- return 1
+ return 2
def main():
@@ -159,9 +160,11 @@ def main():
help='CPU Serial device', required=True)
parser.add_argument(
'--ec', type=str, help='EC Serial device', required=True)
+ parser.add_argument(
+ '--test-timeout', type=int, help='Test phase timeout (minutes)', required=True)
args = parser.parse_args()
- servo = CrosServoRun(args.cpu, args.ec)
+ servo = CrosServoRun(args.cpu, args.ec, args.test_timeout * 60)
while True:
retval = servo.run()
diff --git a/.gitlab-ci/bare-metal/fastboot.sh b/.gitlab-ci/bare-metal/fastboot.sh
index 86d3a9b6c69..d4571819c03 100755
--- a/.gitlab-ci/bare-metal/fastboot.sh
+++ b/.gitlab-ci/bare-metal/fastboot.sh
@@ -133,6 +133,7 @@ fi
set +e
$BM/fastboot_run.py \
--dev="$BM_SERIAL" \
+ --test-timeout ${TEST_PHASE_TIMEOUT:-20} \
--fbserial="$BM_FASTBOOT_SERIAL" \
--powerup="$BM_POWERUP" \
--powerdown="$BM_POWERDOWN"
diff --git a/.gitlab-ci/bare-metal/fastboot_run.py b/.gitlab-ci/bare-metal/fastboot_run.py
index ca27b6fe7e3..8721515b100 100755
--- a/.gitlab-ci/bare-metal/fastboot_run.py
+++ b/.gitlab-ci/bare-metal/fastboot_run.py
@@ -30,14 +30,13 @@ import threading
class FastbootRun:
- def __init__(self, args):
+ def __init__(self, args, test_timeout):
self.powerup = args.powerup
- # We would like something like a 1 minute timeout, but the piglit traces
- # jobs stall out for long periods of time.
self.ser = SerialBuffer(
- args.dev, "results/serial-output.txt", "R SERIAL> ", timeout=600)
+ args.dev, "results/serial-output.txt", "R SERIAL> ")
self.fastboot = "fastboot boot -s {ser} artifacts/fastboot.img".format(
ser=args.fbserial)
+ self.test_timeout = test_timeout
def close(self):
self.ser.close()
@@ -76,7 +75,7 @@ class FastbootRun:
return 1
print_more_lines = -1
- for line in self.ser.lines(timeout=20 * 60, phase="test"):
+ for line in self.ser.lines(timeout=self.test_timeout, phase="test"):
if print_more_lines == 0:
return 2
if print_more_lines > 0:
@@ -138,9 +137,11 @@ def main():
help='shell command for powering off', required=True)
parser.add_argument('--fbserial', type=str,
help='fastboot serial number of the board', required=True)
+ parser.add_argument('--test-timeout', type=int,
+ help='Test phase timeout (minutes)', required=True)
args = parser.parse_args()
- fastboot = FastbootRun(args)
+ fastboot = FastbootRun(args, args.test_timeout * 60)
while True:
retval = fastboot.run()
@@ -148,7 +149,7 @@ def main():
if retval != 2:
break
- fastboot = FastbootRun(args)
+ fastboot = FastbootRun(args, args.test_timeout * 60)
fastboot.logged_system(args.powerdown)
diff --git a/.gitlab-ci/bare-metal/poe-powered.sh b/.gitlab-ci/bare-metal/poe-powered.sh
index 4bd0057459f..aea132e4cba 100755
--- a/.gitlab-ci/bare-metal/poe-powered.sh
+++ b/.gitlab-ci/bare-metal/poe-powered.sh
@@ -131,7 +131,7 @@ while [ $((ATTEMPTS--)) -gt 0 ]; do
--dev="$BM_SERIAL" \
--powerup="$BM_POWERUP" \
--powerdown="$BM_POWERDOWN" \
- --timeout="${BM_POE_TIMEOUT:-60}"
+ --test-timeout ${TEST_PHASE_TIMEOUT:-20}
ret=$?
if [ $ret -eq 2 ]; then
diff --git a/.gitlab-ci/bare-metal/poe_run.py b/.gitlab-ci/bare-metal/poe_run.py
index 30cbc7795b0..ab216ec36a5 100755
--- a/.gitlab-ci/bare-metal/poe_run.py
+++ b/.gitlab-ci/bare-metal/poe_run.py
@@ -30,11 +30,12 @@ import threading
class PoERun:
- def __init__(self, args):
+ def __init__(self, args, test_timeout):
self.powerup = args.powerup
self.powerdown = args.powerdown
self.ser = SerialBuffer(
- args.dev, "results/serial-output.txt", "", args.timeout)
+ args.dev, "results/serial-output.txt", "")
+ self.test_timeout = test_timeout
def print_error(self, message):
RED = '\033[0;31m'
@@ -60,7 +61,7 @@ class PoERun:
"Something wrong; couldn't detect the boot start up sequence")
return 2
- for line in self.ser.lines(timeout=20 * 60, phase="test"):
+ for line in self.ser.lines(timeout=self.test_timeout, phase="test"):
if re.search("---. end Kernel panic", line):
return 1
@@ -93,11 +94,11 @@ def main():
help='shell command for rebooting', required=True)
parser.add_argument('--powerdown', type=str,
help='shell command for powering off', required=True)
- parser.add_argument('--timeout', type=int, default=60,
- help='time in seconds to wait for activity', required=False)
+ parser.add_argument(
+ '--test-timeout', type=int, help='Test phase timeout (minutes)', required=True)
args = parser.parse_args()
- poe = PoERun(args)
+ poe = PoERun(args, args.test_timeout * 60)
retval = poe.run()
poe.logged_system(args.powerdown)
diff --git a/src/broadcom/ci/gitlab-ci.yml b/src/broadcom/ci/gitlab-ci.yml
index e86fba280c8..bf64b62a790 100644
--- a/src/broadcom/ci/gitlab-ci.yml
+++ b/src/broadcom/ci/gitlab-ci.yml
@@ -40,7 +40,6 @@ vc4-rpi3-egl:armhf:
- .piglit-test
- .vc4-rpi3-test:armhf
variables:
- BM_POE_TIMEOUT: 180
HWCI_START_XORG: 1
PIGLIT_PLATFORM: mixed_glx_egl
@@ -68,7 +67,6 @@ vc4-rpi3-piglit-quick_shader:armhf:
variables:
HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
BM_BOOTFS: /boot/raspberrypi_armhf
- BM_POE_TIMEOUT: 300
FLAKES_CHANNEL: "#videocore-ci"
GPU_VERSION: broadcom-rpi4
HWCI_KERNEL_MODULES: v3d,vc4
@@ -113,7 +111,6 @@ v3d-rpi4-piglit:armhf:
variables:
HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
BM_BOOTFS: /boot/raspberrypi_armhf
- BM_POE_TIMEOUT: 300
FLAKES_CHANNEL: "#videocore-ci"
GPU_VERSION: broadcom-rpi4
HWCI_KERNEL_MODULES: v3d,vc4
@@ -134,7 +131,6 @@ v3d-rpi4-traces:arm64:
- .test-manual-mr
variables:
BM_BOOTFS: /boot/raspberrypi_arm64
- BM_POE_TIMEOUT: 300
GPU_VERSION: broadcom-rpi4
HWCI_KERNEL_MODULES: v3d,vc4
HWCI_START_XORG: 1
@@ -164,7 +160,6 @@ v3dv-rpi4-vk:arm64:
variables:
HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
BM_BOOTFS: /boot/raspberrypi_arm64
- BM_POE_TIMEOUT: 300
DEQP_EXPECTED_RENDERER: "V3D.4.2"
DEQP_FRACTION: 10
DEQP_VER: vk
diff --git a/src/freedreno/ci/gitlab-ci.yml b/src/freedreno/ci/gitlab-ci.yml
index 911c8146804..bb6aa07923d 100644
--- a/src/freedreno/ci/gitlab-ci.yml
+++ b/src/freedreno/ci/gitlab-ci.yml
@@ -231,8 +231,9 @@ a630_vk:
DEQP_SUITE: freedreno-a630-vk
a630_vk_full:
- # We use a longer timeout to keep the parallel down so that we don't lock up
- # too many runners for a long time when a dev is trying out at full VK status.
+ # We use a longer timeout (2 hour job, 90 minute deqp) to keep the parallel
+ # down so that we don't lock up too many runners for a long time when a dev is
+ # testing full VK status.
timeout: 2h
extends:
- a630_vk
@@ -240,6 +241,7 @@ a630_vk_full:
parallel: 2
variables:
DEQP_SUITE: freedreno-a630-vk-full
+ TEST_PHASE_TIMEOUT: 90
a630_vk_asan:
extends:
diff --git a/src/gallium/drivers/nouveau/ci/gitlab-ci.yml b/src/gallium/drivers/nouveau/ci/gitlab-ci.yml
index 97de5cfae9a..91b0530ead9 100644
--- a/src/gallium/drivers/nouveau/ci/gitlab-ci.yml
+++ b/src/gallium/drivers/nouveau/ci/gitlab-ci.yml
@@ -6,7 +6,6 @@
variables:
HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
BM_BOOTFS: /baremetal-files/jetson-nano/
- BM_POE_TIMEOUT: 300
BM_CMDLINE: "console=ttyS0,115200n8 rw nfsrootdebug init=/init"
FLAKES_CHANNEL: "#nouveau-ci"
GPU_VERSION: nouveau-gm20b
More information about the mesa-commit
mailing list