[Intel-gfx] [PATCH i-g-t v2 11/16] tests/core_hotunplug: Follow failed subtests with health checks
Janusz Krzysztofik
janusz.krzysztofik at linux.intel.com
Fri Aug 7 09:19:57 UTC 2020
Subtests now forcibly call or request igt_abort on failures in order to
avoid silently leaving an exercised device in an unusable state.
However, a failure inside a subtest doesn't always mean the device is
no longer working correctly and reboot is needed. On the other hand,
if a subtest just fails without aborting, that doesn't mean in turn the
device is healthy. We should still perform a device health check
in that case before deciding on next steps.
Reuse the 'failure' structure field as a mark which is set when a
subtest enters a chunk of critical steps which must be followed by a
successful health check in order to avoid aborting the test execution.
Then, move health checks from inside each subtest body to its
individual follow-up igt_fixture section from where device file
descriptors potentially left open are closed and the health check is
run if theigt_subtest section has been exited with the marker set.
Also, precede health check operations with a driver rebind or bus
rescan attempt as needed.
v2: Start each recovery phase from unconditionally closing file
descriptors potentially left open by a subtest before it entered
its critical section,
- replace igt_require() with 'if() return;' construct in recover() to
reduce noise,
- replace "subtest failure" message used as a request for healthcheck
with a more appropriate "need healthcheck" for clarity,
- rebase on current upstream master.
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski at intel.com> # v1
---
tests/core_hotunplug.c | 108 +++++++++++++++++++++++++++++------------
1 file changed, 76 insertions(+), 32 deletions(-)
diff --git a/tests/core_hotunplug.c b/tests/core_hotunplug.c
index b9982b330..313c44784 100644
--- a/tests/core_hotunplug.c
+++ b/tests/core_hotunplug.c
@@ -52,6 +52,9 @@ struct hotunplug {
static int local_close(int fd)
{
+ if (fd < 0) /* not open - return current status */
+ return fd;
+
errno = 0;
if (close(fd)) /* close failure - return -errno (never -1) */
return -errno;
@@ -91,11 +94,9 @@ static void driver_unbind(struct hotunplug *priv, const char *prefix)
{
igt_debug("%sunbinding the driver from the device\n", prefix);
- priv->failure = "Driver unbind timeout!";
- igt_set_timeout(60, priv->failure);
+ igt_set_timeout(60, "Driver unbind timeout!");
igt_sysfs_set(priv->fd.sysfs_drv, "unbind", priv->dev_bus_addr);
igt_reset_timeout();
- priv->failure = NULL;
}
/* Re-bind the driver to the device */
@@ -103,11 +104,9 @@ static void driver_bind(struct hotunplug *priv)
{
igt_debug("rebinding the driver to the device\n");
- priv->failure = "Driver re-bind timeout!";
- igt_set_timeout(60, priv->failure);
+ igt_set_timeout(60, "Driver re-bind timeout!");
igt_sysfs_set(priv->fd.sysfs_drv, "bind", priv->dev_bus_addr);
igt_reset_timeout();
- priv->failure = NULL;
}
/* Remove (virtually unplug) the device from its bus */
@@ -122,11 +121,9 @@ static void device_unplug(struct hotunplug *priv, const char *prefix)
igt_debug("%sunplugging the device\n", prefix);
- priv->failure = "Device unplug timeout!";
- igt_set_timeout(60, priv->failure);
+ igt_set_timeout(60, "Device unplug timeout!");
igt_sysfs_set(priv->fd.sysfs_dev, "remove", "1");
igt_reset_timeout();
- priv->failure = NULL;
priv->fd.sysfs_dev = local_close(priv->fd.sysfs_dev);
igt_warn_on_f(priv->fd.sysfs_dev != -1,
@@ -138,11 +135,15 @@ static void bus_rescan(struct hotunplug *priv)
{
igt_debug("recovering the device\n");
- priv->failure = "Bus rescan timeout!";
- igt_set_timeout(60, priv->failure);
+ igt_set_timeout(60, "Bus rescan timeout!");
igt_sysfs_set(priv->fd.sysfs_bus, "../rescan", "1");
igt_reset_timeout();
- priv->failure = NULL;
+}
+
+static void cleanup(struct hotunplug *priv)
+{
+ priv->fd.drm = local_close(priv->fd.drm);
+ priv->fd.sysfs_dev = local_close(priv->fd.sysfs_dev);
}
static void healthcheck(struct hotunplug *priv)
@@ -150,6 +151,18 @@ static void healthcheck(struct hotunplug *priv)
/* preserve error code potentially stored before in priv->fd.drm */
int fd_drm;
+ if (faccessat(priv->fd.sysfs_bus, priv->dev_bus_addr, F_OK, 0)) {
+ priv->failure = "Bus rescan failed!";
+ bus_rescan(priv);
+ priv->failure = NULL;
+ }
+
+ if (faccessat(priv->fd.sysfs_drv, priv->dev_bus_addr, F_OK, 0)) {
+ priv->failure = "Driver re-bind failed!";
+ driver_bind(priv);
+ priv->failure = NULL;
+ }
+
/* device name may have changed, rebuild IGT device list */
igt_devices_scan(true);
@@ -169,6 +182,17 @@ static void healthcheck(struct hotunplug *priv)
igt_assert_f(fd_drm == -1, "Device close failed\n");
}
+static void recover(struct hotunplug *priv)
+{
+ cleanup(priv);
+
+ if (!priv->failure)
+ return;
+ priv->failure = NULL;
+
+ healthcheck(priv);
+}
+
static void post_healthcheck(struct hotunplug *priv)
{
igt_abort_on_f(priv->failure, "%s\n", priv->failure);
@@ -195,20 +219,20 @@ static void set_filter_from_device(int fd)
static void unbind_rebind(struct hotunplug *priv)
{
+ priv->failure = "need healthcheck";
+
driver_unbind(priv, "");
driver_bind(priv);
-
- healthcheck(priv);
}
static void unplug_rescan(struct hotunplug *priv)
{
+ priv->failure = "need healthcheck";
+
device_unplug(priv, "");
bus_rescan(priv);
-
- healthcheck(priv);
}
static void hotunbind_lateclose(struct hotunplug *priv)
@@ -217,6 +241,8 @@ static void hotunbind_lateclose(struct hotunplug *priv)
priv->fd.drm = __drm_open_driver(DRIVER_ANY);
igt_assert_fd(priv->fd.drm);
+ priv->failure = "need healthcheck";
+
driver_unbind(priv, "hot ");
driver_bind(priv);
@@ -224,8 +250,6 @@ static void hotunbind_lateclose(struct hotunplug *priv)
igt_debug("late closing the unbound device instance\n");
priv->fd.drm = local_close(priv->fd.drm);
igt_warn_on_f(priv->fd.drm != -1, "Device close failed\n");
-
- healthcheck(priv);
}
static void hotunplug_lateclose(struct hotunplug *priv)
@@ -234,6 +258,8 @@ static void hotunplug_lateclose(struct hotunplug *priv)
priv->fd.drm = __drm_open_driver(DRIVER_ANY);
igt_assert_fd(priv->fd.drm);
+ priv->failure = "need healthcheck";
+
device_unplug(priv, "hot ");
bus_rescan(priv);
@@ -241,8 +267,6 @@ static void hotunplug_lateclose(struct hotunplug *priv)
igt_debug("late closing the removed device instance\n");
priv->fd.drm = local_close(priv->fd.drm);
igt_warn_on_f(priv->fd.drm != -1, "Device close failed\n");
-
- healthcheck(priv);
}
/* Main */
@@ -276,30 +300,50 @@ igt_main
prepare(&priv);
}
- igt_describe("Check if the driver can be cleanly unbound from a device believed to be closed");
- igt_subtest("unbind-rebind")
- unbind_rebind(&priv);
+ igt_subtest_group {
+ igt_describe("Check if the driver can be cleanly unbound from a device believed to be closed");
+ igt_subtest("unbind-rebind")
+ unbind_rebind(&priv);
+
+ igt_fixture
+ recover(&priv);
+ }
igt_fixture
post_healthcheck(&priv);
- igt_describe("Check if a device believed to be closed can be cleanly unplugged");
- igt_subtest("unplug-rescan")
- unplug_rescan(&priv);
+ igt_subtest_group {
+ igt_describe("Check if a device believed to be closed can be cleanly unplugged");
+ igt_subtest("unplug-rescan")
+ unplug_rescan(&priv);
+
+ igt_fixture
+ recover(&priv);
+ }
igt_fixture
post_healthcheck(&priv);
- igt_describe("Check if the driver can be cleanly unbound from a still open device, then released");
- igt_subtest("hotunbind-lateclose")
- hotunbind_lateclose(&priv);
+ igt_subtest_group {
+ igt_describe("Check if the driver can be cleanly unbound from a still open device, then released");
+ igt_subtest("hotunbind-lateclose")
+ hotunbind_lateclose(&priv);
+
+ igt_fixture
+ recover(&priv);
+ }
igt_fixture
post_healthcheck(&priv);
- igt_describe("Check if a still open device can be cleanly unplugged, then released");
- igt_subtest("hotunplug-lateclose")
- hotunplug_lateclose(&priv);
+ igt_subtest_group {
+ igt_describe("Check if a still open device can be cleanly unplugged, then released");
+ igt_subtest("hotunplug-lateclose")
+ hotunplug_lateclose(&priv);
+
+ igt_fixture
+ recover(&priv);
+ }
igt_fixture {
post_healthcheck(&priv);
--
2.21.1
More information about the Intel-gfx
mailing list