[Intel-gfx] [PATCH i-g-t v2 11/16] tests/core_hotunplug: Follow failed subtests with health checks

Janusz Krzysztofik janusz.krzysztofik at linux.intel.com
Fri Aug 7 09:19:57 UTC 2020


Subtests now forcibly call or request igt_abort on failures in order to
avoid silently leaving an exercised device in an unusable state.
However, a failure inside a subtest doesn't always mean the device is
no longer working correctly and reboot is needed.  On the other hand,
if a subtest just fails without aborting, that doesn't mean in turn the
device is healthy.  We should still perform a device health check
in that case before deciding on next steps.

Reuse the 'failure' structure field as a mark which is set when a
subtest enters a chunk of critical steps which must be followed by a
successful health check in order to avoid aborting the test execution.
Then, move health checks from inside each subtest body to its
individual follow-up igt_fixture section from where device file
descriptors potentially left open are closed and the health check is
run if theigt_subtest section has been exited with the marker set.
Also, precede health check operations with a driver rebind or bus
rescan attempt as needed.

v2: Start each recovery phase from unconditionally closing file
    descriptors potentially left open by a subtest before it entered
    its critical section,
  - replace igt_require() with 'if() return;' construct in recover() to
    reduce noise,
  - replace "subtest failure" message used as a request for healthcheck
    with a more appropriate "need healthcheck" for clarity,
  - rebase on current upstream master.

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski at intel.com> # v1
---
 tests/core_hotunplug.c | 108 +++++++++++++++++++++++++++++------------
 1 file changed, 76 insertions(+), 32 deletions(-)

diff --git a/tests/core_hotunplug.c b/tests/core_hotunplug.c
index b9982b330..313c44784 100644
--- a/tests/core_hotunplug.c
+++ b/tests/core_hotunplug.c
@@ -52,6 +52,9 @@ struct hotunplug {
 
 static int local_close(int fd)
 {
+	if (fd < 0)	/* not open - return current status */
+		return fd;
+
 	errno = 0;
 	if (close(fd))	/* close failure - return -errno (never -1) */
 		return -errno;
@@ -91,11 +94,9 @@ static void driver_unbind(struct hotunplug *priv, const char *prefix)
 {
 	igt_debug("%sunbinding the driver from the device\n", prefix);
 
-	priv->failure = "Driver unbind timeout!";
-	igt_set_timeout(60, priv->failure);
+	igt_set_timeout(60, "Driver unbind timeout!");
 	igt_sysfs_set(priv->fd.sysfs_drv, "unbind", priv->dev_bus_addr);
 	igt_reset_timeout();
-	priv->failure = NULL;
 }
 
 /* Re-bind the driver to the device */
@@ -103,11 +104,9 @@ static void driver_bind(struct hotunplug *priv)
 {
 	igt_debug("rebinding the driver to the device\n");
 
-	priv->failure = "Driver re-bind timeout!";
-	igt_set_timeout(60, priv->failure);
+	igt_set_timeout(60, "Driver re-bind timeout!");
 	igt_sysfs_set(priv->fd.sysfs_drv, "bind", priv->dev_bus_addr);
 	igt_reset_timeout();
-	priv->failure = NULL;
 }
 
 /* Remove (virtually unplug) the device from its bus */
@@ -122,11 +121,9 @@ static void device_unplug(struct hotunplug *priv, const char *prefix)
 
 	igt_debug("%sunplugging the device\n", prefix);
 
-	priv->failure = "Device unplug timeout!";
-	igt_set_timeout(60, priv->failure);
+	igt_set_timeout(60, "Device unplug timeout!");
 	igt_sysfs_set(priv->fd.sysfs_dev, "remove", "1");
 	igt_reset_timeout();
-	priv->failure = NULL;
 
 	priv->fd.sysfs_dev = local_close(priv->fd.sysfs_dev);
 	igt_warn_on_f(priv->fd.sysfs_dev != -1,
@@ -138,11 +135,15 @@ static void bus_rescan(struct hotunplug *priv)
 {
 	igt_debug("recovering the device\n");
 
-	priv->failure = "Bus rescan timeout!";
-	igt_set_timeout(60, priv->failure);
+	igt_set_timeout(60, "Bus rescan timeout!");
 	igt_sysfs_set(priv->fd.sysfs_bus, "../rescan", "1");
 	igt_reset_timeout();
-	priv->failure = NULL;
+}
+
+static void cleanup(struct hotunplug *priv)
+{
+	priv->fd.drm = local_close(priv->fd.drm);
+	priv->fd.sysfs_dev = local_close(priv->fd.sysfs_dev);
 }
 
 static void healthcheck(struct hotunplug *priv)
@@ -150,6 +151,18 @@ static void healthcheck(struct hotunplug *priv)
 	/* preserve error code potentially stored before in priv->fd.drm */
 	int fd_drm;
 
+	if (faccessat(priv->fd.sysfs_bus, priv->dev_bus_addr, F_OK, 0)) {
+		priv->failure = "Bus rescan failed!";
+		bus_rescan(priv);
+		priv->failure = NULL;
+	}
+
+	if (faccessat(priv->fd.sysfs_drv, priv->dev_bus_addr, F_OK, 0)) {
+		priv->failure = "Driver re-bind failed!";
+		driver_bind(priv);
+		priv->failure = NULL;
+	}
+
 	/* device name may have changed, rebuild IGT device list */
 	igt_devices_scan(true);
 
@@ -169,6 +182,17 @@ static void healthcheck(struct hotunplug *priv)
 	igt_assert_f(fd_drm == -1, "Device close failed\n");
 }
 
+static void recover(struct hotunplug *priv)
+{
+	cleanup(priv);
+
+	if (!priv->failure)
+		return;
+	priv->failure = NULL;
+
+	healthcheck(priv);
+}
+
 static void post_healthcheck(struct hotunplug *priv)
 {
 	igt_abort_on_f(priv->failure, "%s\n", priv->failure);
@@ -195,20 +219,20 @@ static void set_filter_from_device(int fd)
 
 static void unbind_rebind(struct hotunplug *priv)
 {
+	priv->failure = "need healthcheck";
+
 	driver_unbind(priv, "");
 
 	driver_bind(priv);
-
-	healthcheck(priv);
 }
 
 static void unplug_rescan(struct hotunplug *priv)
 {
+	priv->failure = "need healthcheck";
+
 	device_unplug(priv, "");
 
 	bus_rescan(priv);
-
-	healthcheck(priv);
 }
 
 static void hotunbind_lateclose(struct hotunplug *priv)
@@ -217,6 +241,8 @@ static void hotunbind_lateclose(struct hotunplug *priv)
 	priv->fd.drm = __drm_open_driver(DRIVER_ANY);
 	igt_assert_fd(priv->fd.drm);
 
+	priv->failure = "need healthcheck";
+
 	driver_unbind(priv, "hot ");
 
 	driver_bind(priv);
@@ -224,8 +250,6 @@ static void hotunbind_lateclose(struct hotunplug *priv)
 	igt_debug("late closing the unbound device instance\n");
 	priv->fd.drm = local_close(priv->fd.drm);
 	igt_warn_on_f(priv->fd.drm != -1, "Device close failed\n");
-
-	healthcheck(priv);
 }
 
 static void hotunplug_lateclose(struct hotunplug *priv)
@@ -234,6 +258,8 @@ static void hotunplug_lateclose(struct hotunplug *priv)
 	priv->fd.drm = __drm_open_driver(DRIVER_ANY);
 	igt_assert_fd(priv->fd.drm);
 
+	priv->failure = "need healthcheck";
+
 	device_unplug(priv, "hot ");
 
 	bus_rescan(priv);
@@ -241,8 +267,6 @@ static void hotunplug_lateclose(struct hotunplug *priv)
 	igt_debug("late closing the removed device instance\n");
 	priv->fd.drm = local_close(priv->fd.drm);
 	igt_warn_on_f(priv->fd.drm != -1, "Device close failed\n");
-
-	healthcheck(priv);
 }
 
 /* Main */
@@ -276,30 +300,50 @@ igt_main
 		prepare(&priv);
 	}
 
-	igt_describe("Check if the driver can be cleanly unbound from a device believed to be closed");
-	igt_subtest("unbind-rebind")
-		unbind_rebind(&priv);
+	igt_subtest_group {
+		igt_describe("Check if the driver can be cleanly unbound from a device believed to be closed");
+		igt_subtest("unbind-rebind")
+			unbind_rebind(&priv);
+
+		igt_fixture
+			recover(&priv);
+	}
 
 	igt_fixture
 		post_healthcheck(&priv);
 
-	igt_describe("Check if a device believed to be closed can be cleanly unplugged");
-	igt_subtest("unplug-rescan")
-		unplug_rescan(&priv);
+	igt_subtest_group {
+		igt_describe("Check if a device believed to be closed can be cleanly unplugged");
+		igt_subtest("unplug-rescan")
+			unplug_rescan(&priv);
+
+		igt_fixture
+			recover(&priv);
+	}
 
 	igt_fixture
 		post_healthcheck(&priv);
 
-	igt_describe("Check if the driver can be cleanly unbound from a still open device, then released");
-	igt_subtest("hotunbind-lateclose")
-		hotunbind_lateclose(&priv);
+	igt_subtest_group {
+		igt_describe("Check if the driver can be cleanly unbound from a still open device, then released");
+		igt_subtest("hotunbind-lateclose")
+			hotunbind_lateclose(&priv);
+
+		igt_fixture
+			recover(&priv);
+	}
 
 	igt_fixture
 		post_healthcheck(&priv);
 
-	igt_describe("Check if a still open device can be cleanly unplugged, then released");
-	igt_subtest("hotunplug-lateclose")
-		hotunplug_lateclose(&priv);
+	igt_subtest_group {
+		igt_describe("Check if a still open device can be cleanly unplugged, then released");
+		igt_subtest("hotunplug-lateclose")
+			hotunplug_lateclose(&priv);
+
+		igt_fixture
+			recover(&priv);
+	}
 
 	igt_fixture {
 		post_healthcheck(&priv);
-- 
2.21.1



More information about the Intel-gfx mailing list