[PATCH v8 1/1 i-g-t] tests: Add a new test for driver/device hot reload

Janusz Krzysztofik janusz.krzysztofik at linux.intel.com
Sat Apr 27 10:48:07 UTC 2019


From: Janusz Krzysztofik <janusz.krzysztofik at intel.com>

Put some workload on a device, then try to either remove (unplug) the
device from its bus, or unbind the device's driver from it, possibly
followed by module unload, depending on which specific subtest has been
selected.  If succeeded, rescan the device's bus if needed and perform
health checks on the device with the driver possibly loaded back.

If module unload is requested, the workload is run in a sub-process,
not directly from the test, as it is expected to crash while still
keeping the device open for as long as its process has not exited.

The driver hot unbind / device hot unplug operation is expected to
succeed and the background workload sub-process to crash in a
reasonable time, however long timeouts are used to let kernel level
timeouts pop up first if hit by a bug.

The driver is ready for extending it with an arbitrary workload
functions as needed.  For now, a workload based on igt_dummyload is
implemented, hence subtests work only on i915 driver and are skipped on
other hardware, unless they provide their implementation of
igt_spin_new() and friends, or other workloads are implemented.

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik at intel.com>
---
 tests/Makefile.sources  |   1 +
 tests/core_hot_reload.c | 408 ++++++++++++++++++++++++++++++++++++++++
 tests/meson.build       |   1 +
 3 files changed, 410 insertions(+)
 create mode 100644 tests/core_hot_reload.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 7f921f6c..452d8ed7 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -16,6 +16,7 @@ TESTS_progs = \
 	core_getclient \
 	core_getstats \
 	core_getversion \
+	core_hot_reload \
 	core_setmaster_vs_auth \
 	debugfs_test \
 	drm_import_export \
diff --git a/tests/core_hot_reload.c b/tests/core_hot_reload.c
new file mode 100644
index 00000000..6673f55c
--- /dev/null
+++ b/tests/core_hot_reload.c
@@ -0,0 +1,408 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include "igt_device.h"
+#include "igt_dummyload.h"
+#include "igt_kmod.h"
+#include "igt_sysfs.h"
+
+#include <getopt.h>
+#include <limits.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+
+/**
+ * A post-action device recovery function:
+ * @priv: a pointer to private data required for device recovery
+ *
+ * Make the device re-appear
+ */
+typedef void (*recover_t)(const void *priv);
+
+/**
+ * A test action function:
+ * @dir: file descriptor of an open device sysfs directory
+ * @module: module name, non-NULL indicates post-action module unload requested
+ * @recover: for returning a pointer to a post-action device recovery function
+ * @priv: for returning a pointer to data to be passed to @recover
+ *
+ * Make the device disappear
+ */
+typedef void (*action_t)(int device, const char *module,
+			 recover_t *recover, const void **priv);
+
+/**
+ * A workload completion wait function:
+ * @device: open device file descriptor
+ * @priv: a pointer to private data required by the wait function
+ *
+ * Wait for completion of background workload
+ */
+typedef void (*workload_wait_t)(int device, void *priv);
+
+/**
+ * A workload function:
+ * @device: open device file descriptor
+ * @arg: a optional string argument passed to the workload function
+ * @workload_wait: for returning a pointer to workload completion wait function
+ * @priv: for returning a pointer to data to be passed to @workload_wait
+ *
+ * Put some long lasting load on the device
+ */
+typedef void (*workload_t)(int device, const char *arg,
+			   workload_wait_t *workload_wait, void **priv);
+
+/**
+ * Pairs of test action / device recovery functions
+ */
+
+/* Unbind / re-bind */
+
+struct rebind_data {
+	int driver;	/* open file descriptor of driver sysfs directory */
+	char *device;	/* bus specific device address as string */
+};
+
+/* Re-bind the driver to the device */
+static void driver_bind(const void *priv)
+{
+	const struct rebind_data *data = priv;
+
+	igt_set_timeout(60, "Driver re-bind timeout!");
+	igt_sysfs_set(data->driver, "bind", data->device);
+
+	close(data->driver);
+}
+
+/* Unbind the driver from the device */
+static void driver_unbind(int device, const char *module,
+			  recover_t *recover, const void **priv)
+{
+	static char path[PATH_MAX];
+	static struct rebind_data data;
+	int len;
+
+	/* collect information required for driver bind/unbind */
+	data.driver = openat(device, "device/driver", O_DIRECTORY);
+	igt_assert(data.driver >= 0);
+
+	len = readlinkat(device, "device", path, sizeof(path) - 1);
+	path[len] = '\0';
+	data.device = strrchr(path, '/') + 1;
+
+	/* unbind the driver */
+	igt_set_timeout(60, "Driver unbind timeout!");
+	igt_sysfs_set(data.driver, "unbind", data.device);
+
+	/* pass back info on how to recover the device */
+	if (module) {
+		/* don't try to rebind if module will be unloaded */
+		*recover = NULL;
+	} else {
+		*recover = driver_bind;
+		*priv = &data;
+	}
+}
+
+/* Unplug / re-plug */
+
+/* Re-discover the device by rescanning its bus */
+static void bus_rescan(const void *priv)
+{
+	const int *bus = priv;
+
+	igt_set_timeout(60, "Bus rescan timeout!");
+	igt_sysfs_set(*bus, "rescan", "1");
+
+	close(*bus);
+}
+
+/* Remove (virtually unplug) the device from its bus */
+static void device_unplug(int device, const char *module,
+			  recover_t *recover, const void **priv)
+{
+	static int bus;
+
+	/* collect information required for bus rescan */
+	bus = openat(device, "device/subsystem", O_DIRECTORY);
+	igt_assert(bus >= 0);
+
+	/* remove the device */
+	igt_set_timeout(60, "Device unplug timeout!");
+	igt_sysfs_set(device, "device/remove", "1");
+
+	/* pass back info on how to recover the device */
+	*recover = bus_rescan;
+	*priv = &bus;
+}
+
+/* Each test action function must be registered in the following table */
+static const struct {
+	const char *name;	/* unique test action name used in test names */
+	action_t function;	/* test action function pointer */
+} actions[] = {
+	{ "unbind", driver_unbind, },
+	{ "unplug", device_unplug, },
+};
+
+/**
+ * Pairs of workload / wait completion functions
+ */
+
+/* A workload using igt_spin_run() */
+
+/* Wait for completaion of dummy load */
+static void dummy_wait(int device, void *priv)
+{
+	igt_spin_t *spin = priv;
+
+	/* wait until the spin no longer runs, don't fail on error */
+	if (gem_wait(device, spin->handle, NULL))
+		__gem_set_domain(device, spin->handle,
+				 I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+}
+
+/* Run dummy load */
+static void dummy_load(int device, const char *arg,
+		       workload_wait_t *workload_wait, void **priv)
+{
+	igt_spin_t *spin;
+
+	/* submit a job */
+	spin = igt_spin_new(device);
+
+	*workload_wait = dummy_wait;
+	*priv = spin;
+}
+
+/**
+ * Each workload function must be registered in the following table.
+ * A function may be registered more than once under different workload names,
+ * that makes sense as long as a different argument is specified for each name.
+ */
+static const struct {
+	const char *name;	/* unique workload name used in test names */
+	workload_t function;	/* workload function pointer */
+	const char *arg;	/* optional constant string argument */
+} workloads[] = {
+	{ "spin", dummy_load, NULL, },
+};
+
+/**
+ * Framework
+ */
+
+static void healthcheck(int chipset)
+{
+	int device;
+
+	device = __drm_open_driver(chipset);
+	igt_assert(device >= 0);
+
+	if (chipset == DRIVER_INTEL)
+		gem_test_engine(device, ALL_ENGINES);
+
+	close(device);
+}
+
+static void module_unload(int chipset, const char *module)
+{
+	if (chipset == DRIVER_INTEL)
+		igt_assert(igt_i915_driver_unload() == IGT_EXIT_SUCCESS);
+	else
+		igt_assert(igt_kmod_unload(module, 0) == 0);
+}
+
+static void run_action(int device, action_t action, const char *module,
+		      recover_t *recover, const void **priv)
+{
+	int dir;
+
+	dir = igt_sysfs_open(device);
+	igt_assert(dir >= 0);
+
+	action(dir, module, recover, priv);
+
+	close(dir);
+}
+
+static void wait_helper(int device, void *priv)
+{
+	struct igt_helper_process *proc = priv;
+
+	/* wait until the workload subprocess has completed */
+	igt_ignore_warn(igt_wait_helper(proc));
+}
+
+static void run_workload(int device, workload_t workload, const char *arg,
+			 const char *module, workload_wait_t *workload_wait,
+			 void **priv)
+{
+	if (module) {
+		/* run workload in a subprocess so the module is put on crash */
+		static struct igt_helper_process proc;
+		int wstatus, ret;
+
+		bzero(&proc, sizeof(proc));
+
+		igt_fork_helper(&proc) {
+			/* suppress igt_log messages */
+			igt_log_level = IGT_LOG_NONE;
+
+			/* intercept igt_fail/skip() long jumps */
+			if (sigsetjmp(igt_subtest_jmpbuf, 1) == 0) {
+				workload(device, arg, workload_wait, priv);
+
+				(*workload_wait)(device, *priv);
+
+				/* success if not diverted by igt_fail/skip() */
+				igt_success();
+			}
+
+			/* pass exit code back to the caller */
+			igt_exit();
+		}
+		/* let the background process start doing its job or fail */
+		sleep(2);
+		/* fail or skip on workload premature completion */
+		ret = waitpid(proc.pid, &wstatus, WNOHANG);
+		if (ret < 0)
+			igt_fail(IGT_EXIT_INVALID);
+		if (ret) {
+			if (!WIFEXITED(wstatus))
+				igt_fail(IGT_EXIT_INVALID);
+			if (WEXITSTATUS(wstatus) == IGT_EXIT_SUCCESS)
+				igt_fail(IGT_EXIT_INVALID);
+			if (WEXITSTATUS(wstatus) == IGT_EXIT_SKIP)
+				igt_skip(NULL);
+			igt_fail(WEXITSTATUS(wstatus));
+		}
+
+		/* pass back info on how to wait for helper completion */
+		*workload_wait = wait_helper;
+		*priv = &proc;
+	} else {
+		/* run the requested workload directly */
+		workload(device, arg, workload_wait, priv);
+	}
+}
+
+static void run_subtest(int chipset, int workload, int action,
+			const char *module)
+{
+	workload_wait_t workload_wait;
+	void *workload_priv;
+	recover_t recover;
+	const void *recover_priv;
+	int device;
+
+	igt_subtest_f("%s-%s%s", workloads[workload].name, actions[action].name,
+		      module ? "-unload" : "") {
+		device = __drm_open_driver(chipset);
+		igt_assert(device >= 0);
+
+		/* spawn the requested workload */
+		igt_debug("spawning background workload\n");
+		run_workload(device, workloads[workload].function,
+			     workloads[workload].arg, module,
+			     &workload_wait, &workload_priv);
+
+		/* run the requested test action */
+		igt_debug("running test action\n");
+		run_action(device, actions[action].function, module,
+			   &recover, &recover_priv);
+
+		if (workload_wait) {
+			igt_debug("waiting for workload completion\n");
+			workload_wait(device, workload_priv);
+		}
+
+		close(device);
+
+		if (module) {
+			igt_debug("unloading %s\n", module);
+			module_unload(chipset, module);
+		}
+
+		if (recover) {
+			igt_debug("recovering device\n");
+			recover(recover_priv);
+			igt_reset_timeout();
+		}
+
+		igt_debug("running healthcheck\n");
+		healthcheck(chipset);
+	}
+}
+
+igt_main {
+	int device, chipset;
+	char *module;
+	int i, j;
+
+	igt_fixture {
+		char path[PATH_MAX];
+		int dir, len;
+
+		/**
+		 * Since some subtests depend on successful unload of a driver
+		 * module, don't use drm_open_driver() as it keeps a device file
+		 * descriptor open for exit handler use and that effectively
+		 * prevents the module from being unloaded.
+		 */
+		device = __drm_open_driver(DRIVER_ANY);
+		igt_assert(device >= 0);
+
+		if (is_i915_device(device)) {
+			chipset = DRIVER_INTEL;
+			module = strdup("i915");
+		} else {
+			chipset = DRIVER_ANY;
+
+			/* Capture module name to be unloaded */
+			dir = igt_sysfs_open(device);
+			len = readlinkat(dir, "device/driver/module", path,
+					 sizeof(path) - 1);
+			close(dir);
+			path[len] = '\0';
+			module = strdup(strrchr(path, '/') + 1);
+		}
+		close(device);
+
+		igt_info("Running the test on driver \"%s\", chipset mask %#0x\n",
+			 module, chipset);
+	}
+
+	for (i = 0; i < sizeof(workloads) / sizeof(*workloads); i++) {
+		for (j = 0; j < sizeof(actions) / sizeof(*actions); j++) {
+			/* with module unload */
+			run_subtest(chipset, i, j, module);
+			/* without module unload */
+			run_subtest(chipset, i, j, NULL);
+		}
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 711979b4..0d418035 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -3,6 +3,7 @@ test_progs = [
 	'core_getclient',
 	'core_getstats',
 	'core_getversion',
+	'core_hot_reload',
 	'core_setmaster_vs_auth',
 	'debugfs_test',
 	'drm_import_export',
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list