[Intel-gfx] [PATCH i-g-t 3/5] i915: Exercise preemption timeout controls in sysfs

Chris Wilson chris at chris-wilson.co.uk
Fri Feb 28 10:43:38 UTC 2020


We [will] expose various per-engine scheduling controls. One of which,
'preempt_timeout_ms', defines how we wait for a preemption request to be
honoured by the currently executing context. If it fails to relieve the
GPU within the required timeout, the engine is reset and the miscreant
forcibly evicted.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 lib/i915/gem_context.c             |  64 ++++--
 lib/i915/gem_context.h             |   2 +
 lib/i915/gem_engine_topology.c     |  48 +++++
 lib/i915/gem_engine_topology.h     |   3 +
 tests/Makefile.sources             |   3 +
 tests/i915/sysfs_preempt_timeout.c | 309 +++++++++++++++++++++++++++++
 tests/intel-ci/blacklist.txt       |   1 +
 tests/meson.build                  |   1 +
 8 files changed, 414 insertions(+), 17 deletions(-)
 create mode 100644 tests/i915/sysfs_preempt_timeout.c

diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
index 50dfee3d1..169e43d2e 100644
--- a/lib/i915/gem_context.c
+++ b/lib/i915/gem_context.c
@@ -43,6 +43,21 @@
  * software features improving submission model (context priority).
  */
 
+static int create_ext_ioctl(int i915,
+			    struct drm_i915_gem_context_create_ext *arg)
+{
+	int err;
+
+	err = 0;
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
+		err = -errno;
+		igt_assume(err);
+	}
+
+	errno = 0;
+	return err;
+}
+
 /**
  * gem_has_contexts:
  * @fd: open i915 drm file descriptor
@@ -324,17 +339,14 @@ __gem_context_clone(int i915,
 		.flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
 		.extensions = to_user_pointer(&clone),
 	};
-	int err = 0;
+	int err;
 
-	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg)) {
-		err = -errno;
-		igt_assume(err);
-	}
+	err = create_ext_ioctl(i915, &arg);
+	if (err)
+		return err;
 
 	*out = arg.ctx_id;
-
-	errno = 0;
-	return err;
+	return 0;
 }
 
 static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
@@ -382,16 +394,8 @@ bool gem_has_context_clone(int i915)
 		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
 		.extensions = to_user_pointer(&ext),
 	};
-	int err;
-
-	err = 0;
-	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &create)) {
-		err = -errno;
-		igt_assume(err);
-	}
-	errno = 0;
 
-	return err == -ENOENT;
+	return create_ext_ioctl(i915, &create) == -ENOENT;
 }
 
 /**
@@ -492,3 +496,29 @@ gem_context_copy_engines(int src_fd, uint32_t src, int dst_fd, uint32_t dst)
 	param.ctx_id = dst;
 	gem_context_set_param(dst_fd, &param);
 }
+
+uint32_t gem_context_create_for_engine(int i915, unsigned int class, unsigned int inst)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = {
+		.engines = { { .engine_class = class, .engine_instance = inst } }
+	};
+	struct drm_i915_gem_context_create_ext_setparam p_engines = {
+		.base = {
+			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+			.next_extension = 0, /* end of chain */
+		},
+		.param = {
+			.param = I915_CONTEXT_PARAM_ENGINES,
+			.value = to_user_pointer(&engines),
+			.size = sizeof(engines),
+		},
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&p_engines),
+	};
+
+	igt_assert_eq(create_ext_ioctl(i915, &create), 0);
+	igt_assert_neq(create.ctx_id, 0);
+	return create.ctx_id;
+}
diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
index 15e5db281..b478c47a1 100644
--- a/lib/i915/gem_context.h
+++ b/lib/i915/gem_context.h
@@ -34,6 +34,8 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
 void gem_context_destroy(int fd, uint32_t ctx_id);
 int __gem_context_destroy(int fd, uint32_t ctx_id);
 
+uint32_t gem_context_create_for_engine(int fd, unsigned int class, unsigned int inst);
+
 int __gem_context_clone(int i915,
 			uint32_t src, unsigned int share,
 			unsigned int flags,
diff --git a/lib/i915/gem_engine_topology.c b/lib/i915/gem_engine_topology.c
index 640777ae4..64a6b0472 100644
--- a/lib/i915/gem_engine_topology.c
+++ b/lib/i915/gem_engine_topology.c
@@ -22,6 +22,8 @@
  */
 
 #include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
 #include <unistd.h>
 
 #include "drmtest.h"
@@ -411,3 +413,49 @@ uint32_t gem_engine_mmio_base(int i915, const char *engine)
 
 	return mmio;
 }
+
+void dyn_sysfs_engines(int i915, int engines, const char *file,
+		       void (*test)(int, int))
+{
+	char buf[512];
+	int len;
+
+	lseek(engines, 0, SEEK_SET);
+	while ((len = syscall(SYS_getdents64, engines, buf, sizeof(buf))) > 0) {
+		void *ptr = buf;
+
+		while (len) {
+			struct linux_dirent64 {
+				ino64_t        d_ino;
+				off64_t        d_off;
+				unsigned short d_reclen;
+				unsigned char  d_type;
+				char           d_name[];
+			} *de = ptr;
+			char *name;
+			int engine;
+
+			ptr += de->d_reclen;
+			len -= de->d_reclen;
+
+			engine = openat(engines, de->d_name, O_RDONLY);
+			name = igt_sysfs_get(engine, "name");
+			if (!name) {
+				close(engine);
+				continue;
+			}
+
+			igt_dynamic(name) {
+				if (file) {
+					struct stat st;
+
+					igt_require(fstatat(engine, file, &st, 0) == 0);
+				}
+
+				test(i915, engine);
+			}
+
+			close(engine);
+		}
+	}
+}
diff --git a/lib/i915/gem_engine_topology.h b/lib/i915/gem_engine_topology.h
index 219c84b72..b624d607e 100644
--- a/lib/i915/gem_engine_topology.h
+++ b/lib/i915/gem_engine_topology.h
@@ -80,4 +80,7 @@ int gem_engine_property_scanf(int i915, const char *engine, const char *attr,
 			      const char *fmt, ...);
 uint32_t gem_engine_mmio_base(int i915, const char *engine);
 
+void dyn_sysfs_engines(int i915, int engines, const char *file,
+		       void (*test)(int i915, int engine));
+
 #endif /* GEM_ENGINE_TOPOLOGY_H */
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 10913e86b..cc1d1fdd1 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -103,6 +103,9 @@ TESTS_progs = \
 	vgem_slow \
 	$(NULL)
 
+TESTS_progs += sysfs_preempt_timeout
+sysfs_preempt_timeout_SOURCES = i915/sysfs_preempt_timeout.c
+
 TESTS_progs += gem_bad_reloc
 gem_bad_reloc_SOURCES = i915/gem_bad_reloc.c
 
diff --git a/tests/i915/sysfs_preempt_timeout.c b/tests/i915/sysfs_preempt_timeout.c
new file mode 100644
index 000000000..71fe46387
--- /dev/null
+++ b/tests/i915/sysfs_preempt_timeout.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "drmtest.h" /* gem_quiescent_gpu()! */
+#include "i915/gem_engine_topology.h"
+#include "igt_dummyload.h"
+#include "igt_sysfs.h"
+#include "ioctl_wrappers.h" /* igt_require_gem()! */
+#include "sw_sync.h"
+
+#include "igt_debugfs.h"
+
+#define ATTR "preempt_timeout_ms"
+
+static bool __enable_hangcheck(int dir, bool state)
+{
+	return igt_sysfs_set(dir, "enable_hangcheck", state ? "1" : "0");
+}
+
+static bool enable_hangcheck(int i915, bool state)
+{
+	bool success;
+	int dir;
+
+	dir = igt_sysfs_open_parameters(i915);
+	if (dir < 0) /* no parameters, must be default! */
+		return false;
+
+	success = __enable_hangcheck(dir, state);
+	close(dir);
+
+	return success;
+}
+
+static void set_preempt_timeout(int engine, unsigned int value)
+{
+	unsigned int delay;
+
+	igt_sysfs_printf(engine, ATTR, "%u", value);
+	igt_sysfs_scanf(engine, ATTR, "%u", &delay);
+	igt_assert_eq(delay, value);
+}
+
+static void test_idempotent(int i915, int engine)
+{
+	unsigned int delays[] = { 0, 1, 1000, 1234, 654321 };
+	unsigned int saved;
+
+	/* Quick test that store/show reports the same values */
+
+	igt_assert(igt_sysfs_scanf(engine, ATTR, "%u", &saved) == 1);
+	igt_debug("Initial %s:%u\n", ATTR, saved);
+
+	for (int i = 0; i < ARRAY_SIZE(delays); i++)
+		set_preempt_timeout(engine, delays[i]);
+
+	set_preempt_timeout(engine, saved);
+}
+
+static void test_invalid(int i915, int engine)
+{
+	unsigned int saved, delay;
+
+	/* Quick test that values that are not representable are rejected */
+
+	igt_assert(igt_sysfs_scanf(engine, ATTR, "%u", &saved) == 1);
+	igt_debug("Initial %s:%u\n", ATTR, saved);
+
+	igt_sysfs_printf(engine, ATTR, PRIu64, -1);
+	igt_sysfs_scanf(engine, ATTR, "%u", &delay);
+	igt_assert_eq(delay, saved);
+
+	igt_sysfs_printf(engine, ATTR, "%d", -1);
+	igt_sysfs_scanf(engine, ATTR, "%u", &delay);
+	igt_assert_eq(delay, saved);
+
+	igt_sysfs_printf(engine, ATTR, PRIu64, 40ull << 32);
+	igt_sysfs_scanf(engine, ATTR, "%u", &delay);
+	igt_assert_eq(delay, saved);
+}
+
+static void set_unbannable(int i915, uint32_t ctx)
+{
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = ctx,
+		.param = I915_CONTEXT_PARAM_BANNABLE,
+	};
+
+	igt_assert_eq(__gem_context_set_param(i915, &p), 0);
+}
+
+static uint32_t create_context(int i915, unsigned int class, unsigned int inst, int prio)
+{
+	uint32_t ctx;
+
+	ctx = gem_context_create_for_engine(i915, class, inst);
+	set_unbannable(i915, ctx);
+	gem_context_set_priority(i915, ctx, prio);
+
+	return ctx;
+}
+
+static uint64_t __test_timeout(int i915, int engine, unsigned int timeout)
+{
+	unsigned int class, inst;
+	struct timespec ts = {};
+	igt_spin_t *spin[2];
+	uint64_t elapsed;
+	uint32_t ctx[2];
+
+	igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1);
+	igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1);
+
+	set_preempt_timeout(engine, timeout);
+
+	ctx[0] = create_context(i915, class, inst, -1023);
+	spin[0] = igt_spin_new(i915, ctx[0],
+			       .flags = (IGT_SPIN_NO_PREEMPTION |
+					 IGT_SPIN_POLL_RUN |
+					 IGT_SPIN_FENCE_OUT));
+	igt_spin_busywait_until_started(spin[0]);
+
+	ctx[1] = create_context(i915, class, inst, 1023);
+	igt_nsec_elapsed(&ts);
+	spin[1] = igt_spin_new(i915, ctx[1], .flags = IGT_SPIN_POLL_RUN);
+	igt_spin_busywait_until_started(spin[1]);
+	elapsed = igt_nsec_elapsed(&ts);
+
+	igt_spin_free(i915, spin[1]);
+
+	igt_assert_eq(sync_fence_wait(spin[0]->out_fence, 1), 0);
+	igt_assert_eq(sync_fence_status(spin[0]->out_fence), -EIO);
+
+	igt_spin_free(i915, spin[0]);
+
+	gem_context_destroy(i915, ctx[1]);
+	gem_context_destroy(i915, ctx[0]);
+	gem_quiescent_gpu(i915);
+
+	return elapsed;
+}
+
+static void test_timeout(int i915, int engine)
+{
+	int delays[] = { 1, 50, 100, 500 };
+	unsigned int saved;
+
+	/*
+	 * Send down some non-preemptable workloads and then request a
+	 * switch to a higher priority context. The HW will not be able to
+	 * respond, so the kernel will be forced to reset the hog. This
+	 * timeout should match our specification, and so we can measure
+	 * the delay from requesting the preemption to its completion.
+	 */
+
+	igt_assert(igt_sysfs_scanf(engine, ATTR, "%u", &saved) == 1);
+	igt_debug("Initial %s:%u\n", ATTR, saved);
+
+	gem_quiescent_gpu(i915);
+	igt_require(enable_hangcheck(i915, false));
+
+	for (int i = 0; i < ARRAY_SIZE(delays); i++) {
+		uint64_t elapsed;
+
+		elapsed = __test_timeout(i915, engine, delays[i]);
+		igt_info("%s:%d, elapsed=%.3fms\n",
+			 ATTR, delays[i], elapsed * 1e-6);
+
+		/*
+		 * We need to give a couple of jiffies slack for the scheduler timeouts
+		 * and then a little more slack fr the overhead in submitting and
+		 * measuring. 50ms should cover all of our sins and be useful
+		 * tolerance.
+		 */
+		igt_assert_f(elapsed / 1000 / 1000 < delays[i] + 50,
+			     "Forced preemption timeout exceeded request!\n");
+	}
+
+	igt_assert(enable_hangcheck(i915, true));
+	gem_quiescent_gpu(i915);
+	set_preempt_timeout(engine, saved);
+}
+
+static void test_off(int i915, int engine)
+{
+	unsigned int class, inst;
+	igt_spin_t *spin[2];
+	unsigned int saved;
+	uint32_t ctx[2];
+
+	/*
+	 * We support setting the timeout to 0 to disable the reset on
+	 * preemption failure. Having established that we can do forced
+	 * preemption on demand, we use the same setup (non-preeemptable hog
+	 * followed by a high priority context) and verify that the hog is
+	 * never reset. Never is a long time, so we settle for 150s.
+	 */
+
+	igt_assert(igt_sysfs_scanf(engine, ATTR, "%u", &saved) == 1);
+	igt_debug("Initial %s:%u\n", ATTR, saved);
+
+	gem_quiescent_gpu(i915);
+	igt_require(enable_hangcheck(i915, false));
+
+	igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1);
+	igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1);
+
+	set_preempt_timeout(engine, 0);
+
+	ctx[0] = create_context(i915, class, inst, -1023);
+	spin[0] = igt_spin_new(i915, ctx[0],
+			       .flags = (IGT_SPIN_NO_PREEMPTION |
+					 IGT_SPIN_POLL_RUN |
+					 IGT_SPIN_FENCE_OUT));
+	igt_spin_busywait_until_started(spin[0]);
+
+	ctx[1] = create_context(i915, class, inst, 1023);
+	spin[1] = igt_spin_new(i915, ctx[1], .flags = IGT_SPIN_POLL_RUN);
+
+	for (int i = 0; i < 150; i++) {
+		igt_assert_eq(sync_fence_status(spin[0]->out_fence), 0);
+		sleep(1);
+	}
+
+	set_preempt_timeout(engine, 1);
+
+	igt_spin_busywait_until_started(spin[1]);
+	igt_spin_free(i915, spin[1]);
+
+	igt_assert_eq(sync_fence_wait(spin[0]->out_fence, 1), 0);
+	igt_assert_eq(sync_fence_status(spin[0]->out_fence), -EIO);
+
+	igt_spin_free(i915, spin[0]);
+
+	gem_context_destroy(i915, ctx[1]);
+	gem_context_destroy(i915, ctx[0]);
+
+	igt_assert(enable_hangcheck(i915, true));
+	gem_quiescent_gpu(i915);
+
+	set_preempt_timeout(engine, saved);
+}
+
+igt_main
+{
+	static const struct {
+		const char *name;
+		void (*fn)(int, int);
+	} tests[] = {
+		{ "idempotent", test_idempotent },
+		{ "invalid", test_invalid },
+		{ "timeout", test_timeout },
+		{ "off", test_off },
+		{ }
+	};
+	int i915 = -1, engines = -1;
+
+	igt_fixture {
+		int sys;
+
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+		igt_allow_hang(i915, 0, 0);
+
+		sys = igt_sysfs_open(i915);
+		igt_require(sys != -1);
+
+		engines = openat(sys, "engine", O_RDONLY);
+		igt_require(engines != -1);
+
+		close(sys);
+	}
+
+	for (typeof(*tests) *t = tests; t->name; t++)
+		igt_subtest_with_dynamic(t->name)
+			dyn_sysfs_engines(i915, engines, ATTR, t->fn);
+
+	igt_fixture {
+		close(engines);
+		close(i915);
+	}
+}
diff --git a/tests/intel-ci/blacklist.txt b/tests/intel-ci/blacklist.txt
index 390e4fa0c..01cd176ee 100644
--- a/tests/intel-ci/blacklist.txt
+++ b/tests/intel-ci/blacklist.txt
@@ -59,6 +59,7 @@ igt at gem_sync@(?!.*basic).*
 igt at gem_tiled_swapping@(?!non-threaded).*
 igt at gem_userptr_blits@(major|minor|forked|mlocked|swapping).*
 igt at gem_wait@.*hang.*
+igt at sysfs_preemption_timeout@off.*
 ###############################################
 # GEM: Not worth fixing
 ###############################################
diff --git a/tests/meson.build b/tests/meson.build
index dcc5e7b6a..0af1ed1e1 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -226,6 +226,7 @@ i915_progs = [
 	'i915_query',
 	'i915_selftest',
 	'i915_suspend',
+	'sysfs_preempt_timeout',
 ]
 
 test_deps = [ igt_deps ]
-- 
2.25.1



More information about the Intel-gfx mailing list