[Intel-gfx] [PATCH i-g-t 8/9] i915: Exercise timeslice sysfs property

Thu Oct 24 10:54:48 UTC 2019

We [will] expose various per-engine scheduling controls. One of which,
'timeslice_duration_ms', defines the scheduling quantum. If a context
exhausts its timeslice, it will be preempted in favour of running one of
its compatriots.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 tests/Makefile.sources                |   1 +
 tests/i915/sysfs_timeslice_duration.c | 517 ++++++++++++++++++++++++++
 tests/meson.build                     |   1 +
 3 files changed, 519 insertions(+)
 create mode 100644 tests/i915/sysfs_timeslice_duration.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 74fd8be5b..b22d63265 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -100,6 +100,7 @@ TESTS_progs = \
 	vgem_slow \
 	i915/sysfs_heartbeat_interval \
 	i915/sysfs_preempt_timeout \
+	i915/sysfs_timeslice_duration \
 	$(NULL)
 
 TESTS_progs += gem_bad_reloc
diff --git a/tests/i915/sysfs_timeslice_duration.c b/tests/i915/sysfs_timeslice_duration.c
new file mode 100644
index 000000000..f10b3a839
--- /dev/null
+++ b/tests/i915/sysfs_timeslice_duration.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "drmtest.h" /* gem_quiescent_gpu()! */
+#include "i915/gem_engine_topology.h"
+#include "i915/gem_mman.h"
+#include "igt_dummyload.h"
+#include "igt_sysfs.h"
+#include "ioctl_wrappers.h" /* igt_require_gem()! */
+#include "intel_chipset.h"
+#include "intel_reg.h"
+#include "sw_sync.h"
+
+#define MI_SEMAPHORE_WAIT		(0x1c << 23)
+#define   MI_SEMAPHORE_POLL             (1 << 15)
+#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
+#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
+#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
+#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
+#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
+#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
+
+static bool __enable_hangcheck(int dir, bool state)
+{
+	return igt_sysfs_set(dir, "enable_hangcheck", state ? "1" : "0");
+}
+
+static bool enable_hangcheck(int i915, bool state)
+{
+	bool success;
+	int dir;
+
+	dir = igt_sysfs_open_parameters(i915);
+	if (dir < 0) /* no parameters, must be default! */
+		return false;
+
+	success = __enable_hangcheck(dir, state);
+	close(dir);
+
+	return success;
+}
+
+static void set_timeslice(int engine, unsigned int value)
+{
+	unsigned int delay;
+
+	igt_sysfs_printf(engine, "timeslice_duration_ms", "%u", value);
+	igt_sysfs_scanf(engine, "timeslice_duration_ms", "%u", &delay);
+	igt_assert_eq(delay, value);
+}
+
+static void test_idempotent(int i915, int engine)
+{
+	const unsigned int delays[] = { 0, 1, 1234, 654321 };
+	unsigned int saved;
+
+	/* Quick test to verify the kernel reports the same values as we write */
+
+	igt_assert(igt_sysfs_scanf(engine, "timeslice_duration_ms", "%u", &saved) == 1);
+	igt_debug("Initial timeslice_duration_ms:%u\n", saved);
+
+	for (int i = 0; i < ARRAY_SIZE(delays); i++)
+		set_timeslice(engine, delays[i]);
+
+	set_timeslice(engine, saved);
+}
+
+static void test_invalid(int i915, int engine)
+{
+	unsigned int saved, delay;
+
+	/* Quick test that non-representable delays are rejected */
+
+	igt_assert(igt_sysfs_scanf(engine, "timeslice_duration_ms", "%u", &saved) == 1);
+	igt_debug("Initial timeslice_duration_ms:%u\n", saved);
+
+	igt_sysfs_printf(engine, "timeslice_duration_ms", PRIu64, -1);
+	igt_sysfs_scanf(engine, "timeslice_duration_ms", "%u", &delay);
+	igt_assert_eq(delay, saved);
+
+	igt_sysfs_printf(engine, "timeslice_duration_ms", "%d", -1);
+	igt_sysfs_scanf(engine, "timeslice_duration_ms", "%u", &delay);
+	igt_assert_eq(delay, saved);
+
+	igt_sysfs_printf(engine, "timeslice_duration_ms", PRIu64, 123ull << 32);
+	igt_sysfs_scanf(engine, "timeslice_duration_ms", "%u", &delay);
+	igt_assert_eq(delay, saved);
+}
+
+static void set_unbannable(int i915, uint32_t ctx)
+{
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = ctx,
+		.param = I915_CONTEXT_PARAM_BANNABLE,
+	};
+
+	igt_assert_eq(__gem_context_set_param(i915, &p), 0);
+}
+
+static uint32_t create_context(int i915, unsigned int class, unsigned int inst, int prio)
+{
+	uint32_t ctx;
+
+	ctx = gem_context_create_for_engine(i915, class, inst);
+	set_unbannable(i915, ctx);
+	gem_context_set_priority(i915, ctx, prio);
+
+	return ctx;
+}
+
+static int cmp_u32(const void *_a, const void *_b)
+{
+	const uint32_t *a = _a, *b = _b;
+
+	return *a - *b;
+}
+
+static double clockrate(int i915)
+{
+	int freq;
+	drm_i915_getparam_t gp = {
+		.value = &freq,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+
+	igt_require(igt_ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp) == 0);
+	return 1e9 / freq;
+}
+
+static uint64_t __test_duration(int i915, int engine, unsigned int timeout)
+{
+	struct drm_i915_gem_exec_object2 obj[3] = {
+		{
+			.handle = gem_create(i915, 4096),
+			.offset = 0,
+			.flags = EXEC_OBJECT_PINNED,
+		},
+		{
+			.handle = gem_create(i915, 4096),
+			.offset = 4096,
+			.flags = EXEC_OBJECT_PINNED,
+		},
+		{ gem_create(i915, 4096) }
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffer_count = ARRAY_SIZE(obj),
+		.buffers_ptr = to_user_pointer(obj),
+	};
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+	double duration = clockrate(i915);
+	unsigned int class, inst, mmio;
+	uint32_t *cs, *map;
+	uint32_t ctx[2];
+	int start;
+	int i;
+
+	igt_require(gem_scheduler_has_preemption(i915));
+	igt_require(gen >= 8); /* MI_SEMAPHORE_WAIT */
+
+	igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1);
+	igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1);
+	igt_require(igt_sysfs_scanf(engine, "mmio_base", "%x", &mmio) == 1);
+
+	set_timeslice(engine, timeout);
+
+	ctx[0] = create_context(i915, class, inst, 0);
+	ctx[1] = create_context(i915, class, inst, 0);
+
+	map = gem_mmap__cpu(i915, obj[2].handle, 0, 4096, PROT_WRITE);
+
+	cs = map;
+	for (i = 0; i < 10; i++) {
+		*cs++ = MI_SEMAPHORE_WAIT |
+			MI_SEMAPHORE_POLL |
+			MI_SEMAPHORE_SAD_NEQ_SDD |
+			(4 - 2 + (gen >= 12));
+		*cs++ = 0;
+		*cs++ = obj[0].offset + sizeof(uint32_t) * i;
+		*cs++ = 0;
+		if (gen >= 12)
+			*cs++ = 0;
+
+		*cs++ = 0x24 << 23 | 2; /* SRM */
+		*cs++ = mmio + 0x358;
+		*cs++ = obj[1].offset + sizeof(uint32_t) * i;
+		*cs++ = 0;
+
+		*cs++ = MI_STORE_DWORD_IMM;
+		*cs++ = obj[0].offset +
+			4096 - sizeof(uint32_t) * i - sizeof(uint32_t);
+		*cs++ = 0;
+		*cs++ = 1;
+	}
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	cs += 16 - ((cs - map) & 15);
+	start = (cs - map) * sizeof(*cs);
+	for (i = 0; i < 10; i++) {
+		*cs++ = MI_STORE_DWORD_IMM;
+		*cs++ = obj[0].offset + sizeof(uint32_t) * i;
+		*cs++ = 0;
+		*cs++ = 1;
+
+		*cs++ = MI_SEMAPHORE_WAIT |
+			MI_SEMAPHORE_POLL |
+			MI_SEMAPHORE_SAD_NEQ_SDD |
+			(4 - 2 + (gen >= 12));
+		*cs++ = 0;
+		*cs++ = obj[0].offset +
+			4096 - sizeof(uint32_t) * i - sizeof(uint32_t);
+		*cs++ = 0;
+		if (gen >= 12)
+			*cs++ = 0;
+	}
+	*cs++ = MI_BATCH_BUFFER_END;
+	igt_assert(cs - map < 4096 / sizeof(*cs));
+	munmap(map, 4096);
+
+	eb.rsvd1 = ctx[0];
+	gem_execbuf(i915, &eb);
+
+	eb.rsvd1 = ctx[1];
+	eb.batch_start_offset = start;
+	gem_execbuf(i915, &eb);
+
+	gem_sync(i915, obj[2].handle);
+
+	gem_set_domain(i915, obj[1].handle,
+		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+	map = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
+	for (i = 0; i < 9; i++)
+		map[i] = map[i + 1] - map[i];
+	qsort(map, 9, sizeof(*map), cmp_u32);
+	duration *= map[4] / 2; /* 2 sema-waits between timestamp updates */
+	munmap(map, 4096);
+
+	for (i = 0; i < ARRAY_SIZE(ctx); i++)
+		gem_context_destroy(i915, ctx[i]);
+
+	for (i = 0; i < ARRAY_SIZE(obj); i++)
+		gem_close(i915, obj[i].handle);
+
+	return duration;
+}
+
+static void test_duration(int i915, int engine)
+{
+	int delays[] = { 1, 50, 100, 500 };
+	unsigned int saved;
+
+	/*
+	 * Timeslicing at its very basic level is sharing the GPU by
+	 * running one context for interval before running another. After
+	 * each interval the running context is swapped for another runnable
+	 * context.
+	 *
+	 * We can measure this directly by watching the xCS_TIMESTAMP and
+	 * recording its value every time we switch into the context, using
+	 * a couple of semaphores to busyspin for the timeslice.
+	 */
+
+	igt_assert(igt_sysfs_scanf(engine, "timeslice_duration_ms", "%u", &saved) == 1);
+	igt_debug("Initial timeslice_duration_ms:%u\n", saved);
+
+	gem_quiescent_gpu(i915);
+
+	for (int i = 0; i < ARRAY_SIZE(delays); i++) {
+		uint64_t elapsed;
+
+		elapsed = __test_duration(i915, engine, delays[i]);
+		igt_info("timeslice_duration_ms:%d, elapsed=%.3fms\n",
+			 delays[i], elapsed * 1e-6);
+
+		/*
+		 * We need to give a couple of jiffies slack for the scheduler timeouts
+		 * and then a little more slack fr the overhead in submitting and
+		 * measuring. 50ms should cover all of our sins and be useful
+		 * tolerance.
+		 */
+		igt_assert_f(elapsed / 1000 / 1000 < delays[i] + 50,
+			     "Timeslice exceeded request!\n");
+	}
+
+	gem_quiescent_gpu(i915);
+	set_timeslice(engine, saved);
+}
+
+static uint64_t __test_timeout(int i915, int engine, unsigned int timeout)
+{
+	unsigned int class, inst;
+	struct timespec ts = {};
+	igt_spin_t *spin[2];
+	uint64_t elapsed;
+	uint32_t ctx[2];
+
+	igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1);
+	igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1);
+
+	set_timeslice(engine, timeout);
+
+	ctx[0] = create_context(i915, class, inst, 0);
+	spin[0] = igt_spin_new(i915, ctx[0],
+			       .flags = (IGT_SPIN_NO_PREEMPTION |
+					 IGT_SPIN_POLL_RUN |
+					 IGT_SPIN_FENCE_OUT));
+	igt_spin_busywait_until_started(spin[0]);
+
+	ctx[1] = create_context(i915, class, inst, 0);
+	igt_nsec_elapsed(&ts);
+	spin[1] = igt_spin_new(i915, ctx[1], .flags = IGT_SPIN_POLL_RUN);
+	igt_spin_busywait_until_started(spin[1]);
+	elapsed = igt_nsec_elapsed(&ts);
+
+	igt_spin_free(i915, spin[1]);
+
+	igt_assert_eq(sync_fence_wait(spin[0]->out_fence, 1), 0);
+	igt_assert_eq(sync_fence_status(spin[0]->out_fence), -EIO);
+
+	igt_spin_free(i915, spin[0]);
+
+	gem_context_destroy(i915, ctx[1]);
+	gem_context_destroy(i915, ctx[0]);
+	gem_quiescent_gpu(i915);
+
+	return elapsed;
+}
+
+static void test_timeout(int i915, int engine)
+{
+	int delays[] = { 1, 50, 100, 500 };
+	unsigned int saved;
+
+	/*
+	 * Timeslicing requires us to preempt the running context in order to
+	 * switch into its contemporary. If we couple a unpreemptable hog
+	 * with a fast forced reset, we can measure the timeslice by how long
+	 * it takes for the hog to be reset and the high priority context
+	 * to complete.
+	 */
+
+	igt_require(igt_sysfs_printf(engine, "preempt_timeout_ms", "%u", 1) == 1);
+	igt_assert(igt_sysfs_scanf(engine, "timeslice_duration_ms", "%u", &saved) == 1);
+	igt_debug("Initial timeslice_duration_ms:%u\n", saved);
+
+	gem_quiescent_gpu(i915);
+	igt_require(enable_hangcheck(i915, false));
+
+	for (int i = 0; i < ARRAY_SIZE(delays); i++) {
+		uint64_t elapsed;
+
+		elapsed = __test_timeout(i915, engine, delays[i]);
+		igt_info("timeslice_duration_ms:%d, elapsed=%.3fms\n",
+			 delays[i], elapsed * 1e-6);
+
+		/*
+		 * We need to give a couple of jiffies slack for the scheduler timeouts
+		 * and then a little more slack fr the overhead in submitting and
+		 * measuring. 50ms should cover all of our sins and be useful
+		 * tolerance.
+		 */
+		igt_assert_f(elapsed / 1000 / 1000 < delays[i] + 50,
+			     "Timeslice exceeded request!\n");
+	}
+
+	igt_assert(enable_hangcheck(i915, true));
+	gem_quiescent_gpu(i915);
+	set_timeslice(engine, saved);
+}
+
+static void test_off(int i915, int engine)
+{
+	unsigned int class, inst;
+	unsigned int saved;
+	igt_spin_t *spin[2];
+	uint32_t ctx[2];
+
+	/*
+	 * As always, there are some who must run uninterrupted and simply do
+	 * not want to share the GPU even for a microsecond. Those greedy
+	 * clients can disable timeslicing entirely, and so set the timeslice
+	 * to 0. We test that a hog is not preempted within the 150s of
+	 * our boredom threshold.
+	 */
+
+	igt_require(igt_sysfs_printf(engine, "preempt_timeout_ms", "%u", 1) == 1);
+	igt_assert(igt_sysfs_scanf(engine, "timeslice_duration_ms", "%u", &saved) == 1);
+	igt_debug("Initial timeslice_duration_ms:%u\n", saved);
+
+	gem_quiescent_gpu(i915);
+	igt_require(enable_hangcheck(i915, false));
+
+	igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1);
+	igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1);
+
+	set_timeslice(engine, 0);
+
+	ctx[0] = create_context(i915, class, inst, 0);
+	spin[0] = igt_spin_new(i915, ctx[0],
+			       .flags = (IGT_SPIN_NO_PREEMPTION |
+					 IGT_SPIN_POLL_RUN |
+					 IGT_SPIN_FENCE_OUT));
+	igt_spin_busywait_until_started(spin[0]);
+
+	ctx[1] = create_context(i915, class, inst, 0);
+	spin[1] = igt_spin_new(i915, ctx[1], .flags = IGT_SPIN_POLL_RUN);
+
+	for (int i = 0; i < 150; i++) {
+		igt_assert_eq(sync_fence_status(spin[0]->out_fence), 0);
+		sleep(1);
+	}
+
+	set_timeslice(engine, 1);
+
+	igt_spin_busywait_until_started(spin[1]);
+	igt_spin_free(i915, spin[1]);
+
+	igt_assert_eq(sync_fence_wait(spin[0]->out_fence, 1), 0);
+	igt_assert_eq(sync_fence_status(spin[0]->out_fence), -EIO);
+
+	igt_spin_free(i915, spin[0]);
+
+	gem_context_destroy(i915, ctx[1]);
+	gem_context_destroy(i915, ctx[0]);
+
+	igt_assert(enable_hangcheck(i915, true));
+	gem_quiescent_gpu(i915);
+
+	set_timeslice(engine, saved);
+}
+
+igt_main
+{
+	const struct intel_execution_engine2 *it;
+	int i915 = -1, engines = -1;
+
+	igt_fixture {
+		int sys;
+
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+
+		sys = igt_sysfs_open(i915);
+		igt_require(sys != -1);
+
+		engines = openat(sys, "engine", O_RDONLY);
+		igt_require(engines != -1);
+
+		close(sys);
+	}
+
+	__for_each_static_engine(it) {
+		igt_subtest_group {
+			int engine = -1;
+			char *name = NULL;
+
+			igt_fixture {
+				struct stat st;
+
+				engine = openat(engines, it->name, O_RDONLY);
+				igt_require(fstatat(engine,
+							"timeslice_duration_ms",
+							&st, 0) == 0);
+
+				name = igt_sysfs_get(engine, "name");
+			}
+			if (!name)
+				continue;
+
+			igt_subtest_f("%s-idempotent", name)
+				test_idempotent(i915, engine);
+			igt_subtest_f("%s-invalid", name)
+				test_invalid(i915, engine);
+			igt_subtest_f("%s-duration", name)
+				test_duration(i915, engine);
+			igt_subtest_f("%s-timeout", name)
+				test_timeout(i915, engine);
+			igt_subtest_f("%s-off", name)
+				test_off(i915, engine);
+
+			free(name);
+			close(engine);
+		}
+	}
+
+	igt_fixture {
+		close(engines);
+		close(i915);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 9eeab530d..f151c0a5e 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -240,6 +240,7 @@ i915_progs = [
 	'i915_suspend',
 	'sysfs_heartbeat_interval',
 	'sysfs_preempt_timeout',
+	'sysfs_timeslice_duration',
 ]
 
 test_deps = [ igt_deps ]
-- 
2.24.0.rc0