[Intel-gfx] [CI] drm/i915/selftests: Spin on all engines simultaneously

Thu Oct 31 23:29:16 UTC 2019

Vanshidhar Konda asked for the simplest test "to verify that the kernel
can submit and hardware can execute batch buffers on all the command
streamers in parallel." We have a number of tests in userspace that
submit load to each engine and verify that it is present, but strictly
we have no selftest to prove that the kernel can _simultaneously_
execute on all known engines. (We have tests to demonstrate that we can
submit to HW in parallel, but we don't insist that they execute in
parallel.)

v2: Improve the igt_spinner support for older gen.

Suggested-by: Vanshidhar Konda <vanshidhar.r.konda at intel.com>
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Vanshidhar Konda <vanshidhar.r.konda at intel.com>
Cc: Matthew Auld <matthew.auld at intel.com>
Reviewed-by: Vanshidhar Konda <vanshidhar.r.konda at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h               |  6 ++
 drivers/gpu/drm/i915/selftests/i915_request.c | 76 +++++++++++++++++++
 drivers/gpu/drm/i915/selftests/igt_spinner.c  | 36 ++++++---
 3 files changed, 109 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a22d969cb352..0c3ab6020bc6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -891,6 +891,10 @@ struct intel_cdclk_state {
 	u8 voltage_level;
 };
 
+struct i915_selftest_stash {
+	atomic_t counter;
+};
+
 struct drm_i915_private {
 	struct drm_device drm;
 
@@ -1286,6 +1290,8 @@ struct drm_i915_private {
 	/* Mutex to protect the above hdcp component related values. */
 	struct mutex hdcp_comp_mutex;
 
+	I915_SELFTEST_DECLARE(struct i915_selftest_stash selftest;)
+
 	/*
 	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 	 * will be rejected. Instead look for a better place.
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 30ae34f62176..4efc24c3eed7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -32,6 +32,7 @@
 #include "i915_random.h"
 #include "i915_selftest.h"
 #include "igt_live_test.h"
+#include "igt_spinner.h"
 #include "lib_sw_fence.h"
 
 #include "mock_drm.h"
@@ -1115,12 +1116,85 @@ static int __live_parallel_engineN(void *arg)
 	return 0;
 }
 
+static bool wake_all(struct drm_i915_private *i915)
+{
+	if (atomic_dec_and_test(&i915->selftest.counter)) {
+		wake_up_var(&i915->selftest.counter);
+		return true;
+	}
+
+	return false;
+}
+
+static int wait_for_all(struct drm_i915_private *i915)
+{
+	if (wake_all(i915))
+		return 0;
+
+	if (wait_var_event_timeout(&i915->selftest.counter,
+				   !atomic_read(&i915->selftest.counter),
+				   i915_selftest.timeout_jiffies))
+		return 0;
+
+	return -ETIME;
+}
+
+static int __live_parallel_spin(void *arg)
+{
+	struct intel_engine_cs *engine = arg;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	int err = 0;
+
+	/*
+	 * Create a spinner running for eternity on each engine. If a second
+	 * spinner is incorrectly placed on the same engine, it will not be
+	 * able to start in time.
+	 */
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		wake_all(engine->i915);
+		return -ENOMEM;
+	}
+
+	rq = igt_spinner_create_request(&spin,
+					engine->kernel_context,
+					MI_NOOP); /* no preemption */
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		if (err == -ENODEV)
+			err = 0;
+		wake_all(engine->i915);
+		goto out_spin;
+	}
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (igt_wait_for_spinner(&spin, rq)) {
+		/* Occupy this engine for the whole test */
+		err = wait_for_all(engine->i915);
+	} else {
+		pr_err("Failed to start spinner on %s\n", engine->name);
+		err = -EINVAL;
+	}
+	igt_spinner_end(&spin);
+
+	if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0)
+		err = -EIO;
+	i915_request_put(rq);
+
+out_spin:
+	igt_spinner_fini(&spin);
+	return err;
+}
+
 static int live_parallel_engines(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	static int (* const func[])(void *arg) = {
 		__live_parallel_engine1,
 		__live_parallel_engineN,
+		__live_parallel_spin,
 		NULL,
 	};
 	const unsigned int nengines = num_uabi_engines(i915);
@@ -1146,6 +1220,8 @@ static int live_parallel_engines(void *arg)
 		if (err)
 			break;
 
+		atomic_set(&i915->selftest.counter, nengines);
+
 		idx = 0;
 		for_each_uabi_engine(engine, i915) {
 			tsk[idx] = kthread_run(*fn, engine,
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index ee8450b871da..20ca1134c1b3 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -15,8 +15,6 @@ int igt_spinner_init(struct igt_spinner *spin, struct intel_gt *gt)
 	void *vaddr;
 	int err;
 
-	GEM_BUG_ON(INTEL_GEN(gt->i915) < 8);
-
 	memset(spin, 0, sizeof(*spin));
 	spin->gt = gt;
 
@@ -100,6 +98,9 @@ igt_spinner_create_request(struct igt_spinner *spin,
 
 	GEM_BUG_ON(spin->gt != ce->vm->gt);
 
+	if (!intel_engine_can_store_dword(ce->engine))
+		return ERR_PTR(-ENODEV);
+
 	vma = i915_vma_instance(spin->obj, ce->vm, NULL);
 	if (IS_ERR(vma))
 		return ERR_CAST(vma);
@@ -132,16 +133,33 @@ igt_spinner_create_request(struct igt_spinner *spin,
 
 	batch = spin->batch;
 
-	*batch++ = MI_STORE_DWORD_IMM_GEN4;
-	*batch++ = lower_32_bits(hws_address(hws, rq));
-	*batch++ = upper_32_bits(hws_address(hws, rq));
-	*batch++ = rq->fence.seqno;
+	if (INTEL_GEN(rq->i915) >= 8) {
+		*batch++ = MI_STORE_DWORD_IMM_GEN4;
+		*batch++ = lower_32_bits(hws_address(hws, rq));
+		*batch++ = upper_32_bits(hws_address(hws, rq));
+		*batch++ = rq->fence.seqno;
+	} else if (INTEL_GEN(rq->i915) >= 4) {
+		*batch++ = MI_STORE_DWORD_IMM_GEN4;
+		*batch++ = 0;
+		*batch++ = hws_address(hws, rq);
+		*batch++ = rq->fence.seqno;
+	} else {
+		*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+		*batch++ = hws_address(hws, rq);
+		*batch++ = rq->fence.seqno;
+	}
 
 	*batch++ = arbitration_command;
 
-	*batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
-	*batch++ = lower_32_bits(vma->node.start);
-	*batch++ = upper_32_bits(vma->node.start);
+	if (INTEL_GEN(rq->i915) >= 8) {
+		*batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
+		*batch++ = lower_32_bits(vma->node.start);
+		*batch++ = upper_32_bits(vma->node.start);
+	} else {
+		*batch++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
+		*batch++ = vma->node.start;
+	}
+
 	*batch++ = MI_BATCH_BUFFER_END; /* not reached */
 
 	intel_gt_chipset_flush(engine->gt);
-- 
2.24.0.rc2