[PATCH 3/4] drm/i915: Introduce a selftest for struct i915_active error propagation

Sat Jun 19 18:00:31 UTC 2021

From: Chris Wilson <chris.p.wilson at intel.com>

Now that we propagate errors along the struct i915_active::excl fence,
introduce a selftest, verifying that it works as intended.

v2:
- Don't return from error path in single_error(), just set the error.
- Wait for work completion in double_chain_error()
- Spin wait for the dma_fence callback to modify the fence pointer.

Co-developed-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Signed-off-by: Chris Wilson <chris.p.wilson at intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
---
 drivers/gpu/drm/i915/selftests/i915_active.c | 272 +++++++++++++++++++
 1 file changed, 272 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 61bf4560d8af..1c2cb0454e60 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -10,6 +10,7 @@
 #include "gt/intel_gt.h"
 
 #include "i915_selftest.h"
+#include "i915_sw_fence_work.h"
 
 #include "igt_flush_test.h"
 #include "lib_sw_fence.h"
@@ -246,12 +247,283 @@ static int live_active_barrier(void *arg)
 	return err;
 }
 
+/* Wait until the dma_fence callback modifies the excl i915_active_fence */
+static void active_excl_spin_wait(struct i915_active *ref)
+{
+	while(!IS_ERR_OR_NULL(rcu_access_pointer(ref->excl.fence))) {
+		if (!cond_resched())
+			cpu_relax();
+	}
+}
+
+static int single_error(struct i915_active *ref,
+			struct intel_engine_cs *engine,
+			int expect, int next)
+{
+	struct i915_request *rq;
+	struct dma_fence *prev;
+	int err = 0;
+
+	/* Straight-forward check that we preserve an immediate error */
+
+	rq = i915_request_create(engine->kernel_context);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	prev = i915_active_set_exclusive(ref, &rq->fence);
+	if (PTR_ERR_OR_ZERO(prev) != expect) {
+		pr_err("Previous error fence was not recorded!\n");
+		err = -EINVAL;
+	}
+
+	i915_request_set_error_once(rq, next);
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	if (i915_request_wait(rq, 0, HZ / 5) < 0)
+		err = -ETIME;
+	i915_request_put(rq);
+	active_excl_spin_wait(ref);
+
+	return err;
+}
+
+static int chain_work(struct dma_fence_work *base)
+{
+	return 0;
+}
+
+static const struct dma_fence_work_ops chain_ops = {
+	.name = "chain",
+	.work = chain_work,
+};
+
+static struct dma_fence_work *chain_create(void)
+{
+	struct dma_fence_work *chain;
+
+	chain = kmalloc(sizeof(*chain), GFP_KERNEL);
+	if (!chain)
+		return NULL;
+
+	dma_fence_work_init(chain, &chain_ops);
+	return chain;
+}
+
+static int chain_request_error(struct i915_active *ref,
+			       struct intel_engine_cs *engine,
+			       int expect, int next)
+{
+	struct dma_fence_work *chain;
+	struct i915_request *rq;
+	struct dma_fence *prev;
+	int err = 0;
+
+	/*
+	 * Check that an error is propagated from external work along
+	 * the exclusive timeline and recorded back on the active.
+	 */
+
+	chain = chain_create();
+	if (!chain)
+		return -ENOMEM;
+
+	prev = i915_active_set_exclusive(ref, &chain->dma);
+	if (PTR_ERR_OR_ZERO(prev) != expect) {
+		pr_err("Previous error fence was not propagated!\n");
+		return -EINVAL;
+	}
+
+	i915_sw_fence_set_error_once(&chain->chain, next);
+
+	rq = i915_request_create(engine->kernel_context);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	prev = i915_active_set_exclusive(ref, &rq->fence);
+	if (prev != &chain->dma) {
+		pr_err("Exclusive fence not chained!\n");
+		dma_fence_work_commit_imm(chain);
+		dma_fence_put(prev);
+		i915_request_add(rq);
+		return -EINVAL;
+	}
+
+	err = i915_request_await_dma_fence(rq, prev);
+	dma_fence_put(prev);
+	dma_fence_work_commit_imm(chain);
+	if (err) {
+		i915_request_add(rq);
+		return err;
+	}
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 5) < 0)
+		err = -ETIME;
+	i915_request_put(rq);
+	active_excl_spin_wait(ref);
+
+	return err;
+}
+
+static int double_chain_error(struct i915_active *ref,
+			      struct intel_engine_cs *engine,
+			      int expect, int next)
+{
+	struct dma_fence_work *chain[2];
+	struct dma_fence *prev;
+	int err;
+
+	/*
+	 * Check that an error is propagated along multiple external work
+	 * items and recorded back on the active.
+	 */
+
+	chain[0] = chain_create();
+	if (!chain[0])
+		return -ENOMEM;
+
+	prev = i915_active_set_exclusive(ref, &chain[0]->dma);
+	if (PTR_ERR_OR_ZERO(prev) != expect) {
+		pr_err("Previous error fence was not propagated!\n");
+		err = -EINVAL;
+		goto out_chain0;
+	}
+
+	i915_sw_fence_set_error_once(&chain[0]->chain, next);
+
+	chain[1] = chain_create();
+	if (!chain[1]) {
+		err = -ENOMEM;
+		goto out_chain0;
+	}
+
+	prev = i915_active_set_exclusive(ref, &chain[1]->dma);
+	if (prev != &chain[0]->dma) {
+		pr_err("Exclusive fence not chained!\n");
+		err = -EINVAL;
+		goto out_prev;
+	}
+
+	err = i915_sw_fence_await_dma_fence(&chain[1]->chain,
+					    prev, 0, GFP_KERNEL);
+out_prev:
+	if (!IS_ERR_OR_NULL(prev))
+		dma_fence_put(prev);
+	prev = dma_fence_get(&chain[1]->dma);
+	dma_fence_work_commit_imm(chain[1]);
+out_chain0:
+	dma_fence_work_commit_imm(chain[0]);
+	if (!IS_ERR_OR_NULL(prev)) {
+		if (!dma_fence_wait_timeout(prev, false, HZ / 5))
+			err = -ETIME;
+		dma_fence_put(prev);
+		active_excl_spin_wait(ref);
+	}
+
+	return err < 0 ? err : 0;
+}
+
+static int live_active_error(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct live_active *live;
+	const int errors[] = {
+		-EFAULT,
+		-EIO,
+		-EINTR
+	};
+	int expect;
+	int err;
+
+	/*
+	 * We track the last fence error along active->excl. Let's
+	 * check that we can indeed retrieve an old error and that
+	 * error does not cause problems...
+	 */
+
+	live = __live_alloc(i915);
+	if (!live)
+		return -ENOMEM;
+
+	i915_active_acquire(&live->base);
+
+	pr_info("Testing simple error capture from i915_request\n");
+	expect = 0;
+	for_each_uabi_engine(engine, i915) {
+		int next = errors[engine->id % ARRAY_SIZE(errors)];
+
+		err = single_error(&live->base, engine, expect, next);
+		if (err)
+			goto out;
+
+		expect = next;
+	}
+	if (rcu_access_pointer(live->base.excl.fence) != ERR_PTR(expect)) {
+		pr_err("Exclusive error not preserved!\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	pr_info("Testing error propagation along work/request chain\n");
+	for_each_uabi_engine(engine, i915) {
+		int next = errors[engine->id % ARRAY_SIZE(errors)];
+
+		err = chain_request_error(&live->base, engine, expect, next);
+		if (err)
+			goto out;
+
+		expect = next;
+	}
+	if (rcu_access_pointer(live->base.excl.fence) != ERR_PTR(expect)) {
+		pr_err("Exclusive error not preserved!\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	pr_info("Testing error propagation along work/work chain\n");
+	for_each_uabi_engine(engine, i915) {
+		int next = errors[engine->id % ARRAY_SIZE(errors)];
+
+		err = double_chain_error(&live->base, engine, expect, next);
+		if (err)
+			goto out;
+
+		expect = next;
+	}
+	if (rcu_access_pointer(live->base.excl.fence) != ERR_PTR(expect)) {
+		pr_err("Exclusive error not preserved!\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	i915_active_release(&live->base);
+
+	if (igt_flush_test(i915))
+		err = -EIO;
+
+	if (err == 0 &&
+	    rcu_access_pointer(live->base.excl.fence) != ERR_PTR(expect)) {
+		pr_err("Exclusive error not retained on idling!\n");
+		err = -EINVAL;
+	}
+
+	__live_put(live);
+
+	return err;
+}
+
 int i915_active_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_active_wait),
 		SUBTEST(live_active_retire),
 		SUBTEST(live_active_barrier),
+		SUBTEST(live_active_error),
 	};
 
 	if (intel_gt_is_wedged(&i915->gt))
-- 
2.31.1