[PATCH 3/4] drm/i915: Introduce a selftest for struct i915_active error propagation
Thomas Hellström
thomas.hellstrom at linux.intel.com
Sat Jun 19 18:00:31 UTC 2021
From: Chris Wilson <chris.p.wilson at intel.com>
Now that we propagate errors along the struct i915_active::excl fence,
introduce a selftest, verifying that it works as intended.
v2:
- Don't return from error path in single_error(), just set the error.
- Wait for work completion in double_chain_error()
- Spin wait for the dma_fence callback to modify the fence pointer.
Co-developed-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Signed-off-by: Chris Wilson <chris.p.wilson at intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
---
drivers/gpu/drm/i915/selftests/i915_active.c | 272 +++++++++++++++++++
1 file changed, 272 insertions(+)
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 61bf4560d8af..1c2cb0454e60 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -10,6 +10,7 @@
#include "gt/intel_gt.h"
#include "i915_selftest.h"
+#include "i915_sw_fence_work.h"
#include "igt_flush_test.h"
#include "lib_sw_fence.h"
@@ -246,12 +247,283 @@ static int live_active_barrier(void *arg)
return err;
}
+/* Wait until the dma_fence callback modifies the excl i915_active_fence */
+static void active_excl_spin_wait(struct i915_active *ref)
+{
+ while(!IS_ERR_OR_NULL(rcu_access_pointer(ref->excl.fence))) {
+ if (!cond_resched())
+ cpu_relax();
+ }
+}
+
+static int single_error(struct i915_active *ref,
+ struct intel_engine_cs *engine,
+ int expect, int next)
+{
+ struct i915_request *rq;
+ struct dma_fence *prev;
+ int err = 0;
+
+ /* Straight-forward check that we preserve an immediate error */
+
+ rq = i915_request_create(engine->kernel_context);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ prev = i915_active_set_exclusive(ref, &rq->fence);
+ if (PTR_ERR_OR_ZERO(prev) != expect) {
+ pr_err("Previous error fence was not recorded!\n");
+ err = -EINVAL;
+ }
+
+ i915_request_set_error_once(rq, next);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+ active_excl_spin_wait(ref);
+
+ return err;
+}
+
+static int chain_work(struct dma_fence_work *base)
+{
+ return 0;
+}
+
+static const struct dma_fence_work_ops chain_ops = {
+ .name = "chain",
+ .work = chain_work,
+};
+
+static struct dma_fence_work *chain_create(void)
+{
+ struct dma_fence_work *chain;
+
+ chain = kmalloc(sizeof(*chain), GFP_KERNEL);
+ if (!chain)
+ return NULL;
+
+ dma_fence_work_init(chain, &chain_ops);
+ return chain;
+}
+
+static int chain_request_error(struct i915_active *ref,
+ struct intel_engine_cs *engine,
+ int expect, int next)
+{
+ struct dma_fence_work *chain;
+ struct i915_request *rq;
+ struct dma_fence *prev;
+ int err = 0;
+
+ /*
+ * Check that an error is propagated from external work along
+ * the exclusive timeline and recorded back on the active.
+ */
+
+ chain = chain_create();
+ if (!chain)
+ return -ENOMEM;
+
+ prev = i915_active_set_exclusive(ref, &chain->dma);
+ if (PTR_ERR_OR_ZERO(prev) != expect) {
+ pr_err("Previous error fence was not propagated!\n");
+ return -EINVAL;
+ }
+
+ i915_sw_fence_set_error_once(&chain->chain, next);
+
+ rq = i915_request_create(engine->kernel_context);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ prev = i915_active_set_exclusive(ref, &rq->fence);
+ if (prev != &chain->dma) {
+ pr_err("Exclusive fence not chained!\n");
+ dma_fence_work_commit_imm(chain);
+ dma_fence_put(prev);
+ i915_request_add(rq);
+ return -EINVAL;
+ }
+
+ err = i915_request_await_dma_fence(rq, prev);
+ dma_fence_put(prev);
+ dma_fence_work_commit_imm(chain);
+ if (err) {
+ i915_request_add(rq);
+ return err;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+ active_excl_spin_wait(ref);
+
+ return err;
+}
+
+static int double_chain_error(struct i915_active *ref,
+ struct intel_engine_cs *engine,
+ int expect, int next)
+{
+ struct dma_fence_work *chain[2];
+ struct dma_fence *prev;
+ int err;
+
+ /*
+ * Check that an error is propagated along multiple external work
+ * items and recorded back on the active.
+ */
+
+ chain[0] = chain_create();
+ if (!chain[0])
+ return -ENOMEM;
+
+ prev = i915_active_set_exclusive(ref, &chain[0]->dma);
+ if (PTR_ERR_OR_ZERO(prev) != expect) {
+ pr_err("Previous error fence was not propagated!\n");
+ err = -EINVAL;
+ goto out_chain0;
+ }
+
+ i915_sw_fence_set_error_once(&chain[0]->chain, next);
+
+ chain[1] = chain_create();
+ if (!chain[1]) {
+ err = -ENOMEM;
+ goto out_chain0;
+ }
+
+ prev = i915_active_set_exclusive(ref, &chain[1]->dma);
+ if (prev != &chain[0]->dma) {
+ pr_err("Exclusive fence not chained!\n");
+ err = -EINVAL;
+ goto out_prev;
+ }
+
+ err = i915_sw_fence_await_dma_fence(&chain[1]->chain,
+ prev, 0, GFP_KERNEL);
+out_prev:
+ if (!IS_ERR_OR_NULL(prev))
+ dma_fence_put(prev);
+ prev = dma_fence_get(&chain[1]->dma);
+ dma_fence_work_commit_imm(chain[1]);
+out_chain0:
+ dma_fence_work_commit_imm(chain[0]);
+ if (!IS_ERR_OR_NULL(prev)) {
+ if (!dma_fence_wait_timeout(prev, false, HZ / 5))
+ err = -ETIME;
+ dma_fence_put(prev);
+ active_excl_spin_wait(ref);
+ }
+
+ return err < 0 ? err : 0;
+}
+
+static int live_active_error(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+ struct live_active *live;
+ const int errors[] = {
+ -EFAULT,
+ -EIO,
+ -EINTR
+ };
+ int expect;
+ int err;
+
+ /*
+ * We track the last fence error along active->excl. Let's
+ * check that we can indeed retrieve an old error and that
+ * error does not cause problems...
+ */
+
+ live = __live_alloc(i915);
+ if (!live)
+ return -ENOMEM;
+
+ i915_active_acquire(&live->base);
+
+ pr_info("Testing simple error capture from i915_request\n");
+ expect = 0;
+ for_each_uabi_engine(engine, i915) {
+ int next = errors[engine->id % ARRAY_SIZE(errors)];
+
+ err = single_error(&live->base, engine, expect, next);
+ if (err)
+ goto out;
+
+ expect = next;
+ }
+ if (rcu_access_pointer(live->base.excl.fence) != ERR_PTR(expect)) {
+ pr_err("Exclusive error not preserved!\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ pr_info("Testing error propagation along work/request chain\n");
+ for_each_uabi_engine(engine, i915) {
+ int next = errors[engine->id % ARRAY_SIZE(errors)];
+
+ err = chain_request_error(&live->base, engine, expect, next);
+ if (err)
+ goto out;
+
+ expect = next;
+ }
+ if (rcu_access_pointer(live->base.excl.fence) != ERR_PTR(expect)) {
+ pr_err("Exclusive error not preserved!\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ pr_info("Testing error propagation along work/work chain\n");
+ for_each_uabi_engine(engine, i915) {
+ int next = errors[engine->id % ARRAY_SIZE(errors)];
+
+ err = double_chain_error(&live->base, engine, expect, next);
+ if (err)
+ goto out;
+
+ expect = next;
+ }
+ if (rcu_access_pointer(live->base.excl.fence) != ERR_PTR(expect)) {
+ pr_err("Exclusive error not preserved!\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ i915_active_release(&live->base);
+
+ if (igt_flush_test(i915))
+ err = -EIO;
+
+ if (err == 0 &&
+ rcu_access_pointer(live->base.excl.fence) != ERR_PTR(expect)) {
+ pr_err("Exclusive error not retained on idling!\n");
+ err = -EINVAL;
+ }
+
+ __live_put(live);
+
+ return err;
+}
+
int i915_active_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_active_wait),
SUBTEST(live_active_retire),
SUBTEST(live_active_barrier),
+ SUBTEST(live_active_error),
};
if (intel_gt_is_wedged(&i915->gt))
--
2.31.1
More information about the Intel-gfx-trybot
mailing list