[PATCH 64/86] drm/i915/execlists: Try preempt-reset from hardirq timer context

Chris Wilson chris at chris-wilson.co.uk
Fri Sep 28 15:22:39 UTC 2018


When circumstances allow, trying resetting the engine directly from the
preemption timeout handler. As this is softirq context, we have to be
careful both not to sleep and not to spin on anything we may be
interrupting (e.g. the submission tasklet).

v2: Ignore trying to fast reset the guc, there's still a bunch of
potential sleeps inside its reset-preparation phase.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Cc: MichaƂ Winiarski <michal.winiarski at intel.com>
CC: Michel Thierry <michel.thierry at intel.com>
Cc: Jeff McGee <jeff.mcgee at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c           |  35 +++++-
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 123 +++++++++++++++++++++
 2 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index aaf246004cc6..dc11eeece5e0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -562,6 +562,38 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static int try_preempt_reset(struct intel_engine_cs *engine)
+{
+	struct tasklet_struct * const t = &engine->execlists.tasklet;
+	int err = -EBUSY;
+
+	if (tasklet_trylock(t)) {
+		const unsigned int bit = I915_RESET_ENGINE + engine->id;
+		unsigned long *lock = &engine->i915->gpu_error.flags;
+
+		t->func(t->data);
+		if (!execlists_is_active(&engine->execlists,
+					 EXECLISTS_ACTIVE_PREEMPT_TIMEOUT)) {
+			/* Nothing to do; the tasklet was just delayed. */
+			err = 0;
+		} else if (!test_and_set_bit(bit, lock)) {
+			tasklet_disable_nosync(t);
+			if (!USES_GUC_SUBMISSION(engine)) {
+				err = i915_reset_engine(engine,
+							"preemption time out");
+			}
+			tasklet_enable(t);
+
+			clear_bit(bit, lock);
+			wake_up_bit(lock, bit);
+		}
+
+		tasklet_unlock(t);
+	}
+
+	return err;
+}
+
 static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
 {
 	struct intel_engine_execlists *execlists =
@@ -581,7 +613,8 @@ static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
 		intel_engine_dump(engine, &p, "%s\n", engine->name);
 	}
 
-	queue_work(system_highpri_wq, &execlists->preempt_reset);
+	if (try_preempt_reset(engine))
+		queue_work(system_highpri_wq, &execlists->preempt_reset);
 
 	return HRTIMER_NORESTART;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 692fcbfb9b58..11490a8f9590 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -649,6 +649,128 @@ static int live_preempt_timeout(void *arg)
 	return err;
 }
 
+static void __preempt_begin(void)
+{
+	preempt_disable();
+}
+
+static void __preempt_end(void)
+{
+	preempt_enable();
+}
+
+static void __softirq_begin(void)
+{
+	local_bh_disable();
+}
+
+static void __softirq_end(void)
+{
+	local_bh_enable();
+}
+
+static void __hardirq_begin(void)
+{
+	local_irq_disable();
+}
+
+static void __hardirq_end(void)
+{
+	local_irq_enable();
+}
+
+static int live_preempt_reset(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	enum intel_engine_id id;
+	struct spinner spin;
+	int err = -ENOMEM;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	if (spinner_init(&spin, i915))
+		goto err_unlock;
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		goto err_spin;
+
+	for_each_engine(engine, i915, id) {
+		static const struct {
+			const char *name;
+			void (*critical_section_begin)(void);
+			void (*critical_section_end)(void);
+		} phases[] = {
+			{ "preempt", __preempt_begin, __preempt_end },
+			{ "softirq", __softirq_begin, __softirq_end },
+			{ "hardirq", __hardirq_begin, __hardirq_end },
+			{ }
+		};
+		struct tasklet_struct *t = &engine->execlists.tasklet;
+		const typeof(*phases) *p;
+
+		for (p = phases; p->name; p++) {
+			struct i915_request *rq;
+
+			rq = spinner_create_request(&spin, ctx, engine,
+						    MI_NOOP);
+			if (IS_ERR(rq)) {
+				err = PTR_ERR(rq);
+				goto err_ctx;
+			}
+
+			i915_request_add(rq);
+
+			if (!wait_for_spinner(&spin, rq)) {
+				i915_gem_set_wedged(i915);
+				err = -EIO;
+				goto err_ctx;
+			}
+
+			/* Flush to give try_preempt_reset a chance */
+			tasklet_schedule(t);
+			tasklet_kill(t);
+			GEM_BUG_ON(i915_request_completed(rq));
+
+			GEM_TRACE("%s triggering %s reset\n",
+				  engine->name, p->name);
+			p->critical_section_begin();
+
+			mark_preemption_hang(&engine->execlists);
+			err = try_preempt_reset(engine);
+
+			p->critical_section_end();
+			if (err) {
+				pr_err("Preempt softirq reset failed on %s, tasklet state %lx\n",
+				       engine->name, t->state);
+				spinner_end(&spin);
+				i915_gem_set_wedged(i915);
+				goto err_ctx;
+			}
+
+			if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+				err = -EIO;
+				goto err_ctx;
+			}
+		}
+	}
+
+	err = 0;
+err_ctx:
+	kernel_context_close(ctx);
+err_spin:
+	spinner_fini(&spin);
+err_unlock:
+	igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 static int random_range(struct rnd_state *rnd, int min, int max)
 {
 	return i915_prandom_u32_max_state(max - min, rnd) + min;
@@ -1288,6 +1410,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_late_preempt),
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_timeout),
+		SUBTEST(live_preempt_reset),
 		SUBTEST(live_preempt_smoke),
 		SUBTEST(live_virtual_engine),
 		SUBTEST(live_virtual_bond),
-- 
2.19.0



More information about the Intel-gfx-trybot mailing list