[PATCH 13/17] drm/i915/execlists: Force preemption via reset on timeout

Chris Wilson chris at chris-wilson.co.uk
Wed Mar 28 19:34:31 UTC 2018


Install a timer when trying to preempt on behalf of an important
context such that if the active context does not honour the preemption
request within the desired timeout, then we reset the GPU to allow the
important context to run.

v2: Install the timer on scheduling the preempt request; long before we
even try to inject preemption into the ELSP, as the tasklet/injection
may itself be blocked.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c           | 85 +++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  8 ++-
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 65 +++++++++++++++++++++++
 3 files changed, 149 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 786b36576236..cba51cceb40d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -536,6 +536,43 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
+{
+	struct intel_engine_execlists *execlists =
+		container_of(hrtimer, typeof(*execlists), preempt_timer);
+
+	GEM_TRACE("%s\n",
+		  container_of(execlists,
+			       struct intel_engine_cs,
+			       execlists)->name);
+
+	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT))
+		return HRTIMER_NORESTART;
+
+	queue_work(system_highpri_wq, &execlists->preempt_reset);
+	return HRTIMER_NORESTART;
+}
+
+static void preempt_reset(struct work_struct *work)
+{
+	struct intel_engine_execlists *execlists =
+		container_of(work, typeof(*execlists), preempt_reset);
+	struct intel_engine_cs *engine =
+		  container_of(execlists, struct intel_engine_cs, execlists);
+
+	GEM_TRACE("%s\n", engine->name);
+
+	tasklet_disable(&execlists->tasklet);
+
+	execlists->tasklet.func(execlists->tasklet.data);
+
+	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT))
+		i915_handle_error(engine->i915, BIT(engine->id), 0,
+				  "preemption time out on %s", engine->name);
+
+	tasklet_enable(&execlists->tasklet);
+}
+
 static void complete_preempt_context(struct intel_engine_execlists *execlists)
 {
 	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
@@ -544,6 +581,12 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 	execlists_unwind_incomplete_requests(execlists);
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+
+	/* If the timer already fired, complete the reset */
+	if (hrtimer_try_to_cancel(&execlists->preempt_timer) < 0)
+		return;
+
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
 }
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
@@ -709,6 +752,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
 done:
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
 	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
 	execlists->first = rb;
 	if (submit)
@@ -1078,16 +1122,38 @@ static void queue_request(struct intel_engine_cs *engine,
 	list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
 }
 
-static void __submit_queue(struct intel_engine_cs *engine, int prio)
+static void __submit_queue(struct intel_engine_cs *engine,
+			   int prio, unsigned int timeout)
 {
-	engine->execlists.queue_priority = prio;
-	tasklet_hi_schedule(&engine->execlists.tasklet);
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+
+	GEM_TRACE("%s prio=%d (previous=%d)\n",
+		  engine->name, prio, execlists->queue_priority);
+
+	if (unlikely(execlists_is_active(execlists,
+					 EXECLISTS_ACTIVE_PREEMPT_TIMEOUT)))
+		hrtimer_cancel(&execlists->preempt_timer);
+
+	execlists->queue_priority = prio;
+	tasklet_hi_schedule(&execlists->tasklet);
+
+	/* Set a timer to force preemption vs hostile userspace */
+	if (timeout && prio > 0) {
+		GEM_TRACE("%s preempt timeout=%uns\n", engine->name, timeout);
+
+		execlists_set_active(execlists,
+				     EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
+		hrtimer_start(&execlists->preempt_timer,
+			      ns_to_ktime(timeout),
+			      HRTIMER_MODE_REL);
+	}
 }
 
-static void submit_queue(struct intel_engine_cs *engine, int prio)
+static void submit_queue(struct intel_engine_cs *engine,
+			 int prio, unsigned int timeout)
 {
 	if (prio > engine->execlists.queue_priority)
-		__submit_queue(engine, prio);
+		__submit_queue(engine, prio, timeout);
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1099,7 +1165,7 @@ static void execlists_submit_request(struct i915_request *request)
 	spin_lock_irqsave(&engine->timeline->lock, flags);
 
 	queue_request(engine, &request->priotree, rq_prio(request));
-	submit_queue(engine, rq_prio(request));
+	submit_queue(engine, rq_prio(request), 0);
 
 	GEM_BUG_ON(!engine->execlists.first);
 	GEM_BUG_ON(list_empty(&request->priotree.link));
@@ -1223,7 +1289,7 @@ static void execlists_schedule(struct i915_request *request, int prio)
 
 		if (prio > engine->execlists.queue_priority &&
 		    i915_sw_fence_done(&pt_to_request(pt)->submit))
-			__submit_queue(engine, prio);
+			__submit_queue(engine, prio, 0);
 	}
 
 	spin_unlock_irq(&engine->timeline->lock);
@@ -2309,6 +2375,11 @@ logical_ring_setup(struct intel_engine_cs *engine)
 	tasklet_init(&engine->execlists.tasklet,
 		     execlists_submission_tasklet, (unsigned long)engine);
 
+	INIT_WORK(&engine->execlists.preempt_reset, preempt_reset);
+	hrtimer_init(&engine->execlists.preempt_timer,
+		     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	engine->execlists.preempt_timer.function = preempt_timeout;
+
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4c71dcdc722b..d6a5678dcf3c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -266,8 +266,9 @@ struct intel_engine_execlists {
 	 */
 	unsigned int active;
 #define EXECLISTS_ACTIVE_USER 0
-#define EXECLISTS_ACTIVE_PREEMPT 1
-#define EXECLISTS_ACTIVE_HWACK 2
+#define EXECLISTS_ACTIVE_HWACK 1
+#define EXECLISTS_ACTIVE_PREEMPT 2
+#define EXECLISTS_ACTIVE_PREEMPT_TIMEOUT 2
 
 	/**
 	 * @port_mask: number of execlist ports - 1
@@ -313,6 +314,9 @@ struct intel_engine_execlists {
 	 * @preempt_complete_status: expected CSB upon completing preemption
 	 */
 	u32 preempt_complete_status;
+
+	struct hrtimer preempt_timer;
+	struct work_struct preempt_reset;
 };
 
 #define INTEL_ENGINE_CS_MAX_NAME 8
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 0e52107bbe10..af16b0a40653 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -482,12 +482,77 @@ static int live_late_preempt(void *arg)
 	goto err_ctx_lo;
 }
 
+static int live_preempt_timeout(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	enum intel_engine_id id;
+	struct spinner spin;
+	int err = -ENOMEM;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	if (spinner_init(&spin, i915))
+		goto err_unlock;
+
+	ctx= kernel_context(i915);
+	if (!ctx)
+		goto err_spin;
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq;
+
+		rq = spinner_create_request(&spin, ctx, engine, MI_NOOP);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_ctx;
+		}
+
+		i915_request_add(rq);
+		if (!wait_for_spinner(&spin, rq)) {
+			i915_gem_set_wedged(i915);
+			err = -EIO;
+			goto err_ctx;
+		}
+
+		execlists_set_active(&engine->execlists,
+				     EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
+		preempt_reset(&engine->execlists.preempt_reset);
+
+		/*
+		 * In such a simple case as above; triggering a reset allows
+		 * us to save and restore the hog perfectly...
+		 */
+		spinner_end(&spin);
+
+		if (flush_test(i915, I915_WAIT_LOCKED)) {
+			err = -EIO;
+			goto err_ctx;
+		}
+	}
+
+	err = 0;
+err_ctx:
+	kernel_context_close(ctx);
+err_spin:
+	spinner_fini(&spin);
+err_unlock:
+	flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_sanitycheck),
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
+		SUBTEST(live_preempt_timeout),
 	};
 	return i915_subtests(tests, i915);
 }
-- 
2.16.3



More information about the Intel-gfx-trybot mailing list