[Intel-gfx] [PATCH 08/11] drm/i915/execlists: Force preemption via reset on timeout

Tue Mar 27 15:40:36 UTC 2018

On Tue, Mar 27, 2018 at 09:51:20AM +0100, Tvrtko Ursulin wrote:
> 
> On 26/03/2018 12:50, Chris Wilson wrote:
> >Install a timer when trying to preempt on behalf of an important
> >context such that if the active context does not honour the preemption
> >request within the desired timeout, then we reset the GPU to allow the
> >important context to run.
> 
> I suggest renaming patch title to "Implement optional preemption
> delay timeout", or "upper bound", or something, as long as it is not
> "force preemption". :)
> 
> >(Open: should not the timer be started from receiving the high priority
> >request...)
> 
> If you think receiving as in execbuf I think not - that would be
> something else and not preempt timeout.
> 
> >Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> >---
> >  drivers/gpu/drm/i915/intel_lrc.c        | 53 +++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |  8 +++++
> >  2 files changed, 61 insertions(+)
> >
> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index 50688fc889d9..6da816d23cb3 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -533,6 +533,47 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> >  	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
> >  	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> >+
> >+	/* Set a timer to force preemption vs hostile userspace */
> >+	if (execlists->queue_preempt_timeout) {
> >+		GEM_TRACE("%s timeout=%uns\n",
> 
> preempt-timeout ?
> 
> >+			  engine->name, execlists->queue_preempt_timeout);
> >+		hrtimer_start(&execlists->preempt_timer,
> >+			      ktime_set(0, execlists->queue_preempt_timeout),
> >+			      HRTIMER_MODE_REL);
> >+	}
> >+}
> >+
> >+static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
> >+{
> >+	struct intel_engine_execlists *execlists =
> >+		container_of(hrtimer, typeof(*execlists), preempt_timer);
> >+
> >+	GEM_TRACE("%s\n",
> >+		  container_of(execlists,
> >+			       struct intel_engine_cs,
> >+			       execlists)->name);
> >+
> >+	queue_work(system_highpri_wq, &execlists->preempt_reset);
> 
> I suppose indirection from hrtimer to worker is for better than
> jiffie timeout granularity? But then queue_work might introduce some
> delay to defeat that.
> 
> I am wondering if simply schedule_delayed_work directly wouldn't be
> good enough. I suppose it is a question for the product group. But
> it is also implementation detail.
> 
I started with schedule_delayed_work in my implementation hoping for
at least consistent msec accuracy, but it was all over the place.
We need msec granularity for the automotive use cases.
-Jeff

> >+	return HRTIMER_NORESTART;
> >+}
> >+
> >+static void preempt_reset(struct work_struct *work)
> >+{
> >+	struct intel_engine_cs *engine =
> >+		container_of(work, typeof(*engine), execlists.preempt_reset);
> >+
> >+	GEM_TRACE("%s\n", engine->name);
> >+
> >+	tasklet_disable(&engine->execlists.tasklet);
> >+
> >+	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
> 
> Comment on why calling the tasklet directly.
> 
> >+
> >+	if (execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> >+		i915_handle_error(engine->i915, BIT(engine->id), 0,
> >+				  "preemption timed out on %s", engine->name);
> 
> Can this race with the normal reset and we end up wit
> i915_handle_error twice simultaneously?
> 
> >+
> >+	tasklet_enable(&engine->execlists.tasklet);
> >  }
> >  static void complete_preempt_context(struct intel_engine_execlists *execlists)
> >@@ -542,6 +583,10 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
> >  	execlists_cancel_port_requests(execlists);
> >  	execlists_unwind_incomplete_requests(execlists);
> >+	/* If the timer already fired, complete the reset */
> >+	if (hrtimer_try_to_cancel(&execlists->preempt_timer) < 0)
> >+		return;
> 
> What about timer which had already fired and queued the worker?
> hrtimer_try_to_cancel will return zero for that case I think.
> 
> >+
> >  	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> >  }
> >@@ -708,6 +753,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >  			kmem_cache_free(engine->i915->priorities, p);
> >  	}
> >  done:
> >+	execlists->queue_preempt_timeout = 0; /* preemption point passed */
> >  	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
> >  	execlists->first = rb;
> >  	if (submit)
> >@@ -864,6 +910,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> >  	/* Remaining _unready_ requests will be nop'ed when submitted */
> >+	execlists->queue_preempt_timeout = 0;
> >  	execlists->queue_priority = INT_MIN;
> >  	execlists->queue = RB_ROOT;
> >  	execlists->first = NULL;
> >@@ -1080,6 +1127,7 @@ static void queue_request(struct intel_engine_cs *engine,
> >  static void __submit_queue(struct intel_engine_cs *engine, int prio)
> >  {
> >  		engine->execlists.queue_priority = prio;
> >+		engine->execlists.queue_preempt_timeout = 0;
> >  		tasklet_hi_schedule(&engine->execlists.tasklet);
> >  }
> >@@ -2270,6 +2318,11 @@ logical_ring_setup(struct intel_engine_cs *engine)
> >  	tasklet_init(&engine->execlists.tasklet,
> >  		     execlists_submission_tasklet, (unsigned long)engine);
> >+	INIT_WORK(&engine->execlists.preempt_reset, preempt_reset);
> >+	hrtimer_init(&engine->execlists.preempt_timer,
> >+		     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> >+	engine->execlists.preempt_timer.function = preempt_timeout;
> >+
> >  	logical_ring_default_vfuncs(engine);
> >  	logical_ring_default_irqs(engine);
> >  }
> >diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >index 4c71dcdc722b..7166f47c8489 100644
> >--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> >+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >@@ -284,6 +284,11 @@ struct intel_engine_execlists {
> >  	 */
> >  	int queue_priority;
> >+	/**
> >+	 * @queue_preempt_timeout: Timeout in ns before forcing preemption.
> >+	 */
> >+	unsigned int queue_preempt_timeout;
> >+
> >  	/**
> >  	 * @queue: queue of requests, in priority lists
> >  	 */
> >@@ -313,6 +318,9 @@ struct intel_engine_execlists {
> >  	 * @preempt_complete_status: expected CSB upon completing preemption
> >  	 */
> >  	u32 preempt_complete_status;
> >+
> >+	struct hrtimer preempt_timer;
> >+	struct work_struct preempt_reset;
> >  };
> >  #define INTEL_ENGINE_CS_MAX_NAME 8
> >
> 
> Regards,
> 
> Tvrtko
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx