[PATCH 02/36] drm/i915/execlists: Suppress preempting self

Sun Jan 20 23:56:16 UTC 2019

In order to avoid preempting ourselves, we currently refuse to schedule
the tasklet if we reschedule an inflight context. However, this glosses
over a few issues such as what happens after a CS completion event and
we then preempt the newly executing context with itself, or if something
else causes a tasklet_schedule triggering the same evaluation to
preempt the active context with itself.

To avoid the extra complications, after deciding that we have
potentially queued a request with higher priority than the currently
executing request, inspect the head of the queue to see if it is indeed
higher priority from another context.

References: a2bf92e8cc16 ("drm/i915/execlists: Avoid kicking priority on the current context")
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/i915_scheduler.c | 20 ++++++++++++----
 drivers/gpu/drm/i915/intel_lrc.c      | 33 ++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 340faea6c08a..fb5d953430e5 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -239,6 +239,18 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 	return engine;
 }
 
+static bool inflight(const struct i915_request *rq,
+		     const struct intel_engine_cs *engine)
+{
+	const struct i915_request *active;
+
+	if (!rq->global_seqno)
+		return false;
+
+	active = port_request(engine->execlists.port);
+	return active->hw_context == rq->hw_context;
+}
+
 static void __i915_schedule(struct i915_request *rq,
 			    const struct i915_sched_attr *attr)
 {
@@ -328,6 +340,7 @@ static void __i915_schedule(struct i915_request *rq,
 		INIT_LIST_HEAD(&dep->dfs_link);
 
 		engine = sched_lock_engine(node, engine);
+		lockdep_assert_held(&engine->timeline.lock);
 
 		/* Recheck after acquiring the engine->timeline.lock */
 		if (prio <= node->attr.priority || node_signaled(node))
@@ -356,17 +369,16 @@ static void __i915_schedule(struct i915_request *rq,
 		if (prio <= engine->execlists.queue_priority)
 			continue;
 
+		engine->execlists.queue_priority = prio;
+
 		/*
 		 * If we are already the currently executing context, don't
 		 * bother evaluating if we should preempt ourselves.
 		 */
-		if (node_to_request(node)->global_seqno &&
-		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
-				      node_to_request(node)->global_seqno))
+		if (inflight(node_to_request(node), engine))
 			continue;
 
 		/* Defer (tasklet) submission until after all of our updates. */
-		engine->execlists.queue_priority = prio;
 		tasklet_hi_schedule(&engine->execlists.tasklet);
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c0a42afaf177..28d183439952 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -190,6 +190,30 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 		!i915_request_completed(last));
 }
 
+static inline bool check_preempt(const struct intel_engine_cs *engine,
+				 const struct i915_request *rq)
+{
+	const struct intel_context *ctx = rq->hw_context;
+	const int prio = rq_prio(rq);
+	struct rb_node *rb;
+	int idx;
+
+	list_for_each_entry_continue(rq, &engine->timeline.requests, link) {
+		GEM_BUG_ON(rq->hw_context == ctx);
+		if (rq_prio(rq) > prio)
+			return true;
+	}
+
+	rb = rb_first_cached(&engine->execlists.queue);
+	if (!rb)
+		return false;
+
+	priolist_for_each_request(rq, to_priolist(rb), idx)
+		return rq->hw_context != ctx && rq_prio(rq) > prio;
+
+	return false;
+}
+
 /*
  * The context descriptor encodes various attributes of a context,
  * including its GTT address and some flags. Because it's fairly
@@ -302,6 +326,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	 */
 	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
 		prio |= I915_PRIORITY_NEWCLIENT;
+		active->sched.attr.priority = prio;
 		list_move_tail(&active->sched.link,
 			       i915_sched_lookup_priolist(engine, prio));
 	}
@@ -579,7 +604,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
 			return;
 
-		if (need_preempt(engine, last, execlists->queue_priority)) {
+		if (need_preempt(engine, last, execlists->queue_priority) &&
+		    check_preempt(engine, last)) {
 			inject_preempt_context(engine);
 			return;
 		}
@@ -625,6 +651,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		int i;
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
+			GEM_BUG_ON(last &&
+				   need_preempt(engine, last, rq_prio(rq)));
+
 			/*
 			 * Can we combine this request with the current port?
 			 * It has to be the same context/ringbuffer and not
@@ -868,6 +897,8 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u32 * const buf = execlists->csb_status;
 	u8 head, tail;
 
+	lockdep_assert_held(&engine->timeline.lock);
+
 	/*
 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
 	 * When reading from the csb_write mmio register, we have to be
-- 
2.20.1