[Intel-gfx] [PATCH 43/57] drm/i915/gt: Delay taking irqoff for execlists submission

Mon Feb 1 08:57:01 UTC 2021

Before we take the irqsafe spinlock to dequeue requests and submit them
to HW, first do the check whether we need to take any action (i.e.
whether the HW is ready for some work, or if we need to preempt the
currently executing context) without taking the lock. We will then
likely skip taking the spinlock, and so reduce contention.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 .../drm/i915/gt/intel_execlists_submission.c  | 88 ++++++++-----------
 1 file changed, 39 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index e8f192984e88..d4ae65af7dc1 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1014,24 +1014,6 @@ static void virtual_xfer_context(struct virtual_engine *ve,
 	}
 }
 
-static void defer_active(struct intel_engine_cs *engine)
-{
-	struct i915_request *rq;
-
-	rq = __i915_sched_rewind_requests(engine);
-	if (!rq)
-		return;
-
-	/*
-	 * We want to move the interrupted request to the back of
-	 * the round-robin list (i.e. its priority level), but
-	 * in doing so, we must then move all requests that were in
-	 * flight and were waiting for the interrupted request to
-	 * be run after it again.
-	 */
-	__i915_sched_defer_request(engine, rq);
-}
-
 static bool
 timeslice_yield(const struct intel_engine_execlists *el,
 		const struct i915_request *rq)
@@ -1312,8 +1294,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
-	spin_lock(&se->lock);
-
 	/*
 	 * If the queue is higher priority than the last
 	 * request in the currently active context, submit afresh.
@@ -1336,24 +1316,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				     rq_deadline(last),
 				     rq_prio(last));
 			record_preemption(execlists);
-
-			/*
-			 * Don't let the RING_HEAD advance past the breadcrumb
-			 * as we unwind (and until we resubmit) so that we do
-			 * not accidentally tell it to go backwards.
-			 */
-			ring_set_paused(engine, 1);
-
-			/*
-			 * Note that we have not stopped the GPU at this point,
-			 * so we are unwinding the incomplete requests as they
-			 * remain inflight and so by the time we do complete
-			 * the preemption, some of the unwound requests may
-			 * complete!
-			 */
-			__i915_sched_rewind_requests(engine);
-
-			last = NULL;
+			last = (void *)1;
 		} else if (timeslice_expired(engine, last)) {
 			ENGINE_TRACE(engine,
 				     "expired:%s last=%llx:%llu, deadline=%llu, now=%llu, yield?=%s\n",
@@ -1380,8 +1343,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * same context again, grant it a full timeslice.
 			 */
 			cancel_timer(&execlists->timer);
-			ring_set_paused(engine, 1);
-			defer_active(engine);
 
 			/*
 			 * Unlike for preemption, if we rewind and continue
@@ -1396,7 +1357,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * normal save/restore will preserve state and allow
 			 * us to later continue executing the same request.
 			 */
-			last = NULL;
+			last = (void *)3;
 		} else {
 			/*
 			 * Otherwise if we already have a request pending
@@ -1412,12 +1373,46 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * Even if ELSP[1] is occupied and not worthy
 				 * of timeslices, our queue might be.
 				 */
-				spin_unlock(&se->lock);
 				return;
 			}
 		}
 	}
 
+	local_irq_disable(); /* irq remains off until after ELSP write */
+	spin_lock(&se->lock);
+
+	if ((unsigned long)last & 1) {
+		bool defer = (unsigned long)last & 2;
+
+		/*
+		 * Don't let the RING_HEAD advance past the breadcrumb
+		 * as we unwind (and until we resubmit) so that we do
+		 * not accidentally tell it to go backwards.
+		 */
+		ring_set_paused(engine, (unsigned long)last);
+
+		/*
+		 * Note that we have not stopped the GPU at this point,
+		 * so we are unwinding the incomplete requests as they
+		 * remain inflight and so by the time we do complete
+		 * the preemption, some of the unwound requests may
+		 * complete!
+		 */
+		last = __i915_sched_rewind_requests(engine);
+
+		/*
+		 * We want to move the interrupted request to the back of
+		 * the round-robin list (i.e. its priority level), but
+		 * in doing so, we must then move all requests that were in
+		 * flight and were waiting for the interrupted request to
+		 * be run after it again.
+		 */
+		if (last && defer)
+			__i915_sched_defer_request(engine, last);
+
+		last = NULL;
+	}
+
 	if (!RB_EMPTY_ROOT(&execlists->virtual.rb_root))
 		virtual_requeue(engine, last);
 
@@ -1533,13 +1528,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			i915_request_put(*port);
 		*execlists->pending = NULL;
 	}
-}
 
-static void execlists_dequeue_irq(struct intel_engine_cs *engine)
-{
-	local_irq_disable(); /* Suspend interrupts across request submission */
-	execlists_dequeue(engine);
-	local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
+	local_irq_enable();
 }
 
 static void clear_ports(struct i915_request **ports, int count)
@@ -2191,7 +2181,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
 		execlists_reset(engine);
 
 	if (!engine->execlists.pending[0]) {
-		execlists_dequeue_irq(engine);
+		execlists_dequeue(engine);
 		start_timeslice(engine);
 	}
 
-- 
2.20.1