[PATCH 9/9] drm/i915/execlists: Keep the engine awake until the tasklet is idle

Sun Aug 11 22:53:02 UTC 2019

The execlists tasklet follows the principle as the guc, while it is not
completely interrupt driven we do still expect to generate interrupts
(context-switches) while the tasklet is awake. Currently, we mark the GT
as awake while we expect the CS interrupts to continue, but it is better
to localise that to the engine, e.g. so that intel_engine_is_idle() will
flush the tasklet.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +-
 drivers/gpu/drm/i915/gt/intel_engine_pm.h     |   5 +
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 124 +++++++++++-------
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  23 ++--
 drivers/gpu/drm/i915/i915_utils.h             |  20 ++-
 drivers/gpu/drm/i915/intel_wakeref.h          |  17 ++-
 7 files changed, 123 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index a632b20ec4d8..d8ce266c049f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -41,9 +41,7 @@ struct intel_context {
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *inflight;
 #define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
-#define intel_context_inflight_count(ce)  ptr_unmask_bits((ce)->inflight, 2)
-#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
-#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)
+#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
 
 	struct i915_address_space *vm;
 	struct i915_gem_context *gem_context;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 7d174af30f8c..bfa4ff40169d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1463,7 +1463,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 		for (port = execlists->pending; (rq = *port); port++) {
 			/* Exclude any contexts already counted in active */
-			if (intel_context_inflight_count(rq->hw_context) == 1)
+			if (intel_context_inflight_count(rq->hw_context) == 0)
 				engine->stats.active++;
 		}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 739c50fefcef..17ead1328d4d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -26,6 +26,11 @@ static inline bool intel_engine_pm_get_if_awake(struct intel_engine_cs *engine)
 	return intel_wakeref_get_if_active(&engine->wakeref);
 }
 
+static inline bool intel_engine_pm_put_if_awake(struct intel_engine_cs *engine)
+{
+	return intel_wakeref_put_if_active(&engine->wakeref);
+}
+
 static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
 {
 	intel_wakeref_put(&engine->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c8f68f61e66f..e28367553127 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -547,27 +547,39 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
 				   status, rq);
 }
 
+static inline struct intel_engine_cs *
+__execlists_schedule_in(struct i915_request *rq)
+{
+	struct intel_engine_cs * const engine = rq->engine;
+	struct intel_context * const ce = rq->hw_context;
+
+	intel_context_get(ce);
+
+	intel_engine_pm_get(engine);
+	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+	intel_engine_context_in(engine);
+
+	return engine;
+}
+
 static inline struct i915_request *
 execlists_schedule_in(struct i915_request *rq, int idx)
 {
-	struct intel_context *ce = rq->hw_context;
-	int count;
+	struct intel_context * const ce = rq->hw_context;
+	struct intel_engine_cs *old;
 
+	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
 	trace_i915_request_in(rq, idx);
 
-	count = intel_context_inflight_count(ce);
-	if (!count) {
-		intel_context_get(ce);
-		ce->inflight = rq->engine;
-
-		intel_gt_pm_get(ce->inflight->gt);
-		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-		intel_engine_context_in(ce->inflight);
-	}
+	old = READ_ONCE(ce->inflight);
+	do {
+		if (!old) {
+			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
+			break;
+		}
+	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
 
-	intel_context_inflight_inc(ce);
 	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
-
 	return i915_request_get(rq);
 }
 
@@ -581,35 +593,50 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 }
 
 static inline void
-execlists_schedule_out(struct i915_request *rq)
+__execlists_schedule_out(struct i915_request *rq)
 {
-	struct intel_context *ce = rq->hw_context;
+	struct intel_engine_cs * const engine = rq->engine;
+	struct intel_context * const ce = rq->hw_context;
 
-	GEM_BUG_ON(!intel_context_inflight_count(ce));
+	intel_engine_context_out(engine);
+	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+	intel_engine_pm_put(engine);
 
-	trace_i915_request_out(rq);
+	/*
+	 * If this is part of a virtual engine, its next request may
+	 * have been blocked waiting for access to the active context.
+	 * We have to kick all the siblings again in case we need to
+	 * switch (e.g. the next request is not runnable on this
+	 * engine). Hopefully, we will already have submitted the next
+	 * request before the tasklet runs and do not need to rebuild
+	 * each virtual tree and kick everyone again.
+	 */
+	if (ce->engine != engine)
+		kick_siblings(rq, ce);
 
-	intel_context_inflight_dec(ce);
-	if (!intel_context_inflight_count(ce)) {
-		intel_engine_context_out(ce->inflight);
-		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
-		intel_gt_pm_put(ce->inflight->gt);
+	intel_context_put(ce);
+}
 
-		/*
-		 * If this is part of a virtual engine, its next request may
-		 * have been blocked waiting for access to the active context.
-		 * We have to kick all the siblings again in case we need to
-		 * switch (e.g. the next request is not runnable on this
-		 * engine). Hopefully, we will already have submitted the next
-		 * request before the tasklet runs and do not need to rebuild
-		 * each virtual tree and kick everyone again.
-		 */
-		ce->inflight = NULL;
-		if (rq->engine != ce->engine)
-			kick_siblings(rq, ce);
+static inline void
+execlists_schedule_out(struct i915_request *rq)
+{
+	struct intel_context * const ce = rq->hw_context;
+	struct intel_engine_cs *cur, *old;
+	bool out;
 
-		intel_context_put(ce);
-	}
+	trace_i915_request_out(rq);
+	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
+
+	old = READ_ONCE(ce->inflight);
+	do {
+		out = !ptr_unmask_bits(old, 2);
+		if (out)
+			cur = NULL;
+		else
+			cur = ptr_dec(old);
+	} while (!try_cmpxchg(&ce->inflight, &old, cur));
+	if (out)
+		__execlists_schedule_out(rq);
 
 	i915_request_put(rq);
 }
@@ -684,6 +711,9 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 
 	trace_ports(execlists, msg, execlists->pending);
 
+	if (!execlists->pending[0])
+		return false;
+
 	if (execlists->pending[execlists_num_ports(execlists)])
 		return false;
 
@@ -1356,7 +1386,6 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
-	lockdep_assert_held(&engine->active.lock);
 	GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
 
 	/*
@@ -1427,15 +1456,14 @@ static void process_csb(struct intel_engine_cs *engine)
 				       execlists->pending,
 				       execlists_num_ports(execlists) *
 				       sizeof(*execlists->pending));
-			execlists->pending[0] = NULL;
-
-			trace_ports(execlists, "promoted", execlists->active);
 
 			if (enable_timeslice(engine))
 				mod_timer(&execlists->timer, jiffies + 1);
 
 			if (!inject_preempt_hang(execlists))
 				ring_set_paused(engine, 0);
+
+			WRITE_ONCE(execlists->pending[0], NULL);
 			break;
 
 		case CSB_COMPLETE: /* port0 completed, advanced to port1 */
@@ -1479,8 +1507,6 @@ static void process_csb(struct intel_engine_cs *engine)
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
 	lockdep_assert_held(&engine->active.lock);
-
-	process_csb(engine);
 	if (!engine->execlists.pending[0])
 		execlists_dequeue(engine);
 }
@@ -1494,9 +1520,15 @@ static void execlists_submission_tasklet(unsigned long data)
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->active.lock, flags);
-	__execlists_submission_tasklet(engine);
-	spin_unlock_irqrestore(&engine->active.lock, flags);
+	if (!intel_engine_pm_is_awake(engine))
+		return;
+
+	process_csb(engine);
+	if (!engine->execlists.pending[0]) {
+		spin_lock_irqsave(&engine->active.lock, flags);
+		__execlists_submission_tasklet(engine);
+		spin_unlock_irqrestore(&engine->active.lock, flags);
+	}
 }
 
 static void execlists_submission_timer(struct timer_list *timer)
@@ -2728,7 +2760,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 
 static void execlists_park(struct intel_engine_cs *engine)
 {
-	del_timer_sync(&engine->execlists.timer);
+	del_timer(&engine->execlists.timer);
 	GEM_BUG_ON(*engine->execlists.active);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index fb7a6cddfb44..52edfe8d1c60 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -537,11 +537,7 @@ static struct i915_request *schedule_in(struct i915_request *rq, int idx)
 	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
 	trace_i915_request_in(rq, idx);
 
-	if (!rq->hw_context->inflight)
-		rq->hw_context->inflight = rq->engine;
-	intel_context_inflight_inc(rq->hw_context);
 	intel_engine_pm_get(rq->engine);
-
 	return i915_request_get(rq);
 }
 
@@ -549,10 +545,6 @@ static void schedule_out(struct i915_request *rq)
 {
 	trace_i915_request_out(rq);
 
-	intel_context_inflight_dec(rq->hw_context);
-	if (!intel_context_inflight_count(rq->hw_context))
-		rq->hw_context->inflight = NULL;
-
 	intel_engine_pm_put(rq->engine);
 	i915_request_put(rq);
 }
@@ -654,6 +646,17 @@ static void guc_reset_prepare(struct intel_engine_cs *engine)
 	__tasklet_disable_sync_once(&execlists->tasklet);
 }
 
+static void
+cancel_port_requests(struct intel_engine_execlists * const execlists)
+{
+	struct i915_request * const *port, *rq;
+
+	for (port = execlists->active; (rq = *port); port++)
+		schedule_out(rq);
+	execlists->active =
+		memset(execlists->inflight, 0, sizeof(execlists->inflight));
+}
+
 static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -662,7 +665,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 
 	spin_lock_irqsave(&engine->active.lock, flags);
 
-	execlists_cancel_port_requests(execlists);
+	cancel_port_requests(execlists);
 
 	/* Push back any incomplete requests for replay after the reset. */
 	rq = execlists_unwind_incomplete_requests(execlists);
@@ -705,7 +708,7 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
 	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Cancel the requests on the HW and clear the ELSP tracker. */
-	execlists_cancel_port_requests(execlists);
+	cancel_port_requests(execlists);
 
 	/* Mark all executing requests as skipped. */
 	list_for_each_entry(rq, &engine->active.requests, sched.link) {
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index d652ba5d2320..562f756da421 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -161,17 +161,15 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
 	((typeof(ptr))((unsigned long)(ptr) | __bits));			\
 })
 
-#define ptr_count_dec(p_ptr) do {					\
-	typeof(p_ptr) __p = (p_ptr);					\
-	unsigned long __v = (unsigned long)(*__p);			\
-	*__p = (typeof(*p_ptr))(--__v);					\
-} while (0)
-
-#define ptr_count_inc(p_ptr) do {					\
-	typeof(p_ptr) __p = (p_ptr);					\
-	unsigned long __v = (unsigned long)(*__p);			\
-	*__p = (typeof(*p_ptr))(++__v);					\
-} while (0)
+#define ptr_dec(ptr) ({							\
+	unsigned long __v = (unsigned long)(ptr);			\
+	(typeof(ptr))(__v - 1);						\
+})
+
+#define ptr_inc(ptr) ({							\
+	unsigned long __v = (unsigned long)(ptr);			\
+	(typeof(ptr))(__v + 1);						\
+})
 
 #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
 #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index 5f0c972a80fb..ef6239f323fe 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -85,7 +85,7 @@ intel_wakeref_get(struct intel_wakeref *wf)
 }
 
 /**
- * intel_wakeref_get_if_in_use: Acquire the wakeref
+ * intel_wakeref_get_if_active: Increase the active wakeref counter
  * @wf: the wakeref
  *
  * Acquire a hold on the wakeref, but only if the wakeref is already
@@ -99,6 +99,21 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
 	return atomic_inc_not_zero(&wf->count);
 }
 
+/**
+ * intel_wakeref_put_if_active: Decrease the active wakeref counter
+ * @wf: the wakeref
+ *
+ * Releases a hold on the wakeref, but only if doing so the wakeref
+ * will remain active.
+ *
+ * Returns: true if the wakeref was released, false otherwise.
+ */
+static inline bool
+intel_wakeref_put_if_active(struct intel_wakeref *wf)
+{
+	return atomic_add_unless(&wf->count, -1, 1);
+}
+
 /**
  * intel_wakeref_put: Release the wakeref
  * @i915: the drm_i915_private device
-- 
2.23.0.rc1