[Intel-gfx] [PATCH 4/8] drm/i915: Push the wakeref->count deferral to the backend

Mon Aug 12 09:10:41 UTC 2019

If the backend wishes to defer the wakeref parking, make it responsible
for unlocking the wakeref (i.e. bumping the counter). This allows it to
time the unlock much more carefully in case it happens to needs the
wakeref to be active during its deferral.

For instance, during engine parking we may choose to emit an idle
barrier (a request). To do so, we borrow the engine->kernel_context
timeline and to ensure exclusive access we keep the
engine->wakeref.count as 0. However, to submit that request to HW may
require a intel_engine_pm_get() (e.g. to keep the submission tasklet
alive) and before we allow that we have to rewake our wakeref to avoid a
recursive deadlock.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_pm.c |  8 ++-
 drivers/gpu/drm/i915/i915_request.c       | 66 ++++++++++++-----------
 drivers/gpu/drm/i915/i915_request.h       |  2 +
 drivers/gpu/drm/i915/i915_scheduler.c     |  3 +-
 drivers/gpu/drm/i915/intel_wakeref.c      |  4 +-
 drivers/gpu/drm/i915/intel_wakeref.h      | 11 ++++
 6 files changed, 56 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 6b15e3335dd6..ad37c9808c1f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -68,9 +68,13 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 
 	/* Check again on the next retirement. */
 	engine->wakeref_serial = engine->serial + 1;
-
 	i915_request_add_active_barriers(rq);
+
+	rq->sched.attr.priority = INT_MAX; /* Preemption barrier */
+
 	__i915_request_commit(rq);
+	__intel_wakeref_defer_park(&engine->wakeref);
+	__i915_request_queue(rq, NULL);
 
 	return false;
 }
@@ -98,7 +102,7 @@ static int __engine_park(struct intel_wakeref *wf)
 	intel_engine_pool_park(&engine->pool);
 
 	/* Must be reset upon idling, or we may miss the busy wakeup. */
-	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
+	engine->execlists.queue_priority_hint = INT_MIN;
 
 	if (engine->park)
 		engine->park(engine);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 43175bada09e..4703aab3ae21 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1186,6 +1186,12 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 		list_add(&ring->active_link, &rq->i915->gt.active_rings);
 	rq->emitted_jiffies = jiffies;
 
+	return prev;
+}
+
+void __i915_request_queue(struct i915_request *rq,
+			  const struct i915_sched_attr *attr)
+{
 	/*
 	 * Let the backend know a new request has arrived that may need
 	 * to adjust the existing execution schedule due to a high priority
@@ -1199,43 +1205,15 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 	 */
 	local_bh_disable();
 	i915_sw_fence_commit(&rq->semaphore);
-	if (engine->schedule) {
-		struct i915_sched_attr attr = rq->gem_context->sched;
-
-		/*
-		 * Boost actual workloads past semaphores!
-		 *
-		 * With semaphores we spin on one engine waiting for another,
-		 * simply to reduce the latency of starting our work when
-		 * the signaler completes. However, if there is any other
-		 * work that we could be doing on this engine instead, that
-		 * is better utilisation and will reduce the overall duration
-		 * of the current work. To avoid PI boosting a semaphore
-		 * far in the distance past over useful work, we keep a history
-		 * of any semaphore use along our dependency chain.
-		 */
-		if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
-			attr.priority |= I915_PRIORITY_NOSEMAPHORE;
-
-		/*
-		 * Boost priorities to new clients (new request flows).
-		 *
-		 * Allow interactive/synchronous clients to jump ahead of
-		 * the bulk clients. (FQ_CODEL)
-		 */
-		if (list_empty(&rq->sched.signalers_list))
-			attr.priority |= I915_PRIORITY_WAIT;
-
-		engine->schedule(rq, &attr);
-	}
+	if (attr && rq->engine->schedule)
+		rq->engine->schedule(rq, attr);
 	i915_sw_fence_commit(&rq->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
-
-	return prev;
 }
 
 void i915_request_add(struct i915_request *rq)
 {
+	struct i915_sched_attr attr = rq->gem_context->sched;
 	struct i915_request *prev;
 
 	lockdep_assert_held(&rq->timeline->mutex);
@@ -1245,6 +1223,32 @@ void i915_request_add(struct i915_request *rq)
 
 	prev = __i915_request_commit(rq);
 
+	/*
+	 * Boost actual workloads past semaphores!
+	 *
+	 * With semaphores we spin on one engine waiting for another,
+	 * simply to reduce the latency of starting our work when
+	 * the signaler completes. However, if there is any other
+	 * work that we could be doing on this engine instead, that
+	 * is better utilisation and will reduce the overall duration
+	 * of the current work. To avoid PI boosting a semaphore
+	 * far in the distance past over useful work, we keep a history
+	 * of any semaphore use along our dependency chain.
+	 */
+	if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
+		attr.priority |= I915_PRIORITY_NOSEMAPHORE;
+
+	/*
+	 * Boost priorities to new clients (new request flows).
+	 *
+	 * Allow interactive/synchronous clients to jump ahead of
+	 * the bulk clients. (FQ_CODEL)
+	 */
+	if (list_empty(&rq->sched.signalers_list))
+		attr.priority |= I915_PRIORITY_WAIT;
+
+	__i915_request_queue(rq, &attr);
+
 	/*
 	 * In typical scenarios, we do not expect the previous request on
 	 * the timeline to be still tracked by timeline->last_request if it
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 313df3c37158..fec1d5f17c94 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -251,6 +251,8 @@ struct i915_request * __must_check
 i915_request_create(struct intel_context *ce);
 
 struct i915_request *__i915_request_commit(struct i915_request *request);
+void __i915_request_queue(struct i915_request *rq,
+			  const struct i915_sched_attr *attr);
 
 void i915_request_retire_upto(struct i915_request *rq);
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 0bd452e851d8..7b84ebca2901 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -349,8 +349,7 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
 	unsigned long flags;
 
 	GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
-
-	if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
+	if (READ_ONCE(rq->sched.attr.priority) & bump)
 		return;
 
 	spin_lock_irqsave(&schedule_lock, flags);
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index d4443e81c1c8..868cc78048d0 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -57,12 +57,10 @@ static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
 	if (!atomic_dec_and_test(&wf->count))
 		goto unlock;
 
+	/* ops->put() must reschedule its own release on error/deferral */
 	if (likely(!wf->ops->put(wf))) {
 		rpm_put(wf);
 		wake_up_var(&wf->wakeref);
-	} else {
-		/* ops->put() must schedule its own release on deferral */
-		atomic_set_release(&wf->count, 1);
 	}
 
 unlock:
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index 535a3a12864b..5f0c972a80fb 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -163,6 +163,17 @@ intel_wakeref_is_active(const struct intel_wakeref *wf)
 	return READ_ONCE(wf->wakeref);
 }
 
+/**
+ * __intel_wakeref_defer_park: Defer the current park callback
+ * @wf: the wakeref
+ */
+static inline void
+__intel_wakeref_defer_park(struct intel_wakeref *wf)
+{
+	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
+	atomic_set_release(&wf->count, 1);
+}
+
 /**
  * intel_wakeref_wait_for_idle: Wait until the wakeref is idle
  * @wf: the wakeref
-- 
2.23.0.rc1