[PATCH 2/2] drm/i915/scheduler: emulate a scheduler for guc

Chris Wilson chris at chris-wilson.co.uk
Tue Feb 28 15:26:49 UTC 2017


This emulates execlists on top of the GuC in order to defer submission of
requests to the hardware. This deferral allows time for high priority
requests to gazump their way to the head of the queue, however it nerfs
the GuC by converting it back into a simple execlist (where the CPU has
to wake up after every request to feed new commands into the GuC).

v2: Drop hack status - though iirc there is still a lockdep inversion
between fence and engine->timeline->lock (which is impossible as the
nesting only occurs on different fences - hopefully just requires some
judicious lockdep annotation)
v3: Apply lockdep nesting to enabling signaling on the request, using
the pattern we already have in __i915_gem_request_submit();
v4: Replaying requests after a hang also now needs the timeline
spinlock, to disable the interrupts at least
v5: Hold wq lock for completeness, and emit a tracepoint for enabling signal
v6: Reorder interrupt checking for a happier gcc.
v7: Only signal the tasklet after a user-interrupt if using guc scheduling
v8: Restore lost update of rq through the i915_guc_irq_handler (Tvrtko)
v9: Avoid re-initialising the engine->irq_tasklet from inside a reset
v10: Hook up the execlists-style tracepoints

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 103 +++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_irq.c            |  13 +++-
 drivers/gpu/drm/i915/intel_lrc.c           |   5 +-
 3 files changed, 109 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 7b535a32fc27..915091296145 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -25,6 +25,8 @@
 #include "i915_drv.h"
 #include "intel_uc.h"
 
+#include <trace/events/dma_fence.h>
+
 /**
  * DOC: GuC-based command submission
  *
@@ -521,8 +523,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
 	if (i915_vma_is_map_and_fenceable(rq->ring->vma))
 		POSTING_READ_FW(GUC_STATUS);
 
-	trace_i915_gem_request_in(rq, 0);
-
 	/* We are always called with irqs disabled */
 	GEM_BUG_ON(!irqs_disabled());
 
@@ -544,10 +544,99 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
 
 static void i915_guc_submit(struct drm_i915_gem_request *rq)
 {
-	i915_gem_request_submit(rq);
+	__i915_gem_request_submit(rq);
 	__i915_guc_submit(rq);
 }
 
+static void nested_enable_signaling(struct drm_i915_gem_request *rq)
+{
+	/* If we use dma_fence_enable_sw_signaling() directly, lockdep
+	 * detects an ordering issue between the fence lockclass and the
+	 * global_timeline. This circular dependency can only occur via 2
+	 * different fences (but same fence lockclass), so we use the nesting
+	 * annotation here to prevent the warn, equivalent to the nesting
+	 * inside i915_gem_request_submit() for when we also enable the
+	 * signaler.
+	 */
+
+	if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			     &rq->fence.flags))
+		return;
+
+	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
+	trace_dma_fence_enable_signal(&rq->fence);
+
+	spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
+	intel_engine_enable_signaling(rq);
+	spin_unlock(&rq->lock);
+}
+
+static bool i915_guc_dequeue(struct intel_engine_cs *engine)
+{
+	struct execlist_port *port = engine->execlist_port;
+	struct drm_i915_gem_request *last = port[0].request;
+	unsigned long flags;
+	struct rb_node *rb;
+	bool submit = false;
+
+	spin_lock_irqsave(&engine->timeline->lock, flags);
+	rb = engine->execlist_first;
+	while (rb) {
+		struct drm_i915_gem_request *rq =
+			rb_entry(rb, typeof(*rq), priotree.node);
+
+		if (last && rq->ctx != last->ctx) {
+			if (port != engine->execlist_port)
+				break;
+
+			i915_gem_request_assign(&port->request, last);
+			nested_enable_signaling(last);
+			port++;
+		}
+
+		rb = rb_next(rb);
+		rb_erase(&rq->priotree.node, &engine->execlist_queue);
+		RB_CLEAR_NODE(&rq->priotree.node);
+		rq->priotree.priority = INT_MAX;
+
+		trace_i915_gem_request_in(rq, port - engine->execlist_port);
+		i915_guc_submit(rq);
+		last = rq;
+		submit = true;
+	}
+	if (submit) {
+		i915_gem_request_assign(&port->request, last);
+		nested_enable_signaling(last);
+		engine->execlist_first = rb;
+	}
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+
+	return submit;
+}
+
+static void i915_guc_irq_handler(unsigned long data)
+{
+	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+	struct execlist_port *port = engine->execlist_port;
+	struct drm_i915_gem_request *rq;
+	bool submit;
+
+	do {
+		rq = port[0].request;
+		while (rq && i915_gem_request_completed(rq)) {
+			trace_i915_gem_request_out(rq);
+			i915_gem_request_put(rq);
+			port[0].request = port[1].request;
+			port[1].request = NULL;
+			rq = port[0].request;
+		}
+
+		submit = false;
+		if (!port[1].request)
+			submit = i915_guc_dequeue(engine);
+	} while (submit);
+}
+
 /*
  * Everything below here is concerned with setup & teardown, and is
  * therefore not part of the somewhat time-critical batch-submission
@@ -958,8 +1047,12 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 		const int wqi_size = sizeof(struct guc_wq_item);
 		struct drm_i915_gem_request *rq;
 
-		engine->submit_request = i915_guc_submit;
-		engine->schedule = NULL;
+		/* The tasklet was initialised by execlists, and may be in
+		 * a state of flux (across a reset) and so we just want to
+		 * take over the callback without changing any other state
+		 * in the tasklet.
+		 */
+		engine->irq_tasklet.func = i915_guc_irq_handler;
 
 		/* Replay the current set of previously submitted requests */
 		spin_lock_irq(&engine->timeline->lock);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index ca8c7b22748e..82b8abed4460 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1377,13 +1377,20 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 static __always_inline void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 {
-	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
-		notify_ring(engine);
+	bool tasklet = false;
 
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
 		set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		tasklet_hi_schedule(&engine->irq_tasklet);
+		tasklet = true;
 	}
+
+	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
+		notify_ring(engine);
+		tasklet |= i915.enable_guc_submission;
+	}
+
+	if (tasklet)
+		tasklet_hi_schedule(&engine->irq_tasklet);
 }
 
 static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f9a8545474bc..71d4e86380ac 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1187,7 +1187,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 
 	/* After a GPU reset, we may have requests to replay */
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-	if (!execlists_elsp_idle(engine)) {
+	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
 		DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n",
 				 engine->name,
 				 port_seqno(&engine->execlist_port[0]),
@@ -1272,9 +1272,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	request->ring->last_retired_head = -1;
 	intel_ring_update_space(request->ring);
 
-	if (i915.enable_guc_submission)
-		return;
-
 	/* Catch up with any missed context-switch interrupts */
 	if (request->ctx != port[0].request->ctx) {
 		i915_gem_request_put(port[0].request);
-- 
2.11.0



More information about the Intel-gfx-trybot mailing list