[Intel-gfx] [RFC 3/4] drm/i915/scheduler: Use priorities when resubmitting after reset

Tue Mar 28 18:00:28 UTC 2017

Now that we're able to unsubmit requests, we can take advantage of it
during reset. Rather than resubmitting the previous workload directly to
GuC/ELSP, we can simply move the requests back to priority queue,
submitting from the tasklet instead.

Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Michel Thierry <michel.thierry at intel.com>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c            |   1 +
 drivers/gpu/drm/i915/i915_guc_submission.c |  12 ---
 drivers/gpu/drm/i915/intel_lrc.c           | 127 ++++++++++++++++-------------
 3 files changed, 71 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 84ea249..747ff37 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2924,6 +2924,7 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
 	for_each_engine(engine, dev_priv, id) {
 		tasklet_enable(&engine->irq_tasklet);
 		kthread_unpark(engine->breadcrumbs.signaler);
+		tasklet_hi_schedule(&engine->irq_tasklet);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 082f8ae..9975244 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -1245,24 +1245,12 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 	guc_interrupts_capture(dev_priv);
 
 	for_each_engine(engine, dev_priv, id) {
-		const int wqi_size = sizeof(struct guc_wq_item);
-		struct drm_i915_gem_request *rq;
-
 		/* The tasklet was initialised by execlists, and may be in
 		 * a state of flux (across a reset) and so we just want to
 		 * take over the callback without changing any other state
 		 * in the tasklet.
 		 */
 		engine->irq_tasklet.func = i915_guc_irq_handler;
-		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-
-		/* Replay the current set of previously submitted requests */
-		spin_lock_irq(&engine->timeline->lock);
-		list_for_each_entry(rq, &engine->timeline->requests, link) {
-			guc_client_update_wq_rsvd(client, wqi_size);
-			__i915_guc_submit(rq);
-		}
-		spin_unlock_irq(&engine->timeline->lock);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 107cf91..ff34aba 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -377,6 +377,22 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 	writel(lower_32_bits(desc[0]), elsp);
 }
 
+static void execlists_clear_ports(struct intel_engine_cs *engine)
+{
+	struct execlist_port *port = engine->execlist_port;
+	struct drm_i915_gem_request *rq;
+
+	rq = port->request;
+	while (rq) {
+		i915_gem_request_put(rq);
+		memset(port, 0, sizeof(*port));
+		if (port != engine->execlist_port)
+			break;
+		port++;
+		rq = port->request;
+	}
+}
+
 static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
 {
 	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
@@ -504,11 +520,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		execlists_submit_ports(engine);
 }
 
-static bool execlists_elsp_idle(struct intel_engine_cs *engine)
-{
-	return !engine->execlist_port[0].request;
-}
-
 static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
 {
 	const struct execlist_port *port = engine->execlist_port;
@@ -895,6 +906,25 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
 	return ret;
 }
 
+static void intel_lr_resubmit_requests(struct intel_engine_cs *engine)
+{
+	struct drm_i915_gem_request *rq, *prev;
+
+	lockdep_assert_held(&engine->timeline->lock);
+
+	list_for_each_entry_safe_reverse(rq, prev,
+					 &engine->timeline->requests, link) {
+		if (!i915_gem_request_completed(rq)) {
+			__i915_gem_request_unsubmit(rq);
+			trace_i915_gem_request_out(rq);
+			if (insert_request(&rq->priotree,
+					   &engine->execlist_queue, true))
+				engine->execlist_first = &rq->priotree.node;
+		} else
+			break;
+	}
+}
+
 /*
  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
@@ -1160,11 +1190,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 	return ret;
 }
 
-static u32 port_seqno(struct execlist_port *port)
-{
-	return port->request ? port->request->global_seqno : 0;
-}
-
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -1186,18 +1211,6 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 
 	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
 
-	/* After a GPU reset, we may have requests to replay */
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
-		DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n",
-				 engine->name,
-				 port_seqno(&engine->execlist_port[0]),
-				 port_seqno(&engine->execlist_port[1]));
-		engine->execlist_port[0].count = 0;
-		engine->execlist_port[1].count = 0;
-		execlists_submit_ports(engine);
-	}
-
 	return 0;
 }
 
@@ -1237,10 +1250,10 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
 static void reset_common_ring(struct intel_engine_cs *engine,
 			      struct drm_i915_gem_request *request)
 {
-	struct execlist_port *port = engine->execlist_port;
 	struct intel_context *ce;
+	unsigned long flags;
 
-	/* If the request was innocent, we leave the request in the ELSP
+	/* If the request was innocent, we leave the request intact
 	 * and will try to replay it on restarting. The context image may
 	 * have been corrupted by the reset, in which case we may have
 	 * to service a new GPU hang, but more likely we can continue on
@@ -1250,42 +1263,42 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	 * and have to at least restore the RING register in the context
 	 * image back to the expected values to skip over the guilty request.
 	 */
-	if (!request || request->fence.error != -EIO)
-		return;
-
-	/* We want a simple context + ring to execute the breadcrumb update.
-	 * We cannot rely on the context being intact across the GPU hang,
-	 * so clear it and rebuild just what we need for the breadcrumb.
-	 * All pending requests for this context will be zapped, and any
-	 * future request will be after userspace has had the opportunity
-	 * to recreate its own state.
-	 */
-	ce = &request->ctx->engine[engine->id];
-	execlists_init_reg_state(ce->lrc_reg_state,
-				 request->ctx, engine, ce->ring);
-
-	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
-		i915_ggtt_offset(ce->ring->vma);
-	ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
-
-	request->ring->head = request->postfix;
-	intel_ring_update_space(request->ring);
-
-	/* Catch up with any missed context-switch interrupts */
-	if (request->ctx != port[0].request->ctx) {
-		i915_gem_request_put(port[0].request);
-		port[0] = port[1];
-		memset(&port[1], 0, sizeof(port[1]));
+	if (request && request->fence.error == -EIO) {
+		/* We want a simple context + ring to execute the breadcrumb
+		 * update. We cannot rely on the context being intact across
+		 * the GPU hang, so clear it and rebuild just what we need for
+		 * the breadcrumb. All pending requests for this context will
+		 * be zapped, and any future request will be after userspace
+		 * has had the opportunity to recreate its own state.
+		 */
+		ce = &request->ctx->engine[engine->id];
+		execlists_init_reg_state(ce->lrc_reg_state,
+					 request->ctx, engine, ce->ring);
+
+
+		/* Move the RING_HEAD onto the breadcrumb,
+		 * past the hanging batch */
+		ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
+			i915_ggtt_offset(ce->ring->vma);
+		ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+
+		request->ring->head = request->postfix;
+		intel_ring_update_space(request->ring);
+
+		/* Reset WaIdleLiteRestore:bdw,skl as well */
+		request->tail =
+			intel_ring_wrap(request->ring,
+					request->wa_tail -
+					WA_TAIL_DWORDS * sizeof(u32));
+		assert_ring_tail_valid(request->ring, request->tail);
 	}
 
-	GEM_BUG_ON(request->ctx != port[0].request->ctx);
+	spin_lock_irqsave(&engine->timeline->lock, flags);
+	intel_lr_resubmit_requests(engine);
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 
-	/* Reset WaIdleLiteRestore:bdw,skl as well */
-	request->tail =
-		intel_ring_wrap(request->ring,
-				request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
-	assert_ring_tail_valid(request->ring, request->tail);
+	execlists_clear_ports(engine);
+	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
-- 
2.9.3