[PATCH 5/5] mutexlesless

Wed Jun 20 21:24:30 UTC 2018

---
 drivers/gpu/drm/i915/i915_gem.c         | 107 ++----------------------
 drivers/gpu/drm/i915/intel_engine_cs.c  |   2 +-
 drivers/gpu/drm/i915/intel_lrc.c        |  35 +++++---
 drivers/gpu/drm/i915/intel_ringbuffer.c |  68 ++++++++++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |   3 +-
 5 files changed, 96 insertions(+), 119 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d5d68238daef..a9cfa973971a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2936,6 +2936,7 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
+#if 0
 static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv,
 					const struct i915_gem_context *ctx)
 {
@@ -2990,6 +2991,7 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
 {
 	atomic_inc(&ctx->active_count);
 }
+#endif
 
 struct i915_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine)
@@ -3069,6 +3071,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 	return err;
 }
 
+#if 0
 static void skip_request(struct i915_request *request)
 {
 	void *vaddr = request->ring->vaddr;
@@ -3110,90 +3113,14 @@ static void engine_skip_context(struct i915_request *request)
 	spin_unlock(&timeline->lock);
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
-
-/* Returns the request if it was guilty of the hang */
-static struct i915_request *
-i915_gem_reset_request(struct intel_engine_cs *engine,
-		       struct i915_request *request,
-		       bool stalled)
-{
-	/* The guilty request will get skipped on a hung engine.
-	 *
-	 * Users of client default contexts do not rely on logical
-	 * state preserved between batches so it is safe to execute
-	 * queued requests following the hang. Non default contexts
-	 * rely on preserved state, so skipping a batch loses the
-	 * evolution of the state and it needs to be considered corrupted.
-	 * Executing more queued batches on top of corrupted state is
-	 * risky. But we take the risk by trying to advance through
-	 * the queued requests in order to make the client behaviour
-	 * more predictable around resets, by not throwing away random
-	 * amount of batches it has prepared for execution. Sophisticated
-	 * clients can use gem_reset_stats_ioctl and dma fence status
-	 * (exported via sync_file info ioctl on explicit fences) to observe
-	 * when it loses the context state and should rebuild accordingly.
-	 *
-	 * The context ban, and ultimately the client ban, mechanism are safety
-	 * valves if client submission ends up resulting in nothing more than
-	 * subsequent hangs.
-	 */
-
-	if (i915_request_completed(request)) {
-		GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n",
-			  engine->name, request->global_seqno,
-			  request->fence.context, request->fence.seqno,
-			  intel_engine_get_seqno(engine));
-		stalled = false;
-	}
-
-	if (stalled) {
-		i915_gem_context_mark_guilty(request->gem_context);
-		skip_request(request);
-
-		/* If this context is now banned, skip all pending requests. */
-		if (i915_gem_context_is_banned(request->gem_context))
-			engine_skip_context(request);
-	} else {
-		/*
-		 * Since this is not the hung engine, it may have advanced
-		 * since the hang declaration. Double check by refinding
-		 * the active request at the time of the reset.
-		 */
-		request = i915_gem_find_active_request(engine);
-		if (request) {
-			unsigned long flags;
-
-			i915_gem_context_mark_innocent(request->gem_context);
-			dma_fence_set_error(&request->fence, -EAGAIN);
-
-			/* Rewind the engine to replay the incomplete rq */
-			spin_lock_irqsave(&engine->timeline.lock, flags);
-			request = list_prev_entry(request, link);
-			if (&request->link == &engine->timeline.requests)
-				request = NULL;
-			spin_unlock_irqrestore(&engine->timeline.lock, flags);
-		}
-	}
-
-	return request;
-}
+#endif
 
 void i915_gem_reset_engine(struct intel_engine_cs *engine,
 			   struct i915_request *request,
 			   bool stalled)
 {
-	/*
-	 * Make sure this write is visible before we re-enable the interrupt
-	 * handlers on another CPU, as tasklet_enable() resolves to just
-	 * a compiler barrier which is insufficient for our purpose here.
-	 */
-	smp_store_mb(engine->irq_posted, 0);
-
-	if (request)
-		request = i915_gem_reset_request(engine, request, stalled);
-
 	/* Setup the CS to resume from the breadcrumb of the hung request */
-	engine->reset.reset(engine, request);
+	engine->reset.reset(engine, request, stalled);
 }
 
 void i915_gem_reset(struct drm_i915_private *dev_priv,
@@ -3202,33 +3129,11 @@ void i915_gem_reset(struct drm_i915_private *dev_priv,
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	for_each_engine(engine, dev_priv, id) {
+	for_each_engine(engine, dev_priv, id)
 		i915_gem_reset_engine(engine,
 				      engine->hangcheck.active_request,
 				      stalled_mask & ENGINE_MASK(id));
 
-		//__retire_engine_upto(engine->hangcheck.active_request);
-
-		/*
-		 * Ostensibily, we always want a context loaded for powersaving,
-		 * so if the engine is idle after the reset, send a request
-		 * to load our scratch kernel_context.
-		 *
-		 * More mysteriously, if we leave the engine idle after a reset,
-		 * the next userspace batch may hang, with what appears to be
-		 * an incoherent read by the CS (presumably stale TLB). An
-		 * empty request appears sufficient to paper over the glitch.
-		 */
-		if (intel_engine_is_idle(engine) && 0) {
-			struct i915_request *rq;
-
-			rq = i915_request_alloc(engine,
-						dev_priv->kernel_context);
-			if (!IS_ERR(rq))
-				i915_request_add(rq);
-		}
-	}
-
 	i915_gem_restore_fences(dev_priv);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 32bf3a408d46..2e49e266f563 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1095,7 +1095,7 @@ void intel_engines_sanitize(struct drm_i915_private *i915)
 
 	for_each_engine(engine, i915, id) {
 		if (engine->reset.reset)
-			engine->reset.reset(engine, NULL);
+			engine->reset.reset(engine, NULL, false);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 33bc914c2ef5..75d09e4419e7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1963,14 +1963,15 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 }
 
 static void execlists_reset(struct intel_engine_cs *engine,
-			    struct i915_request *request)
+			    struct i915_request *rq,
+			    bool stalled)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	unsigned long flags;
 	u32 *regs;
 
 	GEM_TRACE("%s request global=%x, current=%d\n",
-		  engine->name, request ? request->global_seqno : 0,
+		  engine->name, rq ? rq->global_seqno : 0,
 		  intel_engine_get_seqno(engine));
 
 	/* See execlists_cancel_requests() for the irq/spinlock split. */
@@ -1998,6 +1999,9 @@ static void execlists_reset(struct intel_engine_cs *engine,
 
 	local_irq_restore(flags);
 
+	if (!rq || i915_request_completed(rq))
+		return;
+
 	/*
 	 * If the request was innocent, we leave the request in the ELSP
 	 * and will try to replay it on restarting. The context image may
@@ -2009,8 +2013,13 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * and have to at least restore the RING register in the context
 	 * image back to the expected values to skip over the guilty request.
 	 */
-	if (!request || request->fence.error != -EIO)
+	if (!stalled) {
+		//i915_gem_context_mark_innocent(rq->gem_context);
+		dma_fence_set_error(&rq->fence, -EAGAIN);
 		return;
+	}
+
+	//i915_gem_context_mark_guilty(rq->gem_context);
 
 	/*
 	 * We want a simple context + ring to execute the breadcrumb update.
@@ -2020,25 +2029,25 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	regs = request->hw_context->lrc_reg_state;
-	if (engine->pinned_default_state) {
+	regs = rq->hw_context->lrc_reg_state;
+
+	if (engine->pinned_default_state)
 		memcpy(regs, /* skip restoring the vanilla PPHWSP */
 		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
 		       engine->context_size - PAGE_SIZE);
-	}
-	execlists_init_reg_state(regs,
-				 request->gem_context, engine, request->ring);
+
+	execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring);
 
 	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
+	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(rq->ring->vma);
 
-	request->ring->head = intel_ring_wrap(request->ring, request->postfix);
-	regs[CTX_RING_HEAD + 1] = request->ring->head;
+	rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix);
+	regs[CTX_RING_HEAD + 1] = rq->ring->head;
 
-	intel_ring_update_space(request->ring);
+	intel_ring_update_space(rq->ring);
 
 	/* Reset WaIdleLiteRestore:bdw,skl as well */
-	unwind_wa_tail(request);
+	unwind_wa_tail(rq);
 }
 
 static void execlists_reset_finish(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e0448eff12bd..a632bb2b18c7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -559,10 +559,49 @@ static void skip_request(struct i915_request *rq)
 	memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32));
 }
 
-static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq)
+static void reset_ring(struct intel_engine_cs *engine,
+		       struct i915_request *rq,
+		       bool stalled)
 {
 	GEM_TRACE("%s seqno=%x\n", engine->name, rq ? rq->global_seqno : 0);
 
+	/*
+	 * Make sure this write is visible before we re-enable the interrupt
+	 * handlers on another CPU, as tasklet_enable() resolves to just
+	 * a compiler barrier which is insufficient for our purpose here.
+	 */
+	smp_store_mb(engine->irq_posted, 0);
+
+	/*
+	 * The guilty request will get skipped on a hung engine.
+	 *
+	 * Users of client default contexts do not rely on logical
+	 * state preserved between batches so it is safe to execute
+	 * queued requests following the hang. Non default contexts
+	 * rely on preserved state, so skipping a batch loses the
+	 * evolution of the state and it needs to be considered corrupted.
+	 * Executing more queued batches on top of corrupted state is
+	 * risky. But we take the risk by trying to advance through
+	 * the queued requests in order to make the client behaviour
+	 * more predictable around resets, by not throwing away random
+	 * amount of batches it has prepared for execution. Sophisticated
+	 * clients can use gem_reset_stats_ioctl and dma fence status
+	 * (exported via sync_file info ioctl on explicit fences) to observe
+	 * when it loses the context state and should rebuild accordingly.
+	 *
+	 * The context ban, and ultimately the client ban, mechanism are safety
+	 * valves if client submission ends up resulting in nothing more than
+	 * subsequent hangs.
+	 */
+
+	if (stalled && i915_request_completed(rq)) {
+		GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n",
+			  engine->name, rq->global_seqno,
+			  rq->fence.context, rq->fence.seqno,
+			  intel_engine_get_seqno(engine));
+		stalled = false;
+	}
+
 	/*
 	 * Try to restore the logical GPU state to match the continuation
 	 * of the request queue. If we skip the context/PD restore, then
@@ -577,11 +616,34 @@ static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq)
 	 * If the request was innocent, we try to replay the request with
 	 * the restored context.
 	 */
+
+	if (stalled && !i915_request_completed(rq)) {
+		//i915_gem_context_mark_guilty(rq->gem_context);
+		dma_fence_set_error(&rq->fence, -EAGAIN);
+		skip_request(rq);
+	} else {
+		struct i915_timeline *tl = &engine->timeline;
+		struct i915_request *pos;
+		unsigned long flags;
+
+		/* Rewind the engine to replay the incomplete rq */
+		spin_lock_irqsave(&tl->lock, flags);
+		list_for_each_entry(pos, &tl->requests, link) {
+			if (!__i915_request_completed(pos, pos->global_seqno)) {
+				//i915_gem_context_mark_innocent(request->gem_context);
+				dma_fence_set_error(&pos->fence, -EAGAIN);
+				break;
+			}
+
+			skip_request(pos);
+			rq = pos;
+		}
+		spin_unlock_irqrestore(&tl->lock, flags);
+	}
+
 	if (rq) {
 		/* If the rq hung, jump to its breadcrumb and skip the batch */
 		rq->ring->head = intel_ring_wrap(rq->ring, rq->head);
-		if (rq->fence.error == -EIO)
-			skip_request(rq);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a0bc7a8222b4..b75db1ca9f07 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -429,7 +429,8 @@ struct intel_engine_cs {
 	struct {
 		struct i915_request *(*prepare)(struct intel_engine_cs *engine);
 		void (*reset)(struct intel_engine_cs *engine,
-			      struct i915_request *rq);
+			      struct i915_request *rq,
+			      bool stalled);
 		void (*finish)(struct intel_engine_cs *engine);
 	} reset;
 
-- 
2.18.0.rc2