[PATCH 6/6] mutexleslesless

Chris Wilson chris at chris-wilson.co.uk
Thu Jun 21 16:50:10 UTC 2018


---
 drivers/gpu/drm/i915/i915_drv.c             | 19 ++--
 drivers/gpu/drm/i915/i915_drv.h             |  9 +-
 drivers/gpu/drm/i915/i915_gem.c             | 55 +++---------
 drivers/gpu/drm/i915/intel_engine_cs.c      |  2 +-
 drivers/gpu/drm/i915/intel_guc_submission.c |  5 +-
 drivers/gpu/drm/i915/intel_lrc.c            | 30 +------
 drivers/gpu/drm/i915/intel_ringbuffer.c     | 96 ++++++++++-----------
 drivers/gpu/drm/i915/intel_ringbuffer.h     |  7 +-
 8 files changed, 73 insertions(+), 150 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 2e3f3b208181..7def56a79880 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1910,11 +1910,7 @@ void i915_reset(struct drm_i915_private *i915,
 	error->reset_count++;
 
 	disable_irq(i915->drm.irq);
-	ret = i915_gem_reset_prepare(i915);
-	if (ret) {
-		dev_err(i915->drm.dev, "GPU recovery failed\n");
-		goto taint;
-	}
+	i915_gem_reset_prepare(i915);
 
 	if (!intel_has_gpu_reset(i915)) {
 		if (i915_modparams.reset)
@@ -2015,18 +2011,15 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv,
 int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 {
 	struct i915_gpu_error *error = &engine->i915->gpu_error;
-	struct i915_request *active_request;
 	int ret;
 
 	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
 
-	active_request = i915_gem_reset_prepare_engine(engine);
-	if (IS_ERR_OR_NULL(active_request)) {
-		/* Either the previous reset failed, or we pardon the reset. */
-		ret = PTR_ERR(active_request);
-		goto out;
-	}
+	if (intel_engine_is_idle(engine))
+		return 0;
+
+	i915_gem_reset_prepare_engine(engine);
 
 	if (msg)
 		dev_notice(engine->i915->drm.dev,
@@ -2050,7 +2043,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 	 * active request and can drop it, adjust head to skip the offending
 	 * request to resume executing remaining requests in the queue.
 	 */
-	i915_gem_reset_engine(engine, active_request, true);
+	i915_gem_reset_engine(engine, true);
 
 	/*
 	 * The engine and its registers (and workarounds in case of render)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7dcdc41ac3b6..2fba3929ba95 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3150,18 +3150,15 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
 	return READ_ONCE(error->reset_engine_count[engine->id]);
 }
 
-struct i915_request *
-i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
-int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
+void i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
+void i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset(struct drm_i915_private *dev_priv,
 		    unsigned int stalled_mask);
 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
-void i915_gem_reset_engine(struct intel_engine_cs *engine,
-			   struct i915_request *request,
-			   bool stalled);
+void i915_gem_reset_engine(struct intel_engine_cs *engine, bool stalled);
 
 void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 65ce4bbfefb9..9a4af12fffce 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3027,11 +3027,8 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
  * Ensure irq handler finishes, and not run again.
  * Also return the active request so that we only search for it once.
  */
-struct i915_request *
-i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
+void i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
 {
-	struct i915_request *request;
-
 	/*
 	 * During the reset sequence, we must prevent the engine from
 	 * entering RC6. As the context state is undefined until we restart
@@ -3040,35 +3037,19 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
 	 * GPU state upon resume, i.e. fail to restart after a reset.
 	 */
 	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
-
-	request = engine->reset.prepare(engine);
-	if (request && request->fence.error == -EIO)
-		request = ERR_PTR(-EIO); /* Previous reset failed! */
-
-	return request;
+	engine->reset.prepare(engine);
 }
 
-int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
+void i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
-	struct i915_request *request;
 	enum intel_engine_id id;
-	int err = 0;
 
-	for_each_engine(engine, dev_priv, id) {
-		request = i915_gem_reset_prepare_engine(engine);
-		if (IS_ERR(request)) {
-			err = PTR_ERR(request);
-			continue;
-		}
-
-		engine->hangcheck.active_request = request;
-	}
+	for_each_engine(engine, dev_priv, id)
+		i915_gem_reset_prepare_engine(engine);
 
 	i915_gem_revoke_fences(dev_priv);
 	intel_uc_sanitize(dev_priv);
-
-	return err;
 }
 
 #if 0
@@ -3115,35 +3096,25 @@ static void engine_skip_context(struct i915_request *request)
 }
 #endif
 
-void i915_gem_reset_engine(struct intel_engine_cs *engine,
-			   struct i915_request *request,
-			   bool stalled)
+void i915_gem_reset_engine(struct intel_engine_cs *engine, bool stalled)
 {
-	if (!request)
-		stalled = false;
-
-	/* Setup the CS to resume from the breadcrumb of the hung request */
-	engine->reset.reset(engine, request, stalled);
+	engine->reset.reset(engine, stalled);
 }
 
-void i915_gem_reset(struct drm_i915_private *dev_priv,
-		    unsigned int stalled_mask)
+void i915_gem_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	for_each_engine(engine, dev_priv, id)
-		i915_gem_reset_engine(engine,
-				      engine->hangcheck.active_request,
-				      stalled_mask & ENGINE_MASK(id));
+	for_each_engine(engine, i915, id)
+		i915_gem_reset_engine(engine, stalled_mask & ENGINE_MASK(id));
 
-	i915_gem_restore_fences(dev_priv);
+	i915_gem_restore_fences(i915);
 }
 
 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
 {
 	engine->reset.finish(engine);
-
 	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
 }
 
@@ -3152,10 +3123,8 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	for_each_engine(engine, dev_priv, id) {
-		engine->hangcheck.active_request = NULL;
+	for_each_engine(engine, dev_priv, id)
 		i915_gem_reset_finish_engine(engine);
-	}
 }
 
 static void nop_submit_request(struct i915_request *request)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index f753c88af7ce..aa3361a49d19 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1097,7 +1097,7 @@ void intel_engines_sanitize(struct drm_i915_private *i915)
 
 	for_each_engine(engine, i915, id) {
 		if (engine->reset.reset)
-			engine->reset.reset(engine, NULL, false);
+			engine->reset.reset(engine, false);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index f3945258fe1b..326de158dae6 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -814,8 +814,7 @@ static void guc_submission_tasklet(unsigned long data)
 		guc_dequeue(engine);
 }
 
-static struct i915_request *
-guc_reset_prepare(struct intel_engine_cs *engine)
+static void guc_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
@@ -841,8 +840,6 @@ guc_reset_prepare(struct intel_engine_cs *engine)
 	 */
 	if (engine->i915->guc.preempt_wq)
 		flush_workqueue(engine->i915->guc.preempt_wq);
-
-	return i915_gem_find_active_request(engine);
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 75d09e4419e7..3676ff924460 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1901,11 +1901,9 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
 	return 0;
 }
 
-static struct i915_request *
-execlists_reset_prepare(struct intel_engine_cs *engine)
+static void execlists_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct i915_request *request, *active;
 
 	GEM_TRACE("%s\n", engine->name);
 
@@ -1935,38 +1933,18 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 	 * now in ELSP[0]. So search backwards from there, so that if the GPU
 	 * has advanced beyond the last CSB update, it will be pardoned.
 	 */
-	active = NULL;
-	request = port_request(execlists->port);
-	if (request) {
-		unsigned long flags;
-
+	if (port_request(execlists->port))
 		/*
 		 * Prevent the breadcrumb from advancing before we decide
 		 * which request is currently active.
 		 */
 		intel_engine_stop_cs(engine);
-
-		spin_lock_irqsave(&engine->timeline.lock, flags);
-		list_for_each_entry_from_reverse(request,
-						 &engine->timeline.requests,
-						 link) {
-			if (__i915_request_completed(request,
-						     request->global_seqno))
-				break;
-
-			active = request;
-		}
-		spin_unlock_irqrestore(&engine->timeline.lock, flags);
-	}
-
-	return active;
 }
 
-static void execlists_reset(struct intel_engine_cs *engine,
-			    struct i915_request *rq,
-			    bool stalled)
+static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct i915_request *rq = port_request(execlists->port);
 	unsigned long flags;
 	u32 *regs;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index a632bb2b18c7..dae87b87fa58 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -535,14 +535,12 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	return ret;
 }
 
-static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
+static void reset_prepare(struct intel_engine_cs *engine)
 {
 	intel_engine_stop_cs(engine);
 
 	if (engine->irq_seqno_barrier)
 		engine->irq_seqno_barrier(engine);
-
-	return i915_gem_find_active_request(engine);
 }
 
 static void skip_request(struct i915_request *rq)
@@ -559,11 +557,27 @@ static void skip_request(struct i915_request *rq)
 	memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32));
 }
 
-static void reset_ring(struct intel_engine_cs *engine,
-		       struct i915_request *rq,
-		       bool stalled)
+static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 {
-	GEM_TRACE("%s seqno=%x\n", engine->name, rq ? rq->global_seqno : 0);
+	struct i915_timeline *tl = &engine->timeline;
+	struct i915_request *pos, *rq;
+	unsigned long flags;
+
+	rq = NULL;
+	spin_lock_irqsave(&tl->lock, flags);
+	list_for_each_entry(pos, &tl->requests, link) {
+		rq = pos;
+		if (!__i915_request_completed(rq, rq->global_seqno))
+			break;
+	}
+	if (&pos->link == &tl->requests)
+		stalled = false;
+	spin_unlock_irqrestore(&tl->lock, flags);
+
+	GEM_TRACE("%s seqno=%x, stalled? %s\n",
+		  engine->name,
+		  rq ? rq->global_seqno : 0,
+		  yesno(stalled));
 
 	/*
 	 * Make sure this write is visible before we re-enable the interrupt
@@ -594,54 +608,32 @@ static void reset_ring(struct intel_engine_cs *engine,
 	 * subsequent hangs.
 	 */
 
-	if (stalled && i915_request_completed(rq)) {
-		GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n",
-			  engine->name, rq->global_seqno,
-			  rq->fence.context, rq->fence.seqno,
-			  intel_engine_get_seqno(engine));
-		stalled = false;
-	}
-
-	/*
-	 * Try to restore the logical GPU state to match the continuation
-	 * of the request queue. If we skip the context/PD restore, then
-	 * the next request may try to execute assuming that its context
-	 * is valid and loaded on the GPU and so may try to access invalid
-	 * memory, prompting repeated GPU hangs.
-	 *
-	 * If the request was guilty, we still restore the logical state
-	 * in case the next request requires it (e.g. the aliasing ppgtt),
-	 * but skip over the hung batch.
-	 *
-	 * If the request was innocent, we try to replay the request with
-	 * the restored context.
-	 */
-
-	if (stalled && !i915_request_completed(rq)) {
-		//i915_gem_context_mark_guilty(rq->gem_context);
-		dma_fence_set_error(&rq->fence, -EAGAIN);
-		skip_request(rq);
-	} else {
-		struct i915_timeline *tl = &engine->timeline;
-		struct i915_request *pos;
-		unsigned long flags;
-
-		/* Rewind the engine to replay the incomplete rq */
-		spin_lock_irqsave(&tl->lock, flags);
-		list_for_each_entry(pos, &tl->requests, link) {
-			if (!__i915_request_completed(pos, pos->global_seqno)) {
-				//i915_gem_context_mark_innocent(request->gem_context);
-				dma_fence_set_error(&pos->fence, -EAGAIN);
-				break;
-			}
+	if (rq) {
+		/*
+		 * Try to restore the logical GPU state to match the
+		 * continuation of the request queue. If we skip the
+		 * context/PD restore, then the next request may try to execute
+		 * assuming that its context is valid and loaded on the GPU and
+		 * so may try to access invalid memory, prompting repeated GPU
+		 * hangs.
+		 *
+		 * If the request was guilty, we still restore the logical
+		 * state in case the next request requires it (e.g. the
+		 * aliasing ppgtt), but skip over the hung batch.
+		 *
+		 * If the request was innocent, we try to replay the request
+		 * with the restored context.
+		 */
 
-			skip_request(pos);
-			rq = pos;
+		if (stalled) {
+			//i915_gem_context_mark_guilty(rq->gem_context);
+			dma_fence_set_error(&rq->fence, -EIO);
+			skip_request(rq);
+		} else {
+			//i915_gem_context_mark_innocent(rq->gem_context);
+			dma_fence_set_error(&rq->fence, -EAGAIN);
 		}
-		spin_unlock_irqrestore(&tl->lock, flags);
-	}
 
-	if (rq) {
 		/* If the rq hung, jump to its breadcrumb and skip the batch */
 		rq->ring->head = intel_ring_wrap(rq->ring, rq->head);
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b75db1ca9f07..b8cde58bcbea 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -121,7 +121,6 @@ struct intel_engine_hangcheck {
 	unsigned long action_timestamp;
 	int deadlock;
 	struct intel_instdone instdone;
-	struct i915_request *active_request;
 	bool stalled:1;
 	bool wedged:1;
 };
@@ -427,10 +426,8 @@ struct intel_engine_cs {
 	int		(*init_hw)(struct intel_engine_cs *engine);
 
 	struct {
-		struct i915_request *(*prepare)(struct intel_engine_cs *engine);
-		void (*reset)(struct intel_engine_cs *engine,
-			      struct i915_request *rq,
-			      bool stalled);
+		void (*prepare)(struct intel_engine_cs *engine);
+		void (*reset)(struct intel_engine_cs *engine, bool stalled);
 		void (*finish)(struct intel_engine_cs *engine);
 	} reset;
 
-- 
2.18.0.rc2



More information about the Intel-gfx-trybot mailing list