[Intel-gfx] [PATCH v2] drm/i915: Restore engine->submit_request before unwedging

Chris Wilson chris at chris-wilson.co.uk
Fri Mar 10 19:01:25 UTC 2017


When we wedge the device, we override engine->submit_request with a nop
to ensure that all in-flight requests are marked in error. However, igt
would like to unwedge the device to test -EIO handling. This requires us
to flush those in-flight requests and restore the original
engine->submit_request.

v2: Use a vfunc to unify enabling request submission to engines

Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
Testcase: igt/gem_eio
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Mika Kuoppala <mika.kuoppala at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |  2 +-
 drivers/gpu/drm/i915/i915_drv.h            |  1 +
 drivers/gpu/drm/i915/i915_gem.c            | 45 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_guc_submission.c |  2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c     | 10 +++++++
 drivers/gpu/drm/i915/intel_lrc.c           | 15 ++++------
 drivers/gpu/drm/i915/intel_lrc.h           |  1 -
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 15 ++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  4 +++
 9 files changed, 80 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index b1e9027a4f80..576b03b0048c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1825,7 +1825,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
 		return;
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
-	__clear_bit(I915_WEDGED, &error->flags);
+	i915_gem_unset_wedged(dev_priv);
 	error->reset_count++;
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eef096566b81..71f07f45d5fe 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3409,6 +3409,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
+void i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
 
 void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 202bb850f260..aae491076390 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3000,6 +3000,51 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
 }
 
+void i915_gem_unset_wedged(struct drm_i915_private *i915)
+{
+	struct i915_gem_timeline *tl;
+	int i;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+		return;
+
+	/* Before unwedging, make sure that all pending operations
+	 * are flushed and errored out. No more can be submitted until
+	 * we reset the wedged bit.
+	 */
+	list_for_each_entry(tl, &i915->gt.timelines, link) {
+		for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
+			struct drm_i915_gem_request *rq;
+
+			rq = i915_gem_active_peek(&tl->engine[i].last_request,
+						  &i915->drm.struct_mutex);
+			if (!rq)
+				continue;
+
+			/* We can't use our normal waiter as we want to
+			 * avoid recursively trying to handle the current
+			 * reset.
+			 */
+			dma_fence_default_wait(&rq->fence, false,
+					       MAX_SCHEDULE_TIMEOUT);
+		}
+	}
+
+	/* Undo nop_submit_request. We prevent all new i915 requests from
+	 * being queued (by disallowing execbuf whilst wedged) so having
+	 * waited for all active requests above, we know the system is idle
+	 * and do not have to worry about a thread being inside
+	 * engine->submit_request() as we swap over. So unlike installing
+	 * the nop_submit_request on reset, we can do this from normal
+	 * context and do not require stop_machine().
+	 */
+	intel_engines_enable_submission(i915);
+
+	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
+	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+}
+
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 41f2dd87b413..28289627a4ca 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -1003,7 +1003,7 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
 		return;
 
 	/* Revert back to manual ELSP submission */
-	intel_execlists_enable_submission(dev_priv);
+	intel_engines_enable_submission(dev_priv);
 }
 
 void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 73fe718516a5..5663ebab851f 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -191,6 +191,7 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
 			goto cleanup;
 		}
 
+		engine->enable_submission(engine);
 		mask |= ENGINE_MASK(id);
 	}
 
@@ -1115,6 +1116,15 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
 	return true;
 }
 
+void intel_engines_enable_submission(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id)
+		engine->enable_submission(engine);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_engine.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 89f38e7def9f..f79df7a51e60 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1560,15 +1560,10 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 	kfree(engine);
 }
 
-void intel_execlists_enable_submission(struct drm_i915_private *dev_priv)
+static void logical_ring_enable_submission(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, dev_priv, id) {
-		engine->submit_request = execlists_submit_request;
-		engine->schedule = execlists_schedule;
-	}
+	engine->submit_request = execlists_submit_request;
+	engine->schedule = execlists_schedule;
 }
 
 static void
@@ -1586,8 +1581,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->emit_flush = gen8_emit_flush;
 	engine->emit_breadcrumb = gen8_emit_breadcrumb;
 	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
-	engine->submit_request = execlists_submit_request;
-	engine->schedule = execlists_schedule;
+
+	engine->enable_submission = logical_ring_enable_submission;
 
 	engine->irq_enable = gen8_logical_ring_enable_irq;
 	engine->irq_disable = gen8_logical_ring_disable_irq;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 5fc07761caff..e8015e7bf4e9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -87,6 +87,5 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 /* Execlists */
 int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
 				    int enable_execlists);
-void intel_execlists_enable_submission(struct drm_i915_private *dev_priv);
 
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 4a864f8c9387..5b141f6639b6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2050,6 +2050,16 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
 	}
 }
 
+static void i9xx_enable_submission(struct intel_engine_cs *engine)
+{
+	engine->submit_request = i9xx_submit_request;
+}
+
+static void gen6_bsd_enable_submission(struct intel_engine_cs *engine)
+{
+	engine->submit_request = gen6_bsd_submit_request;
+}
+
 static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 				      struct intel_engine_cs *engine)
 {
@@ -2080,7 +2090,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 				engine->emit_breadcrumb_sz++;
 		}
 	}
-	engine->submit_request = i9xx_submit_request;
+
+	engine->enable_submission = i9xx_enable_submission;
 
 	if (INTEL_GEN(dev_priv) >= 8)
 		engine->emit_bb_start = gen8_emit_bb_start;
@@ -2165,7 +2176,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 	if (INTEL_GEN(dev_priv) >= 6) {
 		/* gen6 bsd needs a special wa for tail updates */
 		if (IS_GEN6(dev_priv))
-			engine->submit_request = gen6_bsd_submit_request;
+			engine->enable_submission = gen6_bsd_enable_submission;
 		engine->emit_flush = gen6_bsd_ring_flush;
 		if (INTEL_GEN(dev_priv) < 8)
 			engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0ef491df5b4e..30d9820d978c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -273,6 +273,8 @@ struct intel_engine_cs {
 	void		(*reset_hw)(struct intel_engine_cs *engine,
 				    struct drm_i915_gem_request *req);
 
+	void		(*enable_submission)(struct intel_engine_cs *engine);
+
 	int		(*context_pin)(struct intel_engine_cs *engine,
 				       struct i915_gem_context *ctx);
 	void		(*context_unpin)(struct intel_engine_cs *engine,
@@ -669,4 +671,6 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
+void intel_engines_enable_submission(struct drm_i915_private *i915);
+
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.11.0



More information about the Intel-gfx mailing list