[Intel-gfx] [PATCH 6/9] drm/i915/tdr: Add engine reset count to error state

Michel Thierry michel.thierry at intel.com
Fri Dec 16 20:20:07 UTC 2016


From: Arun Siluvery <arun.siluvery at linux.intel.com>

Driver maintains count of how many times a given engine is reset, useful to
capture this in error state also. It gives an idea of how engine is coping
up with the workloads it is executing before this error state.

Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
Signed-off-by: Michel Thierry <michel.thierry at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c       | 1 +
 drivers/gpu/drm/i915/i915_drv.h       | 9 +++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++
 3 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a034793bc246..1c706a082d60 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1877,6 +1877,7 @@ int i915_reset_engine(struct intel_engine_cs *engine)
 	intel_engine_reset_cancel(engine);
 	intel_execlists_restart_submission(engine);
 
+	dev_priv->gpu_error.engine_reset_count[engine->id]++;
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a97cc8f50ade..25183762ed94 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -937,6 +937,7 @@ struct drm_i915_error_state {
 		enum intel_engine_hangcheck_action hangcheck_action;
 		struct i915_address_space *vm;
 		int num_requests;
+		u32 reset_count;
 
 		/* position of active request inside the ring */
 		u32 rq_head, rq_post, rq_tail;
@@ -1629,6 +1630,8 @@ struct i915_gpu_error {
 #define I915_RESET_IN_PROGRESS	0
 #define I915_WEDGED		(BITS_PER_LONG - 1)
 
+	unsigned long engine_reset_count[I915_NUM_ENGINES];
+
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
 	 * to release the struct_mutex for the reset to procede.
@@ -3397,6 +3400,12 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 	return READ_ONCE(error->reset_count);
 }
 
+static inline u32 i915_engine_reset_count(struct i915_gpu_error *error,
+					  struct intel_engine_cs *engine)
+{
+	return READ_ONCE(error->engine_reset_count[engine->id]);
+}
+
 void i915_gem_reset(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 void i915_gem_reset_engine(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index e16037d1b0ba..f168ad873521 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -453,6 +453,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  hangcheck action timestamp: %lu, %u ms ago\n",
 		   ee->hangcheck_timestamp,
 		   jiffies_to_msecs(jiffies - ee->hangcheck_timestamp));
+	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
 	error_print_request(m, "  ELSP[0]: ", &ee->execlist[0]);
 	error_print_request(m, "  ELSP[1]: ", &ee->execlist[1]);
@@ -1170,6 +1171,8 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
 	ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
 	ee->hangcheck_action = engine->hangcheck.action;
 	ee->hangcheck_stalled = engine->hangcheck.stalled;
+	ee->reset_count = i915_engine_reset_count(&dev_priv->gpu_error,
+						  engine);
 
 	if (USES_PPGTT(dev_priv)) {
 		int i;
-- 
2.11.0



More information about the Intel-gfx mailing list