[Intel-gfx] [PATCH 1/3] drm/i915: Add error_recovery option to i915_handle_error

Mika Kuoppala mika.kuoppala at linux.intel.com
Thu Oct 23 19:35:22 CEST 2014


So that we gain more fine grained control how we want
to do the recovery. The aim is to grab the gpu state and
skip everything related to recovery.

Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  9 ++++++++-
 drivers/gpu/drm/i915/i915_irq.c | 28 +++++++++++++++-------------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3212d62..23e7f20 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2288,8 +2288,15 @@ void intel_hpd_cancel_work(struct drm_i915_private *dev_priv);
 
 /* i915_irq.c */
 void i915_queue_hangcheck(struct drm_device *dev);
+
+enum recovery_type {
+	NO_RESET,
+	RESET,
+	DEBUGFS,
+};
+
 __printf(3, 4)
-void i915_handle_error(struct drm_device *dev, bool wedged,
+void i915_handle_error(struct drm_device *dev, enum recovery_type recovery,
 		       const char *fmt, ...);
 
 void gen6_set_pm_mask(struct drm_i915_private *dev_priv, u32 pm_iir,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a2b013d..21008a2 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1320,7 +1320,7 @@ static void snb_gt_irq_handler(struct drm_device *dev,
 	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
 		      GT_BSD_CS_ERROR_INTERRUPT |
 		      GT_RENDER_CS_MASTER_ERROR_INTERRUPT)) {
-		i915_handle_error(dev, false, "GT error interrupt 0x%08x",
+		i915_handle_error(dev, NO_RESET, "GT error interrupt 0x%08x",
 				  gt_iir);
 	}
 
@@ -1716,7 +1716,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
 
 		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) {
-			i915_handle_error(dev_priv->dev, false,
+			i915_handle_error(dev_priv->dev, NO_RESET,
 					  "VEBOX CS error interrupt 0x%08x",
 					  pm_iir);
 		}
@@ -2548,7 +2548,7 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
  * so userspace knows something bad happened (should trigger collection
  * of a ring dump etc.).
  */
-void i915_handle_error(struct drm_device *dev, bool wedged,
+void i915_handle_error(struct drm_device *dev, enum recovery_type recovery,
 		       const char *fmt, ...)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2559,10 +2559,11 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
 	vscnprintf(error_msg, sizeof(error_msg), fmt, args);
 	va_end(args);
 
-	i915_capture_error_state(dev, wedged, error_msg);
-	i915_report_and_clear_eir(dev);
+	i915_capture_error_state(dev, recovery == RESET, error_msg);
+	if (recovery != DEBUGFS)
+		i915_report_and_clear_eir(dev);
 
-	if (wedged) {
+	if (recovery == RESET) {
 		atomic_set_mask(I915_RESET_IN_PROGRESS_FLAG,
 				&dev_priv->gpu_error.reset_counter);
 
@@ -2588,7 +2589,8 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
 	 * dev-priv->wq work queue for otherwise the flush_work in the pageflip
 	 * code will deadlock.
 	 */
-	schedule_work(&dev_priv->gpu_error.work);
+	if (recovery != DEBUGFS)
+		schedule_work(&dev_priv->gpu_error.work);
 }
 
 /* Called from drm generic code, passed 'crtc' which
@@ -2888,7 +2890,7 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
 	 */
 	tmp = I915_READ_CTL(ring);
 	if (tmp & RING_WAIT) {
-		i915_handle_error(dev, false,
+		i915_handle_error(dev, NO_RESET,
 				  "Kicking stuck wait on %s",
 				  ring->name);
 		I915_WRITE_CTL(ring, tmp);
@@ -2900,7 +2902,7 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
 		default:
 			return HANGCHECK_HUNG;
 		case 1:
-			i915_handle_error(dev, false,
+			i915_handle_error(dev, NO_RESET,
 					  "Kicking stuck semaphore on %s",
 					  ring->name);
 			I915_WRITE_CTL(ring, tmp);
@@ -3028,7 +3030,7 @@ static void i915_hangcheck_elapsed(unsigned long data)
 	}
 
 	if (rings_hung)
-		return i915_handle_error(dev, true, "Ring hung");
+		return i915_handle_error(dev, RESET, "Ring hung");
 
 	if (busy_count)
 		/* Reset timer case chip hangs without another request
@@ -3777,7 +3779,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		 */
 		spin_lock(&dev_priv->irq_lock);
 		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
-			i915_handle_error(dev, false,
+			i915_handle_error(dev, NO_RESET,
 					  "Command parser error, iir 0x%08x",
 					  iir);
 
@@ -3962,7 +3964,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		 */
 		spin_lock(&dev_priv->irq_lock);
 		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
-			i915_handle_error(dev, false,
+			i915_handle_error(dev, NO_RESET,
 					  "Command parser error, iir 0x%08x",
 					  iir);
 
@@ -4189,7 +4191,7 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		 */
 		spin_lock(&dev_priv->irq_lock);
 		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
-			i915_handle_error(dev, false,
+			i915_handle_error(dev, NO_RESET,
 					  "Command parser error, iir 0x%08x",
 					  iir);
 
-- 
1.9.1




More information about the Intel-gfx mailing list