[Intel-gfx] [PATCH 2/2] drm/i915: Move error state capture code out from i915_irq.c

Tue Nov 1 16:03:22 UTC 2016

We have a place already for error handling and error
state capture, i915_gpu_error.c. Move code to more
appropriate file.

No functional changes.

Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |   4 +-
 drivers/gpu/drm/i915/i915_gpu_error.c | 157 +++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_irq.c       | 151 --------------------------------
 3 files changed, 155 insertions(+), 157 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6c0b0a6..88301fa 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3686,9 +3686,7 @@ static inline void i915_error_state_buf_release(
 {
 	kfree(eb->buf);
 }
-void i915_capture_error_state(struct drm_i915_private *dev_priv,
-			      u32 engine_mask,
-			      const char *error_msg);
+
 void i915_error_state_get(struct drm_device *dev,
 			  struct i915_error_state_file_priv *error_priv);
 void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 204093f..e307841 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1585,9 +1585,9 @@ static int capture(void *data)
  * out a structure which becomes available in debugfs for user level tools
  * to pick up.
  */
-void i915_capture_error_state(struct drm_i915_private *dev_priv,
-			      u32 engine_mask,
-			      const char *error_msg)
+static void i915_capture_error_state(struct drm_i915_private *dev_priv,
+				     u32 engine_mask,
+				     const char *error_msg)
 {
 	static bool warned;
 	struct drm_i915_error_state *error;
@@ -1640,6 +1640,108 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
 	}
 }
 
+static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
+{
+	u32 eir;
+
+	if (!IS_GEN2(dev_priv))
+		I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
+
+	if (INTEL_GEN(dev_priv) < 4)
+		I915_WRITE(IPEIR, I915_READ(IPEIR));
+	else
+		I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
+
+	I915_WRITE(EIR, I915_READ(EIR));
+	eir = I915_READ(EIR);
+	if (eir) {
+		/*
+		 * some errors might have become stuck,
+		 * mask them.
+		 */
+		DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
+		I915_WRITE(EMR, I915_READ(EMR) | eir);
+		I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
+	}
+}
+
+static void i915_error_wake_up(struct drm_i915_private *dev_priv)
+{
+	/*
+	 * Notify all waiters for GPU completion events that reset state has
+	 * been changed, and that they need to restart their wait after
+	 * checking for potential errors (and bail out to drop locks if there is
+	 * a gpu reset pending so that i915_error_work_func can acquire them).
+	 */
+
+	/* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
+	wake_up_all(&dev_priv->gpu_error.wait_queue);
+
+	/* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
+	wake_up_all(&dev_priv->pending_flip_queue);
+}
+
+/**
+ * i915_reset_and_wakeup - do process context error handling work
+ * @dev_priv: i915 device private
+ *
+ * Fire an error uevent so userspace can see that a hang or error
+ * was detected.
+ */
+static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
+{
+	struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
+	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
+	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
+	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
+
+	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
+
+	DRM_DEBUG_DRIVER("resetting chip\n");
+	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
+	/*
+	 * In most cases it's guaranteed that we get here with an RPM
+	 * reference held, for example because there is a pending GPU
+	 * request that won't finish until the reset is done. This
+	 * isn't the case at least when we get here by doing a
+	 * simulated reset via debugs, so get an RPM reference.
+	 */
+	intel_runtime_pm_get(dev_priv);
+	intel_prepare_reset(dev_priv);
+
+	do {
+		/*
+		 * All state reset _must_ be completed before we update the
+		 * reset counter, for otherwise waiters might miss the reset
+		 * pending state and not properly drop locks, resulting in
+		 * deadlocks with the reset work.
+		 */
+		if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
+			i915_reset(dev_priv);
+			mutex_unlock(&dev_priv->drm.struct_mutex);
+		}
+
+		/* We need to wait for anyone holding the lock to wakeup */
+	} while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
+				     I915_RESET_IN_PROGRESS,
+				     TASK_UNINTERRUPTIBLE,
+				     HZ));
+
+	intel_finish_reset(dev_priv);
+	intel_runtime_pm_put(dev_priv);
+
+	if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
+		kobject_uevent_env(kobj,
+				   KOBJ_CHANGE, reset_done_event);
+
+	/*
+	 * Note: The wake_up also serves as a memory barrier so that
+	 * waiters see the updated value of the dev_priv->gpu_error.
+	 */
+	wake_up_all(&dev_priv->gpu_error.reset_queue);
+}
+
 void i915_error_state_get(struct drm_device *dev,
 			  struct i915_error_state_file_priv *error_priv)
 {
@@ -1671,3 +1773,52 @@ void i915_destroy_error_state(struct drm_device *dev)
 	if (error)
 		kref_put(&error->ref, i915_error_state_free);
 }
+
+/**
+ * i915_handle_error - handle a gpu error
+ * @dev_priv: i915 device private
+ * @engine_mask: mask representing engines that are hung
+ * Do some basic checking of register state at error time and
+ * dump it to the syslog.  Also call i915_capture_error_state() to make
+ * sure we get a record and make it available in debugfs.  Fire a uevent
+ * so userspace knows something bad happened (should trigger collection
+ * of a ring dump etc.).
+ * @fmt: Error message format string
+ */
+void i915_handle_error(struct drm_i915_private *dev_priv,
+		       u32 engine_mask,
+		       const char *fmt, ...)
+{
+	va_list args;
+	char error_msg[80];
+
+	va_start(args, fmt);
+	vscnprintf(error_msg, sizeof(error_msg), fmt, args);
+	va_end(args);
+
+	i915_capture_error_state(dev_priv, engine_mask, error_msg);
+	i915_clear_error_registers(dev_priv);
+
+	if (!engine_mask)
+		return;
+
+	if (test_and_set_bit(I915_RESET_IN_PROGRESS,
+			     &dev_priv->gpu_error.flags))
+		return;
+
+	/*
+	 * Wakeup waiting processes so that the reset function
+	 * i915_reset_and_wakeup doesn't deadlock trying to grab
+	 * various locks. By bumping the reset counter first, the woken
+	 * processes will see a reset in progress and back off,
+	 * releasing their locks and then wait for the reset completion.
+	 * We must do this for _all_ gpu waiters that might hold locks
+	 * that the reset work needs to acquire.
+	 *
+	 * Note: The wake_up also provides a memory barrier to ensure that the
+	 * waiters see the updated value of the reset flags.
+	 */
+	i915_error_wake_up(dev_priv);
+
+	i915_reset_and_wakeup(dev_priv);
+}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6d7505b..f6869f0 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2571,83 +2571,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 	return ret;
 }
 
-static void i915_error_wake_up(struct drm_i915_private *dev_priv)
-{
-	/*
-	 * Notify all waiters for GPU completion events that reset state has
-	 * been changed, and that they need to restart their wait after
-	 * checking for potential errors (and bail out to drop locks if there is
-	 * a gpu reset pending so that i915_error_work_func can acquire them).
-	 */
-
-	/* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
-	wake_up_all(&dev_priv->gpu_error.wait_queue);
-
-	/* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
-	wake_up_all(&dev_priv->pending_flip_queue);
-}
-
-/**
- * i915_reset_and_wakeup - do process context error handling work
- * @dev_priv: i915 device private
- *
- * Fire an error uevent so userspace can see that a hang or error
- * was detected.
- */
-static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
-{
-	struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
-	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
-	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
-	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
-
-	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
-
-	DRM_DEBUG_DRIVER("resetting chip\n");
-	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
-	/*
-	 * In most cases it's guaranteed that we get here with an RPM
-	 * reference held, for example because there is a pending GPU
-	 * request that won't finish until the reset is done. This
-	 * isn't the case at least when we get here by doing a
-	 * simulated reset via debugs, so get an RPM reference.
-	 */
-	intel_runtime_pm_get(dev_priv);
-	intel_prepare_reset(dev_priv);
-
-	do {
-		/*
-		 * All state reset _must_ be completed before we update the
-		 * reset counter, for otherwise waiters might miss the reset
-		 * pending state and not properly drop locks, resulting in
-		 * deadlocks with the reset work.
-		 */
-		if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
-			i915_reset(dev_priv);
-			mutex_unlock(&dev_priv->drm.struct_mutex);
-		}
-
-		/* We need to wait for anyone holding the lock to wakeup */
-	} while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
-				     I915_RESET_IN_PROGRESS,
-				     TASK_UNINTERRUPTIBLE,
-				     HZ));
-
-	intel_finish_reset(dev_priv);
-	intel_runtime_pm_put(dev_priv);
-
-	if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
-		kobject_uevent_env(kobj,
-				   KOBJ_CHANGE, reset_done_event);
-
-	/*
-	 * Note: The wake_up also serves as a memory barrier so that
-	 * waiters see the updated value of the dev_priv->gpu_error.
-	 */
-	wake_up_all(&dev_priv->gpu_error.reset_queue);
-}
-
 static inline void
 i915_err_print_instdone(struct drm_i915_private *dev_priv,
 			struct intel_instdone *instdone)
@@ -2674,80 +2597,6 @@ i915_err_print_instdone(struct drm_i915_private *dev_priv,
 		       slice, subslice, instdone->row[slice][subslice]);
 }
 
-static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
-{
-	u32 eir;
-
-	if (!IS_GEN2(dev_priv))
-		I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
-
-	if (INTEL_GEN(dev_priv) < 4)
-		I915_WRITE(IPEIR, I915_READ(IPEIR));
-	else
-		I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
-
-	I915_WRITE(EIR, I915_READ(EIR));
-	eir = I915_READ(EIR);
-	if (eir) {
-		/*
-		 * some errors might have become stuck,
-		 * mask them.
-		 */
-		DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
-		I915_WRITE(EMR, I915_READ(EMR) | eir);
-		I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
-	}
-}
-
-/**
- * i915_handle_error - handle a gpu error
- * @dev_priv: i915 device private
- * @engine_mask: mask representing engines that are hung
- * Do some basic checking of register state at error time and
- * dump it to the syslog.  Also call i915_capture_error_state() to make
- * sure we get a record and make it available in debugfs.  Fire a uevent
- * so userspace knows something bad happened (should trigger collection
- * of a ring dump etc.).
- * @fmt: Error message format string
- */
-void i915_handle_error(struct drm_i915_private *dev_priv,
-		       u32 engine_mask,
-		       const char *fmt, ...)
-{
-	va_list args;
-	char error_msg[80];
-
-	va_start(args, fmt);
-	vscnprintf(error_msg, sizeof(error_msg), fmt, args);
-	va_end(args);
-
-	i915_capture_error_state(dev_priv, engine_mask, error_msg);
-	i915_clear_error_registers(dev_priv);
-
-	if (!engine_mask)
-		return;
-
-	if (test_and_set_bit(I915_RESET_IN_PROGRESS,
-			     &dev_priv->gpu_error.flags))
-		return;
-
-	/*
-	 * Wakeup waiting processes so that the reset function
-	 * i915_reset_and_wakeup doesn't deadlock trying to grab
-	 * various locks. By bumping the reset counter first, the woken
-	 * processes will see a reset in progress and back off,
-	 * releasing their locks and then wait for the reset completion.
-	 * We must do this for _all_ gpu waiters that might hold locks
-	 * that the reset work needs to acquire.
-	 *
-	 * Note: The wake_up also provides a memory barrier to ensure that the
-	 * waiters see the updated value of the reset flags.
-	 */
-	i915_error_wake_up(dev_priv);
-
-	i915_reset_and_wakeup(dev_priv);
-}
-
 /* Called from drm generic code, passed 'crtc' which
  * we use as a pipe index
  */
-- 
2.7.4