[Intel-gfx] [PATCH 2/2] drm/i915: Move error state capture code out from i915_irq.c
Mika Kuoppala
mika.kuoppala at linux.intel.com
Tue Nov 1 16:03:22 UTC 2016
We have a place already for error handling and error
state capture, i915_gpu_error.c. Move code to more
appropriate file.
No functional changes.
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 4 +-
drivers/gpu/drm/i915/i915_gpu_error.c | 157 +++++++++++++++++++++++++++++++++-
drivers/gpu/drm/i915/i915_irq.c | 151 --------------------------------
3 files changed, 155 insertions(+), 157 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6c0b0a6..88301fa 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3686,9 +3686,7 @@ static inline void i915_error_state_buf_release(
{
kfree(eb->buf);
}
-void i915_capture_error_state(struct drm_i915_private *dev_priv,
- u32 engine_mask,
- const char *error_msg);
+
void i915_error_state_get(struct drm_device *dev,
struct i915_error_state_file_priv *error_priv);
void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 204093f..e307841 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1585,9 +1585,9 @@ static int capture(void *data)
* out a structure which becomes available in debugfs for user level tools
* to pick up.
*/
-void i915_capture_error_state(struct drm_i915_private *dev_priv,
- u32 engine_mask,
- const char *error_msg)
+static void i915_capture_error_state(struct drm_i915_private *dev_priv,
+ u32 engine_mask,
+ const char *error_msg)
{
static bool warned;
struct drm_i915_error_state *error;
@@ -1640,6 +1640,108 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
}
}
+static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
+{
+ u32 eir;
+
+ if (!IS_GEN2(dev_priv))
+ I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
+
+ if (INTEL_GEN(dev_priv) < 4)
+ I915_WRITE(IPEIR, I915_READ(IPEIR));
+ else
+ I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
+
+ I915_WRITE(EIR, I915_READ(EIR));
+ eir = I915_READ(EIR);
+ if (eir) {
+ /*
+ * some errors might have become stuck,
+ * mask them.
+ */
+ DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
+ I915_WRITE(EMR, I915_READ(EMR) | eir);
+ I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
+ }
+}
+
+static void i915_error_wake_up(struct drm_i915_private *dev_priv)
+{
+ /*
+ * Notify all waiters for GPU completion events that reset state has
+ * been changed, and that they need to restart their wait after
+ * checking for potential errors (and bail out to drop locks if there is
+ * a gpu reset pending so that i915_error_work_func can acquire them).
+ */
+
+ /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
+ wake_up_all(&dev_priv->gpu_error.wait_queue);
+
+ /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
+ wake_up_all(&dev_priv->pending_flip_queue);
+}
+
+/**
+ * i915_reset_and_wakeup - do process context error handling work
+ * @dev_priv: i915 device private
+ *
+ * Fire an error uevent so userspace can see that a hang or error
+ * was detected.
+ */
+static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
+{
+ struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
+ char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
+ char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
+ char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
+
+ kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
+
+ DRM_DEBUG_DRIVER("resetting chip\n");
+ kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
+ /*
+ * In most cases it's guaranteed that we get here with an RPM
+ * reference held, for example because there is a pending GPU
+ * request that won't finish until the reset is done. This
+ * isn't the case at least when we get here by doing a
+ * simulated reset via debugs, so get an RPM reference.
+ */
+ intel_runtime_pm_get(dev_priv);
+ intel_prepare_reset(dev_priv);
+
+ do {
+ /*
+ * All state reset _must_ be completed before we update the
+ * reset counter, for otherwise waiters might miss the reset
+ * pending state and not properly drop locks, resulting in
+ * deadlocks with the reset work.
+ */
+ if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
+ i915_reset(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+ }
+
+ /* We need to wait for anyone holding the lock to wakeup */
+ } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
+ I915_RESET_IN_PROGRESS,
+ TASK_UNINTERRUPTIBLE,
+ HZ));
+
+ intel_finish_reset(dev_priv);
+ intel_runtime_pm_put(dev_priv);
+
+ if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
+ kobject_uevent_env(kobj,
+ KOBJ_CHANGE, reset_done_event);
+
+ /*
+ * Note: The wake_up also serves as a memory barrier so that
+ * waiters see the updated value of the dev_priv->gpu_error.
+ */
+ wake_up_all(&dev_priv->gpu_error.reset_queue);
+}
+
void i915_error_state_get(struct drm_device *dev,
struct i915_error_state_file_priv *error_priv)
{
@@ -1671,3 +1773,52 @@ void i915_destroy_error_state(struct drm_device *dev)
if (error)
kref_put(&error->ref, i915_error_state_free);
}
+
+/**
+ * i915_handle_error - handle a gpu error
+ * @dev_priv: i915 device private
+ * @engine_mask: mask representing engines that are hung
+ * Do some basic checking of register state at error time and
+ * dump it to the syslog. Also call i915_capture_error_state() to make
+ * sure we get a record and make it available in debugfs. Fire a uevent
+ * so userspace knows something bad happened (should trigger collection
+ * of a ring dump etc.).
+ * @fmt: Error message format string
+ */
+void i915_handle_error(struct drm_i915_private *dev_priv,
+ u32 engine_mask,
+ const char *fmt, ...)
+{
+ va_list args;
+ char error_msg[80];
+
+ va_start(args, fmt);
+ vscnprintf(error_msg, sizeof(error_msg), fmt, args);
+ va_end(args);
+
+ i915_capture_error_state(dev_priv, engine_mask, error_msg);
+ i915_clear_error_registers(dev_priv);
+
+ if (!engine_mask)
+ return;
+
+ if (test_and_set_bit(I915_RESET_IN_PROGRESS,
+ &dev_priv->gpu_error.flags))
+ return;
+
+ /*
+ * Wakeup waiting processes so that the reset function
+ * i915_reset_and_wakeup doesn't deadlock trying to grab
+ * various locks. By bumping the reset counter first, the woken
+ * processes will see a reset in progress and back off,
+ * releasing their locks and then wait for the reset completion.
+ * We must do this for _all_ gpu waiters that might hold locks
+ * that the reset work needs to acquire.
+ *
+ * Note: The wake_up also provides a memory barrier to ensure that the
+ * waiters see the updated value of the reset flags.
+ */
+ i915_error_wake_up(dev_priv);
+
+ i915_reset_and_wakeup(dev_priv);
+}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6d7505b..f6869f0 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2571,83 +2571,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
return ret;
}
-static void i915_error_wake_up(struct drm_i915_private *dev_priv)
-{
- /*
- * Notify all waiters for GPU completion events that reset state has
- * been changed, and that they need to restart their wait after
- * checking for potential errors (and bail out to drop locks if there is
- * a gpu reset pending so that i915_error_work_func can acquire them).
- */
-
- /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
- wake_up_all(&dev_priv->gpu_error.wait_queue);
-
- /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
- wake_up_all(&dev_priv->pending_flip_queue);
-}
-
-/**
- * i915_reset_and_wakeup - do process context error handling work
- * @dev_priv: i915 device private
- *
- * Fire an error uevent so userspace can see that a hang or error
- * was detected.
- */
-static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
-{
- struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
- char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
- char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
- char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
-
- kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
-
- DRM_DEBUG_DRIVER("resetting chip\n");
- kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
- /*
- * In most cases it's guaranteed that we get here with an RPM
- * reference held, for example because there is a pending GPU
- * request that won't finish until the reset is done. This
- * isn't the case at least when we get here by doing a
- * simulated reset via debugs, so get an RPM reference.
- */
- intel_runtime_pm_get(dev_priv);
- intel_prepare_reset(dev_priv);
-
- do {
- /*
- * All state reset _must_ be completed before we update the
- * reset counter, for otherwise waiters might miss the reset
- * pending state and not properly drop locks, resulting in
- * deadlocks with the reset work.
- */
- if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
- i915_reset(dev_priv);
- mutex_unlock(&dev_priv->drm.struct_mutex);
- }
-
- /* We need to wait for anyone holding the lock to wakeup */
- } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
- I915_RESET_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE,
- HZ));
-
- intel_finish_reset(dev_priv);
- intel_runtime_pm_put(dev_priv);
-
- if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
- kobject_uevent_env(kobj,
- KOBJ_CHANGE, reset_done_event);
-
- /*
- * Note: The wake_up also serves as a memory barrier so that
- * waiters see the updated value of the dev_priv->gpu_error.
- */
- wake_up_all(&dev_priv->gpu_error.reset_queue);
-}
-
static inline void
i915_err_print_instdone(struct drm_i915_private *dev_priv,
struct intel_instdone *instdone)
@@ -2674,80 +2597,6 @@ i915_err_print_instdone(struct drm_i915_private *dev_priv,
slice, subslice, instdone->row[slice][subslice]);
}
-static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
-{
- u32 eir;
-
- if (!IS_GEN2(dev_priv))
- I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
-
- if (INTEL_GEN(dev_priv) < 4)
- I915_WRITE(IPEIR, I915_READ(IPEIR));
- else
- I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
-
- I915_WRITE(EIR, I915_READ(EIR));
- eir = I915_READ(EIR);
- if (eir) {
- /*
- * some errors might have become stuck,
- * mask them.
- */
- DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
- I915_WRITE(EMR, I915_READ(EMR) | eir);
- I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
- }
-}
-
-/**
- * i915_handle_error - handle a gpu error
- * @dev_priv: i915 device private
- * @engine_mask: mask representing engines that are hung
- * Do some basic checking of register state at error time and
- * dump it to the syslog. Also call i915_capture_error_state() to make
- * sure we get a record and make it available in debugfs. Fire a uevent
- * so userspace knows something bad happened (should trigger collection
- * of a ring dump etc.).
- * @fmt: Error message format string
- */
-void i915_handle_error(struct drm_i915_private *dev_priv,
- u32 engine_mask,
- const char *fmt, ...)
-{
- va_list args;
- char error_msg[80];
-
- va_start(args, fmt);
- vscnprintf(error_msg, sizeof(error_msg), fmt, args);
- va_end(args);
-
- i915_capture_error_state(dev_priv, engine_mask, error_msg);
- i915_clear_error_registers(dev_priv);
-
- if (!engine_mask)
- return;
-
- if (test_and_set_bit(I915_RESET_IN_PROGRESS,
- &dev_priv->gpu_error.flags))
- return;
-
- /*
- * Wakeup waiting processes so that the reset function
- * i915_reset_and_wakeup doesn't deadlock trying to grab
- * various locks. By bumping the reset counter first, the woken
- * processes will see a reset in progress and back off,
- * releasing their locks and then wait for the reset completion.
- * We must do this for _all_ gpu waiters that might hold locks
- * that the reset work needs to acquire.
- *
- * Note: The wake_up also provides a memory barrier to ensure that the
- * waiters see the updated value of the reset flags.
- */
- i915_error_wake_up(dev_priv);
-
- i915_reset_and_wakeup(dev_priv);
-}
-
/* Called from drm generic code, passed 'crtc' which
* we use as a pipe index
*/
--
2.7.4
More information about the Intel-gfx
mailing list