[Intel-gfx] [PATCH 16/16] drm/i915: Get the error state over the wire (HACKish)

Ben Widawsky benjamin.widawsky at intel.com
Tue Jul 1 20:17:51 CEST 2014


I was dealing with a bug recently where the system would hard hang
somewhere between hangcheck and reset. There was time after error
collection to actually get my error state out, but I couldn't get the
reads to work.

This patch is also useful for when reset kills the machine, and you want
to keep reset enabled but still get error state.

Since I found the patch pretty useful, I decided to clean it up and
submit it. It was mostly meant as a one-off hack originally though.

If a maintainer decides it's useful, then here it is.

Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c   |  2 +-
 drivers/gpu/drm/i915/i915_drv.h       |  3 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c | 31 +++++++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_sysfs.c     |  2 +-
 4 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6b7b32b..2daad46 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -929,7 +929,7 @@ static ssize_t i915_error_state_read(struct file *file, char __user *userbuf,
 	if (ret)
 		return ret;
 
-	ret = i915_error_state_to_str(&error_str, error_priv);
+	ret = i915_error_state_to_str(&error_str, error_priv->dev, error_priv->error);
 	if (ret)
 		goto out;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1045006..b6a4f1e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2544,7 +2544,8 @@ static inline void intel_display_crc_init(struct drm_device *dev) {}
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
-			    const struct i915_error_state_file_priv *error);
+			    struct drm_device *dev,
+			    const struct drm_i915_error_state *error);
 int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
 			      size_t count, loff_t pos);
 static inline void i915_error_state_buf_release(
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index e82e590..1540bf6 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -184,8 +184,22 @@ static void i915_error_puts(struct drm_i915_error_state_buf *e,
 	__i915_error_advance(e, len);
 }
 
-#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
-#define err_puts(e, s) i915_error_puts(e, s)
+
+static bool wire = false;
+#define err_printf(e, ...) do {				\
+	if (wire) {					\
+		printk(__VA_ARGS__);			\
+	} else {					\
+		i915_error_printf(e, __VA_ARGS__);	\
+	}						\
+} while (0)
+#define err_puts(e, s) do {				\
+	if (wire) {					\
+		printk(s);				\
+	} else {					\
+		i915_error_puts(e, s);			\
+	}						\
+} while (0)
 
 static void print_error_buffers(struct drm_i915_error_state_buf *m,
 				const char *name,
@@ -240,7 +254,7 @@ static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a)
 
 static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 				  struct drm_device *dev,
-				  struct drm_i915_error_ring *ring)
+				  const struct drm_i915_error_ring *ring)
 {
 	if (!ring->valid)
 		return;
@@ -322,11 +336,10 @@ static void print_error_obj(struct drm_i915_error_state_buf *m,
 }
 
 int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
-			    const struct i915_error_state_file_priv *error_priv)
+			    struct drm_device *dev,
+			    const struct drm_i915_error_state *error)
 {
-	struct drm_device *dev = error_priv->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_error_state *error = error_priv->error;
 	int i, j, offset, elt;
 	int max_hangcheck_score;
 
@@ -1197,6 +1210,12 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged,
 	spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
 	if (dev_priv->gpu_error.first_error == NULL) {
 		dev_priv->gpu_error.first_error = error;
+#ifdef PUSH_TO_WIRE
+		/* Probably racy, but this is emergency debug */
+		wire = true;
+		i915_error_state_to_str(NULL, dev, error);
+		wire = false;
+#endif
 		error = NULL;
 	}
 	spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 86ce39a..6f4be9d 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -512,7 +512,7 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 	error_priv.dev = dev;
 	i915_error_state_get(dev, &error_priv);
 
-	ret = i915_error_state_to_str(&error_str, &error_priv);
+	ret = i915_error_state_to_str(&error_str, dev, error_priv.error);
 	if (ret)
 		goto out;
 
-- 
2.0.1




More information about the Intel-gfx mailing list