[Intel-gfx] [PATCH 14/68] drm/i915: Get the error state over the wire (HACKish)
Ben Widawsky
benjamin.widawsky at intel.com
Fri Aug 22 05:11:37 CEST 2014
I was dealing with a bug recently where the system would hard hang
somewhere between hangcheck and reset. There was time after error
collection to actually get my error state out, but I couldn't get the
reads to work.
This patch is also useful for when reset kills the machine, and you want
to keep reset enabled but still get error state.
Since I found the patch pretty useful, I decided to clean it up and
submit it. It was mostly meant as a one-off hack originally though.
If a maintainer decides it's useful, then here it is.
Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
drivers/gpu/drm/i915/i915_drv.h | 3 ++-
drivers/gpu/drm/i915/i915_gpu_error.c | 31 +++++++++++++++++++++++++------
drivers/gpu/drm/i915/i915_sysfs.c | 2 +-
4 files changed, 29 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 330caa1..16ae700 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -931,7 +931,7 @@ static ssize_t i915_error_state_read(struct file *file, char __user *userbuf,
if (ret)
return ret;
- ret = i915_error_state_to_str(&error_str, error_priv);
+ ret = i915_error_state_to_str(&error_str, error_priv->dev, error_priv->error);
if (ret)
goto out;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 02d81b03..04c9e2c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2619,7 +2619,8 @@ static inline void intel_display_crc_init(struct drm_device *dev) {}
__printf(2, 3)
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
- const struct i915_error_state_file_priv *error);
+ struct drm_device *dev,
+ const struct drm_i915_error_state *error);
int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
size_t count, loff_t pos);
static inline void i915_error_state_buf_release(
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 82508dd..c391268 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -184,8 +184,22 @@ static void i915_error_puts(struct drm_i915_error_state_buf *e,
__i915_error_advance(e, len);
}
-#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
-#define err_puts(e, s) i915_error_puts(e, s)
+
+static bool wire = false;
+#define err_printf(e, ...) do { \
+ if (wire) { \
+ printk(__VA_ARGS__); \
+ } else { \
+ i915_error_printf(e, __VA_ARGS__); \
+ } \
+} while (0)
+#define err_puts(e, s) do { \
+ if (wire) { \
+ printk(s); \
+ } else { \
+ i915_error_puts(e, s); \
+ } \
+} while (0)
static void print_error_buffers(struct drm_i915_error_state_buf *m,
const char *name,
@@ -242,7 +256,7 @@ static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a)
static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
struct drm_device *dev,
- struct drm_i915_error_ring *ring)
+ const struct drm_i915_error_ring *ring)
{
if (!ring->valid)
return;
@@ -324,11 +338,10 @@ static void print_error_obj(struct drm_i915_error_state_buf *m,
}
int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
- const struct i915_error_state_file_priv *error_priv)
+ struct drm_device *dev,
+ const struct drm_i915_error_state *error)
{
- struct drm_device *dev = error_priv->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
- struct drm_i915_error_state *error = error_priv->error;
struct drm_i915_error_object *obj;
int i, j, offset, elt;
int max_hangcheck_score;
@@ -1266,6 +1279,12 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged,
spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
if (dev_priv->gpu_error.first_error == NULL) {
dev_priv->gpu_error.first_error = error;
+#ifdef PUSH_TO_WIRE
+ /* Probably racy, but this is emergency debug */
+ wire = true;
+ i915_error_state_to_str(NULL, dev, error);
+ wire = false;
+#endif
error = NULL;
}
spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index ae7fd8f..b559781 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -547,7 +547,7 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
error_priv.dev = dev;
i915_error_state_get(dev, &error_priv);
- ret = i915_error_state_to_str(&error_str, &error_priv);
+ ret = i915_error_state_to_str(&error_str, dev, error_priv.error);
if (ret)
goto out;
--
2.0.4
More information about the Intel-gfx
mailing list