[Intel-gfx] [PATCH 3/4] drm/i915: Record batch buffer following GPU error
Chris Wilson
chris at chris-wilson.co.uk
Mon Jan 4 19:57:58 CET 2010
In order to improve our diagnostic capabilities following a GPU hang
and subsequent reset, we need to record the batch buffer that triggered
the error. We assume that the current batch buffer, plus a few details
about what else is on the active list, will be sufficient -- at the very
least an improvement over nothing.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_debugfs.c | 32 ++++++++++
drivers/gpu/drm/i915/i915_drv.h | 11 +++
drivers/gpu/drm/i915/i915_irq.c | 118 +++++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_reg.h | 1 +
4 files changed, 162 insertions(+), 0 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 463e8d0..6521c83 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -380,6 +380,38 @@ static int i915_error_state(struct seq_file *m, void *unused)
seq_printf(m, " INSTDONE1: 0x%08x\n", error->instdone1);
}
+ if (error->active_bo_count) {
+ int i;
+
+ seq_printf(m, "Buffers [%d]:\n", error->active_bo_count);
+
+ for (i = 0; i < error->active_bo_count; i++) {
+ seq_printf(m, " %08x %8zd %08x %08x",
+ error->active_bo[i].gtt_offset,
+ error->active_bo[i].size,
+ error->active_bo[i].read_domains,
+ error->active_bo[i].write_domain);
+
+ if (error->active_bo[i].name)
+ seq_printf(m, " (name: %d)", error->active_bo[i].name);
+ if (error->active_bo[i].fence_reg != I915_FENCE_REG_NONE)
+ seq_printf(m, " (fence: %d)", error->active_bo[i].fence_reg);
+
+ seq_printf(m, "\n");
+ }
+ }
+
+ if (error->batchbuffer &&
+ i915_gem_object_get_pages(error->batchbuffer) == 0) {
+ struct drm_gem_object *obj = error->batchbuffer;
+ struct drm_i915_gem_object *obj_priv = obj->driver_private;
+
+ seq_printf(m, "--- gtt_offset = 0x%08x\n", obj_priv->gtt_offset);
+ i915_dump_pages(m, obj_priv->pages, obj->size / PAGE_SIZE);
+
+ i915_gem_object_put_pages(obj);
+ }
+
out:
spin_unlock_irqrestore(&dev_priv->error_lock, flags);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 29dd676..7b7ea9e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -150,7 +150,18 @@ struct drm_i915_error_state {
u32 instps;
u32 instdone1;
u32 seqno;
+ u64 bbaddr;
struct timeval time;
+ struct drm_gem_object *batchbuffer;
+ struct drm_i915_error_buffer {
+ size_t size;
+ u32 name;
+ u32 gtt_offset;
+ u32 read_domains;
+ u32 write_domain;
+ u32 fence_reg;
+ } *active_bo;
+ u32 active_bo_count;
};
struct drm_i915_display_funcs {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7cd8110..86b2f53 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -370,6 +370,51 @@ static void i915_error_work_func(struct work_struct *work)
}
}
+static struct drm_gem_object
+*clone_obj(struct drm_device *dev,
+ struct drm_gem_object *src)
+{
+ struct drm_gem_object *dst;
+ struct drm_i915_gem_object *src_priv, *dst_priv;
+ int page, page_count;
+
+ dst = drm_gem_object_alloc(dev, src->size);
+ if (dst == NULL)
+ return NULL;
+
+ if (i915_gem_object_get_pages(src))
+ goto error_unref;
+
+ if (i915_gem_object_get_pages(dst))
+ goto error_src;
+
+ src_priv = src->driver_private;
+ dst_priv = dst->driver_private;
+
+ page_count = src->size / PAGE_SIZE;
+ for (page = 0; page < page_count; page++) {
+ memcpy(kmap_atomic(dst_priv->pages[page], KM_USER1),
+ kmap_atomic(src_priv->pages[page], KM_USER0),
+ PAGE_SIZE);
+ kunmap_atomic(dst_priv->pages[page], KM_USER1);
+ kunmap_atomic(src_priv->pages[page], KM_USER0);
+ }
+
+ i915_gem_object_put_pages(dst);
+ i915_gem_object_put_pages(src);
+
+ /* We lie here, but it makes later analysis easier. */
+ dst_priv->gtt_offset = src_priv->gtt_offset;
+
+ return dst;
+
+error_src:
+ i915_gem_object_put_pages(src);
+error_unref:
+ drm_gem_object_unreference(dst);
+ return NULL;
+}
+
/**
* i915_capture_error_state - capture an error record for later analysis
* @dev: drm device
@@ -382,8 +427,10 @@ static void i915_error_work_func(struct work_struct *work)
static void i915_capture_error_state(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_i915_gem_object *obj_priv;
struct drm_i915_error_state *error;
unsigned long flags;
+ int count;
spin_lock_irqsave(&dev_priv->error_lock, flags);
if (dev_priv->first_error)
@@ -405,6 +452,7 @@ static void i915_capture_error_state(struct drm_device *dev)
error->ipehr = I915_READ(IPEHR);
error->instdone = I915_READ(INSTDONE);
error->acthd = I915_READ(ACTHD);
+ error->bbaddr = 0; /* XXX ? */
} else {
error->ipeir = I915_READ(IPEIR_I965);
error->ipehr = I915_READ(IPEHR_I965);
@@ -412,7 +460,53 @@ static void i915_capture_error_state(struct drm_device *dev)
error->instps = I915_READ(INSTPS);
error->instdone1 = I915_READ(INSTDONE1);
error->acthd = I915_READ(ACTHD_I965);
+ error->bbaddr = I915_READ64(BB_ADDR);
+ }
+
+ /* Grab the current batchbuffer, most likely to have crashed. */
+ error->batchbuffer = NULL;
+ spin_lock(&dev_priv->mm.active_list_lock);
+ list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) {
+ struct drm_gem_object *obj = obj_priv->obj;
+
+ if (error->bbaddr >= obj_priv->gtt_offset &&
+ error->bbaddr < obj_priv->gtt_offset + obj->size) {
+ /* We need to copy this to an anonymous buffer as
+ * the simplest method to avoid being overwritten
+ * by userpace.
+ */
+ error->batchbuffer = clone_obj(dev, obj);
+ break;
+ }
+ }
+
+ /* Record buffers on the active list. */
+ error->active_bo = NULL;
+ error->active_bo_count = 0;
+
+ count = 0;
+ list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list)
+ count++;
+
+ if (count)
+ error->active_bo = kmalloc(sizeof(*error->active_bo)*count, GFP_ATOMIC);
+
+ if (error->active_bo) {
+ count = 0;
+ list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) {
+ struct drm_gem_object *obj = obj_priv->obj;
+
+ error->active_bo[count].size = obj->size;
+ error->active_bo[count].name = obj->name;
+ error->active_bo[count].gtt_offset = obj_priv->gtt_offset;
+ error->active_bo[count].read_domains = obj->read_domains;
+ error->active_bo[count].write_domain = obj->write_domain;
+ error->active_bo[count].fence_reg = obj_priv->fence_reg;
+ count++;
+ }
+ error->active_bo_count = count;
}
+ spin_unlock(&dev_priv->mm.active_list_lock);
do_gettimeofday(&error->time);
@@ -422,6 +516,30 @@ out:
spin_unlock_irqrestore(&dev_priv->error_lock, flags);
}
+void i915_destroy_error_state(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_i915_error_state *error;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev_priv->error_lock, flags);
+ error = dev_priv->first_error;
+ dev_priv->first_error = NULL;
+ spin_unlock_irqrestore(&dev_priv->error_lock, flags);
+
+ if (error == NULL)
+ return;
+
+ if (error->batchbuffer) {
+ mutex_lock(&dev->struct_mutex);
+ drm_gem_object_unreference(error->batchbuffer);
+ mutex_unlock(&dev->struct_mutex);
+ }
+
+ kfree(error->active_bo);
+ kfree(error);
+}
+
/**
* i915_handle_error - handle an error interrupt
* @dev: drm device
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f79b133..e0ee576 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -324,6 +324,7 @@
#define CM0_COLOR_EVICT_DISABLE (1<<3)
#define CM0_DEPTH_WRITE_DISABLE (1<<1)
#define CM0_RC_OP_FLUSH_DISABLE (1<<0)
+#define BB_ADDR 0x02140 /* 8 bytes */
#define GFX_FLSH_CNTL 0x02170 /* 915+ only */
--
1.6.5.7
More information about the Intel-gfx
mailing list