[Intel-gfx] [PATCH 3/4] drm/i915: Record batch buffer following GPU error
Eric Anholt
eric at anholt.net
Tue Jan 5 18:55:07 CET 2010
On Mon, 4 Jan 2010 18:57:58 +0000, Chris Wilson <chris at chris-wilson.co.uk> wrote:
> In order to improve our diagnostic capabilities following a GPU hang
> and subsequent reset, we need to record the batch buffer that triggered
> the error. We assume that the current batch buffer, plus a few details
> about what else is on the active list, will be sufficient -- at the very
> least an improvement over nothing.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 32 ++++++++++
> drivers/gpu/drm/i915/i915_drv.h | 11 +++
> drivers/gpu/drm/i915/i915_irq.c | 118 +++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/i915_reg.h | 1 +
> 4 files changed, 162 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 463e8d0..6521c83 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -380,6 +380,38 @@ static int i915_error_state(struct seq_file *m, void *unused)
> seq_printf(m, " INSTDONE1: 0x%08x\n", error->instdone1);
> }
>
> + if (error->active_bo_count) {
> + int i;
> +
> + seq_printf(m, "Buffers [%d]:\n", error->active_bo_count);
> +
> + for (i = 0; i < error->active_bo_count; i++) {
> + seq_printf(m, " %08x %8zd %08x %08x",
> + error->active_bo[i].gtt_offset,
> + error->active_bo[i].size,
> + error->active_bo[i].read_domains,
> + error->active_bo[i].write_domain);
> +
> + if (error->active_bo[i].name)
> + seq_printf(m, " (name: %d)", error->active_bo[i].name);
> + if (error->active_bo[i].fence_reg != I915_FENCE_REG_NONE)
> + seq_printf(m, " (fence: %d)", error->active_bo[i].fence_reg);
> +
> + seq_printf(m, "\n");
> + }
> + }
> +
> + if (error->batchbuffer &&
> + i915_gem_object_get_pages(error->batchbuffer) == 0) {
> + struct drm_gem_object *obj = error->batchbuffer;
> + struct drm_i915_gem_object *obj_priv = obj->driver_private;
> +
> + seq_printf(m, "--- gtt_offset = 0x%08x\n", obj_priv->gtt_offset);
> + i915_dump_pages(m, obj_priv->pages, obj->size / PAGE_SIZE);
> +
> + i915_gem_object_put_pages(obj);
> + }
> +
> out:
> spin_unlock_irqrestore(&dev_priv->error_lock, flags);
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 29dd676..7b7ea9e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -150,7 +150,18 @@ struct drm_i915_error_state {
> u32 instps;
> u32 instdone1;
> u32 seqno;
> + u64 bbaddr;
> struct timeval time;
> + struct drm_gem_object *batchbuffer;
> + struct drm_i915_error_buffer {
> + size_t size;
> + u32 name;
> + u32 gtt_offset;
> + u32 read_domains;
> + u32 write_domain;
> + u32 fence_reg;
> + } *active_bo;
> + u32 active_bo_count;
> };
>
> struct drm_i915_display_funcs {
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 7cd8110..86b2f53 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -370,6 +370,51 @@ static void i915_error_work_func(struct work_struct *work)
> }
> }
>
> +static struct drm_gem_object
> +*clone_obj(struct drm_device *dev,
> + struct drm_gem_object *src)
> +{
> + struct drm_gem_object *dst;
> + struct drm_i915_gem_object *src_priv, *dst_priv;
> + int page, page_count;
> +
> + dst = drm_gem_object_alloc(dev, src->size);
> + if (dst == NULL)
> + return NULL;
> +
> + if (i915_gem_object_get_pages(src))
> + goto error_unref;
> +
> + if (i915_gem_object_get_pages(dst))
> + goto error_src;
> +
> + src_priv = src->driver_private;
> + dst_priv = dst->driver_private;
> +
> + page_count = src->size / PAGE_SIZE;
> + for (page = 0; page < page_count; page++) {
> + memcpy(kmap_atomic(dst_priv->pages[page], KM_USER1),
> + kmap_atomic(src_priv->pages[page], KM_USER0),
> + PAGE_SIZE);
> + kunmap_atomic(dst_priv->pages[page], KM_USER1);
> + kunmap_atomic(src_priv->pages[page], KM_USER0);
> + }
> +
> + i915_gem_object_put_pages(dst);
> + i915_gem_object_put_pages(src);
> +
> + /* We lie here, but it makes later analysis easier. */
> + dst_priv->gtt_offset = src_priv->gtt_offset;
> +
> + return dst;
> +
> +error_src:
> + i915_gem_object_put_pages(src);
> +error_unref:
> + drm_gem_object_unreference(dst);
> + return NULL;
> +}
> +
> /**
> * i915_capture_error_state - capture an error record for later analysis
> * @dev: drm device
> @@ -382,8 +427,10 @@ static void i915_error_work_func(struct work_struct *work)
> static void i915_capture_error_state(struct drm_device *dev)
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
> + struct drm_i915_gem_object *obj_priv;
> struct drm_i915_error_state *error;
> unsigned long flags;
> + int count;
>
> spin_lock_irqsave(&dev_priv->error_lock, flags);
> if (dev_priv->first_error)
> @@ -405,6 +452,7 @@ static void i915_capture_error_state(struct drm_device *dev)
> error->ipehr = I915_READ(IPEHR);
> error->instdone = I915_READ(INSTDONE);
> error->acthd = I915_READ(ACTHD);
> + error->bbaddr = 0; /* XXX ? */
> } else {
> error->ipeir = I915_READ(IPEIR_I965);
> error->ipehr = I915_READ(IPEHR_I965);
> @@ -412,7 +460,53 @@ static void i915_capture_error_state(struct drm_device *dev)
> error->instps = I915_READ(INSTPS);
> error->instdone1 = I915_READ(INSTDONE1);
> error->acthd = I915_READ(ACTHD_I965);
> + error->bbaddr = I915_READ64(BB_ADDR);
> + }
> +
> + /* Grab the current batchbuffer, most likely to have crashed. */
> + error->batchbuffer = NULL;
> + spin_lock(&dev_priv->mm.active_list_lock);
> + list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) {
> + struct drm_gem_object *obj = obj_priv->obj;
> +
> + if (error->bbaddr >= obj_priv->gtt_offset &&
> + error->bbaddr < obj_priv->gtt_offset + obj->size) {
> + /* We need to copy this to an anonymous buffer as
> + * the simplest method to avoid being overwritten
> + * by userpace.
> + */
> + error->batchbuffer = clone_obj(dev, obj);
> + break;
> + }
> + }
Grabbing the spinlock and then doing object allocation while holding it?
That seems unlikely to actually work.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 197 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20100105/25dc6051/attachment.sig>
More information about the Intel-gfx
mailing list