[Intel-gfx] [PATCH 5/5] drm/i915/error: Capture WA ctx batch in error state

Thu Jan 28 23:52:08 PST 2016

Arun Siluvery <arun.siluvery at linux.intel.com> writes:

> From Gen8 onwards we apply ctx workarounds using special batch buffers that
> execute during save/restore, good to have them in error state.
>
> Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h       |  2 +-
>  drivers/gpu/drm/i915/i915_gpu_error.c | 25 +++++++++++++++++++++++++
>  2 files changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4b199a4..8440c35 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -567,7 +567,7 @@ struct drm_i915_error_state {
>  			int page_count;
>  			u64 gtt_offset;
>  			u32 *pages[0];
> -		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
> +		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page, *wa_ctx;
>  
>  		struct drm_i915_error_request {
>  			u64 ctx_desc;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 8b1a1c0..e2c32d4 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -561,6 +561,24 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>  			}
>  		}
>  
> +		if ((obj = error->ring[i].wa_ctx)) {
> +			u64 wa_ctx_offset = obj->gtt_offset;
> +			u32 *wa_ctx_page = &obj->pages[0][0];
> +
> +			err_printf(m, "\n%s --- WA Ctx batch buffer = 0x%08llx\n",
> +				   dev_priv->ring[i].name, wa_ctx_offset);
> +			offset = 0;
> +			for (elt = 0; elt < PAGE_SIZE/32; elt += 4) {

PAGE_SIZE/16 ?

Also we have wa_ctx->size. Is there a reason to output past that?

Assumption is that after wa_ctx->size and BB_END, there should
be zeros only. If it is a concern that something has corrupted
that space, you could print only nonzero ones after ctx_size?

Thanks,
-Mika

> +				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> +					   offset,
> +					   wa_ctx_page[elt],
> +					   wa_ctx_page[elt+1],
> +					   wa_ctx_page[elt+2],
> +					   wa_ctx_page[elt+3]);
> +				offset += 16;
> +			}
> +		}
> +
>  		if ((obj = error->ring[i].ctx)) {
>  			err_printf(m, "%s --- HW Context = 0x%08x\n",
>  				   dev_priv->ring[i].name,
> @@ -654,6 +672,8 @@ static void i915_error_state_free(struct kref *error_ref)
>  		i915_error_object_free(error->ring[i].hws_page);
>  		i915_error_object_free(error->ring[i].ctx);
>  		kfree(error->ring[i].requests);
> +		if (i == RCS)
> +			i915_error_object_free(error->ring[i].wa_ctx);
>  	}
>  
>  	i915_error_object_free(error->semaphore_obj);
> @@ -1165,6 +1185,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  		error->ring[i].hws_page =
>  			i915_error_ggtt_object_create(dev_priv, ring->status_page.obj);
>  
> +		if (INTEL_INFO(dev)->gen >= 8 && ring->id == RCS) {
> +			error->ring[i].wa_ctx =
> +				i915_error_ggtt_object_create(dev_priv, ring->wa_ctx.obj);
> +		}
> +
>  		i915_gem_record_active_context(ring, error, &error->ring[i]);
>  
>  		count = 0;
> -- 
> 1.9.1