[Intel-gfx] [PATCH 5/5] drm/i915/error: Capture WA ctx batch in error state

Arun Siluvery arun.siluvery at linux.intel.com
Fri Jan 29 02:09:08 PST 2016


On 29/01/2016 07:52, Mika Kuoppala wrote:
> Arun Siluvery <arun.siluvery at linux.intel.com> writes:
>
>>  From Gen8 onwards we apply ctx workarounds using special batch buffers that
>> execute during save/restore, good to have them in error state.
>>
>> Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h       |  2 +-
>>   drivers/gpu/drm/i915/i915_gpu_error.c | 25 +++++++++++++++++++++++++
>>   2 files changed, 26 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 4b199a4..8440c35 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -567,7 +567,7 @@ struct drm_i915_error_state {
>>   			int page_count;
>>   			u64 gtt_offset;
>>   			u32 *pages[0];
>> -		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
>> +		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page, *wa_ctx;
>>
>>   		struct drm_i915_error_request {
>>   			u64 ctx_desc;
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>> index 8b1a1c0..e2c32d4 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> @@ -561,6 +561,24 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>>   			}
>>   		}
>>
>> +		if ((obj = error->ring[i].wa_ctx)) {
>> +			u64 wa_ctx_offset = obj->gtt_offset;
>> +			u32 *wa_ctx_page = &obj->pages[0][0];
>> +
>> +			err_printf(m, "\n%s --- WA Ctx batch buffer = 0x%08llx\n",
>> +				   dev_priv->ring[i].name, wa_ctx_offset);
>> +			offset = 0;
>> +			for (elt = 0; elt < PAGE_SIZE/32; elt += 4) {
>
> PAGE_SIZE/16 ?
>
> Also we have wa_ctx->size. Is there a reason to output past that?

No reason, wa_ctx->size is not the total size, it is the size of one wa 
batch although we can get total size easily by combining all (two) of them.
>
> Assumption is that after wa_ctx->size and BB_END, there should
> be zeros only. If it is a concern that something has corrupted
> that space, you could print only nonzero ones after ctx_size?
No concern that it gets corrupted, I will update the patch to use 
wa_ctx->size and print only size values.

regards
Arun

>
> Thanks,
> -Mika
>
>
>> +				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
>> +					   offset,
>> +					   wa_ctx_page[elt],
>> +					   wa_ctx_page[elt+1],
>> +					   wa_ctx_page[elt+2],
>> +					   wa_ctx_page[elt+3]);
>> +				offset += 16;
>> +			}
>> +		}
>> +
>>   		if ((obj = error->ring[i].ctx)) {
>>   			err_printf(m, "%s --- HW Context = 0x%08x\n",
>>   				   dev_priv->ring[i].name,
>> @@ -654,6 +672,8 @@ static void i915_error_state_free(struct kref *error_ref)
>>   		i915_error_object_free(error->ring[i].hws_page);
>>   		i915_error_object_free(error->ring[i].ctx);
>>   		kfree(error->ring[i].requests);
>> +		if (i == RCS)
>> +			i915_error_object_free(error->ring[i].wa_ctx);
>>   	}
>>
>>   	i915_error_object_free(error->semaphore_obj);
>> @@ -1165,6 +1185,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
>>   		error->ring[i].hws_page =
>>   			i915_error_ggtt_object_create(dev_priv, ring->status_page.obj);
>>
>> +		if (INTEL_INFO(dev)->gen >= 8 && ring->id == RCS) {
>> +			error->ring[i].wa_ctx =
>> +				i915_error_ggtt_object_create(dev_priv, ring->wa_ctx.obj);
>> +		}
>> +
>>   		i915_gem_record_active_context(ring, error, &error->ring[i]);
>>
>>   		count = 0;
>> --
>> 1.9.1
>



More information about the Intel-gfx mailing list