[Intel-gfx] [PATCH i-g-t 2/3] aubdump: Use write_reloc for filling out the ringbuffer

Wed Aug 23 20:31:19 UTC 2017

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>

On 23/08/17 18:14, Jason Ekstrand wrote:
> ---
>   tools/aubdump.c | 66 ++++++++++++++++++++++++++++-----------------------------
>   1 file changed, 32 insertions(+), 34 deletions(-)
>
> diff --git a/tools/aubdump.c b/tools/aubdump.c
> index c14c9fa..567de3d 100644
> --- a/tools/aubdump.c
> +++ b/tools/aubdump.c
> @@ -262,9 +262,36 @@ aub_write_trace_block(uint32_t type, void *virtual, uint32_t size, uint64_t gtt_
>   }
>   
>   static void
> +write_reloc(void *p, uint64_t v)
> +{
> +	if (gen >= 8) {
> +		/* From the Broadwell PRM Vol. 2a,
> +		 * MI_LOAD_REGISTER_MEM::MemoryAddress:
> +		 *
> +		 *	"This field specifies the address of the memory
> +		 *	location where the register value specified in the
> +		 *	DWord above will read from.  The address specifies
> +		 *	the DWord location of the data. Range =
> +		 *	GraphicsVirtualAddress[63:2] for a DWord register
> +		 *	GraphicsAddress [63:48] are ignored by the HW and
> +		 *	assumed to be in correct canonical form [63:48] ==
> +		 *	[47]."
> +		 *
> +		 * In practice, this will always mean the top bits are zero
> +		 * because of the GTT size limitation of the aubdump tool.
> +		 */
> +		const int shift = 63 - 47;
> +		*(uint64_t *)p = (((int64_t)v) << shift) >> shift;
> +	} else {
> +		*(uint32_t *)p = v;
> +	}
> +}
> +
> +static void
>   aub_dump_ringbuffer(uint64_t batch_offset, uint64_t offset, int ring_flag)
>   {
>   	uint32_t ringbuffer[4096];
> +	unsigned aub_mi_bbs_len;
>   	int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */
>   	int ring_count = 0;
>   
> @@ -275,14 +302,11 @@ aub_dump_ringbuffer(uint64_t batch_offset, uint64_t offset, int ring_flag)
>   
>   	/* Make a ring buffer to execute our batchbuffer. */
>   	memset(ringbuffer, 0, sizeof(ringbuffer));
> -	if (gen >= 8) {
> -		ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2);
> -		ringbuffer[ring_count++] = batch_offset;
> -		ringbuffer[ring_count++] = batch_offset >> 32;
> -	} else {
> -		ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START;
> -		ringbuffer[ring_count++] = batch_offset;
> -	}
> +
> +	aub_mi_bbs_len = gen >= 8 ? 3 : 2;
> +	ringbuffer[ring_count] = AUB_MI_BATCH_BUFFER_START | (aub_mi_bbs_len - 2);
> +	write_reloc(&ringbuffer[ring_count + 1], batch_offset);
> +	ring_count += aub_mi_bbs_len;
>   
>   	/* Write out the ring.  This appears to trigger execution of
>   	 * the ring in the simulator.
> @@ -299,32 +323,6 @@ aub_dump_ringbuffer(uint64_t batch_offset, uint64_t offset, int ring_flag)
>   	data_out(ringbuffer, ring_count * 4);
>   }
>   
> -static void
> -write_reloc(void *p, uint64_t v)
> -{
> -	if (gen >= 8) {
> -		/* From the Broadwell PRM Vol. 2a,
> -		 * MI_LOAD_REGISTER_MEM::MemoryAddress:
> -		 *
> -		 *	"This field specifies the address of the memory
> -		 *	location where the register value specified in the
> -		 *	DWord above will read from.  The address specifies
> -		 *	the DWord location of the data. Range =
> -		 *	GraphicsVirtualAddress[63:2] for a DWord register
> -		 *	GraphicsAddress [63:48] are ignored by the HW and
> -		 *	assumed to be in correct canonical form [63:48] ==
> -		 *	[47]."
> -		 *
> -		 * In practice, this will always mean the top bits are zero
> -		 * because of the GTT size limitation of the aubdump tool.
> -		 */
> -		const int shift = 63 - 47;
> -		*(uint64_t *)p = (((int64_t)v) << shift) >> shift;
> -	} else {
> -		*(uint32_t *)p = v;
> -	}
> -}
> -
>   static void *
>   relocate_bo(struct bo *bo, const struct drm_i915_gem_execbuffer2 *execbuffer2,
>   	    const struct drm_i915_gem_exec_object2 *obj)