[Intel-gfx] [PATCH 2/2] intel: Add .aub file output support.

Kenneth Graunke kenneth at whitecape.org
Thu Mar 8 20:14:27 CET 2012


On 03/07/2012 06:51 PM, Yuanhan Liu wrote:
> On Wed, Mar 07, 2012 at 11:21:07AM -0800, Eric Anholt wrote:
>> From: Kenneth Graunke<kenneth at whitecape.org>
>>
>> This will allow the driver to capture all of its execution state to a
>> file for later debugging.  intel_gpu_dump is limited in that it only
>> captures batchbuffers, and Mesa's captures, while more complete, still
>> capture only a portion of the state involved in execution.
>>
>> It also enables us to load traces in our internal simulator.
>>
>> Signed-off-by: Eric Anholt<eric at anholt.net>
>> Signed-off-by: Yuanhan Liu<yuanhan.liu at linux.intel.com>
>> Signed-off-by: Kenneth Graunke<kenneth at whitecape.org>
>> ---
>>   intel/Makefile.am        |    1 +
>>   intel/intel_aub.h        |  123 ++++++++++++++++++
>>   intel/intel_bufmgr.h     |   14 ++
>>   intel/intel_bufmgr_gem.c |  315 ++++++++++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 453 insertions(+), 0 deletions(-)
>>   create mode 100644 intel/intel_aub.h
>>
>> diff --git a/intel/Makefile.am b/intel/Makefile.am
>> index 06362b6..dc01a96 100644
>> --- a/intel/Makefile.am
>> +++ b/intel/Makefile.am
>> @@ -53,6 +53,7 @@ intel_bufmgr_gem_o_CFLAGS = $(AM_CFLAGS) -c99
>>
>>   libdrm_intelincludedir = ${includedir}/libdrm
>>   libdrm_intelinclude_HEADERS = intel_bufmgr.h \
>> +			      intel_aub.h \
>>   			      intel_debug.h
>>
>>   # This may be interesting even outside of "make check", due to the -dump option.
>> diff --git a/intel/intel_aub.h b/intel/intel_aub.h
>> new file mode 100644
>> index 0000000..a36fd53
>> --- /dev/null
>> +++ b/intel/intel_aub.h
>> @@ -0,0 +1,123 @@
>> +/*
>> + * Copyright © 2010 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *    Eric Anholt<eric at anholt.net>
>> + *
>> + */
>> +
>> +/** @file intel_aub.h
>> + *
>> + * The AUB file is a file format used by Intel's internal simulation
>> + * and other validation tools.  It can be used at various levels by a
>> + * driver to input state to the simulated hardware or a replaying
>> + * debugger.
>> + *
>> + * We choose to dump AUB files using the trace block format for ease
>> + * of implementation -- dump out the blocks of memory as plain blobs
>> + * and insert ring commands to execute the batchbuffer blob.
>> + */
>> +
>> +#ifndef _INTEL_AUB_H
>> +#define _INTEL_AUB_H
>> +
>> +#define AUB_MI_NOOP			(0)
>> +#define AUB_MI_BATCH_BUFFER_START 	(0x31<<  23)
>> +#define AUB_PIPE_CONTROL		(0x7a000002)
>> +
>> +/* DW0: instruction type. */
>> +
>> +#define CMD_AUB			(7<<  29)
>> +
>> +#define CMD_AUB_HEADER		(CMD_AUB | (1<<  23) | (0x05<<  16))
>> +/* DW1 */
>> +# define AUB_HEADER_MAJOR_SHIFT		24
>> +# define AUB_HEADER_MINOR_SHIFT		16
>> +
>> +#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1<<  23) | (0x41<<  16))
>> +#define CMD_AUB_DUMP_BMP           (CMD_AUB | (1<<  23) | (0x9e<<  16))
>> +
>> +/* DW1 */
>> +#define AUB_TRACE_OPERATION_MASK	0x000000ff
>> +#define AUB_TRACE_OP_COMMENT		0x00000000
>> +#define AUB_TRACE_OP_DATA_WRITE		0x00000001
>> +#define AUB_TRACE_OP_COMMAND_WRITE	0x00000002
>> +#define AUB_TRACE_OP_MMIO_WRITE		0x00000003
>> +// operation = TRACE_DATA_WRITE, Type
>> +#define AUB_TRACE_TYPE_MASK		0x0000ff00
>> +#define AUB_TRACE_TYPE_NOTYPE		(0<<  8)
>> +#define AUB_TRACE_TYPE_BATCH		(1<<  8)
>> +#define AUB_TRACE_TYPE_VERTEX_BUFFER	(5<<  8)
>> +#define AUB_TRACE_TYPE_2D_MAP		(6<<  8)
>> +#define AUB_TRACE_TYPE_CUBE_MAP		(7<<  8)
>> +#define AUB_TRACE_TYPE_VOLUME_MAP	(9<<  8)
>> +#define AUB_TRACE_TYPE_1D_MAP		(10<<  8)
>> +#define AUB_TRACE_TYPE_CONSTANT_BUFFER	(11<<  8)
>> +#define AUB_TRACE_TYPE_CONSTANT_URB	(12<<  8)
>> +#define AUB_TRACE_TYPE_INDEX_BUFFER	(13<<  8)
>> +#define AUB_TRACE_TYPE_GENERAL		(14<<  8)
>> +#define AUB_TRACE_TYPE_SURFACE		(15<<  8)
>> +
>> +
>> +// operation = TRACE_COMMAND_WRITE, Type =
>> +#define AUB_TRACE_TYPE_RING_HWB		(1<<  8)
>> +#define AUB_TRACE_TYPE_RING_PRB0	(2<<  8)
>> +#define AUB_TRACE_TYPE_RING_PRB1	(3<<  8)
>> +#define AUB_TRACE_TYPE_RING_PRB2	(4<<  8)
>> +
>> +// Address space
>> +#define AUB_TRACE_ADDRESS_SPACE_MASK	0x00ff0000
>> +#define AUB_TRACE_MEMTYPE_GTT		(0<<  16)
>> +#define AUB_TRACE_MEMTYPE_LOCAL		(1<<  16)
>> +#define AUB_TRACE_MEMTYPE_NONLOCAL	(2<<  16)
>> +#define AUB_TRACE_MEMTYPE_PCI		(3<<  16)
>> +#define AUB_TRACE_MEMTYPE_GTT_ENTRY     (4<<  16)
>> +
>> +/* DW2 */
>> +// operation = TRACE_DATA_WRITE, Type = TRACE_DATA_WRITE_GENERAL_STATE
>> +#define AUB_TRACE_GENERAL_STATE_MASK	0x000000ff
>> +
>> +#define AUB_TRACE_VS_STATE		0x00000001
>> +#define AUB_TRACE_GS_STATE		0x00000002
>> +#define AUB_TRACE_CL_STATE		0x00000003
>> +#define AUB_TRACE_SF_STATE		0x00000004
>> +#define AUB_TRACE_WM_STATE		0x00000005
>> +#define AUB_TRACE_CC_STATE		0x00000006
>> +#define AUB_TRACE_CL_VP			0x00000007
>> +#define AUB_TRACE_SF_VP			0x00000008
>> +#define AUB_TRACE_CC_VP			0x00000009
>> +#define AUB_TRACE_SAMPLER_STATE		0x0000000a
>> +#define AUB_TRACE_KERNEL		0x0000000b
>> +#define AUB_TRACE_SCRATCH		0x0000000c
>> +#define AUB_TRACE_SDC			0x0000000d
>> +#define AUB_TRACE_BLEND_STATE		0x00000016
>> +#define AUB_TRACE_DEPTH_STENCIL_STATE	0x00000017
>> +
>> +// operation = TRACE_DATA_WRITE, Type = TRACE_DATA_WRITE_SURFACE_STATE
>> +#define AUB_TRACE_SURFACE_STATE_MASK	0x00000ff00
>> +#define AUB_TRACE_BINDING_TABLE		0x000000100
>> +#define AUB_TRACE_SURFACE_STATE		0x000000200
>> +
>> +/* DW3: address */
>> +/* DW4: len */
>> +
>> +#endif /* _INTEL_AUB_H */
>> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
>> index 8036031..fa6f2b8 100644
>> --- a/intel/intel_bufmgr.h
>> +++ b/intel/intel_bufmgr.h
>> @@ -36,6 +36,7 @@
>>
>>   #include<stdio.h>
>>   #include<stdint.h>
>> +#include<stdio.h>
>>
>>   struct drm_clip_rect;
>>
>> @@ -84,6 +85,13 @@ struct _drm_intel_bo {
>>   	int handle;
>>   };
>>
>> +enum aub_dump_bmp_format {
>> +	AUB_DUMP_BMP_FORMAT_8BIT = 1,
>> +	AUB_DUMP_BMP_FORMAT_ARGB_4444 = 4,
>> +	AUB_DUMP_BMP_FORMAT_ARGB_0888 = 6,
>> +	AUB_DUMP_BMP_FORMAT_ARGB_8888 = 7,
>> +};
>> +
>>   #define BO_ALLOC_FOR_RENDER (1<<0)
>>
>>   drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
>> @@ -154,6 +162,12 @@ int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo);
>>   void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start);
>>   void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
>>
>> +void drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable);
>> +void drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
>> +				   int x1, int y1, int width, int height,
>> +				   enum aub_dump_bmp_format format,
>> +				   int pitch, int offset);
>> +
>>   int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id);
>>
>>   int drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total);
>> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
>> index ba38e50..45585f7 100644
>> --- a/intel/intel_bufmgr_gem.c
>> +++ b/intel/intel_bufmgr_gem.c
>> @@ -58,6 +58,7 @@
>>   #include "intel_bufmgr.h"
>>   #include "intel_bufmgr_priv.h"
>>   #include "intel_chipset.h"
>> +#include "intel_aub.h"
>>   #include "string.h"
>>
>>   #include "i915_drm.h"
>> @@ -121,6 +122,9 @@ typedef struct _drm_intel_bufmgr_gem {
>>   	unsigned int bo_reuse : 1;
>>   	unsigned int no_exec : 1;
>>   	bool fenced_relocs;
>> +
>> +	FILE *aub_file;
>> +	uint32_t aub_offset;
>>   } drm_intel_bufmgr_gem;
>>
>>   #define DRM_INTEL_RELOC_FENCE (1<<0)
>> @@ -215,6 +219,8 @@ struct _drm_intel_bo_gem {
>>
>>   	/** Flags that we may need to do the SW_FINSIH ioctl on unmap. */
>>   	bool mapped_cpu_write;
>> +
>> +	uint32_t aub_offset;
>>   };
>>
>>   static unsigned int
>> @@ -1715,6 +1721,247 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
>>   	}
>>   }
>>
>> +static void
>> +aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data)
>> +{
>> +	fwrite(&data, 1, 4, bufmgr_gem->aub_file);
>> +}
>> +
>> +static void
>> +aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size)
>> +{
>> +	fwrite(data, 1, size, bufmgr_gem->aub_file);
>> +}
>> +
>> +static void
>> +aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size)
>> +{
>> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>> +	uint32_t *data;
>> +	unsigned int i;
>> +
>> +	data = malloc(bo->size);
>> +	drm_intel_bo_get_subdata(bo, offset, size, data);
>> +
>> +	/* Easy mode: write out bo with no relocations */
>> +	if (!bo_gem->reloc_count) {
>> +		aub_out_data(bufmgr_gem, data, size);
>> +		free(data);
>> +		return;
>> +	}
>> +
>> +	/* Otherwise, handle the relocations while writing. */
>> +	for (i = 0; i<  size / 4; i++) {
>> +		int r;
>> +		for (r = 0; r<  bo_gem->reloc_count; r++) {
>> +			struct drm_i915_gem_relocation_entry *reloc;
>> +			drm_intel_reloc_target *info;
>> +
>> +			reloc =&bo_gem->relocs[r];
>> +			info =&bo_gem->reloc_target_info[r];
>> +
>> +			if (reloc->offset == offset + i * 4) {
>> +				drm_intel_bo_gem *target_gem;
>> +				uint32_t val;
>> +
>> +				target_gem = (drm_intel_bo_gem *)info->bo;
>> +
>> +				val = reloc->delta;
>> +				val += target_gem->aub_offset;
>> +
>> +				aub_out(bufmgr_gem, val);
>> +				data[i] = val;
>> +				break;
>> +			}
>> +		}
>> +		if (r == bo_gem->reloc_count) {
>> +			/* no relocation, just the data */
>> +			aub_out(bufmgr_gem, data[i]);
>> +		}
>> +	}
>> +
>> +	free(data);
>> +}
>> +
>> +static void
>> +aub_bo_get_address(drm_intel_bo *bo)
>> +{
>> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>> +
>> +	/* Give the object a graphics address in the AUB file.  We
>> +	 * don't just use the GEM object address because we do AUB
>> +	 * dumping before execution -- we want to successfully log
>> +	 * when the hardware might hang, and we might even want to aub
>> +	 * capture for a driver trying to execute on a different
>> +	 * generation of hardware by disabling the actual kernel exec
>> +	 * call.
>> +	 */
>> +	bo_gem->aub_offset = bufmgr_gem->aub_offset;
>> +	bufmgr_gem->aub_offset += bo->size;
>> +	/* XXX: Handle aperture overflow. */
>> +	assert(bufmgr_gem->aub_offset<  256 * 1024 * 1024);
>> +}
>> +
>> +static void
>> +aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
>> +		      uint32_t offset, uint32_t size)
>> +{
>> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>> +
>> +	aub_out(bufmgr_gem,
>> +		CMD_AUB_TRACE_HEADER_BLOCK |
>> +		(5 - 2));
>> +	aub_out(bufmgr_gem,
>> +		AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE);
>> +	aub_out(bufmgr_gem, subtype);
>> +	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
>> +	aub_out(bufmgr_gem, size);
>> +	aub_write_bo_data(bo, offset, size);
>> +}
>> +
>> +static void
>> +aub_write_bo(drm_intel_bo *bo)
>> +{
>> +	uint32_t block_size;
>> +	uint32_t offset;
>> +
>> +	aub_bo_get_address(bo);
>> +
>> +	/* Break up large objects into multiple writes.  Otherwise a
>> +	 * 128kb VBO would overflow the 16 bits of size field in the
>> +	 * packet header and everything goes badly after that.
>> +	 */
>> +	for (offset = 0; offset<  bo->size; offset += block_size) {
>> +		block_size = bo->size - offset;
>> +
>> +		if (block_size>  8 * 4096)
>> +			block_size = 8 * 4096;
>> +
>> +		aub_write_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
>> +				      offset, block_size);
>> +	}
>> +}
>> +
>> +/*
>> + * Make a ringbuffer on fly and dump it
>> + */
>> +static void
>> +aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem,
>> +			  uint32_t batch_buffer, int ring_flag)
>> +{
>> +	uint32_t ringbuffer[4096];
>> +	int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */
>> +	int ring_count = 0;
>> +
>> +	if (ring_flag == I915_EXEC_BSD)
>> +		ring = AUB_TRACE_TYPE_RING_PRB1;
>> +
>> +	/* Make a ring buffer to execute our batchbuffer. */
>> +	memset(ringbuffer, 0, sizeof(ringbuffer));
>> +	ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START;
>> +	ringbuffer[ring_count++] = batch_buffer;
>> +
>> +	/* Write out the ring.  This appears to trigger execution of
>> +	 * the ring in the simulator.
>> +	 */
>> +	aub_out(bufmgr_gem,
>> +		CMD_AUB_TRACE_HEADER_BLOCK |
>> +		(5 - 2));
>> +	aub_out(bufmgr_gem,
>> +		AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
>> +	aub_out(bufmgr_gem, 0); /* general/surface subtype */
>> +	aub_out(bufmgr_gem, bufmgr_gem->aub_offset);
>> +	aub_out(bufmgr_gem, ring_count * 4);
>> +
>> +	/* FIXME: Need some flush operations here? */
>> +	aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4);
>> +
>> +	/* Update offset pointer */
>> +	bufmgr_gem->aub_offset += 4096;
>> +}
>> +
>> +void
>> +drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
>> +			      int x1, int y1, int width, int height,
>> +			      enum aub_dump_bmp_format format,
>> +			      int pitch, int offset)
>> +{
>> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
>> +	uint32_t cpp;
>> +
>> +	switch (format) {
>> +	case AUB_DUMP_BMP_FORMAT_8BIT:
>> +		cpp = 1;
>> +		break;
>> +	case AUB_DUMP_BMP_FORMAT_ARGB_4444:
>> +		cpp = 2;
>> +		break;
>> +	case AUB_DUMP_BMP_FORMAT_ARGB_0888:
>> +	case AUB_DUMP_BMP_FORMAT_ARGB_8888:
>> +		cpp = 4;
>> +		break;
>> +	default:
>> +		printf("Unknown AUB dump format %d\n", format);
>> +		return;
>> +	}
>> +
>> +	if (!bufmgr_gem->aub_file)
>> +		return;
>> +
>> +	aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4);
>> +	aub_out(bufmgr_gem, (y1<<  16) | x1);
>> +	aub_out(bufmgr_gem,
>> +		(format<<  24) |
>> +		(cpp<<  19) |
>> +		pitch / 4);
>> +	aub_out(bufmgr_gem, (height<<  16) | width);
>> +	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
>> +	aub_out(bufmgr_gem,
>> +		((bo_gem->tiling_mode != I915_TILING_NONE) ? (1<<  2) : 0) |
>> +		((bo_gem->tiling_mode == I915_TILING_Y) ? (1<<  3) : 0));
>> +}
>> +
>> +static void
>> +aub_exec(drm_intel_bo *bo, int ring_flag, int used)
>> +{
>> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>> +	int i;
>> +
>> +	if (!bufmgr_gem->aub_file)
>> +		return;
>> +
>> +	/* Write out all but the batchbuffer to AUB memory */
>> +	for (i = 0; i<  bufmgr_gem->exec_count - 1; i++) {
>> +		if (bufmgr_gem->exec_bos[i] != bo)
>> +			aub_write_bo(bufmgr_gem->exec_bos[i]);
>> +	}
>> +
>> +	aub_bo_get_address(bo);
>> +
>> +	/* Dump the batchbuffer. */
>> +	aub_write_trace_block(bo, AUB_TRACE_TYPE_BATCH, 0,
>> +			      0, used);
>> +	aub_write_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
>> +			      used, bo->size - used);
>> +
>> +	/* Dump ring buffer */
>> +	aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag);
>> +
>> +	fflush(bufmgr_gem->aub_file);
>> +
>> +	/*
>> +	 * One frame has been dumped. So reset the aub_offset for the next frame.
>> +	 *
>> +	 * FIXME: Can we do this?
>> +	 */
>> +	bufmgr_gem->aub_offset = 0x10000;
>> +}
>> +
>>   static int
>>   drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
>>   		      drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
>> @@ -1830,6 +2077,8 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
>>   	execbuf.rsvd1 = 0;
>>   	execbuf.rsvd2 = 0;
>>
>> +	aub_exec(bo, flags, used);
>> +
>>   	if (bufmgr_gem->no_exec)
>>   		goto skip_execution;
>>
>> @@ -2360,6 +2609,72 @@ drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
>>   }
>>
>>   /**
>> + * Sets up AUB dumping.
>> + *
>> + * This is a trace file format that can be used with the simulator.
>> + * Packets are emitted in a format somewhat like GPU command packets.
>> + * You can set up a GTT and upload your objects into the referenced
>> + * space, then send off batchbuffers and get BMPs out the other end.
>> + */
>> +void
>> +drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
>> +{
>> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
>> +	int entry = 0x200003;
>> +	int i;
>> +	int gtt_size = 0x10000;
>> +
>> +	if (!enable) {
>> +		if (bufmgr_gem->aub_file) {
>> +			fclose(bufmgr_gem->aub_file);
>> +			bufmgr_gem->aub_file = NULL;
>> +		}
>> +	}
>> +
>> +	if (geteuid() != getuid())
>> +		return;
>> +
>> +	bufmgr_gem->aub_file = fopen("intel.aub", "w+");
>
> I guess it's would be better that we can name the aub dump file
> according to the program we are tracing, like if we run:
>   $ INTEL_DEBUG=aub glxgears
>
> I guess it would be good if we get a glxgears.aub but not intel.aub.
> Otherwise, it would override the former one we dumped. It's somehow a
> little un-convenient.

That would be a nice feature, though I'm okay with intel.aub for now.

>> +	if (!bufmgr_gem->aub_file)
>> +		return;
>> +
>> +	/* Start allocating objects from just after the GTT. */
>> +	bufmgr_gem->aub_offset = gtt_size;
>> +
>> +	/* Start with a (required) version packet. */
>> +	aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2));
>> +	aub_out(bufmgr_gem,
>> +		(4<<  AUB_HEADER_MAJOR_SHIFT) |
>> +		(0<<  AUB_HEADER_MINOR_SHIFT));
>> +	for (i = 0; i<  8; i++) {
>> +		aub_out(bufmgr_gem, 0); /* app name */
>> +	}
>> +	aub_out(bufmgr_gem, 0); /* timestamp */
>> +	aub_out(bufmgr_gem, 0); /* timestamp */
>> +	aub_out(bufmgr_gem, 0); /* comment len */
>> +
>> +	/* Set up the GTT. The max we can handle is 256M */
>> +	aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2));
>> +	aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE);
>> +	aub_out(bufmgr_gem, 0); /* subtype */
>> +	aub_out(bufmgr_gem, 0); /* offset */
>> +	aub_out(bufmgr_gem, gtt_size); /* size */
>> +	for (i = 0x000; i<  gtt_size; i += 4, entry += 0x1000) {
>> +		aub_out(bufmgr_gem, entry);
>> +	}
>> +
>> +	/* MI_FLUSH enable */
>> +	if (bufmgr_gem->gen>= 6) {
>> +		aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2));
>> +		aub_out(bufmgr_gem, AUB_TRACE_OP_MMIO_WRITE);
>> +		aub_out(bufmgr_gem, 0);
>> +		aub_out(bufmgr_gem, 0x209c);  /* reg addr */
>> +		aub_out(bufmgr_gem, 0x04);    /* Length in byte */
>> +		aub_out(bufmgr_gem, ((1<<  12)<<  16) | (1<<  12));
>> +	}
>
> Zhenyu and me came to an agreement that it's the driver side to do the
> MI_FLUSH enable stuff. Since using MI_FLUSH is deprecated, and if you still
> use it, it is your job to set up the MI_FLUSH enable bit.

Oh...yeah, we should definitely drop that.

>> +}
>> +
>> +/**
>>    * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
>>    * and manage map buffer objections.
>>    *
>> --
>> 1.7.9.1

Otherwise,
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

Please set yourself as the author, though - I just squashed this and 
fixed up a few minor things.



More information about the Intel-gfx mailing list