[Intel-gfx] [PATCH] intel: Add AUB file dump support

Xiang, Haihao haihao.xiang at intel.com
Tue Feb 22 06:20:46 CET 2011


Could you add a entry for media kernel for name_to_type_mapping or just
use a common name for all tracked kernels?

Thanks
Haihao

> This adds AUB file dump support to generate execution
> trace for internal GPU simulator.
> 
> Signed-off-by: Zhenyu Wang <zhenyuw at linux.intel.com>
> ---
>  intel/Makefile.am        |    3 +-
>  intel/intel_bufmgr.h     |   38 +++++
>  intel/intel_bufmgr_gem.c |  402 ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 442 insertions(+), 1 deletions(-)
> 
> diff --git a/intel/Makefile.am b/intel/Makefile.am
> index 1ae92f8..398cd2f 100644
> --- a/intel/Makefile.am
> +++ b/intel/Makefile.am
> @@ -41,7 +41,8 @@ libdrm_intel_la_SOURCES = \
>         intel_bufmgr_gem.c \
>         intel_chipset.h \
>         mm.c \
> -       mm.h
> +       mm.h \
> +       intel_aub.h
> 
>  libdrm_intelincludedir = ${includedir}/libdrm
>  libdrm_intelinclude_HEADERS = intel_bufmgr.h
> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> index daa18b4..bb4158a 100644
> --- a/intel/intel_bufmgr.h
> +++ b/intel/intel_bufmgr.h
> @@ -35,6 +35,7 @@
>  #define INTEL_BUFMGR_H
> 
>  #include <stdint.h>
> +#include <stdio.h>
> 
>  struct drm_clip_rect;
> 
> @@ -83,6 +84,39 @@ struct _drm_intel_bo {
>         int handle;
>  };
> 
> +enum drm_intel_aub_bmp_format {
> +       AUB_DUMP_BMP_LEGACY,
> +       AUB_DUMP_BMP_8BIT,
> +       AUB_DUMP_BMP_ARGB_0555,
> +       AUB_DUMP_BMP_ARGB_0565,
> +       AUB_DUMP_BMP_ARGB_4444,
> +       AUB_DUMP_BMP_ARGB_1555,
> +       AUB_DUMP_BMP_ARGB_0888,
> +       AUB_DUMP_BMP_ARGB_8888,
> +       AUB_DUMP_BMP_YCRCB_SWAPY,
> +       AUB_DUMP_BMP_YCRCB_NORMAL,
> +       AUB_DUMP_BMP_YCRCB_SWAPUV,
> +       AUB_DUMP_BMP_YCRCB_SWAPUVY,
> +       AUB_DUMP_BMP_ABGR_8888,
> +};
> +
> +/*
> + * surface info needed by aub DUMP_BMP block
> + */
> +struct drm_intel_aub_surface_bmp {
> +       uint16_t x_offset;
> +       uint16_t y_offset;
> +       uint16_t pitch;
> +       uint8_t bits_per_pixel;
> +       uint8_t format;
> +       uint16_t width;
> +       uint16_t height;
> +       uint32_t tiling_walk_y:1;
> +       uint32_t tiling:1;
> +       uint32_t pad:30;
> +};
> +
> +
>  #define BO_ALLOC_FOR_RENDER (1<<0)
> 
>  drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
> @@ -150,6 +184,10 @@ int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
>  void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
> 
>  int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id);
> +void drm_intel_bufmgr_gem_set_aubfile(drm_intel_bufmgr *bufmgr, FILE *file);
> +void drm_intel_bufmgr_gem_stop_aubfile(drm_intel_bufmgr *bufmgr);
> +int drm_intel_gem_aub_dump_bmp(drm_intel_bufmgr *bufmgr, drm_intel_bo *bo,
> +                              unsigned int offset, struct drm_intel_aub_surface_bmp *bmp);
> 
>  /* drm_intel_bufmgr_fake.c */
>  drm_intel_bufmgr *drm_intel_bufmgr_fake_init(int fd,
> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> index 3cdffce..654bc31 100644
> --- a/intel/intel_bufmgr_gem.c
> +++ b/intel/intel_bufmgr_gem.c
> @@ -57,6 +57,7 @@
>  #include "intel_bufmgr.h"
>  #include "intel_bufmgr_priv.h"
>  #include "intel_chipset.h"
> +#include "intel_aub.h"
>  #include "string.h"
> 
>  #include "i915_drm.h"
> @@ -75,6 +76,13 @@ struct drm_intel_gem_bo_bucket {
>         unsigned long size;
>  };
> 
> +struct drm_intel_aub_bmp {
> +       drm_intel_bo *bo; /* surface bo */
> +       unsigned int offset;
> +       struct drm_intel_aub_surface_bmp bmp;
> +       struct drm_intel_aub_bmp *next;
> +};
> +
>  typedef struct _drm_intel_bufmgr_gem {
>         drm_intel_bufmgr bufmgr;
> 
> @@ -106,6 +114,10 @@ typedef struct _drm_intel_bufmgr_gem {
>         unsigned int has_relaxed_fencing : 1;
>         unsigned int bo_reuse : 1;
>         char fenced_relocs;
> +
> +       FILE *aub_file;
> +       uint32_t aub_offset;
> +       struct drm_intel_aub_bmp *aub_bmp;
>  } drm_intel_bufmgr_gem;
> 
>  #define DRM_INTEL_RELOC_FENCE (1<<0)
> @@ -195,8 +207,396 @@ struct _drm_intel_bo_gem {
>          * relocations.
>          */
>         int reloc_tree_fences;
> +
> +       uint32_t aub_offset;
>  };
> 
> +/* AUB trace dump support */
> +
> +static void
> +aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data)
> +{
> +       fwrite(&data, 1, 4, bufmgr_gem->aub_file);
> +}
> +
> +static void
> +aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size)
> +{
> +       fwrite(data, 1, size, bufmgr_gem->aub_file);
> +}
> +
> +static void
> +aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size)
> +{
> +       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +       uint32_t *data;
> +       unsigned int i;
> +
> +       data = malloc(bo->size);
> +       drm_intel_bo_get_subdata(bo, offset, size, data);
> +
> +       /* Easy mode: write out bo with no relocations */
> +       if (!bo_gem->reloc_count) {
> +               aub_out_data(bufmgr_gem, data, size);
> +               free(data);
> +               return;
> +       }
> +
> +       /* Otherwise, handle the relocations while writing. */
> +       for (i = 0; i < size / 4; i++) {
> +               int r;
> +               for (r = 0; r < bo_gem->reloc_count; r++) {
> +                       struct drm_i915_gem_relocation_entry *reloc;
> +                       drm_intel_reloc_target *info;
> +
> +                       reloc = &bo_gem->relocs[r];
> +                       info = &bo_gem->reloc_target_info[r];
> +
> +                       if (reloc->offset == offset + i * 4) {
> +                               drm_intel_bo_gem *target_gem;
> +                               uint32_t val;
> +
> +                               target_gem = (drm_intel_bo_gem *)info->bo;
> +
> +                               val = reloc->delta;
> +                               val += target_gem->aub_offset;
> +
> +                               aub_out(bufmgr_gem, val);
> +                               data[i] = val;
> +                               break;
> +                       }
> +               }
> +               if (r == bo_gem->reloc_count) {
> +                       /* no relocation, just the data */
> +                       aub_out(bufmgr_gem, data[i]);
> +               }
> +       }
> +}
> +
> +static void
> +aub_bo_get_address(drm_intel_bo *bo)
> +{
> +       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +
> +       /* Give the object a graphics address in the AUB file.  We
> +        * don't just use the GEM object address because we do AUB
> +        * dumping before execution -- we want to successfully log
> +        * when the hardware might hang, and we might even want to aub
> +        * capture for a driver trying to execute on a different
> +        * generation of hardware by disabling the actual kernel exec
> +        * call.
> +        */
> +       bo_gem->aub_offset = bufmgr_gem->aub_offset;
> +       bufmgr_gem->aub_offset += bo->size;
> +       /* XXX: Handle aperture overflow. */
> +       assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024);
> +}
> +
> +static const struct {
> +       const char *name;
> +       uint32_t type;
> +       uint32_t subtype;
> +} name_to_type_mapping[] = {
> +       { "VS_UNIT",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_VS_STATE},
> +       { "GS_UNIT",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_GS_STATE},
> +       { "CLIP_UNIT",  AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CL_STATE},
> +       { "SF_UNIT",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SF_STATE},
> +       { "WM_UNIT",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_WM_STATE},
> +       { "CC_UNIT",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CC_STATE},
> +       { "CLIP_VP",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CL_VP},
> +       { "SF_VP",      AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SF_VP},
> +       { "SF_SCISSOR_UNIT",
> +         AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SF_SCISSOR_RECT},
> +       { "CC_VP",      AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CC_VP},
> +       { "SAMPLER",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SAMPLER_STATE},
> +       { "SAMPLER_DEFAULT_COLOR", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SDC},
> +       { "VS_PROG",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL},
> +       { "GS_PROG",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL},
> +       { "CLIP_PROG",  AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL},
> +       { "SF_PROG",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL},
> +       { "WM_PROG",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL},
> +       { "BLEND_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_BLEND_STATE},
> +       { "DEPTH_STENCIL_STATE",
> +         AUB_TRACE_TYPE_GENERAL, AUB_TRACE_DEPTH_STENCIL_STATE},
> +       { "COLOR_CALC_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CC_STATE},
> +       { "SS_SURF_BIND", AUB_TRACE_TYPE_SURFACE, AUB_TRACE_BINDING_TABLE},
> +       { "SS_SURFACE", AUB_TRACE_TYPE_SURFACE, AUB_TRACE_SURFACE_STATE},
> +       { "temporary VBO", AUB_TRACE_TYPE_VERTEX_BUFFER, 0},
> +       { "CURBE",      AUB_TRACE_TYPE_CONSTANT_URB, 0},
> +       { "VS constant_bo", AUB_TRACE_TYPE_CONSTANT_BUFFER, 0},
> +       { "WM constant_bo", AUB_TRACE_TYPE_CONSTANT_BUFFER, 0},
> +       { "INTERFACE_DESC", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_INTERFACE_DESC},
> +       { "VLD_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_VLD_STATE},
> +       { "VFE_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_VFE_STATE},
> +       { "IT_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_IT_STATE},
> +       { "DI_SAMPLE_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_DI_SAMPLE_STATE},
> +       { "IEF_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_IEF_STATE},
> +       { "AVS_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_AVS_STATE},
> +};
> +
> +static void
> +aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
> +                     uint32_t offset, uint32_t size)
> +{
> +       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +
> +       aub_out(bufmgr_gem,
> +               CMD_AUB_TRACE_HEADER_BLOCK |
> +               (5 - 2));
> +       aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE);
> +       aub_out(bufmgr_gem, subtype);
> +       aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
> +       aub_out(bufmgr_gem, size);
> +       aub_write_bo_data(bo, offset, size);
> +}
> +
> +static void
> +aub_write_bo(drm_intel_bo *bo)
> +{
> +       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +       uint32_t type = AUB_TRACE_TYPE_NOTYPE;
> +       uint32_t subtype = 0;
> +       uint32_t block_size;
> +       uint32_t offset;
> +       unsigned int i;
> +
> +       aub_bo_get_address(bo);
> +
> +       for (i = 0; i < ARRAY_SIZE(name_to_type_mapping); i++) {
> +               if (strcmp(bo_gem->name,
> +                          name_to_type_mapping[i].name) == 0) {
> +                       type = name_to_type_mapping[i].type;
> +                       subtype = name_to_type_mapping[i].subtype;
> +                       break;
> +               }
> +       }
> +
> +       if (type == 0) {
> +               DBG("Failed to find type for object %s(size: 0x%lx, aub_offset: 0x%08x)\n",
> +                      bo_gem->name, bo->size, bo_gem->aub_offset);
> +       }
> +
> +
> +       /* Break up large objects into multiple writes.  Otherwise a
> +        * 128kb VBO would overflow the 16 bits of size field in the
> +        * packet header and everything goes badly after that.
> +        */
> +       for (offset = 0; offset < bo->size; offset += block_size) {
> +               block_size = bo->size - offset;
> +
> +               if (block_size > 2 * 4096)
> +                       block_size = 2 * 4096;
> +
> +               aub_write_trace_block(bo, type, subtype,
> +                                     offset, block_size);
> +       }
> +}
> +
> +/*
> + * Make a ringbuffer on fly and dump it
> + */
> +static void
> +aub_generate_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem,
> +                         uint32_t batch_buffer, unsigned int flags)
> +{
> +       uint32_t ringbuffer[1024];
> +       int ring = 0;
> +
> +       switch (flags) {
> +       case I915_EXEC_RENDER:
> +       case I915_EXEC_DEFAULT:
> +               ring = AUB_TRACE_TYPE_RING_PRB0;
> +               break;
> +       case I915_EXEC_BSD:
> +               ring = AUB_TRACE_TYPE_RING_PRB1;
> +               break;
> +       case I915_EXEC_BLT:
> +               ring = AUB_TRACE_TYPE_RING_PRB2;
> +               break;
> +       }
> +
> +       aub_out(bufmgr_gem,
> +               CMD_AUB_TRACE_HEADER_BLOCK |
> +               (5 - 2));
> +       aub_out(bufmgr_gem,
> +               AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
> +       aub_out(bufmgr_gem, 0); /* general/surface subtype */
> +       aub_out(bufmgr_gem, bufmgr_gem->aub_offset);
> +       aub_out(bufmgr_gem, 4096);
> +
> +       /* Do make a ring buffer here */
> +       memset(ringbuffer, AUB_MI_NOOP, sizeof(ringbuffer));
> +       ringbuffer[0] = AUB_MI_BATCH_BUFFER_START;
> +       ringbuffer[1] = batch_buffer;
> +
> +       /* FIXME: Need some flush operations here? */
> +
> +       aub_out_data(bufmgr_gem, ringbuffer, 4096);
> +
> +       /* Update offset pointer */
> +       bufmgr_gem->aub_offset += 4096;
> +}
> +
> +static void
> +aub_dump_bmp(drm_intel_bufmgr_gem *bufmgr_gem)
> +{
> +       struct drm_intel_aub_bmp *p = bufmgr_gem->aub_bmp;
> +
> +       while(p) {
> +               aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4);
> +               aub_out(bufmgr_gem, (p->bmp.y_offset << 16) | p->bmp.x_offset);
> +               aub_out(bufmgr_gem, (p->bmp.format << 24) |
> +                                   (p->bmp.bits_per_pixel << 16) | p->bmp.pitch);
> +               aub_out(bufmgr_gem, (p->bmp.height << 16) | p->bmp.width);
> +               /* surface bo should already be written out */
> +               assert(((drm_intel_bo_gem *)p->bo)->aub_offset != 0);
> +               aub_out(bufmgr_gem, ((drm_intel_bo_gem *)p->bo)->aub_offset + p->offset);
> +               aub_out(bufmgr_gem, (p->bmp.tiling << 2) | (p->bmp.tiling_walk_y << 3));
> +
> +               bufmgr_gem->aub_bmp = p->next;
> +               free(p);
> +               p = bufmgr_gem->aub_bmp;
> +       }
> +}
> +
> +static void
> +aub_exec(drm_intel_bo *bo, unsigned int flags)
> +{
> +       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +       int i;
> +
> +       if (!bufmgr_gem->aub_file)
> +               return;
> +
> +       /* Write out all but the batchbuffer to AUB memory */
> +       for (i = 0; i < bufmgr_gem->exec_count - 1; i++) {
> +               if (bufmgr_gem->exec_bos[i] != bo)
> +                       aub_write_bo(bufmgr_gem->exec_bos[i]);
> +       }
> +
> +       aub_bo_get_address(bo);
> +
> +       /* Dump the batchbuffer. */
> +       aub_out(bufmgr_gem,
> +               CMD_AUB_TRACE_HEADER_BLOCK |
> +               (5 - 2));
> +       aub_out(bufmgr_gem,
> +               AUB_TRACE_MEMTYPE_GTT | AUB_TRACE_TYPE_BATCH | AUB_TRACE_OP_DATA_WRITE);
> +       aub_out(bufmgr_gem, 0); /* general/surface subtype */
> +       aub_out(bufmgr_gem, bo_gem->aub_offset);
> +       aub_out(bufmgr_gem, bo_gem->bo.size);
> +       aub_write_bo_data(bo, 0, bo_gem->bo.size);
> +
> +       /* Dump ring buffer */
> +       aub_generate_ringbuffer(bufmgr_gem, bo_gem->aub_offset, flags);
> +
> +       /* Dump BMP file for any requested surface */
> +       aub_dump_bmp(bufmgr_gem);
> +
> +       fflush(bufmgr_gem->aub_file);
> +
> +       /*
> +        * One frame has been dumped. So reset the aub_offset for the next frame.
> +        *
> +        * FIXME: Can we do this?
> +        */
> +       bufmgr_gem->aub_offset = 0x10000;
> +}
> +
> +/*
> + * Stop dumping data to aub file
> + */
> +void drm_intel_bufmgr_gem_stop_aubfile(drm_intel_bufmgr *bufmgr)
> +{
> +       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
> +
> +       pthread_mutex_lock(&bufmgr_gem->lock);
> +       bufmgr_gem->aub_file = NULL;
> +       pthread_mutex_unlock(&bufmgr_gem->lock);
> +}
> +
> +void drm_intel_bufmgr_gem_set_aubfile(drm_intel_bufmgr *bufmgr, FILE *file)
> +{
> +       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
> +       int entry = 0x3; /* uc/valid GTT */
> +       int i;
> +
> +       if (!file)
> +               return;
> +
> +       pthread_mutex_lock(&bufmgr_gem->lock);
> +
> +       bufmgr_gem->aub_file = file;
> +
> +       /* Start from 0x10000, since the address below is used for GTT entry building */
> +       bufmgr_gem->aub_offset = 0x10000;
> +
> +       /* Start with a (required) version packet. */
> +       aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2));
> +       aub_out(bufmgr_gem,
> +               (4 << AUB_HEADER_MAJOR_SHIFT) |
> +               (0 << AUB_HEADER_MINOR_SHIFT));
> +       for (i = 0; i < 8; i++) {
> +               aub_out(bufmgr_gem, 0); /* app name */
> +       }
> +       aub_out(bufmgr_gem, 0); /* timestamp */
> +       aub_out(bufmgr_gem, 0); /* timestamp */
> +       aub_out(bufmgr_gem, 0); /* comment len */
> +
> +       /* Set up the GTT. The max we can handle is 256M.
> +        * Need improvement, dynamicly alloc/write GTT entry
> +        * block for each bo, so AubList output won't contain
> +        * whole GTT entry block in the first, easier for parse.
> +        */
> +       for (i = 0x000; i < 0x10000; i += 4, entry += 0x1000) {
> +               aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2));
> +               aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE);
> +               aub_out(bufmgr_gem, 0);
> +               aub_out(bufmgr_gem, i);
> +               aub_out(bufmgr_gem, 4);
> +               aub_out(bufmgr_gem, entry);
> +       }
> +
> +       pthread_mutex_unlock(&bufmgr_gem->lock);
> +}
> +
> +int drm_intel_gem_aub_dump_bmp(drm_intel_bufmgr *bufmgr,
> +                              drm_intel_bo *bo, unsigned int offset,
> +                              struct drm_intel_aub_surface_bmp *bmp)
> +{
> +       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
> +       struct drm_intel_aub_bmp *aub_bmp, *p, *last;
> +
> +       aub_bmp = malloc(sizeof(*aub_bmp));
> +
> +       aub_bmp->bo = bo;
> +       aub_bmp->offset = offset;
> +       memcpy(&aub_bmp->bmp, bmp, sizeof(*bmp));
> +       aub_bmp->next = NULL;
> +
> +       pthread_mutex_lock(&bufmgr_gem->lock);
> +
> +       /* Insert last */
> +       p = last = bufmgr_gem->aub_bmp;
> +       while (p) {
> +               last = p;
> +               p = p->next;
> +       }
> +       if (last == bufmgr_gem->aub_bmp)
> +           bufmgr_gem->aub_bmp = aub_bmp;
> +       else
> +           last->next = aub_bmp;
> +
> +       pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> +       return 0;
> +}
> +
>  static unsigned int
>  drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
> 
> @@ -1624,6 +2024,8 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
>         execbuf.rsvd1 = 0;
>         execbuf.rsvd2 = 0;
> 
> +       aub_exec(bo, flags);
> +
>         ret = drmIoctl(bufmgr_gem->fd,
>                        DRM_IOCTL_I915_GEM_EXECBUFFER2,
>                        &execbuf);
> --
> 1.7.2.3
> 





More information about the Intel-gfx mailing list