[Intel-gfx] [PATCH] libdrm/intel: execbuf2 support

Eric Anholt eric at anholt.net
Sun Aug 9 09:14:44 CEST 2009


On Tue, 2009-07-14 at 13:51 -0700, Jesse Barnes wrote:
> This patch to libdrm adds support for the new execbuf2 ioctl.  If
> detected, it will be used instead of the old ioctl.  To make using the
> new code easier, this patch also adds a new tiled allocation function.
> drm_intel_bo_alloc_tiled hides the stride and size restrictions open
> coded in current tiling aware code, and so should make tiling easier to
> use.
> 
> Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>
> 
> diff --git a/libdrm/intel/intel_bufmgr.c b/libdrm/intel/intel_bufmgr.c
> index f170e7f..ec32993 100644
> --- a/libdrm/intel/intel_bufmgr.c
> +++ b/libdrm/intel/intel_bufmgr.c
> @@ -45,6 +45,17 @@
>   */
>  
>  drm_intel_bo *
> +drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
> +			 int x, int y, int cpp, uint32_t *tiling_mode,
> +			 unsigned long *pitch, unsigned long flags)
> +{
> +   if (!bufmgr->bo_alloc_tiled)
> +      return bufmgr->bo_alloc(bufmgr, name, x * y * cpp, 0);
> +   return bufmgr->bo_alloc_tiled(bufmgr, name, x, y, cpp, tiling_mode, pitch,
> +				 flags);
> +}
> +
> +drm_intel_bo *
>  drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
>  		   unsigned long size, unsigned int alignment)
>  {
> @@ -174,6 +185,17 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>  					 read_domains, write_domain);
>  }
>  
> +/* For fence registers, not GL fences */
> +int
> +drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
> +			      drm_intel_bo *target_bo, uint32_t target_offset,
> +			      uint32_t read_domains, uint32_t write_domain)
> +{
> +	return bo->bufmgr->bo_emit_reloc_fence(bo, offset,
> +					       target_bo, target_offset,
> +					       read_domains, write_domain);
> +}
> +
>  int
>  drm_intel_bo_pin(drm_intel_bo *bo, uint32_t alignment)
>  {
> diff --git a/libdrm/intel/intel_bufmgr.h b/libdrm/intel/intel_bufmgr.h
> index 758558d..5459d13 100644
> --- a/libdrm/intel/intel_bufmgr.h
> +++ b/libdrm/intel/intel_bufmgr.h
> @@ -73,6 +73,14 @@ struct _drm_intel_bo {
>      int handle;
>  };
>  
> +#define BO_ALLOC_FOR_RENDER (1<<0)
> +
> +drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
> +				       const char *name,
> +				       int x, int y, int cpp,
> +				       uint32_t *tiling_mode,
> +				       unsigned long *pitch,
> +				       unsigned long flags);
>  drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
>  				 unsigned long size, unsigned int alignment);
>  drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
> @@ -100,6 +108,10 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo **bo_array, int count);
>  int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>  			    drm_intel_bo *target_bo, uint32_t target_offset,
>  			    uint32_t read_domains, uint32_t write_domain);
> +int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
> +				  drm_intel_bo *target_bo,
> +				  uint32_t target_offset,
> +				  uint32_t read_domains, uint32_t write_domain);
>  int drm_intel_bo_pin(drm_intel_bo *bo, uint32_t alignment);
>  int drm_intel_bo_unpin(drm_intel_bo *bo);
>  int drm_intel_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
> diff --git a/libdrm/intel/intel_bufmgr_gem.c b/libdrm/intel/intel_bufmgr_gem.c
> index 737ceae..65c84bc 100644
> --- a/libdrm/intel/intel_bufmgr_gem.c
> +++ b/libdrm/intel/intel_bufmgr_gem.c
> @@ -95,6 +95,7 @@ typedef struct _drm_intel_bufmgr_gem {
>      pthread_mutex_t lock;
>  
>      struct drm_i915_gem_exec_object *exec_objects;
> +    struct drm_i915_gem_exec_object2 *exec2_objects;
>      drm_intel_bo **exec_bos;
>      int exec_size;
>      int exec_count;
> @@ -107,6 +108,13 @@ typedef struct _drm_intel_bufmgr_gem {
>      int pci_device;
>  } drm_intel_bufmgr_gem;
>  
> +#define DRM_INTEL_RELOC_FENCE (1<<0)
> +
> +typedef struct _drm_intel_reloc_target_info {
> +	drm_intel_bo *bo;
> +	int flags;
> +} drm_intel_reloc_target;
> +
>  struct _drm_intel_bo_gem {
>      drm_intel_bo bo;
>  
> @@ -143,8 +151,8 @@ struct _drm_intel_bo_gem {
>  
>      /** Array passed to the DRM containing relocation information. */
>      struct drm_i915_gem_relocation_entry *relocs;
> -    /** Array of bos corresponding to relocs[i].target_handle */
> -    drm_intel_bo **reloc_target_bo;
> +    /** Array of info structs corresponding to relocs[i].target_handle etc */
> +    drm_intel_reloc_target *reloc_target_info;
>      /** Number of entries in relocs */
>      int reloc_count;
>      /** Mapped address for the buffer, saved across map/unmap cycles */
> @@ -206,6 +214,71 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
>  static void
>  drm_intel_gem_bo_unreference(drm_intel_bo *bo);
>  
> +#define ROUND_UP_TO(x, y)			(((x) + (y) - 1) / (y) * (y))
> +#define ROUND_UP_TO_MB(x)			ROUND_UP_TO((x), 1024*1024)
> +
> +/* Round a given size up to the nearest tileable size for the object, taking
> + * fence register mapping into account */
> +static unsigned long
> +drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
> +			   uint32_t *tiling_mode)
> +{
> +    unsigned long min_size, max_size;
> +    unsigned long i;
> +
> +    if (*tiling_mode == I915_TILING_NONE)
> +	return size;
> +
> +    /* 965+ just need multiples of page size for tiling */
> +    if (IS_I965G(bufmgr_gem))
> +	return ROUND_UP_TO(size, 4096);
> +
> +    /* Older chips need powers of two, of at least 512k or 1M */
> +    if (IS_I9XX(bufmgr_gem)) {
> +	min_size = 1024*1024;
> +	max_size = 128*1024*1024;
> +    } else {
> +	min_size = 512*1024;
> +	max_size = 64*1024*1024;
> +    }
> +
> +    if (size > max_size) {
> +	*tiling_mode = I915_TILING_NONE;
> +	return size;
> +    }
> +
> +    for (i = min_size; i < size; i <<= 1)
> +	;
> +
> +    return i;
> +}
> +
> +/*
> + * Round a given pitch up to the minimum required for X tiling on a
> + * given chip.  We use 512 as the minimum to allow for a later tiling
> + * change.
> + */
> +static unsigned long
> +drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
> +			    unsigned long pitch, uint32_t tiling_mode)
> +{
> +    unsigned long tile_width = 512;
> +    unsigned long i;
> +
> +    if (tiling_mode == I915_TILING_NONE)
> +	return ROUND_UP_TO(pitch, tile_width);
> +
> +    /* 965 is flexible */
> +    if (IS_I965G(bufmgr_gem))
> +	return ROUND_UP_TO(pitch, tile_width);
> +
> +    /* Pre-965 needs power of two tile width */
> +    for (i = tile_width; i < pitch; i <<= 1)
> +	;
> +
> +    return i;
> +}
> +
>  static struct drm_intel_gem_bo_bucket *
>  drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
>  				 unsigned long size)
> @@ -236,7 +309,7 @@ static void drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
>  	}
>  
>  	for (j = 0; j < bo_gem->reloc_count; j++) {
> -	    drm_intel_bo *target_bo = bo_gem->reloc_target_bo[j];
> +	    drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
>  	    drm_intel_bo_gem *target_gem = (drm_intel_bo_gem *)target_bo;
>  
>  	    DBG("%2d: %d (%s)@0x%08llx -> %d (%s)@0x%08lx + 0x%08x\n",
> @@ -296,6 +369,49 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo)
>      bufmgr_gem->exec_count++;
>  }
>  
> +static void
> +drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
> +{
> +    drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
> +    drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
> +    int index;
> +
> +    if (bo_gem->validate_index != -1)
> +	return;
> +
> +    /* Extend the array of validation entries as necessary. */
> +    if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
> +	int new_size = bufmgr_gem->exec_size * 2;
> +
> +	if (new_size == 0)
> +	    new_size = 5;
> +
> +	bufmgr_gem->exec2_objects =
> +	    realloc(bufmgr_gem->exec2_objects,
> +		    sizeof(*bufmgr_gem->exec2_objects) * new_size);
> +	bufmgr_gem->exec_bos =
> +	    realloc(bufmgr_gem->exec_bos,
> +		    sizeof(*bufmgr_gem->exec_bos) * new_size);
> +	bufmgr_gem->exec_size = new_size;
> +    }
> +
> +    index = bufmgr_gem->exec_count;
> +    bo_gem->validate_index = index;
> +    /* Fill in array entry */
> +    bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
> +    bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
> +    bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
> +    bufmgr_gem->exec2_objects[index].alignment = 0;
> +    bufmgr_gem->exec2_objects[index].offset = 0;
> +    bufmgr_gem->exec_bos[index] = bo;
> +    bufmgr_gem->exec2_objects[index].flags = 0;
> +    bufmgr_gem->exec2_objects[index].rsvd1 = 0;
> +    bufmgr_gem->exec2_objects[index].rsvd2 = 0;
> +    if (need_fence)
> +	    bufmgr_gem->exec2_objects[index].flags |= EXEC_OBJECT_NEEDS_FENCE;
> +    drm_intel_gem_bo_reference_locked(bo);
> +    bufmgr_gem->exec_count++;
> +}
>  
>  #define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
>  	sizeof(uint32_t))
> @@ -308,25 +424,27 @@ drm_intel_setup_reloc_list(drm_intel_bo *bo)
>  
>      bo_gem->relocs = malloc(bufmgr_gem->max_relocs *
>  			    sizeof(struct drm_i915_gem_relocation_entry));
> -    bo_gem->reloc_target_bo = malloc(bufmgr_gem->max_relocs *
> -				     sizeof(drm_intel_bo *));
> +    bo_gem->reloc_target_info = malloc(bufmgr_gem->max_relocs *
> +				       sizeof(drm_intel_reloc_target *));
>  
>      return 0;
>  }
>  
>  static drm_intel_bo *
>  drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
> -				unsigned long size, unsigned int alignment,
> -				int for_render)
> +				unsigned long size, unsigned long flags)
>  {
>      drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
>      drm_intel_bo_gem *bo_gem;
>      unsigned int page_size = getpagesize();
> -    int ret;
> +    int ret, for_render = 0;
>      struct drm_intel_gem_bo_bucket *bucket;
>      int alloc_from_cache = 0;
>      unsigned long bo_size;
>  
> +    if (flags & BO_ALLOC_FOR_RENDER)
> +	for_render = 1;
> +
>      /* Round the allocated size up to a power of two number of pages. */
>      bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
>  
> @@ -405,9 +523,9 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
>      bo_gem->reloc_tree_size = bo_gem->bo.size;
>      bo_gem->reloc_tree_fences = 0;
>      bo_gem->used_as_reloc_target = 0;
> +    bo_gem->reusable = 1;
>      bo_gem->tiling_mode = I915_TILING_NONE;
>      bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
> -    bo_gem->reusable = 1;
>  
>      DBG("bo_create: buf %d (%s) %ldb\n",
>  	bo_gem->gem_handle, bo_gem->name, size);
> @@ -419,14 +537,45 @@ static drm_intel_bo *
>  drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, const char *name,
>  				  unsigned long size, unsigned int alignment)
>  {
> -    return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 1);
> +    return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
> +					   BO_ALLOC_FOR_RENDER);
>  }
>  
>  static drm_intel_bo *
>  drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
>  		       unsigned long size, unsigned int alignment)
>  {
> -    return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 0);
> +    return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0);
> +}
> +
> +static drm_intel_bo *
> +drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
> +			     int x, int y, int cpp, uint32_t *tiling_mode,
> +			     unsigned long *pitch, unsigned long flags)
> +{
> +    drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
> +    drm_intel_bo *bo;
> +    unsigned long size, stride;
> +    int ret;
> +
> +    stride = x * cpp;
> +    stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, *tiling_mode);
> +    size = stride * y;

Needs the y alignment treatment here like my commits on Friday.

> +    size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
> +
> +    bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags);
> +    if (!bo)
> +	return NULL;
> +
> +    ret = drm_intel_gem_bo_set_tiling(bo, tiling_mode, stride);
> +    if (ret != 0) {
> +	drm_intel_gem_bo_unreference(bo);
> +	return NULL;
> +    }
> +
> +    *pitch = stride;
> +
> +    return bo;
>  }
>  
>  /**
> @@ -478,10 +627,6 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, const char *name,
>      }
>      bo_gem->tiling_mode = get_tiling.tiling_mode;
>      bo_gem->swizzle_mode = get_tiling.swizzle_mode;
> -    if (bo_gem->tiling_mode == I915_TILING_NONE)
> -	bo_gem->reloc_tree_fences = 0;
> -    else
> -	bo_gem->reloc_tree_fences = 1;
>  
>      DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
>  
> @@ -574,8 +719,8 @@ drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo)
>  
>  	    /* Unreference all the target buffers */
>  	    for (i = 0; i < bo_gem->reloc_count; i++)
> -		 drm_intel_gem_bo_unreference_locked(bo_gem->reloc_target_bo[i]);
> -	    free(bo_gem->reloc_target_bo);
> +		 drm_intel_gem_bo_unreference_locked(bo_gem->reloc_target_info[i].bo);
> +	    free(bo_gem->reloc_target_info);
>  	    free(bo_gem->relocs);
>  	}
>  
> @@ -600,7 +745,7 @@ drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo)
>  	    bo_gem->name = NULL;
>  	    bo_gem->validate_index = -1;
>  	    bo_gem->relocs = NULL;
> -	    bo_gem->reloc_target_bo = NULL;
> +	    bo_gem->reloc_target_info = NULL;
>  	    bo_gem->reloc_count = 0;
>  
>  	    DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
> @@ -919,6 +1064,7 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
>      int i;
>  
>      free(bufmgr_gem->exec_objects);
> +    free(bufmgr_gem->exec2_objects);
>      free(bufmgr_gem->exec_bos);
>  
>      pthread_mutex_destroy(&bufmgr_gem->lock);
> @@ -950,9 +1096,9 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
>   * last known offset in target_bo.
>   */
>  static int
> -drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
> -			    drm_intel_bo *target_bo, uint32_t target_offset,
> -			    uint32_t read_domains, uint32_t write_domain)
> +do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
> +		 drm_intel_bo *target_bo, uint32_t target_offset,
> +		 uint32_t read_domains, uint32_t write_domain, int need_fence)
>  {
>      drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
>      drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
> @@ -976,7 +1122,8 @@ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>       */
>      assert(!bo_gem->used_as_reloc_target);
>      bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
> -    bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
> +    if (need_fence)
> +	    bo_gem->reloc_tree_fences++;

With this and other changes in the patch, it looks like
reloc_tree_fences stops actually counting the number of fences required
in the reloc tree rooted at bo_gem.
 
>      /* Flag the target to disallow further relocations in it. */
>      target_bo_gem->used_as_reloc_target = 1;
> @@ -989,7 +1136,12 @@ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>      bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
>      bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset;
>  
> -    bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo;
> +    bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
> +    if (need_fence)
> +	    bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
> +		    DRM_INTEL_RELOC_FENCE;
> +    else
> +	    bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
>      drm_intel_gem_bo_reference_locked(target_bo);
>  
>      bo_gem->reloc_count++;
> @@ -999,6 +1151,24 @@ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>      return 0;
>  }
>  
> +static int
> +drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
> +			    drm_intel_bo *target_bo, uint32_t target_offset,
> +			    uint32_t read_domains, uint32_t write_domain)
> +{
> +	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
> +				read_domains, write_domain, 0);
> +}

So, new drm plus old code assumes that the old code doesn't want fences
on its objects?  That seems broken -- then reloc_tree_fences isn't
counted for old code.  A simple boolean flag the user sets saying "I
understand the requirement to use emit_reloc_fence" to enable new
behavior seems like it would work fine, though.

> +
> +static int
> +drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
> +				  drm_intel_bo *target_bo,
> +				  uint32_t target_offset,
> +				  uint32_t read_domains, uint32_t write_domain)
> +{
> +	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
> +				read_domains, write_domain, 1);
> +}
>  /**
>   * Walk the tree of relocations rooted at BO and accumulate the list of
>   * validations to be performed and update the relocation buffers with
> @@ -1014,7 +1184,7 @@ drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
>  	return;
>  
>      for (i = 0; i < bo_gem->reloc_count; i++) {
> -	drm_intel_bo *target_bo = bo_gem->reloc_target_bo[i];
> +	drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
>  
>  	/* Continue walking the tree depth-first. */
>  	drm_intel_gem_bo_process_reloc(target_bo);
> @@ -1025,6 +1195,29 @@ drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
>  }
>  
>  static void
> +drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
> +{
> +    drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
> +    int i;
> +
> +    if (bo_gem->relocs == NULL)
> +	return;
> +
> +    for (i = 0; i < bo_gem->reloc_count; i++) {
> +	drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
> +	int need_fence;
> +
> +	/* Continue walking the tree depth-first. */
> +	drm_intel_gem_bo_process_reloc(target_bo);
> +
> +	need_fence = bo_gem->reloc_target_info[i].flags & DRM_INTEL_RELOC_FENCE;
> +
> +	/* Add the target to the validate list */
> +	drm_intel_add_validate_buffer2(target_bo, need_fence);
> +    }
> +}
> +
> +static void
>  drm_intel_update_buffer_offsets (drm_intel_bufmgr_gem *bufmgr_gem)
>  {
>      int i;
> @@ -1043,6 +1236,25 @@ drm_intel_update_buffer_offsets (drm_intel_bufmgr_gem *bufmgr_gem)
>      }
>  }
>  
> +static void
> +drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
> +{
> +    int i;
> +
> +    for (i = 0; i < bufmgr_gem->exec_count; i++) {
> +	drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
> +
> +	/* Update the buffer offset */
> +	if (bufmgr_gem->exec2_objects[i].offset != bo->offset) {
> +	    DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
> +		bo_gem->gem_handle, bo_gem->name, bo->offset,
> +		(unsigned long long)bufmgr_gem->exec2_objects[i].offset);
> +	    bo->offset = bufmgr_gem->exec2_objects[i].offset;
> +	}
> +    }
> +}
> +
>  static int
>  drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
>  		      drm_clip_rect_t *cliprects, int num_cliprects,
> @@ -1106,6 +1318,71 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
>  }
>  
>  static int
> +drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
> +		      drm_clip_rect_t *cliprects, int num_cliprects,
> +		      int DR4)
> +{
> +    drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
> +    struct drm_i915_gem_execbuffer2 execbuf;
> +    int ret, i;
> +
> +    pthread_mutex_lock(&bufmgr_gem->lock);
> +    /* Update indices and set up the validate list. */
> +    drm_intel_gem_bo_process_reloc2(bo);
> +
> +    /* Add the batch buffer to the validation list.  There are no relocations
> +     * pointing to it.
> +     */
> +    drm_intel_add_validate_buffer2(bo, 0);
> +
> +    execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
> +    execbuf.buffer_count = bufmgr_gem->exec_count;
> +    execbuf.batch_start_offset = 0;
> +    execbuf.batch_len = used;
> +    execbuf.cliprects_ptr = (uintptr_t)cliprects;
> +    execbuf.num_cliprects = num_cliprects;
> +    execbuf.DR1 = 0;
> +    execbuf.DR4 = DR4;
> +    execbuf.flags = 0;
> +    execbuf.rsvd1 = 0;
> +    execbuf.rsvd2 = 0;
> +
> +    do {
> +	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
> +    } while (ret != 0 && errno == EAGAIN);
> +
> +    if (ret != 0 && errno == ENOMEM) {
> +	fprintf(stderr, "Execbuffer fails to pin. Estimate: %u. Actual: %u. Available: %u\n",
> +		drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
> +						   bufmgr_gem->exec_count),
> +		drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
> +						  bufmgr_gem->exec_count),
> +		(unsigned int) bufmgr_gem->gtt_size);
> +    }
> +    drm_intel_update_buffer_offsets2 (bufmgr_gem);
> +
> +    if (bufmgr_gem->bufmgr.debug)
> +	drm_intel_gem_dump_validation_list(bufmgr_gem);
> +
> +    for (i = 0; i < bufmgr_gem->exec_count; i++) {
> +	drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
> +
> +	/* Need to call swrast on next bo_map */
> +	bo_gem->swrast = 0;
> +
> +	/* Disconnect the buffer from the validate list */
> +	bo_gem->validate_index = -1;
> +	drm_intel_gem_bo_unreference_locked(bo);
> +	bufmgr_gem->exec_bos[i] = NULL;
> +    }
> +    bufmgr_gem->exec_count = 0;
> +    pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> +    return 0;
> +}
> +
> +static int
>  drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
>  {
>      drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
> @@ -1158,10 +1435,6 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
>      if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode)
>  	return 0;
>  
> -    /* If we're going from non-tiling to tiling, bump fence count */
> -    if (bo_gem->tiling_mode == I915_TILING_NONE)
> -	bo_gem->reloc_tree_fences++;
> -
>      memset(&set_tiling, 0, sizeof(set_tiling));
>      set_tiling.handle = bo_gem->gem_handle;
>      set_tiling.tiling_mode = *tiling_mode;
> @@ -1175,10 +1448,6 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
>      bo_gem->tiling_mode = set_tiling.tiling_mode;
>      bo_gem->swizzle_mode = set_tiling.swizzle_mode;
>  
> -    /* If we're going from tiling to non-tiling, drop fence count */
> -    if (bo_gem->tiling_mode == I915_TILING_NONE)
> -	bo_gem->reloc_tree_fences--;
> -
>      *tiling_mode = bo_gem->tiling_mode;
>      return 0;
>  }
> @@ -1253,7 +1522,7 @@ drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
>      bo_gem->included_in_check_aperture = 1;
>  
>      for (i = 0; i < bo_gem->reloc_count; i++)
> -	total += drm_intel_gem_bo_get_aperture_space(bo_gem->reloc_target_bo[i]);
> +	total += drm_intel_gem_bo_get_aperture_space(bo_gem->reloc_target_info[i].bo);
>  
>      return total;
>  }
> @@ -1299,7 +1568,7 @@ drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
>      bo_gem->included_in_check_aperture = 0;
>  
>      for (i = 0; i < bo_gem->reloc_count; i++)
> -	drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->reloc_target_bo[i]);
> +	drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->reloc_target_info[i].bo);
>  }
>  
>  /**
> @@ -1426,6 +1695,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>      drm_i915_getparam_t gp;
>      int ret, i;
>      unsigned long size;
> +    int exec2 = 0;
>  
>      bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
>      bufmgr_gem->fd = fd;
> @@ -1467,6 +1737,11 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>  	}
>      }
>  
> +    gp.param = I915_PARAM_HAS_EXECBUF2;
> +    ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
> +    if (!ret)
> +	    exec2 = 1;
> +
>      /* Let's go with one relocation per every 2 dwords (but round down a bit
>       * since a power of two will mean an extra page allocation for the reloc
>       * buffer).
> @@ -1475,6 +1750,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>       */
>      bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
>  
> +    bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
>      bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
>      bufmgr_gem->bufmgr.bo_alloc_for_render = drm_intel_gem_bo_alloc_for_render;
>      bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
> @@ -1485,12 +1761,17 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>      bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
>      bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
>      bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
> +    bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
>      bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
>      bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
>      bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
>      bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
>      bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
>      bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
> +    /* Use the new one if available */
> +    if (exec2)
> +	    bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
> +
>      bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy;
>      bufmgr_gem->bufmgr.debug = 0;
>      bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space;
> diff --git a/libdrm/intel/intel_bufmgr_priv.h b/libdrm/intel/intel_bufmgr_priv.h
> index 0098076..d832882 100644
> --- a/libdrm/intel/intel_bufmgr_priv.h
> +++ b/libdrm/intel/intel_bufmgr_priv.h
> @@ -41,6 +41,25 @@
>   */
>  struct _drm_intel_bufmgr {
>     /**
> +    * Allocate a tiled buffer object.
> +    *
> +    * Alignment for tiled objects is set automatically; the 'flags'
> +    * argument provides a hint about how the object will be used initially.
> +    *
> +    * Valid tiling formats are:
> +    *  I915_TILING_NONE
> +    *  I915_TILING_X
> +    *  I915_TILING_Y
> +    *
> +    * Note the tiling format may be rejected; callers should check the
> +    * 'tiling_mode' field on return, as well as the pitch value, which
> +    * may have been rounded up to accommodate for tiling restrictions.
> +    */
> +   drm_intel_bo *(*bo_alloc_tiled)(drm_intel_bufmgr *bufmgr, const char *name,
> +				   int x, int y, int cpp, uint32_t *tiling_mode,
> +				   unsigned long *pitch, unsigned long flags);
> +
> +   /**
>      * Allocate a buffer object.
>      *
>      * Buffer objects are not necessarily initially mapped into CPU virtual
> @@ -133,6 +152,9 @@ struct _drm_intel_bufmgr {
>      int (*bo_emit_reloc)(drm_intel_bo *bo, uint32_t offset,
>  			 drm_intel_bo *target_bo, uint32_t target_offset,
>  			 uint32_t read_domains, uint32_t write_domain);
> +    int (*bo_emit_reloc_fence)(drm_intel_bo *bo, uint32_t offset,
> +			       drm_intel_bo *target_bo, uint32_t target_offset,
> +			       uint32_t read_domains, uint32_t write_domain);
>  
>      /** Executes the command buffer pointed to by bo. */
>      int (*bo_exec)(drm_intel_bo *bo, int used,
> diff --git a/shared-core/i915_drm.h b/shared-core/i915_drm.h
> index 2539966..3ee768c 100644
> --- a/shared-core/i915_drm.h
> +++ b/shared-core/i915_drm.h
> @@ -206,6 +206,7 @@ typedef struct drm_i915_sarea {
>  #define DRM_I915_GEM_GET_APERTURE 0x23
>  #define DRM_I915_GEM_MMAP_GTT	0x24
>  #define DRM_I915_GET_PIPE_FROM_CRTC_ID	0x25
> +#define DRM_I915_GEM_EXECBUFFER2	0x26
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -227,6 +228,7 @@ typedef struct drm_i915_sarea {
>  #define DRM_IOCTL_I915_EXECBUFFER	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_EXECBUFFER, struct drm_i915_execbuffer)
>  #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
>  #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
> +#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
>  #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
>  #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
>  #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
> @@ -299,6 +301,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_CHIPSET_ID            4
>  #define I915_PARAM_HAS_GEM               5
>  #define I915_PARAM_NUM_FENCES_AVAIL      6
> +#define I915_PARAM_HAS_EXECBUF2          7
>  
>  typedef struct drm_i915_getparam {
>  	int param;
> @@ -622,6 +625,57 @@ struct drm_i915_gem_execbuffer {
>  	uint64_t cliprects_ptr;
>  };
>  
> +struct drm_i915_gem_exec_object2 {
> +	/**
> +	 * User's handle for a buffer to be bound into the GTT for this
> +	 * operation.
> +	 */
> +	uint32_t handle;
> +
> +	/** Number of relocations to be performed on this buffer */
> +	uint32_t relocation_count;
> +	/**
> +	 * Pointer to array of struct drm_i915_gem_relocation_entry containing
> +	 * the relocations to be performed in this buffer.
> +	 */
> +	uint64_t relocs_ptr;
> +
> +	/** Required alignment in graphics aperture */
> +	uint64_t alignment;
> +
> +	/**
> +	 * Returned value of the updated offset of the object, for future
> +	 * presumed_offset writes.
> +	 */
> +	uint64_t offset;
> +
> +#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
> +	uint64_t flags;
> +	uint64_t rsvd1;
> +	uint64_t rsvd2;
> +};
> +
> +struct drm_i915_gem_execbuffer2 {
> +	/**
> +	 * List of gem_exec_object2 structs
> +	 */
> +	uint64_t buffers_ptr;
> +	uint32_t buffer_count;
> +
> +	/** Offset in the batchbuffer to start execution from. */
> +	uint32_t batch_start_offset;
> +	/** Bytes used in batchbuffer from batch_start_offset */
> +	uint32_t batch_len;
> +	uint32_t DR1;
> +	uint32_t DR4;
> +	uint32_t num_cliprects;
> +	/** This is a struct drm_clip_rect *cliprects */
> +	uint64_t cliprects_ptr;
> +	uint64_t flags; /* currently unused */
> +	uint64_t rsvd1;
> +	uint64_t rsvd2;
> +};
> +
>  struct drm_i915_gem_pin {
>  	/** Handle of the buffer to be pinned. */
>  	uint32_t handle;
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
-- 
Eric Anholt
eric at anholt.net                         eric.anholt at intel.com


-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 197 bytes
Desc: This is a digitally signed message part
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20090809/40ebbb65/attachment.sig>


More information about the Intel-gfx mailing list