[Mesa-dev] [PATCH 4/4] i965: Convert reloc.target_handle into an index for I915_EXEC_HANDLE_LUT

Daniel Vetter daniel at ffwll.ch
Fri Jul 7 10:31:46 UTC 2017


On Mon, Jun 19, 2017 at 11:06:50AM +0100, Chris Wilson wrote:
> Passing the index of the target buffer via the reloc.target_handle is
> marginally more efficient for the kernel (it can avoid some allocations,
> and can use a direct lookup rather than a hash or search). It is also
> useful for ourselves as we can use the index into our exec_bos for other
> tasks.
> 
> v2: Only enable HANDLE_LUT if we can use BATCH_FIRST and thereby avoid
> a post-processing loop to fixup the relocations.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Kenneth Graunke <kenneth at whitecape.org>
> Cc: Matt Turner <mattst88 at gmail.com>
> Cc: Jason Ekstrand <jason.ekstrand at intel.com>
> ---
>  src/mesa/drivers/dri/i965/brw_context.h       |  1 +
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 82 ++++++++++++++++++++-------
>  2 files changed, 61 insertions(+), 22 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 2fb2cab918..93ddd0825a 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -450,6 +450,7 @@ struct intel_batchbuffer {
>  
>     uint32_t state_batch_offset;
>     enum brw_gpu_ring ring;
> +   bool has_batch_first;
>     bool needs_sol_reset;
>     bool state_base_address_emitted;
>  
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 15aaf01e52..5fa849c5a5 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -40,6 +40,10 @@
>  
>  #define FILE_DEBUG_FLAG DEBUG_BUFMGR
>  
> +#define DBG_NO_BATCH_FIRST 0
> +#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48
> +#define I915_EXEC_BATCH_FIRST (1 << 18)

Needs an #ifndef I think, to avoid troubles when updating libdrm. Or just
properly update mesa's copy of i915_drm.h, that would be much better.

> +
>  static void
>  intel_batchbuffer_reset(struct intel_batchbuffer *batch,
>                          struct brw_bufmgr *bufmgr,
> @@ -57,13 +61,33 @@ uint_key_hash(const void *key)
>     return (uintptr_t) key;
>  }
>  
> +static int gem_param(int fd, int name)
> +{
> +   drm_i915_getparam_t gp;
> +   int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */
> +
> +   memset(&gp, 0, sizeof(gp));
> +   gp.param = name;
> +   gp.value = &v;
> +   if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
> +      return -1;
> +
> +   return v;
> +}

Afaict this exists as intel_get_param already, why can't we use that?

> +
> +static bool test_has_batch_first(int fd)
> +{
> +   if (DBG_NO_BATCH_FIRST)
> +      return DBG_NO_BATCH_FIRST < 0;
> +
> +   return gem_param(fd, I915_PARAM_HAS_EXEC_BATCH_FIRST) > 0;
> +}
> +
>  void
>  intel_batchbuffer_init(struct intel_batchbuffer *batch,
>                         struct brw_bufmgr *bufmgr,
>                         bool has_llc)
>  {
> -   intel_batchbuffer_reset(batch, bufmgr, has_llc);
> -
>     if (!has_llc) {
>        batch->cpu_map = malloc(BATCH_SZ);
>        batch->map = batch->cpu_map;
> @@ -85,6 +109,12 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
>        batch->state_batch_sizes =
>           _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare);
>     }
> +
> +   struct brw_context *brw = container_of(batch, brw, batch);
> +   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
> +   batch->has_batch_first = test_has_batch_first(dri_screen->fd);
> +
> +   intel_batchbuffer_reset(batch, bufmgr, has_llc);
>  }
>  
>  #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
> @@ -117,21 +147,12 @@ add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
>                   batch->exec_array_size * sizeof(batch->exec_objects[0]));
>     }
>  
> -   struct drm_i915_gem_exec_object2 *validation_entry =
> -      &batch->exec_objects[batch->exec_count];
> -   validation_entry->handle = bo->gem_handle;
> -   if (bo == batch->bo) {
> -      validation_entry->relocation_count = batch->reloc_count;
> -      validation_entry->relocs_ptr = (uintptr_t) batch->relocs;
> -   } else {
> -      validation_entry->relocation_count = 0;
> -      validation_entry->relocs_ptr = 0;
> -   }
> -   validation_entry->alignment = bo->align;
> -   validation_entry->offset = bo->offset64;
> -   validation_entry->flags = bo->kflags;
> -   validation_entry->rsvd1 = 0;
> -   validation_entry->rsvd2 = 0;
> +   struct drm_i915_gem_exec_object2 *exec =
> +      memset(&batch->exec_objects[batch->exec_count], 0, sizeof(*exec));
> +   exec->handle = bo->gem_handle;
> +   exec->alignment = bo->align;
> +   exec->offset = bo->offset64;
> +   exec->flags = bo->kflags;
>  
>     bo->index = batch->exec_count;
>     batch->exec_bos[batch->exec_count] = bo;
> @@ -157,6 +178,11 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch,
>     }
>     batch->map_next = batch->map;
>  
> +   if (batch->has_batch_first) {
> +      add_exec_bo(batch, batch->bo);
> +      assert(batch->bo->index == 0);
> +   }
> +
>     batch->reserved_space = BATCH_RESERVED;
>     batch->state_batch_offset = batch->bo->size;
>     batch->needs_sol_reset = false;
> @@ -663,15 +689,25 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
>        } else {
>           flags |= I915_EXEC_RENDER;
>        }
> +
>        if (batch->needs_sol_reset)
>  	 flags |= I915_EXEC_GEN7_SOL_RESET;
>  
> +      unsigned int index;
> +      if (batch->has_batch_first) {
> +         flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT;
> +         index = 0;
> +      } else {
> +         index = add_exec_bo(batch, batch->bo);
> +      }
> +
> +      struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[index];
> +      exec->relocation_count = batch->reloc_count;
> +      exec->relocs_ptr = (uintptr_t) batch->relocs;
> +
>        if (ret == 0) {
>           uint32_t hw_ctx = batch->ring == RENDER_RING ? brw->hw_ctx : 0;
>  
> -         /* Add the batch itself to the end of the validation list */
> -         add_exec_bo(batch, batch->bo);
> -
>           ret = execbuffer(dri_screen->fd, batch, hw_ctx,
>                            4 * USED_BATCH(*batch),
>                            in_fence_fd, out_fence_fd, flags);
> @@ -793,8 +829,9 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
>     assert(batch_offset <= BATCH_SZ - sizeof(uint32_t));
>     assert(_mesa_bitcount(write_domain) <= 1);
>  
> +   unsigned int index;
>     if (target != batch->bo) {
> -      unsigned int index = add_exec_bo(batch, target);
> +      index = add_exec_bo(batch, target);
>        struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[index];
>  
>        if (write_domain) {
> @@ -811,6 +848,7 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
>        offset64 = exec->offset;
>     } else {
>        offset64 = target->offset64;
> +      index = target->index;

index = 0; Yes the bathc isn't ever shared, but I think it's better to
make this obviously safe.

With the nits addressed:

Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>


>     }
>  
>     struct drm_i915_gem_relocation_entry *reloc =
> @@ -820,7 +858,7 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
>  
>     reloc->offset = batch_offset;
>     reloc->delta = target_offset;
> -   reloc->target_handle = target->gem_handle;
> +   reloc->target_handle = batch->has_batch_first ? index : target->gem_handle;
>     reloc->read_domains = read_domains;
>     reloc->write_domain = write_domain;
>     reloc->presumed_offset = offset64;
> -- 
> 2.11.0
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the mesa-dev mailing list