[Mesa-dev] [PATCH 2/3] i965: Convert reloc.target_handle into an index for I915_EXEC_HANDLE_LUT

Kenneth Graunke kenneth at whitecape.org
Fri May 26 17:47:18 UTC 2017


On Friday, May 26, 2017 4:25:20 AM PDT Chris Wilson wrote:
> Passing the index of the target buffer via the reloc.target_handle is
> marginally more efficient for the kernel (it can avoid some allocations,
> and can use a direct lookup rather than a hash or search). It is also
> useful for ourselves as we can use the index into our exec_bos for other
> tasks.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Kenneth Graunke <kenneth at whitecape.org>
> Cc: Jason Ekstrand <jason.ekstrand at intel.com>
> ---
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 30 ++++++++++++++++++---------
>  1 file changed, 20 insertions(+), 10 deletions(-)

I wrote this patch as well, and it turned out to hurt performance by a
small amount in every CPU-limited benchmark I tried.

I think the extra loop to rewrite -1 to the batch index was enough to
make it hurt.  That would go away with I915_EXEC_BATCH_FIRST...

> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 752e9b389b..2e10d165c7 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -518,19 +518,19 @@ throttle(struct brw_context *brw)
>  
>  #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
>  
> -static void
> +static unsigned int
>  add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
>  {
>     if (bo != batch->bo) {
>        unsigned int index = READ_ONCE(bo->index);
>  
>        if (index < batch->exec_count && batch->exec_bos[index] == bo)
> -         return;
> +         return index;
>  
>        /* May have been shared between multiple active batches */
>        for (index = 0; index < batch->exec_count; index++) {
>           if (batch->exec_bos[index] == bo)
> -            return;
> +            return index;
>        }
>  
>        brw_bo_reference(bo);
> @@ -564,8 +564,9 @@ add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
>  
>     bo->index = batch->exec_count;
>     batch->exec_bos[batch->exec_count] = bo;
> -   batch->exec_count++;
>     batch->aperture_space += bo->size;
> +
> +   return batch->exec_count++;
>  }
>  
>  static int
> @@ -643,21 +644,28 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
>     }
>  
>     if (!brw->screen->no_hw) {
> -      int flags;
> +      unsigned int flags;
>  
> +      flags = I915_EXEC_HANDLE_LUT;
>        if (brw->gen >= 6 && batch->ring == BLT_RING) {
> -         flags = I915_EXEC_BLT;
> +         flags |= I915_EXEC_BLT;
>        } else {
> -         flags = I915_EXEC_RENDER;
> +         flags |= I915_EXEC_RENDER;
>        }
>        if (batch->needs_sol_reset)
>  	 flags |= I915_EXEC_GEN7_SOL_RESET;
>  
>        if (ret == 0) {
>           uint32_t hw_ctx = batch->ring == RENDER_RING ? brw->hw_ctx : 0;
> +         unsigned int index;
>  
>           /* Add the batch itself to the end of the validation list */
> -         add_exec_bo(batch, batch->bo);
> +         index = add_exec_bo(batch, batch->bo);
> +         for (unsigned int i = 0; i < batch->reloc_count; i++) {
> +            struct drm_i915_gem_relocation_entry *reloc = &batch->relocs[i];
> +            if (reloc->target_handle == -1)
> +               reloc->target_handle = index;
> +         }
>  
>           ret = execbuffer(dri_screen->fd, batch, hw_ctx,
>                            4 * USED_BATCH(*batch),
> @@ -768,6 +776,7 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
>                 uint32_t read_domains, uint32_t write_domain)
>  {
>     uint64_t offset64;
> +   unsigned int index;
>  
>     if (batch->reloc_count == batch->reloc_array_size) {
>        batch->reloc_array_size *= 2;
> @@ -780,8 +789,9 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
>     assert(batch_offset <= BATCH_SZ - sizeof(uint32_t));
>     assert(_mesa_bitcount(write_domain) <= 1);
>  
> +   index = -1; /* to be fixed up later when the index of the batch is known */
>     if (target != batch->bo)
> -      add_exec_bo(batch, target);
> +      index = add_exec_bo(batch, target);
>  
>     struct drm_i915_gem_relocation_entry *reloc =
>        &batch->relocs[batch->reloc_count];
> @@ -792,7 +802,7 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
>     offset64 = *((volatile uint64_t *)&target->offset64);
>     reloc->offset = batch_offset;
>     reloc->delta = target_offset;
> -   reloc->target_handle = target->gem_handle;
> +   reloc->target_handle = index;
>     reloc->read_domains = read_domains;
>     reloc->write_domain = write_domain;
>     reloc->presumed_offset = offset64;
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170526/aad5010b/attachment.sig>


More information about the mesa-dev mailing list