[Mesa-dev] [PATCH 06/13] i965: Use I915_EXEC_NO_RELOC

Kenneth Graunke kenneth at whitecape.org
Wed Jul 19 22:17:49 UTC 2017


On Wednesday, July 19, 2017 3:09:14 AM PDT Chris Wilson wrote:
> If we correctly fill the batch with the right relocation value, and that
> matches the expected location of the object, we can then tell the kernel
> it can forgo checking each individual relocation by only checking
> whether the object moved.
> 
> v2: Rebase to apply ahead of I915_EXEC_HANDLE_LUT
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Kenneth Graunke <kenneth at whitecape.org>
> Cc: Matt Turner <mattst88 at gmail.com>
> Cc: Jason Ekstrand <jason.ekstrand at intel.com>
> ---
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 54 +++++++++++++++++++++------
>  1 file changed, 42 insertions(+), 12 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index a358269d9b..ac6fa080e1 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -517,19 +517,19 @@ throttle(struct brw_context *brw)
>  
>  #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
>  
> -static void
> +static unsigned int
>  add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
>  {
>     if (bo != batch->bo) {
>        unsigned int index = READ_ONCE(bo->index);
>  
>        if (index < batch->exec_count && batch->exec_bos[index] == bo)
> -         return;
> +         return index;
>  
>        /* May have been shared between multiple active batches */
>        for (index = 0; index < batch->exec_count; index++) {
>           if (batch->exec_bos[index] == bo)
> -            return;
> +            return index;
>        }
>  
>        brw_bo_reference(bo);
> @@ -563,8 +563,9 @@ add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
>  
>     bo->index = batch->exec_count;
>     batch->exec_bos[batch->exec_count] = bo;
> -   batch->exec_count++;
>     batch->aperture_space += bo->size;
> +
> +   return batch->exec_count++;
>  }
>  
>  static int
> @@ -642,12 +643,25 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
>     }
>  
>     if (!brw->screen->no_hw) {
> -      int flags;
> -
> +      unsigned int flags;
> +
> +      /* The requirement for using I915_EXEC_NO_RELOC are:
> +       *
> +       *   The addresses written in the objects must match the corresponding
> +       *   reloc.presumed_offset which in turn must match the corresponding
> +       *   execobject.offset.
> +       *
> +       *   Any render targets written to in the batch must be flagged with
> +       *   EXEC_OBJECT_WRITE.
> +       *
> +       *   To avoid stalling, execobject.offset should match the current
> +       *   address of that object within the active context.
> +       */
> +      flags = I915_EXEC_NO_RELOC;
>        if (brw->gen >= 6 && batch->ring == BLT_RING) {
> -         flags = I915_EXEC_BLT;
> +         flags |= I915_EXEC_BLT;
>        } else {
> -         flags = I915_EXEC_RENDER;
> +         flags |= I915_EXEC_RENDER;
>        }
>        if (batch->needs_sol_reset)
>  	 flags |= I915_EXEC_GEN7_SOL_RESET;
> @@ -783,16 +797,32 @@ __brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
>     assert(batch_offset <= BATCH_SZ - sizeof(uint32_t));
>     assert(_mesa_bitcount(write_domain) <= 1);
>  
> -   if (target != batch->bo)
> -      add_exec_bo(batch, target);
> +   uint64_t offset64;
> +   if (target != batch->bo) {
> +      unsigned int index = add_exec_bo(batch, target);
> +      struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[index];
> +
> +      if (write_domain) {
> +         exec->flags |= EXEC_OBJECT_WRITE;
> +
> +         /* PIPECONTROL needs a w/a on gen6 */
> +         if (write_domain == I915_GEM_DOMAIN_INSTRUCTION) {
> +            struct brw_context *brw = container_of(batch, brw, batch);
> +            if (brw->gen == 6)
> +               exec->flags |= EXEC_OBJECT_NEEDS_GTT;
> +         }
> +      }
> +
> +      offset64 = exec->offset;
> +   } else {
> +      offset64 = target->offset64;
> +   }
>  
>     struct drm_i915_gem_relocation_entry *reloc =
>        &batch->relocs[batch->reloc_count];
>  
>     batch->reloc_count++;
>  
> -   /* ensure gcc doesn't reload */
> -   uint64_t offset64 = *((volatile uint64_t *)&target->offset64);
>     reloc->offset = batch_offset;
>     reloc->delta = target_offset;
>     reloc->target_handle = target->gem_handle;
> 

Looks solid - by pulling the presumed offset from the validation list,
you guarantee that the validation entry and reloc entries will always
match.  You can't do that for the batch, but there are no threading
issues with the batch BO because they're only used in one context.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170719/6d9e2045/attachment.sig>


More information about the mesa-dev mailing list