[Mesa-dev] [PATCH 2/4] i965: Use I915_EXEC_NO_RELOC

Kenneth Graunke kenneth at whitecape.org
Mon Jun 19 19:28:31 UTC 2017


On Monday, June 19, 2017 3:06:48 AM PDT Chris Wilson wrote:
> If we correctly fill the batch with the right relocation value, and that
> matches the expected location of the object, we can then tell the kernel
> it can forgo checking each individual relocation by only checking
> whether the object moved.
> 
> v2: Rebase to apply ahead of I915_EXEC_HANDLE_LUT
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Kenneth Graunke <kenneth at whitecape.org>
> Cc: Matt Turner <mattst88 at gmail.com>
> Cc: Jason Ekstrand <jason.ekstrand at intel.com>
> ---
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 53 +++++++++++++++++++++------
>  1 file changed, 41 insertions(+), 12 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index ca7d6b81b1..7129209c26 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -517,19 +517,19 @@ throttle(struct brw_context *brw)
>  
>  #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
>  
> -static void
> +static unsigned int
>  add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
>  {
>     if (bo != batch->bo) {
>        unsigned int index = READ_ONCE(bo->index);
>  
>        if (index < batch->exec_count && batch->exec_bos[index] == bo)
> -         return;
> +         return index;
>  
>        /* May have been shared between multiple active batches */
>        for (index = 0; index < batch->exec_count; index++) {
>           if (batch->exec_bos[index] == bo)
> -            return;
> +            return index;
>        }
>  
>        brw_bo_reference(bo);
> @@ -563,8 +563,9 @@ add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
>  
>     bo->index = batch->exec_count;
>     batch->exec_bos[batch->exec_count] = bo;
> -   batch->exec_count++;
>     batch->aperture_space += bo->size;
> +
> +   return batch->exec_count++;
>  }
>  
>  static int
> @@ -642,12 +643,25 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
>     }
>  
>     if (!brw->screen->no_hw) {
> -      int flags;
> -
> +      unsigned int flags;
> +
> +      /* The requirement for using I915_EXEC_NO_RELOC are:
> +       *
> +       *   The addresses written in the objects must match the corresponding
> +       *   reloc.presumed_offset which in turn must match the corresponding
> +       *   execobject.offset.
> +       *
> +       *   Any render targets written to in the batch must be flagged with
> +       *   EXEC_OBJECT_WRITE.
> +       *
> +       *   To avoid stalling, execobject.offset should match the current
> +       *   address of that object within the active context.
> +       */
> +      flags = I915_EXEC_NO_RELOC;
>        if (brw->gen >= 6 && batch->ring == BLT_RING) {
> -         flags = I915_EXEC_BLT;
> +         flags |= I915_EXEC_BLT;
>        } else {
> -         flags = I915_EXEC_RENDER;
> +         flags |= I915_EXEC_RENDER;
>        }
>        if (batch->needs_sol_reset)
>  	 flags |= I915_EXEC_GEN7_SOL_RESET;
> @@ -779,16 +793,31 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
>     assert(batch_offset <= BATCH_SZ - sizeof(uint32_t));
>     assert(_mesa_bitcount(write_domain) <= 1);
>  
> -   if (target != batch->bo)
> -      add_exec_bo(batch, target);
> +   if (target != batch->bo) {
> +      unsigned int index = add_exec_bo(batch, target);
> +      struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[index];
> +
> +      if (write_domain) {
> +         exec->flags |= EXEC_OBJECT_WRITE;
> +
> +         /* PIPECONTROL needs a w/a on gen6 */
> +         if (write_domain == I915_GEM_DOMAIN_INSTRUCTION) {
> +            struct brw_context *brw = container_of(batch, brw, batch);
> +            if (brw->gen == 6)
> +               exec->flags |= EXEC_OBJECT_NEEDS_GTT;
> +         }
> +      }
> +
> +      offset64 = exec->offset;

I don't think this works.  brw_bo still has a single offset64 value that's
shared across all contexts.  So, in a multi-context/multi-threaded case, we
may put a bogus offset in the validation list.

Pulling the value out of the validation list is nice, but it's still
potentially bogus.

I think we still need per-context-bo fields.  I have a patch series in
progress to do that, but I moved it down the priority list a ways...
need to get back to that...

> +   } else {
> +      offset64 = target->offset64;
> +   }
>  
>     struct drm_i915_gem_relocation_entry *reloc =
>        &batch->relocs[batch->reloc_count];
>  
>     batch->reloc_count++;
>  
> -   /* ensure gcc doesn't reload */
> -   offset64 = *((volatile uint64_t *)&target->offset64);
>     reloc->offset = batch_offset;
>     reloc->delta = target_offset;
>     reloc->target_handle = target->gem_handle;
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170619/98a8caea/attachment.sig>


More information about the mesa-dev mailing list