[Mesa-dev] [PATCH 08/13] i965: Convert reloc.target_handle into an index for I915_EXEC_HANDLE_LUT

Chris Wilson chris at chris-wilson.co.uk
Wed Jul 19 10:09:16 UTC 2017


Passing the index of the target buffer via the reloc.target_handle is
marginally more efficient for the kernel (it can avoid some allocations,
and can use a direct lookup rather than a hash or search). It is also
useful for ourselves as we can use the index into our exec_bos for other
tasks.

v2: Only enable HANDLE_LUT if we can use BATCH_FIRST and thereby avoid
a post-processing loop to fixup the relocations.
v3: Move kernel probing from context creation to screen init.
Use batch->use_exec_lut as it more descriptive of what's going on (Daniel)

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth at whitecape.org>
Cc: Matt Turner <mattst88 at gmail.com>
Cc: Jason Ekstrand <jason.ekstrand at intel.com>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
---
 src/mesa/drivers/dri/i965/brw_context.h       |  1 +
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 60 +++++++++++++++++----------
 src/mesa/drivers/dri/i965/intel_screen.c      | 20 +++++++++
 src/mesa/drivers/dri/i965/intel_screen.h      |  5 +++
 4 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index ffe4792b73..62ce5e472c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -452,6 +452,7 @@ struct intel_batchbuffer {
 
    uint32_t state_batch_offset;
    enum brw_gpu_ring ring;
+   bool use_exec_lut;
    bool needs_sol_reset;
    bool state_base_address_emitted;
 
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 065a9c1c0c..5f9639cd4d 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -62,8 +62,6 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
                        struct brw_bufmgr *bufmgr,
                        bool has_llc)
 {
-   intel_batchbuffer_reset(batch, bufmgr, has_llc);
-
    if (!has_llc) {
       batch->cpu_map = malloc(BATCH_SZ);
       batch->map = batch->cpu_map;
@@ -85,6 +83,16 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
       batch->state_batch_sizes =
          _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare);
    }
+
+   struct brw_context *brw = container_of(batch, brw, batch);
+   /* To use the LUT method for execbuf, we also require placing the batch
+    * first (to simplify our implementation). We require a kernel recent
+    * enough to always support EXEC_LUT_HANDLE, but we must check that
+    * the kernel supports EXEC_BATCH_FIRST.
+    */
+   batch->use_exec_lut = brw->screen->kerninfo.has_exec_batch_first;
+
+   intel_batchbuffer_reset(batch, bufmgr, has_llc);
 }
 
 #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
@@ -117,21 +125,12 @@ add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
                  batch->exec_array_size * sizeof(batch->exec_objects[0]));
    }
 
-   struct drm_i915_gem_exec_object2 *validation_entry =
-      &batch->exec_objects[batch->exec_count];
-   validation_entry->handle = bo->gem_handle;
-   if (bo == batch->bo) {
-      validation_entry->relocation_count = batch->reloc_count;
-      validation_entry->relocs_ptr = (uintptr_t) batch->relocs;
-   } else {
-      validation_entry->relocation_count = 0;
-      validation_entry->relocs_ptr = 0;
-   }
-   validation_entry->alignment = bo->align;
-   validation_entry->offset = bo->offset64;
-   validation_entry->flags = bo->kflags;
-   validation_entry->rsvd1 = 0;
-   validation_entry->rsvd2 = 0;
+   struct drm_i915_gem_exec_object2 *exec =
+      memset(&batch->exec_objects[batch->exec_count], 0, sizeof(*exec));
+   exec->handle = bo->gem_handle;
+   exec->alignment = bo->align;
+   exec->offset = bo->offset64;
+   exec->flags = bo->kflags;
 
    bo->index = batch->exec_count;
    batch->exec_bos[batch->exec_count] = bo;
@@ -157,6 +156,11 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch,
    }
    batch->map_next = batch->map;
 
+   if (batch->use_exec_lut) {
+      add_exec_bo(batch, batch->bo);
+      assert(batch->bo->index == 0);
+   }
+
    batch->reserved_space = BATCH_RESERVED;
    batch->state_batch_offset = batch->bo->size;
    batch->needs_sol_reset = false;
@@ -663,15 +667,25 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
       } else {
          flags |= I915_EXEC_RENDER;
       }
+
       if (batch->needs_sol_reset)
 	 flags |= I915_EXEC_GEN7_SOL_RESET;
 
+      unsigned int index;
+      if (batch->use_exec_lut) {
+         flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT;
+         index = 0;
+      } else {
+         index = add_exec_bo(batch, batch->bo);
+      }
+
+      struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[index];
+      exec->relocation_count = batch->reloc_count;
+      exec->relocs_ptr = (uintptr_t) batch->relocs;
+
       if (ret == 0) {
          uint32_t hw_ctx = batch->ring == RENDER_RING ? brw->hw_ctx : 0;
 
-         /* Add the batch itself to the end of the validation list */
-         add_exec_bo(batch, batch->bo);
-
          ret = execbuffer(dri_screen->fd, batch, hw_ctx,
                           4 * USED_BATCH(*batch),
                           in_fence_fd, out_fence_fd, flags);
@@ -798,8 +812,9 @@ __brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
    assert(_mesa_bitcount(write_domain) <= 1);
 
    uint64_t offset64;
+   unsigned int index;
    if (target != batch->bo) {
-      unsigned int index = add_exec_bo(batch, target);
+      index = add_exec_bo(batch, target);
       struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[index];
 
       if (write_domain) {
@@ -816,6 +831,7 @@ __brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
       offset64 = exec->offset;
    } else {
       offset64 = target->offset64;
+      index = target->index;
    }
 
    struct drm_i915_gem_relocation_entry *reloc =
@@ -825,7 +841,7 @@ __brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
 
    reloc->offset = batch_offset;
    reloc->delta = target_offset;
-   reloc->target_handle = target->gem_handle;
+   reloc->target_handle = batch->use_exec_lut ? index : target->gem_handle;
    reloc->read_domains = read_domains;
    reloc->write_domain = write_domain;
    reloc->presumed_offset = offset64;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 7ccc1d34f0..99804c8329 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -51,6 +51,8 @@
 #define DRM_FORMAT_MOD_LINEAR 0
 #endif
 
+#define DBG_NO_EXEC_BATCH_FIRST 0
+
 static const __DRIconfigOptionsExtension brw_config_options = {
    .base = { __DRI_CONFIG_OPTIONS, 1 },
    .xml =
@@ -1466,6 +1468,22 @@ intelDestroyBuffer(__DRIdrawable * driDrawPriv)
     _mesa_reference_framebuffer(&fb, NULL);
 }
 
+static bool
+test_has_exec_batch_first(struct intel_screen *screen)
+{
+   if (DBG_NO_EXEC_BATCH_FIRST)
+      return DBG_NO_EXEC_BATCH_FIRST < 0;
+
+   return intel_get_integer(screen, I915_PARAM_HAS_EXEC_BATCH_FIRST) > 0;
+}
+
+static void
+intel_get_kernel_info(struct intel_screen *screen,
+                      struct intel_kernel_info *kerninfo)
+{
+   kerninfo->has_exec_batch_first = test_has_exec_batch_first(screen);
+}
+
 static void
 intel_detect_sseu(struct intel_screen *screen)
 {
@@ -2097,6 +2115,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
    if (!gen_get_device_info(screen->deviceID, &screen->devinfo))
       return NULL;
 
+   intel_get_kernel_info(screen, &screen->kerninfo);
+
    if (!intel_init_bufmgr(screen))
        return NULL;
 
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index 0980c8f561..d88703b1e6 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -44,10 +44,15 @@
 extern "C" {
 #endif
 
+struct intel_kernel_info {
+	bool has_exec_batch_first;
+};
+
 struct intel_screen
 {
    int deviceID;
    struct gen_device_info devinfo;
+   struct intel_kernel_info kerninfo;
 
    __DRIscreen *driScrnPriv;
 
-- 
2.13.3



More information about the mesa-dev mailing list