[Mesa-dev] [PATCH 08/13] i965: Convert reloc.target_handle into an index for I915_EXEC_HANDLE_LUT
Kenneth Graunke
kenneth at whitecape.org
Wed Jul 19 22:36:58 UTC 2017
On Wednesday, July 19, 2017 3:09:16 AM PDT Chris Wilson wrote:
> Passing the index of the target buffer via the reloc.target_handle is
> marginally more efficient for the kernel (it can avoid some allocations,
> and can use a direct lookup rather than a hash or search). It is also
> useful for ourselves as we can use the index into our exec_bos for other
> tasks.
>
> v2: Only enable HANDLE_LUT if we can use BATCH_FIRST and thereby avoid
> a post-processing loop to fixup the relocations.
> v3: Move kernel probing from context creation to screen init.
> Use batch->use_exec_lut as it more descriptive of what's going on (Daniel)
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Kenneth Graunke <kenneth at whitecape.org>
> Cc: Matt Turner <mattst88 at gmail.com>
> Cc: Jason Ekstrand <jason.ekstrand at intel.com>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> ---
> src/mesa/drivers/dri/i965/brw_context.h | 1 +
> src/mesa/drivers/dri/i965/intel_batchbuffer.c | 60 +++++++++++++++++----------
> src/mesa/drivers/dri/i965/intel_screen.c | 20 +++++++++
> src/mesa/drivers/dri/i965/intel_screen.h | 5 +++
> 4 files changed, 64 insertions(+), 22 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index ffe4792b73..62ce5e472c 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -452,6 +452,7 @@ struct intel_batchbuffer {
>
> uint32_t state_batch_offset;
> enum brw_gpu_ring ring;
> + bool use_exec_lut;
> bool needs_sol_reset;
> bool state_base_address_emitted;
>
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 065a9c1c0c..5f9639cd4d 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -62,8 +62,6 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
> struct brw_bufmgr *bufmgr,
> bool has_llc)
> {
> - intel_batchbuffer_reset(batch, bufmgr, has_llc);
> -
> if (!has_llc) {
> batch->cpu_map = malloc(BATCH_SZ);
> batch->map = batch->cpu_map;
> @@ -85,6 +83,16 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
> batch->state_batch_sizes =
> _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare);
> }
> +
> + struct brw_context *brw = container_of(batch, brw, batch);
> + /* To use the LUT method for execbuf, we also require placing the batch
> + * first (to simplify our implementation). We require a kernel recent
> + * enough to always support EXEC_LUT_HANDLE, but we must check that
> + * the kernel supports EXEC_BATCH_FIRST.
> + */
> + batch->use_exec_lut = brw->screen->kerninfo.has_exec_batch_first;
> +
> + intel_batchbuffer_reset(batch, bufmgr, has_llc);
> }
>
> #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
> @@ -117,21 +125,12 @@ add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
> batch->exec_array_size * sizeof(batch->exec_objects[0]));
> }
>
> - struct drm_i915_gem_exec_object2 *validation_entry =
> - &batch->exec_objects[batch->exec_count];
> - validation_entry->handle = bo->gem_handle;
> - if (bo == batch->bo) {
> - validation_entry->relocation_count = batch->reloc_count;
> - validation_entry->relocs_ptr = (uintptr_t) batch->relocs;
> - } else {
> - validation_entry->relocation_count = 0;
> - validation_entry->relocs_ptr = 0;
> - }
> - validation_entry->alignment = bo->align;
> - validation_entry->offset = bo->offset64;
> - validation_entry->flags = bo->kflags;
> - validation_entry->rsvd1 = 0;
> - validation_entry->rsvd2 = 0;
> + struct drm_i915_gem_exec_object2 *exec =
> + memset(&batch->exec_objects[batch->exec_count], 0, sizeof(*exec));
> + exec->handle = bo->gem_handle;
> + exec->alignment = bo->align;
> + exec->offset = bo->offset64;
> + exec->flags = bo->kflags;
I liked the name "validation_entry" given that we call this the "validation
list"...exec matches the struct name better, but I think validation_entry
helps distinguish the two lists...
Moving the relocation count rubbish out to do_flush_locked is a good idea.
>
> bo->index = batch->exec_count;
> batch->exec_bos[batch->exec_count] = bo;
> @@ -157,6 +156,11 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch,
> }
> batch->map_next = batch->map;
>
> + if (batch->use_exec_lut) {
> + add_exec_bo(batch, batch->bo);
> + assert(batch->bo->index == 0);
> + }
> +
> batch->reserved_space = BATCH_RESERVED;
> batch->state_batch_offset = batch->bo->size;
> batch->needs_sol_reset = false;
> @@ -663,15 +667,25 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
> } else {
> flags |= I915_EXEC_RENDER;
> }
> +
> if (batch->needs_sol_reset)
> flags |= I915_EXEC_GEN7_SOL_RESET;
>
> + unsigned int index;
> + if (batch->use_exec_lut) {
> + flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT;
> + index = 0;
> + } else {
> + index = add_exec_bo(batch, batch->bo);
> + }
> +
> + struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[index];
> + exec->relocation_count = batch->reloc_count;
> + exec->relocs_ptr = (uintptr_t) batch->relocs;
> +
> if (ret == 0) {
> uint32_t hw_ctx = batch->ring == RENDER_RING ? brw->hw_ctx : 0;
>
> - /* Add the batch itself to the end of the validation list */
> - add_exec_bo(batch, batch->bo);
> -
> ret = execbuffer(dri_screen->fd, batch, hw_ctx,
> 4 * USED_BATCH(*batch),
> in_fence_fd, out_fence_fd, flags);
> @@ -798,8 +812,9 @@ __brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
> assert(_mesa_bitcount(write_domain) <= 1);
>
> uint64_t offset64;
> + unsigned int index;
I'd prefer "validation_index" here.
> if (target != batch->bo) {
> - unsigned int index = add_exec_bo(batch, target);
> + index = add_exec_bo(batch, target);
> struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[index];
>
> if (write_domain) {
> @@ -816,6 +831,7 @@ __brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
> offset64 = exec->offset;
> } else {
> offset64 = target->offset64;
> + index = target->index;
> }
>
> struct drm_i915_gem_relocation_entry *reloc =
> @@ -825,7 +841,7 @@ __brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
>
> reloc->offset = batch_offset;
> reloc->delta = target_offset;
> - reloc->target_handle = target->gem_handle;
> + reloc->target_handle = batch->use_exec_lut ? index : target->gem_handle;
> reloc->read_domains = read_domains;
> reloc->write_domain = write_domain;
> reloc->presumed_offset = offset64;
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
> index 7ccc1d34f0..99804c8329 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -51,6 +51,8 @@
> #define DRM_FORMAT_MOD_LINEAR 0
> #endif
>
> +#define DBG_NO_EXEC_BATCH_FIRST 0
> +
> static const __DRIconfigOptionsExtension brw_config_options = {
> .base = { __DRI_CONFIG_OPTIONS, 1 },
> .xml =
> @@ -1466,6 +1468,22 @@ intelDestroyBuffer(__DRIdrawable * driDrawPriv)
> _mesa_reference_framebuffer(&fb, NULL);
> }
>
> +static bool
> +test_has_exec_batch_first(struct intel_screen *screen)
> +{
> + if (DBG_NO_EXEC_BATCH_FIRST)
> + return DBG_NO_EXEC_BATCH_FIRST < 0;
> +
> + return intel_get_integer(screen, I915_PARAM_HAS_EXEC_BATCH_FIRST) > 0;
> +}
> +
> +static void
> +intel_get_kernel_info(struct intel_screen *screen,
> + struct intel_kernel_info *kerninfo)
> +{
> + kerninfo->has_exec_batch_first = test_has_exec_batch_first(screen);
> +}
> +
> static void
> intel_detect_sseu(struct intel_screen *screen)
> {
> @@ -2097,6 +2115,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
> if (!gen_get_device_info(screen->deviceID, &screen->devinfo))
> return NULL;
>
> + intel_get_kernel_info(screen, &screen->kerninfo);
> +
Let's just add a bit to screen->kernel_features instead of adding a whole
new struct...
if (intel_get_integer(screen, I915_PARAM_HAS_EXEC_BATCH_FIRST) > 0)
screen->kernel_features |= KERNEL_ALLOWS_EXEC_BATCH_FIRST;
no need for the DBG_* stuff either - if you want to disable it, you can
just change that line to if (0 && ...).
With those changes, this would get a:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
> if (!intel_init_bufmgr(screen))
> return NULL;
>
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
> index 0980c8f561..d88703b1e6 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.h
> +++ b/src/mesa/drivers/dri/i965/intel_screen.h
> @@ -44,10 +44,15 @@
> extern "C" {
> #endif
>
> +struct intel_kernel_info {
> + bool has_exec_batch_first;
> +};
> +
> struct intel_screen
> {
> int deviceID;
> struct gen_device_info devinfo;
> + struct intel_kernel_info kerninfo;
>
> __DRIscreen *driScrnPriv;
>
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170719/4e93429c/attachment.sig>
More information about the mesa-dev
mailing list