[Mesa-dev] [PATCH 08/10] i965: Convert reloc.target_handle into an index for I915_EXEC_HANDLE_LUT
Chris Wilson
chris at chris-wilson.co.uk
Fri Jul 21 15:36:50 UTC 2017
Passing the index of the target buffer via the reloc.target_handle is
marginally more efficient for the kernel (it can avoid some allocations,
and can use a direct lookup rather than a hash or search). It is also
useful for ourselves as we can use the index into our exec_bos for other
tasks.
v2: Only enable HANDLE_LUT if we can use BATCH_FIRST and thereby avoid
a post-processing loop to fixup the relocations.
v3: Move kernel probing from context creation to screen init.
Use batch->use_exec_lut as it more descriptive of what's going on (Daniel)
v4: Kernel features already exists, use it for BATCH_FIRST
Rename locals to preserve current flavouring
v5: Squash in "always insert batch bo first"
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth at whitecape.org>
Cc: Matt Turner <mattst88 at gmail.com>
Cc: Jason Ekstrand <jason.ekstrand at intel.com>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org> #v4
---
src/mesa/drivers/dri/i965/brw_context.h | 1 +
src/mesa/drivers/dri/i965/intel_batchbuffer.c | 109 ++++++++++++++------------
src/mesa/drivers/dri/i965/intel_screen.c | 4 +
src/mesa/drivers/dri/i965/intel_screen.h | 1 +
4 files changed, 65 insertions(+), 50 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 2acebaa820..57081fb434 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -452,6 +452,7 @@ struct intel_batchbuffer {
uint32_t state_batch_offset;
enum brw_gpu_ring ring;
+ bool use_exec_lut;
bool needs_sol_reset;
bool state_base_address_emitted;
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 5e861a555a..16791de3de 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -62,8 +62,6 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
struct brw_bufmgr *bufmgr,
bool has_llc)
{
- intel_batchbuffer_reset(batch, bufmgr, has_llc);
-
if (!has_llc) {
batch->cpu_map = malloc(BATCH_SZ);
batch->map = batch->cpu_map;
@@ -85,6 +83,17 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
batch->state_batch_sizes =
_mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare);
}
+
+ struct brw_context *brw = container_of(batch, brw, batch);
+ /* To use the LUT method for execbuf, we also require placing the batch
+ * first (to simplify our implementation). We require a kernel recent
+ * enough to always support EXEC_LUT_HANDLE, but we must check that
+ * the kernel supports EXEC_BATCH_FIRST.
+ */
+ batch->use_exec_lut =
+ brw->screen->kernel_features & KERNEL_ALLOWS_EXEC_BATCH_FIRST;
+
+ intel_batchbuffer_reset(batch, bufmgr, has_llc);
}
#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
@@ -92,19 +101,15 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
static unsigned int
add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
{
- if (bo != batch->bo) {
- unsigned int index = READ_ONCE(bo->index);
-
- if (index < batch->exec_count && batch->exec_bos[index] == bo)
- return index;
+ unsigned int index = READ_ONCE(bo->index);
- /* May have been shared between multiple active batches */
- for (index = 0; index < batch->exec_count; index++) {
- if (batch->exec_bos[index] == bo)
- return index;
- }
+ if (index < batch->exec_count && batch->exec_bos[index] == bo)
+ return index;
- brw_bo_reference(bo);
+ /* May have been shared between multiple active batches */
+ for (index = 0; index < batch->exec_count; index++) {
+ if (batch->exec_bos[index] == bo)
+ return index;
}
if (batch->exec_count == batch->exec_array_size) {
@@ -117,26 +122,21 @@ add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
batch->exec_array_size * sizeof(batch->validation_list[0]));
}
- struct drm_i915_gem_exec_object2 *validation_entry =
- &batch->validation_list[batch->exec_count];
- validation_entry->handle = bo->gem_handle;
- if (bo == batch->bo) {
- validation_entry->relocation_count = batch->reloc_count;
- validation_entry->relocs_ptr = (uintptr_t) batch->relocs;
- } else {
- validation_entry->relocation_count = 0;
- validation_entry->relocs_ptr = 0;
- }
- validation_entry->alignment = bo->align;
- validation_entry->offset = bo->offset64;
- validation_entry->flags = bo->kflags;
- validation_entry->rsvd1 = 0;
- validation_entry->rsvd2 = 0;
+ batch->validation_list[batch->exec_count] =
+ (struct drm_i915_gem_exec_object2) {
+ .handle = bo->gem_handle,
+ .alignment = bo->align,
+ .offset = bo->offset64,
+ .flags = bo->kflags,
+ };
bo->index = batch->exec_count;
batch->exec_bos[batch->exec_count] = bo;
batch->aperture_space += bo->size;
+ if (bo != batch->bo)
+ brw_bo_reference(bo);
+
return batch->exec_count++;
}
@@ -157,6 +157,9 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch,
}
batch->map_next = batch->map;
+ add_exec_bo(batch, batch->bo);
+ assert(batch->bo->index == 0);
+
batch->reserved_space = BATCH_RESERVED;
batch->state_batch_offset = batch->bo->size;
batch->needs_sol_reset = false;
@@ -581,9 +584,6 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
}
if (ret == 0) {
- /* Add the batch itself to the end of the validation list */
- add_exec_bo(batch, batch->bo);
-
struct drm_i915_gem_execbuffer2 execbuf = {
.buffers_ptr = (uintptr_t) batch->validation_list,
.buffer_count = batch->exec_count,
@@ -614,6 +614,22 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
if (batch->needs_sol_reset)
execbuf.flags |= I915_EXEC_GEN7_SOL_RESET;
+ struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0];
+ assert(entry->handle == batch->bo->gem_handle);
+ entry->relocation_count = batch->reloc_count;
+ entry->relocs_ptr = (uintptr_t) batch->relocs;
+
+ if (batch->use_exec_lut) {
+ execbuf.flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT;
+ } else {
+ struct drm_i915_gem_exec_object2 tmp;
+ unsigned int index = batch->exec_count - 1;
+
+ tmp = *entry;
+ *entry = batch->validation_list[index];
+ batch->validation_list[index] = tmp;
+ }
+
unsigned long cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2;
if (in_fence_fd != -1) {
@@ -777,40 +793,33 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
assert(batch_offset <= BATCH_SZ - sizeof(uint32_t));
assert(_mesa_bitcount(write_domain) <= 1);
- uint64_t offset64;
- if (target != batch->bo) {
- unsigned int index = add_exec_bo(batch, target);
- struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index];
+ unsigned int index = add_exec_bo(batch, target);
+ struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index];
- if (write_domain) {
- entry->flags |= EXEC_OBJECT_WRITE;
+ if (write_domain) {
+ entry->flags |= EXEC_OBJECT_WRITE;
- /* PIPECONTROL needs a w/a on gen6 */
- if (write_domain == I915_GEM_DOMAIN_INSTRUCTION) {
- struct brw_context *brw = container_of(batch, brw, batch);
- if (brw->gen == 6)
- entry->flags |= EXEC_OBJECT_NEEDS_GTT;
- }
+ /* PIPECONTROL needs a w/a on gen6 */
+ if (write_domain == I915_GEM_DOMAIN_INSTRUCTION) {
+ struct brw_context *brw = container_of(batch, brw, batch);
+ if (brw->gen == 6)
+ entry->flags |= EXEC_OBJECT_NEEDS_GTT;
}
-
- offset64 = entry->offset;
- } else {
- offset64 = target->offset64;
}
batch->relocs[batch->reloc_count++] =
(struct drm_i915_gem_relocation_entry) {
.offset = batch_offset,
.delta = target_offset,
- .target_handle = target->gem_handle,
- .presumed_offset = offset64,
+ .target_handle = batch->use_exec_lut ? index : target->gem_handle,
+ .presumed_offset = entry->offset,
};
/* Using the old buffer offset, write in what the right data would be, in
* case the buffer doesn't move and we can short-circuit the relocation
* processing in the kernel
*/
- return offset64 + target_offset;
+ return target_offset + entry->offset;
}
void
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 44ea6a4562..109ba79b67 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -2278,6 +2278,10 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
screen->kernel_features |= KERNEL_ALLOWS_EXEC_CAPTURE;
}
+ if (intel_get_boolean(screen, I915_PARAM_HAS_EXEC_BATCH_FIRST)) {
+ screen->kernel_features |= KERNEL_ALLOWS_EXEC_BATCH_FIRST;
+ }
+
if (!intel_detect_pipelined_so(screen)) {
/* We can't do anything, so the effective version is 0. */
screen->cmd_parser_version = 0;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index 0980c8f561..577058dc15 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -80,6 +80,7 @@ struct intel_screen
#define KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3 (1<<3)
#define KERNEL_ALLOWS_COMPUTE_DISPATCH (1<<4)
#define KERNEL_ALLOWS_EXEC_CAPTURE (1<<5)
+#define KERNEL_ALLOWS_EXEC_BATCH_FIRST (1<<6)
struct brw_bufmgr *bufmgr;
--
2.13.3
More information about the mesa-dev
mailing list