[Intel-gfx] [PATCH 1/2] intel: Use I915_EXEC_HANDLE_LUT when available
Kristian Høgsberg
krh at bitplanet.net
Fri Jan 16 17:45:59 PST 2015
In userspace we can track which buffer a relocation refers to in
constant time. However, the kernel has to look up the per-fd gem
handle for each relocation. Using the I915_EXEC_HANDLE_LUT flag lets
us use the the bos validation list index instead of the gem handle in
the relocation entries. This allows the kernel to look up the bo for
a reloc in constant time.
Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
---
intel/intel_bufmgr_gem.c | 33 +++++++++++++++++++++++++++++++--
1 file changed, 31 insertions(+), 2 deletions(-)
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index cf85bb8..8a51cea 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -130,6 +130,7 @@ typedef struct _drm_intel_bufmgr_gem {
unsigned int bo_reuse : 1;
unsigned int no_exec : 1;
unsigned int has_vebox : 1;
+ unsigned int has_handle_lut : 1;
bool fenced_relocs;
char *aub_filename;
@@ -399,11 +400,12 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
(drm_intel_bo_gem *) target_bo;
DBG("%2d: %d (%s)@0x%08llx -> "
- "%d (%s)@0x%08lx + 0x%08x\n",
+ "%d (#%d) (%s)@0x%08lx + 0x%08x\n",
i,
bo_gem->gem_handle, bo_gem->name,
(unsigned long long)bo_gem->relocs[j].offset,
target_gem->gem_handle,
+ bo_gem->relocs[j].target_handle,
target_gem->name,
target_bo->offset64,
bo_gem->relocs[j].delta);
@@ -470,7 +472,7 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
- int index;
+ int i, index;
if (bo_gem->validate_index != -1) {
if (need_fence)
@@ -512,6 +514,26 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
EXEC_OBJECT_NEEDS_FENCE;
}
bufmgr_gem->exec_count++;
+
+ if (bufmgr_gem->has_handle_lut) {
+ /* If the kernel supports I915_EXEC_HANDLE_LUT, we can
+ * use validate_index instead of the gem handle in
+ * target_handle in the reloc entry. This allows the
+ * kernel to do a simple constant time lookup instead
+ * of looking up the gem handle for each reloc.
+ *
+ * We have to fix up the relocs here, since the bo_gem
+ * needs to have a valid validate_index in case there
+ * are self-relocs in the reloc list.
+ */
+ for (i = 0; i < bo_gem->reloc_count; i++) {
+ drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)
+ bo_gem->reloc_target_info[i].bo;
+
+ bo_gem->relocs[i].target_handle =
+ target_bo_gem->validate_index;
+ }
+ }
}
#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
@@ -2447,6 +2469,9 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
if (bufmgr_gem->no_exec)
goto skip_execution;
+ if (bufmgr_gem->has_handle_lut)
+ execbuf.flags |= I915_EXEC_HANDLE_LUT;
+
ret = drmIoctl(bufmgr_gem->fd,
DRM_IOCTL_I915_GEM_EXECBUFFER2,
&execbuf);
@@ -3569,6 +3594,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
}
}
+ gp.param = I915_PARAM_HAS_EXEC_HANDLE_LUT;
+ ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
+ bufmgr_gem->has_handle_lut = ret == 0;
+
/* Let's go with one relocation per every 2 dwords (but round down a bit
* since a power of two will mean an extra page allocation for the reloc
* buffer).
--
2.2.1
More information about the Intel-gfx
mailing list