[Mesa-dev] [RFC 3/3] anv: Do relocations in userspace before execbuf ioctl
Jason Ekstrand
jason at jlekstrand.net
Tue Nov 1 04:48:15 UTC 2016
From: Kristian Høgsberg Kristensen <kristian.h.kristensen at intel.com>
This reduces the amount of stalling that the kernel does between batches
and improves the performance of Dota 2 on a Sky Lake GT2 desktop by around
30%. Ideally, we would have a simple execbuf2 flag that would let us tell
the kernel "Trust me, you don't need to stall to relocate" but the kernel
has no such flag at the moment.
Signed-off-by: Jason Ekstrand <jason at jlekstrand.net>
Cc: Kristian Høgsberg <krh at bitplanet.net>
Cc: Ben Widawsky <ben at bwidawsk.net>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
---
src/intel/vulkan/anv_device.c | 96 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 93 insertions(+), 3 deletions(-)
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index baa767e..ef371a7 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1068,6 +1068,89 @@ void anv_GetDeviceQueue(
*pQueue = anv_queue_to_handle(&device->queue);
}
+static void
+write_reloc(const struct anv_device *device, void *p, uint64_t v)
+{
+ if (device->info.gen >= 8)
+ *(uint64_t *)p = v;
+ else
+ *(uint32_t *)p = v;
+}
+
+static void
+anv_reloc_list_apply(struct anv_reloc_list *list,
+ struct anv_device *device, struct anv_bo *bo)
+{
+ for (size_t i = 0; i < list->num_relocs; i++) {
+ void *p = bo->map + list->relocs[i].offset;
+
+ struct anv_bo *target_bo = list->reloc_bos[i];
+ write_reloc(device, p, target_bo->offset + list->relocs[i].delta);
+ list->relocs[i].presumed_offset = bo->offset;
+ }
+}
+
+/**
+ * This function applies the relocation for a command buffer and writes the
+ * actual addresses into the buffers as per what we were told by the kernel on
+ * the previous execbuf2 call. This should be safe to do because, for each
+ * relocated address, we have two cases:
+ *
+ * 1) The buffer to which the address points is not in use by the GPU. In
+ * this case, neither is the 32 or 64-bit chunk of GPU memory storing the
+ * address so it's completely safe to update the address.
+ *
+ * 2) The buffer to which the address points is currently in use by the GPU.
+ * If it's still in use, then some execbuf2 call before this one used it
+ * and it has been in use ever since. In this case, it can't have moved
+ * since the last execbuffer2 call. We then have two subcases:
+ *
+ * a) The 32 or 64-bit chunk of GPU memory storing the address is not in
+ * use by the GPU. In this case, it is safe to write the relocation.
+ *
+ * a) The 32 or 64-bit chunk of GPU memory storing the address is
+ * currently in use by the GPU. In this case the the relocated
+ * address we will write is exactly the same as the last address we
+ * wrote there.
+ *
+ * By doing relocations on the CPU, we can tell the kernel that it doesn't
+ * need to bother. We want to do this because the surface state buffer is
+ * used by every command buffer so, if the kernel does the relocations, it
+ * will always be busy and the kernel will always stall.
+ */
+static void
+relocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer)
+{
+ if (!cmd_buffer->execbuf2.need_reloc) {
+ assert(cmd_buffer->execbuf2.execbuf.flags & I915_EXEC_NO_RELOC);
+ return;
+ }
+
+ for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) {
+ if (cmd_buffer->execbuf2.bos[i]->offset == (uint64_t)-1)
+ return;
+ }
+
+ anv_reloc_list_apply(&cmd_buffer->surface_relocs,
+ cmd_buffer->device,
+ &cmd_buffer->device->surface_state_block_pool.bo);
+
+ struct anv_batch_bo **bbo;
+ u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
+ anv_reloc_list_apply(&(*bbo)->relocs,
+ cmd_buffer->device, &(*bbo)->bo);
+ }
+
+ for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) {
+ struct anv_bo *bo = cmd_buffer->execbuf2.bos[i];
+
+ cmd_buffer->execbuf2.objects[i].offset = bo->offset;
+ }
+
+ cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC;
+ cmd_buffer->execbuf2.need_reloc = false;
+}
+
VkResult
anv_device_execbuf(struct anv_device *device,
struct drm_i915_gem_execbuffer2 *execbuf,
@@ -1097,16 +1180,20 @@ VkResult anv_QueueSubmit(
struct anv_device *device = queue->device;
VkResult result = VK_SUCCESS;
+ pthread_mutex_lock(&device->mutex);
+
for (uint32_t i = 0; i < submitCount; i++) {
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
pSubmits[i].pCommandBuffers[j]);
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+ relocate_cmd_buffer(cmd_buffer);
+
result = anv_device_execbuf(device, &cmd_buffer->execbuf2.execbuf,
cmd_buffer->execbuf2.bos);
if (result != VK_SUCCESS)
- return result;
+ goto out;
}
}
@@ -1114,10 +1201,13 @@ VkResult anv_QueueSubmit(
struct anv_bo *fence_bo = &fence->bo;
result = anv_device_execbuf(device, &fence->execbuf, &fence_bo);
if (result != VK_SUCCESS)
- return result;
+ goto out;
}
- return VK_SUCCESS;
+out:
+ pthread_mutex_unlock(&device->mutex);
+
+ return result;
}
VkResult anv_QueueWaitIdle(
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list