Mesa (main): turnip: Copy command buffers to deferred submit request

Tue Jun 15 02:34:15 UTC 2021

Module: Mesa
Branch: main
Commit: 639579d1162d2bfc823047e78a1fe3e0f308d64c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=639579d1162d2bfc823047e78a1fe3e0f308d64c

Author: Hyunjun Ko <zzoon at igalia.com>
Date:   Tue Jun  8 06:49:45 2021 +0000

turnip: Copy command buffers to deferred submit request

To make sure the index of global bo table in drm_msm_gem_submit_cmd is
valid at actual submit time.

v1. Move the entry_count calculation into the submit request creation
function.

Fixes: #4877
Fixes: 3f229e34 ("turnip: Implement VK_KHR_timeline_semaphore.")

Signed-off-by: Hyunjun Ko <zzoon at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11260>

---

 src/freedreno/vulkan/tu_drm.c | 125 +++++++++++++++++++++++++++---------------
 1 file changed, 80 insertions(+), 45 deletions(-)

diff --git a/src/freedreno/vulkan/tu_drm.c b/src/freedreno/vulkan/tu_drm.c
index 839956a6eca..14bced0e2fa 100644
--- a/src/freedreno/vulkan/tu_drm.c
+++ b/src/freedreno/vulkan/tu_drm.c
@@ -83,6 +83,9 @@ struct tu_queue_submit
 {
    struct   list_head link;
 
+   VkCommandBuffer *cmd_buffers;
+   uint32_t cmd_buffer_count;
+
    struct   tu_syncobj **wait_semaphores;
    uint32_t wait_semaphore_count;
    struct   tu_syncobj **signal_semaphores;
@@ -106,6 +109,7 @@ struct tu_queue_submit
 
    bool     last_submit;
    uint32_t entry_count;
+   uint32_t counter_pass_index;
 };
 
 static int
@@ -837,10 +841,10 @@ tu_queue_submit_add_timeline_signal_locked(struct tu_queue_submit* submit,
 static VkResult
 tu_queue_submit_create_locked(struct tu_queue *queue,
                               const VkSubmitInfo *submit_info,
-                              const uint32_t entry_count,
                               const uint32_t nr_in_syncobjs,
                               const uint32_t nr_out_syncobjs,
                               const bool last_submit,
+                              const VkPerformanceQuerySubmitInfoKHR *perf_info,
                               struct tu_queue_submit **submit)
 {
    VkResult result;
@@ -862,6 +866,19 @@ tu_queue_submit_create_locked(struct tu_queue *queue,
    struct tu_queue_submit *new_submit = vk_zalloc(&queue->device->vk.alloc,
                sizeof(*new_submit), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
 
+   new_submit->cmd_buffer_count = submit_info->commandBufferCount;
+   new_submit->cmd_buffers = vk_zalloc(&queue->device->vk.alloc,
+         new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers), 8,
+         VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+
+   if (new_submit->cmd_buffers == NULL) {
+      result = vk_error(queue->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY)
+      goto fail_cmd_buffers;
+   }
+
+   memcpy(new_submit->cmd_buffers, submit_info->pCommandBuffers,
+          new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers));
+
    new_submit->wait_semaphores = vk_zalloc(&queue->device->vk.alloc,
          submit_info->waitSemaphoreCount * sizeof(*new_submit->wait_semaphores),
          8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
@@ -904,6 +921,16 @@ tu_queue_submit_create_locked(struct tu_queue *queue,
       }
    }
 
+   uint32_t entry_count = 0;
+   for (uint32_t j = 0; j < new_submit->cmd_buffer_count; ++j) {
+      TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[j]);
+
+      if (perf_info)
+         entry_count++;
+
+      entry_count += cmdbuf->cs.entry_count;
+   }
+
    new_submit->cmds = vk_zalloc(&queue->device->vk.alloc,
          entry_count * sizeof(*new_submit->cmds), 8,
          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
@@ -937,6 +964,8 @@ tu_queue_submit_create_locked(struct tu_queue *queue,
    new_submit->nr_in_syncobjs = nr_in_syncobjs;
    new_submit->nr_out_syncobjs = nr_out_syncobjs;
    new_submit->last_submit = last_submit;
+   new_submit->counter_pass_index = perf_info ? perf_info->counterPassIndex : ~0;
+
    list_inithead(&new_submit->link);
 
    *submit = new_submit;
@@ -954,6 +983,8 @@ fail_wait_timelines:
 fail_signal_semaphores:
    vk_free(&queue->device->vk.alloc, new_submit->wait_semaphores);
 fail_wait_semaphores:
+   vk_free(&queue->device->vk.alloc, new_submit->cmd_buffers);
+fail_cmd_buffers:
    return result;
 }
 
@@ -971,9 +1002,49 @@ tu_queue_submit_free(struct tu_queue *queue, struct tu_queue_submit *submit)
    vk_free(&queue->device->vk.alloc, submit->cmds);
    vk_free(&queue->device->vk.alloc, submit->in_syncobjs);
    vk_free(&queue->device->vk.alloc, submit->out_syncobjs);
+   vk_free(&queue->device->vk.alloc, submit->cmd_buffers);
    vk_free(&queue->device->vk.alloc, submit);
 }
 
+static void
+tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue,
+                                   struct tu_queue_submit *submit)
+{
+   struct drm_msm_gem_submit_cmd *cmds = submit->cmds;
+
+   uint32_t entry_idx = 0;
+   for (uint32_t j = 0; j < submit->cmd_buffer_count; ++j) {
+      TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->cmd_buffers[j]);
+      struct tu_cs *cs = &cmdbuf->cs;
+      struct tu_device *dev = queue->device;
+
+      if (submit->counter_pass_index != ~0) {
+         struct tu_cs_entry *perf_cs_entry =
+            &dev->perfcntrs_pass_cs_entries[submit->counter_pass_index];
+
+         cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
+         cmds[entry_idx].submit_idx =
+            dev->bo_idx[perf_cs_entry->bo->gem_handle];
+         cmds[entry_idx].submit_offset = perf_cs_entry->offset;
+         cmds[entry_idx].size = perf_cs_entry->size;
+         cmds[entry_idx].pad = 0;
+         cmds[entry_idx].nr_relocs = 0;
+         cmds[entry_idx++].relocs = 0;
+      }
+
+      for (unsigned i = 0; i < cs->entry_count; ++i, ++entry_idx) {
+         cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
+         cmds[entry_idx].submit_idx =
+            dev->bo_idx[cs->entries[i].bo->gem_handle];
+         cmds[entry_idx].submit_offset = cs->entries[i].offset;
+         cmds[entry_idx].size = cs->entries[i].size;
+         cmds[entry_idx].pad = 0;
+         cmds[entry_idx].nr_relocs = 0;
+         cmds[entry_idx].relocs = 0;
+      }
+   }
+}
+
 static VkResult
 tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
 {
@@ -990,6 +1061,12 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
 
    mtx_lock(&queue->device->bo_mutex);
 
+   /* drm_msm_gem_submit_cmd requires index of bo which could change at any
+    * time when bo_mutex is not locked. So we build submit cmds here the real
+    * place to submit.
+    */
+   tu_queue_build_msm_gem_submit_cmds(queue, submit);
+
    struct drm_msm_gem_submit req = {
       .flags = flags,
       .queueid = queue->msm_queue_id,
@@ -1186,22 +1263,12 @@ tu_QueueSubmit(VkQueue _queue,
       if (last_submit && fence)
          out_syncobjs_size += 1;
 
-      uint32_t entry_count = 0;
-      for (uint32_t j = 0; j < submit->commandBufferCount; ++j) {
-         TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->pCommandBuffers[j]);
-
-         if (perf_info)
-            entry_count++;
-
-         entry_count += cmdbuf->cs.entry_count;
-      }
-
       pthread_mutex_lock(&queue->device->submit_mutex);
       struct tu_queue_submit *submit_req = NULL;
 
       VkResult ret = tu_queue_submit_create_locked(queue, submit,
-            entry_count, submit->waitSemaphoreCount, out_syncobjs_size,
-            last_submit, &submit_req);
+            submit->waitSemaphoreCount, out_syncobjs_size,
+            last_submit, perf_info, &submit_req);
 
       if (ret != VK_SUCCESS) {
          pthread_mutex_unlock(&queue->device->submit_mutex);
@@ -1246,38 +1313,6 @@ tu_QueueSubmit(VkQueue _queue,
          };
       }
 
-      struct drm_msm_gem_submit_cmd *cmds = submit_req->cmds;
-
-      uint32_t entry_idx = 0;
-      for (uint32_t j = 0; j < submit->commandBufferCount; ++j) {
-         TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->pCommandBuffers[j]);
-         struct tu_cs *cs = &cmdbuf->cs;
-
-         if (perf_info) {
-            struct tu_cs_entry *perf_cs_entry =
-               &cmdbuf->device->perfcntrs_pass_cs_entries[perf_info->counterPassIndex];
-            cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
-            cmds[entry_idx].submit_idx =
-               queue->device->bo_idx[perf_cs_entry->bo->gem_handle];
-            cmds[entry_idx].submit_offset = perf_cs_entry->offset;
-            cmds[entry_idx].size = perf_cs_entry->size;
-            cmds[entry_idx].pad = 0;
-            cmds[entry_idx].nr_relocs = 0;
-            cmds[entry_idx++].relocs = 0;
-         }
-
-         for (unsigned i = 0; i < cs->entry_count; ++i, ++entry_idx) {
-            cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
-            cmds[entry_idx].submit_idx =
-               queue->device->bo_idx[cs->entries[i].bo->gem_handle];
-            cmds[entry_idx].submit_offset = cs->entries[i].offset;
-            cmds[entry_idx].size = cs->entries[i].size;
-            cmds[entry_idx].pad = 0;
-            cmds[entry_idx].nr_relocs = 0;
-            cmds[entry_idx].relocs = 0;
-         }
-      }
-
       /* Queue the current submit */
       list_addtail(&submit_req->link, &queue->queued_submits);
       ret = tu_device_submit_deferred_locked(queue->device);