Mesa (main): v3dv: handle wait semaphores in the first job by queue

Fri Jan 21 13:23:16 UTC 2022

Module: Mesa
Branch: main
Commit: 0ab98612ef6860751f6a5115229e561eab7bb890
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ab98612ef6860751f6a5115229e561eab7bb890

Author: Melissa Wen <mwen at igalia.com>
Date:   Mon Jan  3 12:55:37 2022 -0100

v3dv: handle wait semaphores in the first job by queue

With multiple semaphore support, we can improve the way we handle
wait semaphores considering different job types and cmd buffer
batch scenarios, that means:

- A GPU job depends on wait semaphores whenever it is the first job
submitted to a queue in this command buffer batch (the `first` flag
for the job's queue type is set).
- For the first CPU job, if there are wait semaphores, we should
wait for the CPU and GPU being idle to process the job.

Signed-off-by: Melissa Wen <mwen at igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13178>

---

 src/broadcom/vulkan/v3dv_cmd_buffer.c |  6 ++--
 src/broadcom/vulkan/v3dv_private.h    |  2 ++
 src/broadcom/vulkan/v3dv_queue.c      | 55 ++++++++++++++++++++++++++++++-----
 3 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index df2f4882e7e..fa6b85b244a 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -687,8 +687,8 @@ v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer)
    }
 }
 
-static bool
-job_type_is_gpu(struct v3dv_job *job)
+bool
+v3dv_job_type_is_gpu(struct v3dv_job *job)
 {
    switch (job->type) {
    case V3DV_JOB_TYPE_GPU_CL:
@@ -713,7 +713,7 @@ cmd_buffer_serialize_job_if_needed(struct v3dv_cmd_buffer *cmd_buffer,
    /* Serialization only affects GPU jobs, CPU jobs are always automatically
     * serialized.
     */
-   if (!job_type_is_gpu(job))
+   if (!v3dv_job_type_is_gpu(job))
       return;
 
    job->serialize = true;
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index 083ad5a21bc..43ea60d4359 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -1148,6 +1148,8 @@ void v3dv_job_start_frame(struct v3dv_job *job,
                           uint8_t max_internal_bpp,
                           bool msaa);
 
+bool v3dv_job_type_is_gpu(struct v3dv_job *job);
+
 struct v3dv_job *
 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
                              struct v3dv_cmd_buffer *cmd_buffer);
diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c
index 17a5e2c60d2..0dcf13bd0eb 100644
--- a/src/broadcom/vulkan/v3dv_queue.c
+++ b/src/broadcom/vulkan/v3dv_queue.c
@@ -668,15 +668,23 @@ multisync_free(struct v3dv_device *device,
 static struct drm_v3d_sem *
 set_in_syncs(struct v3dv_device *device,
              struct v3dv_job *job,
+             enum v3dv_queue_type queue,
              uint32_t *count,
              struct v3dv_submit_info_semaphores *sems_info)
 {
-   /* If we are serializing a job in a cmd buffer, we are already making it
-    * wait until the last job submitted to each queue completes before
-    * running, so in that case we can skip waiting for any additional
-    * semaphores.
+   uint32_t n_sems = 0;
+
+   /* If this is the first job submitted to a given GPU queue in this cmd buf
+    * batch, it has to wait on wait semaphores (if any) before running.
+    */
+   if (device->last_job_syncs.first[queue])
+      n_sems = sems_info->wait_sem_count;
+
+   /* If we don't need to wait on wait semaphores but the serialize flag is
+    * set, this job waits for completion of all GPU jobs submitted in any
+    * queue V3DV_QUEUE_(CL/TFU/CSD) before running.
     */
-   *count = job->serialize ? 3 : sems_info->wait_sem_count;
+   *count = n_sems == 0 && job->serialize ? 3 : n_sems;
 
    if (!*count)
       return NULL;
@@ -688,7 +696,7 @@ set_in_syncs(struct v3dv_device *device,
    if (!syncs)
       return NULL;
 
-   if (!job->serialize) {
+   if (n_sems) {
       for (int i = 0; i < *count; i++) {
          struct v3dv_semaphore *sem =
             v3dv_semaphore_from_handle(sems_info->wait_sems[i]);
@@ -768,7 +776,8 @@ set_multisync(struct drm_v3d_multi_sync *ms,
 {
    uint32_t out_sync_count = 0, in_sync_count = 0;
 
-   in_syncs = set_in_syncs(device, job, &in_sync_count, sems_info);
+   in_syncs = set_in_syncs(device, job, queue_sync,
+                           &in_sync_count, sems_info);
    if (!in_syncs && in_sync_count)
       goto fail;
 
@@ -787,6 +796,8 @@ set_multisync(struct drm_v3d_multi_sync *ms,
    ms->in_sync_count = in_sync_count;
    ms->in_syncs = (uintptr_t)(void *)in_syncs;
 
+   device->last_job_syncs.first[queue_sync] = false;
+
    return;
 
 fail:
@@ -1045,6 +1056,30 @@ queue_submit_job(struct v3dv_queue *queue,
 {
    assert(job);
 
+   /* CPU jobs typically execute explicit waits before they are processed. For
+    * example, a query reset CPU job will explicitly wait for the queries
+    * being unused before proceeding, etc. However, if we have any wait
+    * semaphores, we need to honour that too for the first CPU job we process
+    * in the command buffer batch. We do that by waiting for idle to ensure
+    * that any previous work has been completed, at which point any wait
+    * semaphores must be signalled, and we never need to do this again for the
+    * same batch.
+    */
+   if (!v3dv_job_type_is_gpu(job) && sems_info->wait_sem_count) {
+      v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
+#ifdef DEBUG
+      /* Loop through wait sems and check they are all signalled */
+      for (int i = 0; i < sems_info->wait_sem_count; i++) {
+         int render_fd = queue->device->pdevice->render_fd;
+         struct v3dv_semaphore *sem =
+            v3dv_semaphore_from_handle(sems_info->wait_sems[i]);
+	 int ret = drmSyncobjWait(render_fd, &sem->sync, 1, 0, 0, NULL);
+	 assert(ret == 0);
+      }
+#endif
+      sems_info->wait_sem_count = 0;
+   }
+
    switch (job->type) {
    case V3DV_JOB_TYPE_GPU_CL:
       return handle_cl_job(queue, job, sems_info);
@@ -1212,6 +1247,12 @@ queue_submit_cmd_buffer_batch(struct v3dv_queue *queue,
       .signal_sems = (VkSemaphore *) pSubmit->pSignalSemaphores,
    };
 
+   /* In the beginning of a cmd buffer batch, we set all last_job_syncs as
+    * first. It helps to determine wait semaphores conditions.
+    */
+   for (unsigned i = 0; i < V3DV_QUEUE_COUNT; i++)
+      queue->device->last_job_syncs.first[i] = true;
+
    /* Even if we don't have any actual work to submit we still need to wait
     * on the wait semaphores and signal the signal semaphores and fence, so
     * in this scenario we just submit a trivial no-op job so we don't have