Mesa (main): v3dv: only check binning sync for semaphores for the first CL job

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jul 27 00:28:22 UTC 2022


Module: Mesa
Branch: main
Commit: 903a75245b367916cc51bdb14f1ad91027592471
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=903a75245b367916cc51bdb14f1ad91027592471

Author: Iago Toral Quiroga <itoral at igalia.com>
Date:   Mon Jul 18 11:12:11 2022 +0200

v3dv: only check binning sync for semaphores for the first CL job

Semaphore waits in a command buffer only affect the first jobs we execute
in each hardware queue since jobs in the same queue are serialized against
each other. Binning syncs in particular, only affect CL jobs.

Reviewed-by: Alejandro Piñeiro <apinheiro at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17594>

---

 src/broadcom/vulkan/v3dv_private.h | 12 +++++++--
 src/broadcom/vulkan/v3dv_queue.c   | 52 +++++++++++++++++++-------------------
 2 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index 3107ca10de1..a7a52cb9fa8 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -250,9 +250,17 @@ enum v3dv_queue_type {
  * cmd buf batch.
  */
 struct v3dv_last_job_sync {
-   /* If the job is the first submitted to a GPU queue in a cmd buffer batch */
+   /* If the job is the first submitted to a GPU queue in a cmd buffer batch.
+    *
+    * We use V3DV_QUEUE_{CL,CSD,TFU} both with and without multisync.
+    */
    bool first[V3DV_QUEUE_COUNT];
-   /* Array of syncobj to track the last job submitted to a GPU queue */
+   /* Array of syncobj to track the last job submitted to a GPU queue.
+    *
+    * With multisync we use V3DV_QUEUE_{CL,CSD,TFU} to track syncobjs for each
+    * queue, but without multisync we only track the last job submitted to any
+    * queue in V3DV_QUEUE_ANY.
+    */
    uint32_t syncs[V3DV_QUEUE_COUNT];
 };
 
diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c
index b988ee25218..dddb0405ed2 100644
--- a/src/broadcom/vulkan/v3dv_queue.c
+++ b/src/broadcom/vulkan/v3dv_queue.c
@@ -103,9 +103,6 @@ queue_wait_idle(struct v3dv_queue *queue,
          if (result != VK_SUCCESS)
             return result;
       }
-
-      for (int i = 0; i < 3; i++)
-         queue->last_job_syncs.first[i] = false;
    } else {
       /* Without multisync, all the semaphores are baked into the one syncobj
        * at the start of each submit so we only need to wait on the one.
@@ -120,6 +117,9 @@ queue_wait_idle(struct v3dv_queue *queue,
       }
    }
 
+   for (int i = 0; i < 3; i++)
+      queue->last_job_syncs.first[i] = false;
+
    return VK_SUCCESS;
 }
 
@@ -722,8 +722,6 @@ set_multisync(struct drm_v3d_multi_sync *ms,
    ms->in_sync_count = in_sync_count;
    ms->in_syncs = (uintptr_t)(void *)in_syncs;
 
-   queue->last_job_syncs.first[queue_sync] = false;
-
    return;
 
 fail:
@@ -797,35 +795,33 @@ handle_cl_job(struct v3dv_queue *queue,
    const bool needs_perf_sync = queue->last_perfmon_id != submit.perfmon_id;
    queue->last_perfmon_id = submit.perfmon_id;
 
-   /* We need a binning sync if we are waiting on a semaphore with a wait stage
-    * that involves the geometry pipeline, or if the job comes after a pipeline
-    * barrier that involves geometry stages (needs_bcl_sync), or if
-    * performance queries are in use.
+   /* We need a binning sync if we are the first CL job waiting on a semaphore
+    * with a wait stage that involves the geometry pipeline, or if the job
+    * comes after a pipeline barrier that involves geometry stages
+    * (needs_bcl_sync) or when performance queries are in use.
     *
     * We need a render sync if the job doesn't need a binning sync but has
     * still been flagged for serialization. It should be noted that RCL jobs
     * don't start until the previous RCL job has finished so we don't really
     * need to add a fence for those, however, we might need to wait on a CSD or
     * TFU job, which are not automatically serialized with CL jobs.
-    *
-    * FIXME: see if we can do better and avoid bcl syncs for any jobs in the
-    * command buffer after the first job where we should be able to track bcl
-    * dependencies strictly through barriers.
     */
-   bool needs_bcl_sync = job->needs_bcl_sync;
-   for (int i = 0; !needs_bcl_sync && i < sync_info->wait_count; i++) {
-      needs_bcl_sync = sync_info->waits[i].stage_mask &
-         (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT |
-          VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT |
-          VK_PIPELINE_STAGE_ALL_COMMANDS_BIT |
-          VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
-          VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
-          VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
-          VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
-          VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
-          VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT);
+   bool needs_bcl_sync = job->needs_bcl_sync || needs_perf_sync;
+   if (queue->last_job_syncs.first[V3DV_QUEUE_CL]) {
+      for (int i = 0; !needs_bcl_sync && i < sync_info->wait_count; i++) {
+         needs_bcl_sync = sync_info->waits[i].stage_mask &
+             (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT |
+              VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT |
+              VK_PIPELINE_STAGE_ALL_COMMANDS_BIT |
+              VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
+              VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
+              VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
+              VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
+              VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
+              VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT);
+      }
    }
-   needs_bcl_sync |= needs_perf_sync;
+
    bool needs_rcl_sync = job->serialize && !needs_bcl_sync;
 
    /* Replace single semaphore settings whenever our kernel-driver supports
@@ -866,6 +862,8 @@ handle_cl_job(struct v3dv_queue *queue,
    free(bo_handles);
    multisync_free(device, &ms);
 
+   queue->last_job_syncs.first[V3DV_QUEUE_CL] = false;
+
    if (ret)
       return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CL failed: %m");
 
@@ -906,6 +904,7 @@ handle_tfu_job(struct v3dv_queue *queue,
                         DRM_IOCTL_V3D_SUBMIT_TFU, &job->tfu);
 
    multisync_free(device, &ms);
+   queue->last_job_syncs.first[V3DV_QUEUE_TFU] = false;
 
    if (ret != 0)
       return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_TFU failed: %m");
@@ -984,6 +983,7 @@ handle_csd_job(struct v3dv_queue *queue,
    free(bo_handles);
 
    multisync_free(device, &ms);
+   queue->last_job_syncs.first[V3DV_QUEUE_CSD] = false;
 
    if (ret)
       return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CSD failed: %m");



More information about the mesa-commit mailing list