Mesa (main): v3dv: limit sync for barriers to hw queues selected by source mask

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue May 31 06:30:23 UTC 2022


Module: Mesa
Branch: main
Commit: 0ce346368f3a1d7a7b12ade87645b466850c0f82
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ce346368f3a1d7a7b12ade87645b466850c0f82

Author: Iago Toral Quiroga <itoral at igalia.com>
Date:   Mon May 30 09:12:22 2022 +0200

v3dv: limit sync for barriers to hw queues selected by source mask

Until know when we consumed a barrier we would implement it by
setting the serialize flag on a job, which would cause it to
be serialized across all hardware queues (CL, CSD, TFU). However,
now that we track the source(s) of the barrier, we can restrict this
to only the relevant queue(s) instead (multisync path only).

It should be noted that we can implement transfers via TFU or CL
jobs, so if the source of a barrier is a transfer, we currently
synchronize against both the TFU and the CL queues, however, we
may be able to more effectively track this in the future to
restrict this to just one of the queues.

Also, for secondary command buffers we are taking the easy way
out and always synchronize against all queues, but we should
be able to do the same for secondaries without too much effort.

Reviewed-by: Alejandro Piñeiro <apinheiro at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16743>

---

 src/broadcom/vulkan/v3dv_cmd_buffer.c  |  9 +++++++--
 src/broadcom/vulkan/v3dv_private.h     |  9 +++++++--
 src/broadcom/vulkan/v3dv_queue.c       | 35 ++++++++++++++++++++++++++--------
 src/broadcom/vulkan/v3dvx_cmd_buffer.c | 10 ++++++++--
 4 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index 6b245d303d4..485e5b9a066 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -707,7 +707,7 @@ cmd_buffer_serialize_job_if_needed(struct v3dv_cmd_buffer *cmd_buffer,
    }
 
    if (barrier_mask & bit) {
-      job->serialize = true;
+      job->serialize = *src_mask;
       *src_mask = 0;
       cmd_buffer->state.barrier.dst_mask &= ~bit;
    }
@@ -1714,7 +1714,12 @@ cmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary,
             return;
 
          if (pending_barrier.dst_mask) {
-            job->serialize = true;
+            /* FIXME: do the same we do for primaries and only choose the
+             * relevant src masks.
+             */
+            job->serialize = pending_barrier.src_mask_graphics |
+                             pending_barrier.src_mask_transfer |
+                             pending_barrier.src_mask_compute;
             if (pending_barrier.bcl_buffer_access ||
                 pending_barrier.bcl_image_access) {
                job->needs_bcl_sync = true;
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index bfde1119e9e..a191e9ed19c 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -1094,8 +1094,10 @@ struct v3dv_job {
     */
    bool always_flush;
 
-   /* Whether we need to serialize this job in our command stream */
-   bool serialize;
+   /* A mask of V3DV_BARRIER_* indicating the source(s) of the barrier. We
+    * can use this to select the hw queues where we need to serialize the job.
+    */
+   uint8_t serialize;
 
    /* If this is a CL job, whether we should sync before binning */
    bool needs_bcl_sync;
@@ -1196,6 +1198,9 @@ enum {
    V3DV_BARRIER_COMPUTE_BIT  = (1 << 1),
    V3DV_BARRIER_TRANSFER_BIT = (1 << 2),
 };
+#define V3DV_BARRIER_ALL (V3DV_BARRIER_GRAPHICS_BIT | \
+                          V3DV_BARRIER_TRANSFER_BIT | \
+                          V3DV_BARRIER_COMPUTE_BIT);
 
 struct v3dv_barrier_state {
    /* Mask of V3DV_BARRIER_* indicating where we consume a barrier. */
diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c
index eb78bef9bc3..83ea99200c3 100644
--- a/src/broadcom/vulkan/v3dv_queue.c
+++ b/src/broadcom/vulkan/v3dv_queue.c
@@ -488,10 +488,24 @@ set_in_syncs(struct v3dv_queue *queue,
    if (queue->last_job_syncs.first[queue_sync])
       n_syncs = sync_info->wait_count;
 
-   /* If the serialize flag is set, this job waits for completion of all GPU
-    * jobs submitted in any queue V3DV_QUEUE_(CL/TFU/CSD) before running.
+   /* If the serialize flag is set the job needs to be serialized in the
+    * corresponding queues. Notice that we may implement transfer operations
+    * as both CL or TFU jobs.
+    *
+    * FIXME: maybe we could track more precisely if the source of a transfer
+    * barrier is a CL and/or a TFU job.
     */
-   *count = n_syncs + (job->serialize ? 3 : 0);
+   bool sync_csd  = job->serialize & V3DV_BARRIER_COMPUTE_BIT;
+   bool sync_tfu  = job->serialize & V3DV_BARRIER_TRANSFER_BIT;
+   bool sync_cl   = job->serialize & (V3DV_BARRIER_GRAPHICS_BIT |
+                                      V3DV_BARRIER_TRANSFER_BIT);
+   *count = n_syncs;
+   if (sync_cl)
+      (*count)++;
+   if (sync_tfu)
+      (*count)++;
+   if (sync_csd)
+      (*count)++;
 
    if (!*count)
       return NULL;
@@ -508,11 +522,16 @@ set_in_syncs(struct v3dv_queue *queue,
          vk_sync_as_drm_syncobj(sync_info->waits[i].sync)->syncobj;
    }
 
-   if (job->serialize) {
-      for (int i = 0; i < 3; i++)
-         syncs[n_syncs + i].handle = queue->last_job_syncs.syncs[i];
-   }
+   if (sync_cl)
+      syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CL];
+
+   if (sync_csd)
+      syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CSD];
+
+   if (sync_tfu)
+      syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_TFU];
 
+   assert(n_syncs == *count);
    return syncs;
 }
 
@@ -887,7 +906,7 @@ queue_create_noop_job(struct v3dv_queue *queue)
     * order requirements, which basically require that signal operations occur
     * in submission order.
     */
-   queue->noop_job->serialize = true;
+   queue->noop_job->serialize = V3DV_BARRIER_ALL;
 
    return VK_SUCCESS;
 }
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
index 933210c84a4..21bf3c4cf9b 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
@@ -1578,7 +1578,8 @@ cmd_buffer_subpass_split_for_barrier(struct v3dv_cmd_buffer *cmd_buffer,
    if (!job)
       return NULL;
 
-   job->serialize = true;
+   /* FIXME: we can do better than all barriers */
+   job->serialize = V3DV_BARRIER_ALL;
    job->needs_bcl_sync = is_bcl_barrier;
    return job;
 }
@@ -1711,7 +1712,12 @@ v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary,
             v3dv_cmd_buffer_finish_job(primary);
             v3dv_job_clone_in_cmd_buffer(secondary_job, primary);
             if (pending_barrier.dst_mask) {
-               secondary_job->serialize = true;
+               /* FIXME: do the same we do for primaries and only choose the
+                * relevant src masks.
+                */
+               secondary_job->serialize = pending_barrier.src_mask_graphics |
+                                          pending_barrier.src_mask_transfer |
+                                          pending_barrier.src_mask_compute;
                if (pending_barrier.bcl_buffer_access ||
                    pending_barrier.bcl_image_access) {
                   secondary_job->needs_bcl_sync = true;



More information about the mesa-commit mailing list