Mesa (main): v3dv: implement double-buffer mode

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Jan 14 11:23:27 UTC 2022


Module: Mesa
Branch: main
Commit: b9f947457724f90becd820f877bf21c27f6ced99
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b9f947457724f90becd820f877bf21c27f6ced99

Author: Iago Toral Quiroga <itoral at igalia.com>
Date:   Wed Jan  5 11:07:59 2022 +0100

v3dv: implement double-buffer mode

Double buffer mode splits the tile buffer size in half so we can
start processing the next tile while the current one is being
stored to memory. This mode is available only if MSAA is not enabled
and can, in theory, improve performance by reducing tile store
overhead, however, it comes at the cost of reducing the tile size,
which also causes some overhead of its own.

Testing shows that this helps some cases (i.e the Vulkan Quake
ports) but hurts others (i.e. Unreal Engine 4), so for the time
being we don't enable this by default but we allow to enable it
selectively by using V3D_DEBUG.

Reviewed-by: Juan A. Suarez <jasuarez at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14551>

---

 src/broadcom/common/v3d_debug.c         |  2 ++
 src/broadcom/common/v3d_debug.h         |  1 +
 src/broadcom/common/v3d_util.c          |  8 ++++++--
 src/broadcom/common/v3d_util.h          |  4 ++--
 src/broadcom/vulkan/v3dv_cmd_buffer.c   | 18 ++++++++++++++++--
 src/broadcom/vulkan/v3dv_pass.c         |  4 +++-
 src/broadcom/vulkan/v3dv_private.h      | 16 ++++++++++++++++
 src/broadcom/vulkan/v3dvx_cmd_buffer.c  | 16 +++++++---------
 src/broadcom/vulkan/v3dvx_meta_common.c |  8 +++++++-
 src/gallium/drivers/v3d/v3d_context.c   |  1 +
 10 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/src/broadcom/common/v3d_debug.c b/src/broadcom/common/v3d_debug.c
index 508a2b7c74c..ed14830b526 100644
--- a/src/broadcom/common/v3d_debug.c
+++ b/src/broadcom/common/v3d_debug.c
@@ -88,6 +88,8 @@ static const struct debug_named_value debug_control[] = {
           "Force 16-bit precision on all TMU operations" },
         { "noloopunroll",  V3D_DEBUG_NO_LOOP_UNROLL,
           "Disable loop unrolling" },
+        { "db", V3D_DEBUG_DOUBLE_BUFFER,
+          "Enable double buffer for Tile Buffer when MSAA is disabled" },
         { NULL }
 };
 
diff --git a/src/broadcom/common/v3d_debug.h b/src/broadcom/common/v3d_debug.h
index 72d632568de..24c7078fa82 100644
--- a/src/broadcom/common/v3d_debug.h
+++ b/src/broadcom/common/v3d_debug.h
@@ -63,6 +63,7 @@ extern uint32_t V3D_DEBUG;
 #define V3D_DEBUG_TMU_16BIT         (1 << 19)
 #define V3D_DEBUG_NO_LOOP_UNROLL    (1 << 20)
 #define V3D_DEBUG_CL_NO_BIN         (1 << 21)
+#define V3D_DEBUG_DOUBLE_BUFFER     (1 << 22)
 
 #define V3D_DEBUG_SHADERS           (V3D_DEBUG_TGSI | V3D_DEBUG_NIR | \
                                      V3D_DEBUG_VIR | V3D_DEBUG_QPU | \
diff --git a/src/broadcom/common/v3d_util.c b/src/broadcom/common/v3d_util.c
index ecbe5382f34..6a0a2103805 100644
--- a/src/broadcom/common/v3d_util.c
+++ b/src/broadcom/common/v3d_util.c
@@ -88,8 +88,8 @@ v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo,
 }
 
 void
-v3d_choose_tile_size(uint32_t color_attachment_count,
-                     uint32_t max_color_bpp, bool msaa,
+v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp,
+                     bool msaa, bool double_buffer,
                      uint32_t *width, uint32_t *height)
 {
    static const uint8_t tile_sizes[] = {
@@ -108,8 +108,12 @@ v3d_choose_tile_size(uint32_t color_attachment_count,
    else if (color_attachment_count > 1)
       idx += 1;
 
+   /* MSAA and double-buffer are mutually exclusive */
+   assert(!msaa || !double_buffer);
    if (msaa)
       idx += 2;
+   else if (double_buffer)
+      idx += 1;
 
    idx += max_color_bpp;
 
diff --git a/src/broadcom/common/v3d_util.h b/src/broadcom/common/v3d_util.h
index 69c91e9a5f5..ab8f859f252 100644
--- a/src/broadcom/common/v3d_util.h
+++ b/src/broadcom/common/v3d_util.h
@@ -35,8 +35,8 @@ v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo,
                                          uint32_t wg_size);
 
 void
-v3d_choose_tile_size(uint32_t color_attachment_count,
-                     uint32_t max_color_bpp, bool msaa,
+v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp,
+                     bool msaa, bool double_buffer,
                      uint32_t *width, uint32_t *height);
 
 #endif
diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index 0acbf1a0582..df2f4882e7e 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -441,8 +441,22 @@ job_compute_frame_tiling(struct v3dv_job *job,
    tiling->msaa = msaa;
    tiling->internal_bpp = max_internal_bpp;
 
-   v3d_choose_tile_size(render_target_count, max_internal_bpp, msaa,
-                         &tiling->tile_width, &tiling->tile_height);
+   /* We can use double-buffer when MSAA is disabled to reduce tile store
+    * overhead.
+    *
+    * FIXME: if we are emitting any tile loads the hardware will serialize
+    * loads and stores across tiles effectivley disabling double buffering,
+    * so we would want to check for that and not enable it in that case to
+    * avoid reducing the tile size.
+    */
+   tiling->double_buffer =
+      unlikely(V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !msaa;
+
+   assert(!tiling->msaa || !tiling->double_buffer);
+
+   v3d_choose_tile_size(render_target_count, max_internal_bpp,
+                        tiling->msaa, tiling->double_buffer,
+                        &tiling->tile_width, &tiling->tile_height);
 
    tiling->draw_tiles_x = DIV_ROUND_UP(width, tiling->tile_width);
    tiling->draw_tiles_y = DIV_ROUND_UP(height, tiling->tile_height);
diff --git a/src/broadcom/vulkan/v3dv_pass.c b/src/broadcom/vulkan/v3dv_pass.c
index 7347a189904..0dcb6c91c95 100644
--- a/src/broadcom/vulkan/v3dv_pass.c
+++ b/src/broadcom/vulkan/v3dv_pass.c
@@ -310,7 +310,9 @@ subpass_get_granularity(struct v3dv_device *device,
    }
 
    uint32_t width, height;
-   v3d_choose_tile_size(color_attachment_count, max_bpp, msaa, &width, &height);
+   bool double_buffer = (V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !msaa;
+   v3d_choose_tile_size(color_attachment_count, max_bpp, msaa,
+                        double_buffer, &width, &height);
    *granularity = (VkExtent2D) {
       .width = width,
       .height = height
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index 36b07de5a08..36e2337fb0f 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -745,6 +745,7 @@ struct v3dv_frame_tiling {
    uint32_t render_target_count;
    uint32_t internal_bpp;
    bool     msaa;
+   bool     double_buffer;
    uint32_t tile_width;
    uint32_t tile_height;
    uint32_t draw_tiles_x;
@@ -765,6 +766,21 @@ bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
                                        struct v3dv_render_pass *pass,
                                        uint32_t subpass_idx);
 
+/* Checks if we need to emit 2 initial tile clears for double buffer mode.
+ * This happens when we render at least 2 tiles, because in this mode each
+ * tile uses a different half of the tile buffer memory so we can have 2 tiles
+ * in flight (one being stored to memory and the next being rendered). In this
+ * scenario, if we emit a single initial tile clear we would only clear the
+ * first half of the tile buffer.
+ */
+static inline bool
+v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling *tiling)
+{
+   return tiling->double_buffer &&
+          (tiling->draw_tiles_x > 1 || tiling->draw_tiles_y > 1 ||
+           tiling->layers > 1);
+}
+
 struct v3dv_cmd_pool {
    struct vk_object_base base;
 
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
index 3ec0c4d0e65..f2d67305570 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
@@ -53,11 +53,13 @@ v3dX(job_emit_binning_prolog)(struct v3dv_job *job,
       config.number_of_layers = layers;
    }
 
+   assert(!tiling->double_buffer || !tiling->msaa);
    cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
       config.width_in_pixels = tiling->width;
       config.height_in_pixels = tiling->height;
       config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
       config.multisample_mode_4x = tiling->msaa;
+      config.double_buffer_in_non_ms_mode = tiling->double_buffer;
       config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
    }
 
@@ -762,11 +764,13 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
     */
    bool do_early_zs_clear = false;
    const uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
+   assert(!tiling->msaa || !tiling->double_buffer);
    cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
       config.image_width_pixels = framebuffer->width;
       config.image_height_pixels = framebuffer->height;
       config.number_of_render_targets = MAX2(subpass->color_count, 1);
       config.multisample_mode_4x = tiling->msaa;
+      config.double_buffer_in_non_ms_mode = tiling->double_buffer;
       config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
 
       if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
@@ -944,12 +948,6 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
          tiling->frame_height_in_supertiles;
    }
 
-   /* Start by clearing the tile buffer. */
-   cl_emit(rcl, TILE_COORDINATES, coords) {
-      coords.tile_column_number = 0;
-      coords.tile_row_number = 0;
-   }
-
    /* Emit an initial clear of the tile buffers. This is necessary
     * for any buffers that should be cleared (since clearing
     * normally happens at the *end* of the generic tile list), but
@@ -964,13 +962,13 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
     * changes on V3D 3.x, and 2 dummy stores on 4.x.
     */
    for (int i = 0; i < 2; i++) {
-      if (i > 0)
-         cl_emit(rcl, TILE_COORDINATES, coords);
+      cl_emit(rcl, TILE_COORDINATES, coords);
       cl_emit(rcl, END_OF_LOADS, end);
       cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
          store.buffer_to_store = NONE;
       }
-      if (i == 0 && cmd_buffer->state.tile_aligned_render_area) {
+      if (cmd_buffer->state.tile_aligned_render_area &&
+          (i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
          cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
             clear.clear_z_stencil_buffer = !job->early_zs_clear;
             clear.clear_all_render_targets = true;
diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c
index 93a6f0c609a..73c649e2ead 100644
--- a/src/broadcom/vulkan/v3dvx_meta_common.c
+++ b/src/broadcom/vulkan/v3dvx_meta_common.c
@@ -50,12 +50,14 @@ emit_rcl_prologue(struct v3dv_job *job,
    if (job->cmd_buffer->state.oom)
       return NULL;
 
+   assert(!tiling->msaa || !tiling->double_buffer);
    cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
       config.early_z_disable = true;
       config.image_width_pixels = tiling->width;
       config.image_height_pixels = tiling->height;
       config.number_of_render_targets = 1;
       config.multisample_mode_4x = tiling->msaa;
+      config.double_buffer_in_non_ms_mode = tiling->double_buffer;
       config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
       config.internal_depth_type = fb->internal_depth_type;
    }
@@ -166,7 +168,11 @@ emit_frame_setup(struct v3dv_job *job,
       cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
          store.buffer_to_store = NONE;
       }
-      if (clear_value && i == 0) {
+      /* When using double-buffering, we need to clear both buffers (unless
+       * we only have a single tile to render).
+       */
+      if (clear_value &&
+          (i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
          cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
             clear.clear_z_stencil_buffer = true;
             clear.clear_all_render_targets = true;
diff --git a/src/gallium/drivers/v3d/v3d_context.c b/src/gallium/drivers/v3d/v3d_context.c
index 70b773c0e8d..6ae58d7518d 100644
--- a/src/gallium/drivers/v3d/v3d_context.c
+++ b/src/gallium/drivers/v3d/v3d_context.c
@@ -266,6 +266,7 @@ v3d_get_tile_buffer_size(bool is_msaa,
         }
 
         v3d_choose_tile_size(max_cbuf_idx + 1, *max_bpp, is_msaa,
+                             false /* double-buffer */,
                              tile_width, tile_height);
 }
 



More information about the mesa-commit mailing list