Mesa (main): tu: Implement non-aligned multisample GMEM STORE_OP_STORE

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Aug 2 11:37:31 UTC 2021


Module: Mesa
Branch: main
Commit: b157a5d0d68ee8a1b4cb862a56b97bd881841413
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b157a5d0d68ee8a1b4cb862a56b97bd881841413

Author: Connor Abbott <cwabbott0 at gmail.com>
Date:   Wed Jul 28 14:42:08 2021 +0200

tu: Implement non-aligned multisample GMEM STORE_OP_STORE

We have to a bit careful here when disabling draw states. This also
necessitates moving the actual recording of the stores to the end so
that we set the dirty flag correctly.

Closes: #4462
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12102>

---

 src/freedreno/ci/deqp-freedreno-a630-fails.txt |   3 -
 src/freedreno/vulkan/tu_clear_blit.c           | 113 +++++++++++++++++++++----
 src/freedreno/vulkan/tu_cmd_buffer.c           |  13 ++-
 src/freedreno/vulkan/tu_private.h              |   2 +
 4 files changed, 104 insertions(+), 27 deletions(-)

diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt
index 463a09a0726..0de08181475 100644
--- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt
+++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt
@@ -17,9 +17,6 @@ dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail,Fail
 dEQP-VK.compute.basic.max_local_size_x,Crash
 dEQP-VK.compute.basic.max_local_size_y,Crash
 
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4462
-dEQP-VK.pipeline.framebuffer_attachment.diff_attachments_2d_19x27_32x32_ms,Fail
-
 # https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3019
 # should be fixed by https://gerrit.khronos.org/c/vk-gl-cts/+/7745
 dEQP-VK.renderpass.dedicated_allocation.attachment_allocation.input_output.7,Fail
diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index fd8e75b13f2..5ecf11ce530 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -894,6 +894,36 @@ r3d_src_buffer(struct tu_cmd_buffer *cmd,
    r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
 }
 
+static void
+r3d_src_gmem(struct tu_cmd_buffer *cmd,
+             struct tu_cs *cs,
+             const struct tu_image_view *iview,
+             VkFormat format,
+             uint32_t gmem_offset,
+             uint32_t cpp)
+{
+   uint32_t desc[A6XX_TEX_CONST_DWORDS];
+   memcpy(desc, iview->descriptor, sizeof(desc));
+
+   /* patch the format so that depth/stencil get the right format */
+   desc[0] &= ~A6XX_TEX_CONST_0_FMT__MASK;
+   desc[0] |= A6XX_TEX_CONST_0_FMT(tu6_format_texture(format, TILE6_2).fmt);
+
+   /* patched for gmem */
+   desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
+   desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
+   desc[2] =
+      A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
+      A6XX_TEX_CONST_2_PITCH(cmd->state.framebuffer->tile0.width * cpp);
+   desc[3] = 0;
+   desc[4] = cmd->device->physical_device->gmem_base + gmem_offset;
+   desc[5] = A6XX_TEX_CONST_5_DEPTH(1);
+   for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
+      desc[i] = 0;
+
+   r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
+}
+
 static void
 r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
 {
@@ -2733,6 +2763,42 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
    tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
 }
 
+static void
+store_3d_blit(struct tu_cmd_buffer *cmd,
+              struct tu_cs *cs,
+              const struct tu_image_view *iview,
+              uint32_t dst_samples,
+              bool separate_stencil,
+              VkFormat format,
+              const VkRect2D *render_area,
+              uint32_t gmem_offset,
+              uint32_t cpp)
+{
+   r3d_setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false,
+             iview->ubwc_enabled, dst_samples);
+
+   r3d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
+
+   if (separate_stencil)
+      r3d_dst_stencil(cs, iview, 0);
+   else
+      r3d_dst(cs, iview, 0);
+
+   r3d_src_gmem(cmd, cs, iview, format, gmem_offset, cpp);
+
+   /* sync GMEM writes with CACHE. */
+   tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
+
+   r3d_run(cmd, cs);
+
+   /* Draws write to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
+    * sysmem, and we generally assume that GMEM renderpasses leave their
+    * results in sysmem, so we need to flush manually here. The 3d blit path
+    * writes to depth images as a color RT, so there's no need to flush depth.
+    */
+   tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
+}
+
 void
 tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
                          struct tu_cs *cs,
@@ -2782,26 +2848,39 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
       return;
    }
 
-   if (dst->samples > 1) {
-      /* I guess we need to use shader path in this case?
-       * need a testcase which fails because of this
-       */
-      tu_finishme("unaligned store of msaa attachment\n");
-      return;
-   }
-
-   r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
-
    VkFormat format = src->format;
    if (format == VK_FORMAT_D32_SFLOAT_S8_UINT)
       format = VK_FORMAT_D32_SFLOAT;
 
-   if (dst->store) {
-      store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format,
-                    src->gmem_offset, src->cpp);
-   }
-   if (dst->store_stencil) {
-      store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT,
-                    src->gmem_offset_stencil, src->samples);
+   if (dst->samples > 1) {
+      /* If we hit this path, we have to disable draw states after every tile
+       * instead of once at the end of the renderpass, so that they aren't
+       * executed when calling CP_DRAW.
+       *
+       * TODO: store a flag somewhere so we don't do this more than once and
+       * don't do it after the renderpass when this happens.
+       */
+      if (dst->store || dst->store_stencil)
+         tu_disable_draw_states(cmd, cs);
+
+      if (dst->store) {
+         store_3d_blit(cmd, cs, iview, dst->samples, resolve_d32s8_s8, format,
+                       render_area, src->gmem_offset, src->cpp);
+      }
+      if (dst->store_stencil) {
+         store_3d_blit(cmd, cs, iview, dst->samples, true, VK_FORMAT_S8_UINT,
+                       render_area, src->gmem_offset, src->samples);
+      }
+   } else {
+      r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
+
+      if (dst->store) {
+         store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format,
+                       src->gmem_offset, src->cpp);
+      }
+      if (dst->store_stencil) {
+         store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT,
+                       src->gmem_offset_stencil, src->samples);
+      }
    }
 }
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index 45eeea2fbdf..47aa21789fa 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -684,7 +684,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
    }
 }
 
-static void
+void
 tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 {
    tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
@@ -2913,8 +2913,6 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
    cmd->state.framebuffer = fb;
    cmd->state.render_area = pRenderPassBegin->renderArea;
 
-   tu6_emit_tile_store(cmd, &cmd->tile_store_cs);
-
    /* Note: because this is external, any flushes will happen before draw_cs
     * gets called. However deferred flushes could have to happen later as part
     * of the subpass.
@@ -4349,6 +4347,8 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
 {
    TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
 
+   tu6_emit_tile_store(cmd_buffer, &cmd_buffer->tile_store_cs);
+
    tu_cs_end(&cmd_buffer->draw_cs);
    tu_cs_end(&cmd_buffer->tile_store_cs);
    tu_cs_end(&cmd_buffer->draw_epilogue_cs);
@@ -4358,10 +4358,9 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
    else
       tu_cmd_render_tiles(cmd_buffer);
 
-   /* outside of renderpasses we assume all draw states are disabled
-    * we can do this in the main cs because no resolve/store commands
-    * should use a draw command (TODO: this will change if unaligned
-    * GMEM stores are supported)
+   /* Outside of renderpasses we assume all draw states are disabled. We do
+    * this outside the draw CS for the normal case where 3d gmem stores aren't
+    * used.
     */
    tu_disable_draw_states(cmd_buffer, &cmd_buffer->cs);
 
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index 153e1880e0a..5400072c859 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -1239,6 +1239,8 @@ void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_
 
 void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
 
+void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
+
 struct tu_pvtmem_config {
    uint64_t iova;
    uint32_t per_fiber_size;



More information about the mesa-commit mailing list