Mesa (main): tu: Implement non-aligned multisample GMEM STORE_OP_STORE
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Aug 2 11:37:31 UTC 2021
Module: Mesa
Branch: main
Commit: b157a5d0d68ee8a1b4cb862a56b97bd881841413
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b157a5d0d68ee8a1b4cb862a56b97bd881841413
Author: Connor Abbott <cwabbott0 at gmail.com>
Date: Wed Jul 28 14:42:08 2021 +0200
tu: Implement non-aligned multisample GMEM STORE_OP_STORE
We have to a bit careful here when disabling draw states. This also
necessitates moving the actual recording of the stores to the end so
that we set the dirty flag correctly.
Closes: #4462
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12102>
---
src/freedreno/ci/deqp-freedreno-a630-fails.txt | 3 -
src/freedreno/vulkan/tu_clear_blit.c | 113 +++++++++++++++++++++----
src/freedreno/vulkan/tu_cmd_buffer.c | 13 ++-
src/freedreno/vulkan/tu_private.h | 2 +
4 files changed, 104 insertions(+), 27 deletions(-)
diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt
index 463a09a0726..0de08181475 100644
--- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt
+++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt
@@ -17,9 +17,6 @@ dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail,Fail
dEQP-VK.compute.basic.max_local_size_x,Crash
dEQP-VK.compute.basic.max_local_size_y,Crash
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4462
-dEQP-VK.pipeline.framebuffer_attachment.diff_attachments_2d_19x27_32x32_ms,Fail
-
# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3019
# should be fixed by https://gerrit.khronos.org/c/vk-gl-cts/+/7745
dEQP-VK.renderpass.dedicated_allocation.attachment_allocation.input_output.7,Fail
diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index fd8e75b13f2..5ecf11ce530 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -894,6 +894,36 @@ r3d_src_buffer(struct tu_cmd_buffer *cmd,
r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
}
+static void
+r3d_src_gmem(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ const struct tu_image_view *iview,
+ VkFormat format,
+ uint32_t gmem_offset,
+ uint32_t cpp)
+{
+ uint32_t desc[A6XX_TEX_CONST_DWORDS];
+ memcpy(desc, iview->descriptor, sizeof(desc));
+
+ /* patch the format so that depth/stencil get the right format */
+ desc[0] &= ~A6XX_TEX_CONST_0_FMT__MASK;
+ desc[0] |= A6XX_TEX_CONST_0_FMT(tu6_format_texture(format, TILE6_2).fmt);
+
+ /* patched for gmem */
+ desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
+ desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
+ desc[2] =
+ A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
+ A6XX_TEX_CONST_2_PITCH(cmd->state.framebuffer->tile0.width * cpp);
+ desc[3] = 0;
+ desc[4] = cmd->device->physical_device->gmem_base + gmem_offset;
+ desc[5] = A6XX_TEX_CONST_5_DEPTH(1);
+ for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
+ desc[i] = 0;
+
+ r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
+}
+
static void
r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
{
@@ -2733,6 +2763,42 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
}
+static void
+store_3d_blit(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ const struct tu_image_view *iview,
+ uint32_t dst_samples,
+ bool separate_stencil,
+ VkFormat format,
+ const VkRect2D *render_area,
+ uint32_t gmem_offset,
+ uint32_t cpp)
+{
+ r3d_setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false,
+ iview->ubwc_enabled, dst_samples);
+
+ r3d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
+
+ if (separate_stencil)
+ r3d_dst_stencil(cs, iview, 0);
+ else
+ r3d_dst(cs, iview, 0);
+
+ r3d_src_gmem(cmd, cs, iview, format, gmem_offset, cpp);
+
+ /* sync GMEM writes with CACHE. */
+ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
+
+ r3d_run(cmd, cs);
+
+ /* Draws write to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
+ * sysmem, and we generally assume that GMEM renderpasses leave their
+ * results in sysmem, so we need to flush manually here. The 3d blit path
+ * writes to depth images as a color RT, so there's no need to flush depth.
+ */
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
+}
+
void
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
@@ -2782,26 +2848,39 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
return;
}
- if (dst->samples > 1) {
- /* I guess we need to use shader path in this case?
- * need a testcase which fails because of this
- */
- tu_finishme("unaligned store of msaa attachment\n");
- return;
- }
-
- r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
-
VkFormat format = src->format;
if (format == VK_FORMAT_D32_SFLOAT_S8_UINT)
format = VK_FORMAT_D32_SFLOAT;
- if (dst->store) {
- store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format,
- src->gmem_offset, src->cpp);
- }
- if (dst->store_stencil) {
- store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT,
- src->gmem_offset_stencil, src->samples);
+ if (dst->samples > 1) {
+ /* If we hit this path, we have to disable draw states after every tile
+ * instead of once at the end of the renderpass, so that they aren't
+ * executed when calling CP_DRAW.
+ *
+ * TODO: store a flag somewhere so we don't do this more than once and
+ * don't do it after the renderpass when this happens.
+ */
+ if (dst->store || dst->store_stencil)
+ tu_disable_draw_states(cmd, cs);
+
+ if (dst->store) {
+ store_3d_blit(cmd, cs, iview, dst->samples, resolve_d32s8_s8, format,
+ render_area, src->gmem_offset, src->cpp);
+ }
+ if (dst->store_stencil) {
+ store_3d_blit(cmd, cs, iview, dst->samples, true, VK_FORMAT_S8_UINT,
+ render_area, src->gmem_offset, src->samples);
+ }
+ } else {
+ r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
+
+ if (dst->store) {
+ store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format,
+ src->gmem_offset, src->cpp);
+ }
+ if (dst->store_stencil) {
+ store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT,
+ src->gmem_offset_stencil, src->samples);
+ }
}
}
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index 45eeea2fbdf..47aa21789fa 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -684,7 +684,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
}
}
-static void
+void
tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
@@ -2913,8 +2913,6 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
cmd->state.framebuffer = fb;
cmd->state.render_area = pRenderPassBegin->renderArea;
- tu6_emit_tile_store(cmd, &cmd->tile_store_cs);
-
/* Note: because this is external, any flushes will happen before draw_cs
* gets called. However deferred flushes could have to happen later as part
* of the subpass.
@@ -4349,6 +4347,8 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
+ tu6_emit_tile_store(cmd_buffer, &cmd_buffer->tile_store_cs);
+
tu_cs_end(&cmd_buffer->draw_cs);
tu_cs_end(&cmd_buffer->tile_store_cs);
tu_cs_end(&cmd_buffer->draw_epilogue_cs);
@@ -4358,10 +4358,9 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
else
tu_cmd_render_tiles(cmd_buffer);
- /* outside of renderpasses we assume all draw states are disabled
- * we can do this in the main cs because no resolve/store commands
- * should use a draw command (TODO: this will change if unaligned
- * GMEM stores are supported)
+ /* Outside of renderpasses we assume all draw states are disabled. We do
+ * this outside the draw CS for the normal case where 3d gmem stores aren't
+ * used.
*/
tu_disable_draw_states(cmd_buffer, &cmd_buffer->cs);
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index 153e1880e0a..5400072c859 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -1239,6 +1239,8 @@ void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_
void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
+void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
+
struct tu_pvtmem_config {
uint64_t iova;
uint32_t per_fiber_size;
More information about the mesa-commit
mailing list