Mesa (staging/20.2): radv: Fix a hang on CB change by adding flushes.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Dec 10 03:42:21 UTC 2020


Module: Mesa
Branch: staging/20.2
Commit: 5741e39344877c575f5c8ca69cbd4b5f885afd6d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5741e39344877c575f5c8ca69cbd4b5f885afd6d

Author: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Date:   Mon Oct 19 02:53:39 2020 +0200

radv: Fix a hang on CB change by adding flushes.

This workaround fixes a hang while loading a renderdoc trace for me.

Since the workload does 1 mip per cmdbuffer it is quite hard to confirm
what exactly the conditions for the hang are but this is the most
restrictive set I found and it corresponds to a workaround in AMDVLK as
well.

CC: mesa-stable
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7210>
(cherry picked from commit 4cce4d22a72bf84459ee95223cc7d1c6542617fb)

---

 .pick_status.json                |  2 +-
 src/amd/vulkan/radv_cmd_buffer.c | 74 ++++++++++++++++++++++++++++++++++++++++
 src/amd/vulkan/radv_private.h    |  2 ++
 3 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/.pick_status.json b/.pick_status.json
index c32a538a235..51b06fe1a9a 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -5269,7 +5269,7 @@
         "description": "radv: Fix a hang on CB change by adding flushes.",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 651b5a503d3..3b60b60bdf3 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2222,6 +2222,71 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 	}
 }
 
+/* GFX9+ metadata cache flushing workaround. metadata cache coherency is
+ * broken if the CB caches data of multiple mips of the same image at the
+ * same time.
+ *
+ * Insert some flushes to avoid this.
+ */
+static void
+radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer)
+{
+	struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
+	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+	bool color_mip_changed = false;
+
+	/* Entire workaround is not applicable before GFX9 */
+	if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
+		return;
+
+	if (!framebuffer)
+		return;
+
+	for (int i = 0; i < subpass->color_count; ++i) {
+		int idx = subpass->color_attachments[i].attachment;
+		struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
+
+		if ((radv_image_has_CB_metadata(iview->image) ||
+		     radv_image_has_dcc(iview->image)) &&
+		    cmd_buffer->state.cb_mip[i] != iview->base_mip)
+			color_mip_changed = true;
+
+		cmd_buffer->state.cb_mip[i] = iview->base_mip;
+	}
+
+	if (color_mip_changed) {
+		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+		                                RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+	}
+}
+
+/* This function does the flushes for mip changes if the levels are not zero for
+ * all render targets. This way we can assume at the start of the next cmd_buffer
+ * that rendering to mip 0 doesn't need any flushes. As that is the most common
+ * case that saves some flushes. */
+static void
+radv_emit_mip_change_flush_default(struct radv_cmd_buffer *cmd_buffer)
+{
+	/* Entire workaround is not applicable before GFX9 */
+	if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
+		return;
+
+	bool need_color_mip_flush = false;
+	for (unsigned i = 0; i < 8; ++i) {
+		if (cmd_buffer->state.cb_mip[i]) {
+			need_color_mip_flush = true;
+			break;
+		}
+	}
+
+	if (need_color_mip_flush) {
+		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+		                                RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+	}
+
+	memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip));
+}
+
 static void
 radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
 {
@@ -4061,6 +4126,8 @@ VkResult radv_EndCommandBuffer(
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 
+	radv_emit_mip_change_flush_default(cmd_buffer);
+
 	if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
 		if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
 			cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
@@ -4640,6 +4707,8 @@ void radv_CmdExecuteCommands(
 
 	assert(commandBufferCount > 0);
 
+	radv_emit_mip_change_flush_default(primary);
+
 	/* Emit pending flushes on primary prior to executing secondary */
 	si_emit_cache_flush(primary);
 
@@ -4672,6 +4741,7 @@ void radv_CmdExecuteCommands(
 			 * has been recorded without a framebuffer, otherwise
 			 * fast color/depth clears can't work.
 			 */
+			radv_emit_fb_mip_change_flush(primary);
 			radv_emit_framebuffer_state(primary);
 		}
 
@@ -5279,6 +5349,10 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
 
 	radv_describe_draw(cmd_buffer);
 
+	/* Need to apply this workaround early as it can set flush flags. */
+	if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
+		radv_emit_fb_mip_change_flush(cmd_buffer);
+
 	/* Use optimal packet order based on whether we need to sync the
 	 * pipeline.
 	 */
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 69315e5215d..3067409f0f3 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1332,6 +1332,8 @@ struct radv_cmd_state {
 	uint32_t current_event_type;
 	uint32_t num_events;
 	uint32_t num_layout_transitions;
+
+	uint8_t cb_mip[MAX_RTS];
 };
 
 struct radv_cmd_pool {



More information about the mesa-commit mailing list