[Mesa-stable] [PATCH] radv: emit a dummy ZPASS_DONE to prevent GPU hangs on GFX9

Samuel Pitoiset samuel.pitoiset at gmail.com
Wed Jul 11 09:55:55 UTC 2018


A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
counters) must immediately precede every timestamp event to
prevent a GPU hang on GFX9.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Cc: 18.1 <mesa-stable at lists.freedesktop.org>
---
 src/amd/vulkan/radv_cmd_buffer.c | 15 +++++++++++++--
 src/amd/vulkan/radv_device.c     |  4 ++--
 src/amd/vulkan/radv_private.h    |  7 +++++--
 src/amd/vulkan/radv_query.c      |  9 ++++++---
 src/amd/vulkan/si_cmd_buffer.c   | 26 +++++++++++++++++++++-----
 5 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 9da42fe03e..325e1993f8 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -319,11 +319,21 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
 	}
 
 	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+		unsigned num_db = cmd_buffer->device->physical_device->rad_info.num_render_backends;
+		unsigned eop_bug_offset;
 		void *fence_ptr;
+
 		radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
 					     &cmd_buffer->gfx9_fence_offset,
 					     &fence_ptr);
 		cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;
+
+		/* Allocate a buffer for the EOP bug on GFX9. */
+		radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
+					     &eop_bug_offset, &fence_ptr);
+		cmd_buffer->gfx9_eop_bug_va =
+			radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+		cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
 	}
 
 	cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
@@ -473,7 +483,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer,
 				       cmd_buffer->device->physical_device->rad_info.chip_class,
 				       ptr, va,
 				       radv_cmd_buffer_uses_mec(cmd_buffer),
-				       flags);
+				       flags, cmd_buffer->gfx9_eop_bug_va);
 	}
 
 	if (unlikely(cmd_buffer->device->trace_bo))
@@ -4318,7 +4328,8 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
 					   cmd_buffer->device->physical_device->rad_info.chip_class,
 					   radv_cmd_buffer_uses_mec(cmd_buffer),
 					   V_028A90_BOTTOM_OF_PIPE_TS, 0,
-					   EOP_DATA_SEL_VALUE_32BIT, va, 2, value);
+					   EOP_DATA_SEL_VALUE_32BIT, va, 2, value,
+					   cmd_buffer->gfx9_eop_bug_va);
 	}
 
 	assert(cmd_buffer->cs->cdw <= cdw_max);
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 73c48cef1f..1c0a50c82f 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2240,7 +2240,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
 			                       RADV_CMD_FLAG_INV_SMEM_L1 |
 			                       RADV_CMD_FLAG_INV_VMEM_L1 |
 			                       RADV_CMD_FLAG_INV_GLOBAL_L2 |
-					       RADV_CMD_FLAG_START_PIPELINE_STATS);
+					       RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
 		} else if (i == 1) {
 			si_cs_emit_cache_flush(cs,
 			                       queue->device->physical_device->rad_info.chip_class,
@@ -2251,7 +2251,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
 			                       RADV_CMD_FLAG_INV_SMEM_L1 |
 			                       RADV_CMD_FLAG_INV_VMEM_L1 |
 			                       RADV_CMD_FLAG_INV_GLOBAL_L2 |
-					       RADV_CMD_FLAG_START_PIPELINE_STATS);
+					       RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
 		}
 
 		if (!queue->device->ws->cs_finalize(cs))
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 4e4b3a6037..96218f4be2 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1041,6 +1041,7 @@ struct radv_cmd_buffer {
 	uint32_t gfx9_fence_offset;
 	struct radeon_winsys_bo *gfx9_fence_bo;
 	uint32_t gfx9_fence_idx;
+	uint64_t gfx9_eop_bug_va;
 
 	/**
 	 * Whether a query pool has been resetted and we have to flush caches.
@@ -1072,7 +1073,8 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
 				unsigned data_sel,
 				uint64_t va,
 				uint32_t old_fence,
-				uint32_t new_fence);
+				uint32_t new_fence,
+				uint64_t gfx9_eop_bug_va);
 
 void si_emit_wait_fence(struct radeon_cmdbuf *cs,
 			uint64_t va, uint32_t ref,
@@ -1081,7 +1083,8 @@ void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
 			    enum chip_class chip_class,
 			    uint32_t *fence_ptr, uint64_t va,
 			    bool is_mec,
-			    enum radv_cmd_flush_bits flush_bits);
+			    enum radv_cmd_flush_bits flush_bits,
+			    uint64_t gfx9_eop_bug_va);
 void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
 void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va);
 void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index 267d45890e..ba3783905f 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1180,7 +1180,8 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
 					   radv_cmd_buffer_uses_mec(cmd_buffer),
 					   V_028A90_BOTTOM_OF_PIPE_TS, 0,
 					   EOP_DATA_SEL_VALUE_32BIT,
-					   avail_va, 0, 1);
+					   avail_va, 0, 1,
+					   cmd_buffer->gfx9_eop_bug_va);
 		break;
 	default:
 		unreachable("ending unhandled query type");
@@ -1303,13 +1304,15 @@ void radv_CmdWriteTimestamp(
 						   mec,
 						   V_028A90_BOTTOM_OF_PIPE_TS, 0,
 						   EOP_DATA_SEL_TIMESTAMP,
-						   query_va, 0, 0);
+						   query_va, 0, 0,
+						   cmd_buffer->gfx9_eop_bug_va);
 			si_cs_emit_write_event_eop(cs,
 						   cmd_buffer->device->physical_device->rad_info.chip_class,
 						   mec,
 						   V_028A90_BOTTOM_OF_PIPE_TS, 0,
 						   EOP_DATA_SEL_VALUE_32BIT,
-						   avail_va, 0, 1);
+						   avail_va, 0, 1,
+						   cmd_buffer->gfx9_eop_bug_va);
 			break;
 		}
 		query_va += pool->stride;
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 454fd8c39c..49c5eb1372 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -679,7 +679,8 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
 				unsigned data_sel,
 				uint64_t va,
 				uint32_t old_fence,
-				uint32_t new_fence)
+				uint32_t new_fence,
+				uint64_t gfx9_eop_bug_va)
 {
 	unsigned op = EVENT_TYPE(event) |
 		EVENT_INDEX(5) |
@@ -693,6 +694,17 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
 		sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
 
 	if (chip_class >= GFX9 || is_gfx8_mec) {
+		/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
+		 * counters) must immediately precede every timestamp event to
+		 * prevent a GPU hang on GFX9.
+		 */
+		if (chip_class == GFX9) {
+			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+			radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+			radeon_emit(cs, gfx9_eop_bug_va);
+			radeon_emit(cs, gfx9_eop_bug_va >> 32);
+		}
+
 		radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, false));
 		radeon_emit(cs, op);
 		radeon_emit(cs, sel);
@@ -772,7 +784,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
 		       uint32_t *flush_cnt,
 		       uint64_t flush_va,
                        bool is_mec,
-                       enum radv_cmd_flush_bits flush_bits)
+                       enum radv_cmd_flush_bits flush_bits,
+		       uint64_t gfx9_eop_bug_va)
 {
 	unsigned cp_coher_cntl = 0;
 	uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
@@ -803,7 +816,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
 							   V_028A90_FLUSH_AND_INV_CB_DATA_TS,
 							   0,
 							   EOP_DATA_SEL_DISCARD,
-							   0, 0, 0);
+							   0, 0, 0,
+							   gfx9_eop_bug_va);
 			}
 		}
 		if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
@@ -873,7 +887,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
 
 		si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event, tc_flags,
 					   EOP_DATA_SEL_VALUE_32BIT,
-					   flush_va, old_fence, *flush_cnt);
+					   flush_va, old_fence, *flush_cnt,
+					   gfx9_eop_bug_va);
 		si_emit_wait_fence(cs, flush_va, *flush_cnt, 0xffffffff);
 	}
 
@@ -975,7 +990,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 	                       cmd_buffer->device->physical_device->rad_info.chip_class,
 			       ptr, va,
 	                       radv_cmd_buffer_uses_mec(cmd_buffer),
-	                       cmd_buffer->state.flush_bits);
+	                       cmd_buffer->state.flush_bits,
+			       cmd_buffer->gfx9_eop_bug_va);
 
 
 	if (unlikely(cmd_buffer->device->trace_bo))
-- 
2.18.0



More information about the mesa-stable mailing list