[Mesa-dev] [PATCH 1/3] r600g: fix CP DMA hazard with index buffer fetches

Marek Olšák maraeo at gmail.com
Fri May 27 18:18:25 UTC 2016


From: Marek Olšák <marek.olsak at amd.com>

R600-R700 used a bad workaround. Now only R600 has to use it.
---
 src/gallium/drivers/r600/evergreen_hw_context.c | 13 +++++++++++--
 src/gallium/drivers/r600/evergreend.h           |  1 +
 src/gallium/drivers/r600/r600_blit.c            |  6 ------
 src/gallium/drivers/r600/r600_hw_context.c      | 25 ++++++++++++++++++++-----
 src/gallium/drivers/r600/r600d.h                |  1 +
 5 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index f456696..14877ae 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -117,7 +117,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
 		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
 		unsigned reloc;
 
-		r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
+		r600_need_cs_space(rctx, 2 + 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
 
 		/* Flush the caches for the first copy only. */
 		if (rctx->b.flags) {
@@ -148,9 +148,18 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
 		offset += byte_count;
 	}
 
+	/* CP DMA is executed in ME, but index buffers are read by PFP.
+	 * This ensures that ME (CP DMA) is idle before PFP starts fetching
+	 * indices. If we wanted to execute CP DMA in PFP, this packet
+	 * should precede it.
+	 */
+	if (coher == R600_COHERENCY_SHADER) {
+		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+		radeon_emit(cs, 0);
+	}
+
 	/* Invalidate the read caches. */
 	rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
 			 R600_CONTEXT_INV_VERTEX_CACHE |
 			 R600_CONTEXT_INV_TEX_CACHE;
 }
-
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index c1c6169..457152e 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -88,6 +88,7 @@
 #define		WAIT_REG_MEM_EQUAL		3
 #define PKT3_MEM_WRITE                         0x3D
 #define PKT3_INDIRECT_BUFFER                   0x32
+#define PKT3_PFP_SYNC_ME		       0x42 /* r7xx+ */
 #define PKT3_SURFACE_SYNC                      0x43
 #define PKT3_ME_INITIALIZE                     0x44
 #define PKT3_COND_WRITE                        0x45
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 9230b40..9f309d8 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -519,12 +519,6 @@ static void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst
 	} else {
 		util_resource_copy_region(ctx, dst, 0, dstx, 0, 0, src, 0, src_box);
 	}
-
-	/* The index buffer (VGT) doesn't seem to see the result of the copying.
-	 * Can we somehow flush the index buffer cache? Starting a new IB seems
-	 * to do the trick. */
-	if (rctx->b.chip_class <= R700)
-		rctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 }
 
 /**
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 1f7bed8..5d6200d 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -403,7 +403,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
 		unsigned src_reloc, dst_reloc;
 
-		r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
+		r600_need_cs_space(rctx, 2 + 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
 
 		/* Flush the caches for the first copy only. */
 		if (rctx->b.flags) {
@@ -438,10 +438,25 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 		dst_offset += byte_count;
 	}
 
-	/* Invalidate the read caches. */
-	rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
-			 R600_CONTEXT_INV_VERTEX_CACHE |
-			 R600_CONTEXT_INV_TEX_CACHE;
+	/* CP DMA is executed in ME, but index buffers are read by PFP.
+	 * This ensures that ME (CP DMA) is idle before PFP starts fetching
+	 * indices. If we wanted to execute CP DMA in PFP, this packet
+	 * should precede it.
+	 *
+	 * R6xx is out of luck, as it doesn't have the packet.
+	 * Starting a new IB has the same effect.
+	 */
+	if (rctx->b.chip_class >= R700) {
+		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+		radeon_emit(cs, 0);
+
+		/* Invalidate the read caches. */
+		rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
+				 R600_CONTEXT_INV_VERTEX_CACHE |
+				 R600_CONTEXT_INV_TEX_CACHE;
+	} else {
+		rctx->b.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+	}
 }
 
 void r600_dma_copy_buffer(struct r600_context *rctx,
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 24f599e..0b6dabd 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -98,6 +98,7 @@
 #define		WAIT_REG_MEM_EQUAL		3
 #define PKT3_MEM_WRITE                         0x3D
 #define PKT3_INDIRECT_BUFFER                   0x32
+#define PKT3_PFP_SYNC_ME		       0x42 /* r7xx+ */
 #define PKT3_SURFACE_SYNC                      0x43
 #define PKT3_ME_INITIALIZE                     0x44
 #define PKT3_COND_WRITE                        0x45
-- 
2.7.4



More information about the mesa-dev mailing list