[Mesa-dev] [PATCH 1/3] r600g: fix CP DMA hazard with index buffer fetches
Marek Olšák
maraeo at gmail.com
Fri May 27 19:56:44 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
v2: don't use PFP_SYNC_ME on R700
---
src/gallium/drivers/r600/evergreen_hw_context.c | 13 +++++++++++--
src/gallium/drivers/r600/evergreend.h | 1 +
src/gallium/drivers/r600/r600_blit.c | 6 ------
src/gallium/drivers/r600/r600_hw_context.c | 25 ++++++++++++++++++++-----
src/gallium/drivers/r600/r600d.h | 1 +
src/gallium/drivers/radeonsi/sid.h | 2 +-
6 files changed, 34 insertions(+), 14 deletions(-)
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index f456696..14877ae 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -117,7 +117,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
unsigned reloc;
- r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
+ r600_need_cs_space(rctx, 2 + 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
/* Flush the caches for the first copy only. */
if (rctx->b.flags) {
@@ -148,9 +148,18 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
offset += byte_count;
}
+ /* CP DMA is executed in ME, but index buffers are read by PFP.
+ * This ensures that ME (CP DMA) is idle before PFP starts fetching
+ * indices. If we wanted to execute CP DMA in PFP, this packet
+ * should precede it.
+ */
+ if (coher == R600_COHERENCY_SHADER) {
+ radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+ radeon_emit(cs, 0);
+ }
+
/* Invalidate the read caches. */
rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
R600_CONTEXT_INV_VERTEX_CACHE |
R600_CONTEXT_INV_TEX_CACHE;
}
-
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index c1c6169..a81b6c5 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -88,6 +88,7 @@
#define WAIT_REG_MEM_EQUAL 3
#define PKT3_MEM_WRITE 0x3D
#define PKT3_INDIRECT_BUFFER 0x32
+#define PKT3_PFP_SYNC_ME 0x42
#define PKT3_SURFACE_SYNC 0x43
#define PKT3_ME_INITIALIZE 0x44
#define PKT3_COND_WRITE 0x45
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 9230b40..9f309d8 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -519,12 +519,6 @@ static void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst
} else {
util_resource_copy_region(ctx, dst, 0, dstx, 0, 0, src, 0, src_box);
}
-
- /* The index buffer (VGT) doesn't seem to see the result of the copying.
- * Can we somehow flush the index buffer cache? Starting a new IB seems
- * to do the trick. */
- if (rctx->b.chip_class <= R700)
- rctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}
/**
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 1f7bed8..5f63dde 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -403,7 +403,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
unsigned src_reloc, dst_reloc;
- r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
+ r600_need_cs_space(rctx, 2 + 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
/* Flush the caches for the first copy only. */
if (rctx->b.flags) {
@@ -438,10 +438,25 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
dst_offset += byte_count;
}
- /* Invalidate the read caches. */
- rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
- R600_CONTEXT_INV_VERTEX_CACHE |
- R600_CONTEXT_INV_TEX_CACHE;
+ /* CP DMA is executed in ME, but index buffers are read by PFP.
+ * This ensures that ME (CP DMA) is idle before PFP starts fetching
+ * indices. If we wanted to execute CP DMA in PFP, this packet
+ * should precede it.
+ *
+ * R6xx-R7xx is out of luck, as it doesn't have the packet.
+ * Starting a new IB has the same effect.
+ */
+ if (rctx->b.chip_class >= EVERGREEN) {
+ radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+ radeon_emit(cs, 0);
+
+ /* Invalidate the read caches. */
+ rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
+ R600_CONTEXT_INV_VERTEX_CACHE |
+ R600_CONTEXT_INV_TEX_CACHE;
+ } else {
+ rctx->b.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+ }
}
void r600_dma_copy_buffer(struct r600_context *rctx,
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 24f599e..5f79222 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -98,6 +98,7 @@
#define WAIT_REG_MEM_EQUAL 3
#define PKT3_MEM_WRITE 0x3D
#define PKT3_INDIRECT_BUFFER 0x32
+#define PKT3_PFP_SYNC_ME 0x42 /* EG+ */
#define PKT3_SURFACE_SYNC 0x43
#define PKT3_ME_INITIALIZE 0x44
#define PKT3_COND_WRITE 0x45
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 25f8cf5..ddbfe00 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -135,7 +135,7 @@
#define COPY_DATA_DST_SEL(x) (((unsigned)(x) & 0xf) << 8)
#define COPY_DATA_COUNT_SEL (1 << 16)
#define COPY_DATA_WR_CONFIRM (1 << 20)
-#define PKT3_PFP_SYNC_ME 0x42 /* r7xx+ */
+#define PKT3_PFP_SYNC_ME 0x42
#define PKT3_SURFACE_SYNC 0x43 /* deprecated on CIK, use ACQUIRE_MEM */
#define PKT3_ME_INITIALIZE 0x44 /* not on CIK */
#define PKT3_COND_WRITE 0x45
--
2.7.4
More information about the mesa-dev
mailing list