[Mesa-dev] [PATCH 04/12] radeonsi: always use SDMA for big buffer clears and first buffer uses

Marek Olšák maraeo at gmail.com
Mon Jan 2 22:54:09 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_cp_dma.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 13b901b..fb6ed26 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -167,20 +167,21 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
 	if (!(user_flags & SI_CPDMA_SKIP_SYNC_AFTER) &&
 	    byte_count == remaining_size)
 		*packet_flags |= CP_DMA_SYNC;
 }
 
 static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 			    uint64_t offset, uint64_t size, unsigned value,
 			    enum r600_coherency coher)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
+	struct radeon_winsys *ws = sctx->b.ws;
 	unsigned tc_l2_flag = get_tc_l2_flag(sctx, coher);
 	unsigned flush_flags = get_flush_flags(sctx, coher);
 	bool is_first = true;
 
 	if (!size)
 		return;
 
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
@@ -193,20 +194,39 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 						      sctx->b.gfx.cs,
 						      PIPE_TRANSFER_WRITE);
 		map += offset;
 		for (uint64_t i = 0; i < size; i++) {
 			unsigned byte_within_dword = (offset + i) % 4;
 			*map++ = (value >> (byte_within_dword * 8)) & 0xff;
 		}
 		return;
 	}
 
+	/* dma_clear_buffer can use clear_buffer on failure. Make sure that
+	 * doesn't happen. We don't want an infinite recursion: */
+	if (sctx->b.chip_class >= CIK && sctx->b.dma.cs &&
+	    /* CP DMA is very slow. Always use SDMA for big clears. This
+	     * alone improves DeusEx:MD performance by 70%. */
+	    (size > 128 * 1024 ||
+	     /* Buffers not used by the GFX IB yet will be cleared by SDMA.
+	      * This happens to move most buffer clears to SDMA, including
+	      * DCC and CMASK clears, because pipe->clear clears them before
+	      * si_emit_framebuffer_state (in a draw call) adds them.
+	      * For example, DeusEx:MD has 21 buffer clears per frame and all
+	      * of them are moved to SDMA thanks to this. */
+	     !ws->cs_is_buffer_referenced(sctx->b.gfx.cs,
+					  r600_resource(dst)->buf,
+				          RADEON_USAGE_READWRITE))) {
+		sctx->b.dma_clear_buffer(ctx, dst, offset, size, value);
+		return;
+	}
+
 	uint64_t va = r600_resource(dst)->gpu_address + offset;
 
 	/* Flush the caches. */
 	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 	                 SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
 
 	while (size) {
 		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
 		unsigned dma_flags = tc_l2_flag  | CP_DMA_CLEAR;
 
-- 
2.7.4



More information about the mesa-dev mailing list