[Mesa-dev] [PATCH 2/7] radeonsi: add flag L2_STREAM for minimal cache usage

Marek Olšák maraeo at gmail.com
Tue Aug 21 05:50:31 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

---
 src/amd/common/sid.h                     |  2 ++
 src/gallium/drivers/radeonsi/si_cp_dma.c | 16 ++++++++++------
 src/gallium/drivers/radeonsi/si_pipe.h   |  1 +
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 0671f7d3998..d9c4a1a7414 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -287,25 +287,27 @@
  * 5. DST_ADDR_HI [31:0]
  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
  */
 #define   R_500_DMA_DATA_WORD0		0x500 /* 0x[packet number][word index] */
 #define     S_500_CP_SYNC(x)		(((unsigned)(x) & 0x1) << 31)
 #define     S_500_SRC_SEL(x)		(((unsigned)(x) & 0x3) << 29)
 #define       V_500_SRC_ADDR		0
 #define       V_500_GDS			1 /* program SAS to 1 as well */
 #define       V_500_DATA		2
 #define       V_500_SRC_ADDR_TC_L2	3 /* new for CIK */
+#define     S_500_DST_CACHE_POLICY(x)	(((unsigned)(x) & 0x3) << 25) /* CIK+ */
 #define     S_500_DST_SEL(x)		(((unsigned)(x) & 0x3) << 20)
 #define       V_500_DST_ADDR		0
 #define       V_500_GDS			1 /* program DAS to 1 as well */
 #define       V_500_NOWHERE		2 /* new for GFX9 */
 #define       V_500_DST_ADDR_TC_L2	3 /* new for CIK */
+#define     S_500_SRC_CACHE_POLICY(x)	(((unsigned)(x) & 0x3) << 13) /* CIK+ */
 #define     S_500_ENGINE(x)		((x) & 0x1)
 #define       V_500_ME			0
 #define       V_500_PFP			1
 #define   R_501_SRC_ADDR_LO		0x501
 #define   R_502_SRC_ADDR_HI		0x502
 #define   R_503_DST_ADDR_LO		0x503
 #define   R_504_DST_ADDR_HI		0x504
 
 #define R_000E4C_SRBM_STATUS2                                           0x000E4C
 #define   S_000E4C_SDMA_RQ_PENDING(x)                                 (((unsigned)(x) & 0x1) << 0)
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index bae592a4f7d..61be22f28b5 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -81,29 +81,33 @@ static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va,
 			command |= S_414_DISABLE_WR_CONFIRM_GFX9(1);
 		else
 			command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
 	}
 
 	if (flags & CP_DMA_RAW_WAIT)
 		command |= S_414_RAW_WAIT(1);
 
 	/* Src and dst flags. */
 	if (sctx->chip_class >= GFX9 && !(flags & CP_DMA_CLEAR) &&
-	    src_va == dst_va)
+	    src_va == dst_va) {
 		header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */
-	else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS)
-		header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
+	} else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) {
+		header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2) |
+			  S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM);
+	}
 
-	if (flags & CP_DMA_CLEAR)
+	if (flags & CP_DMA_CLEAR) {
 		header |= S_411_SRC_SEL(V_411_DATA);
-	else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS)
-		header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
+	} else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) {
+		header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) |
+			  S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM);
+	}
 
 	if (sctx->chip_class >= CIK) {
 		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
 		radeon_emit(cs, header);
 		radeon_emit(cs, src_va);	/* SRC_ADDR_LO [31:0] */
 		radeon_emit(cs, src_va >> 32);	/* SRC_ADDR_HI [31:0] */
 		radeon_emit(cs, dst_va);	/* DST_ADDR_LO [31:0] */
 		radeon_emit(cs, dst_va >> 32);	/* DST_ADDR_HI [31:0] */
 		radeon_emit(cs, command);
 	} else {
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 5fa8c33f6cb..95489f09612 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1104,20 +1104,21 @@ void si_init_clear_functions(struct si_context *sctx);
 #define SI_CPDMA_SKIP_BO_LIST_UPDATE	(1 << 4) /* don't update the BO list */
 #define SI_CPDMA_SKIP_ALL (SI_CPDMA_SKIP_CHECK_CS_SPACE | \
 			   SI_CPDMA_SKIP_SYNC_AFTER | \
 			   SI_CPDMA_SKIP_SYNC_BEFORE | \
 			   SI_CPDMA_SKIP_GFX_SYNC | \
 			   SI_CPDMA_SKIP_BO_LIST_UPDATE)
 
 enum si_cache_policy {
 	L2_BYPASS,
 	L2_LRU,    /* same as SLC=0 */
+	L2_STREAM, /* same as SLC=1 */
 };
 
 enum si_coherency {
 	SI_COHERENCY_NONE, /* no cache flushes needed */
 	SI_COHERENCY_SHADER,
 	SI_COHERENCY_CB_META,
 };
 
 void si_cp_dma_wait_for_idle(struct si_context *sctx);
 void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
-- 
2.17.1



More information about the mesa-dev mailing list