[Mesa-dev] [PATCH 2/7] radeonsi: add flag L2_STREAM for minimal cache usage
Marek Olšák
maraeo at gmail.com
Tue Aug 21 05:50:31 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
---
src/amd/common/sid.h | 2 ++
src/gallium/drivers/radeonsi/si_cp_dma.c | 16 ++++++++++------
src/gallium/drivers/radeonsi/si_pipe.h | 1 +
3 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 0671f7d3998..d9c4a1a7414 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -287,25 +287,27 @@
* 5. DST_ADDR_HI [31:0]
* 6. COMMAND [29:22] | BYTE_COUNT [20:0]
*/
#define R_500_DMA_DATA_WORD0 0x500 /* 0x[packet number][word index] */
#define S_500_CP_SYNC(x) (((unsigned)(x) & 0x1) << 31)
#define S_500_SRC_SEL(x) (((unsigned)(x) & 0x3) << 29)
#define V_500_SRC_ADDR 0
#define V_500_GDS 1 /* program SAS to 1 as well */
#define V_500_DATA 2
#define V_500_SRC_ADDR_TC_L2 3 /* new for CIK */
+#define S_500_DST_CACHE_POLICY(x) (((unsigned)(x) & 0x3) << 25) /* CIK+ */
#define S_500_DST_SEL(x) (((unsigned)(x) & 0x3) << 20)
#define V_500_DST_ADDR 0
#define V_500_GDS 1 /* program DAS to 1 as well */
#define V_500_NOWHERE 2 /* new for GFX9 */
#define V_500_DST_ADDR_TC_L2 3 /* new for CIK */
+#define S_500_SRC_CACHE_POLICY(x) (((unsigned)(x) & 0x3) << 13) /* CIK+ */
#define S_500_ENGINE(x) ((x) & 0x1)
#define V_500_ME 0
#define V_500_PFP 1
#define R_501_SRC_ADDR_LO 0x501
#define R_502_SRC_ADDR_HI 0x502
#define R_503_DST_ADDR_LO 0x503
#define R_504_DST_ADDR_HI 0x504
#define R_000E4C_SRBM_STATUS2 0x000E4C
#define S_000E4C_SDMA_RQ_PENDING(x) (((unsigned)(x) & 0x1) << 0)
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index bae592a4f7d..61be22f28b5 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -81,29 +81,33 @@ static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va,
command |= S_414_DISABLE_WR_CONFIRM_GFX9(1);
else
command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
}
if (flags & CP_DMA_RAW_WAIT)
command |= S_414_RAW_WAIT(1);
/* Src and dst flags. */
if (sctx->chip_class >= GFX9 && !(flags & CP_DMA_CLEAR) &&
- src_va == dst_va)
+ src_va == dst_va) {
header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */
- else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS)
- header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
+ } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) {
+ header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2) |
+ S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM);
+ }
- if (flags & CP_DMA_CLEAR)
+ if (flags & CP_DMA_CLEAR) {
header |= S_411_SRC_SEL(V_411_DATA);
- else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS)
- header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
+ } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) {
+ header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) |
+ S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM);
+ }
if (sctx->chip_class >= CIK) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
radeon_emit(cs, header);
radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */
radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [31:0] */
radeon_emit(cs, command);
} else {
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 5fa8c33f6cb..95489f09612 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1104,20 +1104,21 @@ void si_init_clear_functions(struct si_context *sctx);
#define SI_CPDMA_SKIP_BO_LIST_UPDATE (1 << 4) /* don't update the BO list */
#define SI_CPDMA_SKIP_ALL (SI_CPDMA_SKIP_CHECK_CS_SPACE | \
SI_CPDMA_SKIP_SYNC_AFTER | \
SI_CPDMA_SKIP_SYNC_BEFORE | \
SI_CPDMA_SKIP_GFX_SYNC | \
SI_CPDMA_SKIP_BO_LIST_UPDATE)
enum si_cache_policy {
L2_BYPASS,
L2_LRU, /* same as SLC=0 */
+ L2_STREAM, /* same as SLC=1 */
};
enum si_coherency {
SI_COHERENCY_NONE, /* no cache flushes needed */
SI_COHERENCY_SHADER,
SI_COHERENCY_CB_META,
};
void si_cp_dma_wait_for_idle(struct si_context *sctx);
void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
--
2.17.1
More information about the mesa-dev
mailing list