[Mesa-dev] [PATCH 066/140] amd: GFX9 packet changes

Marek Olšák maraeo at gmail.com
Mon Mar 20 22:43:16 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/amd/common/r600d_common.h            |  1 +
 src/amd/common/sid.h                     | 30 +++++++++++++++++++++---------
 src/gallium/drivers/radeonsi/si_cp_dma.c |  4 ++--
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/amd/common/r600d_common.h b/src/amd/common/r600d_common.h
index a35108f..3fdfb7c 100644
--- a/src/amd/common/r600d_common.h
+++ b/src/amd/common/r600d_common.h
@@ -56,20 +56,21 @@
 #define         WAIT_REG_MEM_MEM_SPACE(x)       (((unsigned)(x) & 0x3) << 4)
 #define PKT3_EVENT_WRITE                       0x46
 #define PKT3_EVENT_WRITE_EOP                   0x47
 #define         EOP_DATA_SEL(x)                         ((x) << 29)
 		/* 0 - discard
 		 * 1 - send low 32bit data
 		 * 2 - send 64bit data
 		 * 3 - send 64bit GPU counter value
 		 * 4 - send 64bit sys counter value
 		 */
+#define PKT3_RELEASE_MEM                       0x49 /* GFX9+ */
 #define PKT3_SET_CONFIG_REG		       0x68
 #define PKT3_SET_CONTEXT_REG		       0x69
 #define PKT3_STRMOUT_BASE_UPDATE	       0x72 /* r700 only */
 #define PKT3_SURFACE_BASE_UPDATE               0x73 /* r600 only */
 #define		SURFACE_BASE_UPDATE_DEPTH      (1 << 0)
 #define		SURFACE_BASE_UPDATE_COLOR(x)   (2 << (x))
 #define		SURFACE_BASE_UPDATE_COLOR_NUM(x) (((1 << x) - 1) << 1)
 #define		SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
 #define PKT3_SET_SH_REG                        0x76 /* SI and later */
 #define PKT3_SET_UCONFIG_REG                   0x79 /* CIK and later */
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 61e1406..e0c3a02 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -43,21 +43,30 @@
 #define EVENT_TYPE_SAMPLE_STREAMOUTSTATS	0x20
 #define		EVENT_TYPE(x)                           ((x) << 0)
 #define		EVENT_INDEX(x)                          ((x) << 8)
                 /* 0 - any non-TS event
 		 * 1 - ZPASS_DONE
 		 * 2 - SAMPLE_PIPELINESTAT
 		 * 3 - SAMPLE_STREAMOUTSTAT*
 		 * 4 - *S_PARTIAL_FLUSH
 		 * 5 - TS events
 		 */
-#define EVENT_WRITE_INV_L2                   0x100000
+
+/* EVENT_WRITE_EOP (SI-VI) & RELEASE_MEM (GFX9) */
+#define EVENT_TCL1_VOL_ACTION_ENA		(1 << 12)
+#define EVENT_TC_VOL_ACTION_ENA			(1 << 13)
+#define EVENT_TC_WB_ACTION_ENA			(1 << 15)
+#define EVENT_TCL1_ACTION_ENA			(1 << 16)
+#define EVENT_TC_ACTION_ENA			(1 << 17)
+#define EVENT_TC_NC_ACTION_ENA			(1 << 19) /* GFX9+ */
+#define EVENT_TC_WC_ACTION_ENA			(1 << 20) /* GFX9+ */
+#define EVENT_TC_MD_ACTION_ENA			(1 << 21) /* GFX9+ */
 
 
 #define PREDICATION_OP_CLEAR 0x0
 #define PREDICATION_OP_ZPASS 0x1
 #define PREDICATION_OP_PRIMCOUNT 0x2
 #define PREDICATION_OP_BOOL64 0x3
 
 #define PRED_OP(x) ((x) << 16)
 
 #define PREDICATION_CONTINUE (1 << 31)
@@ -85,21 +94,21 @@
 #define PKT3_COND_EXEC                         0x22
 #define PKT3_PRED_EXEC                         0x23
 #define PKT3_DRAW_INDIRECT                     0x24
 #define PKT3_DRAW_INDEX_INDIRECT               0x25
 #define PKT3_INDEX_BASE                        0x26
 #define PKT3_DRAW_INDEX_2                      0x27
 #define PKT3_CONTEXT_CONTROL                   0x28
 #define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
 #define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
 #define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
-#define PKT3_INDEX_TYPE                        0x2A
+#define PKT3_INDEX_TYPE                        0x2A /* not on GFX9 */
 #define PKT3_DRAW_INDIRECT_MULTI               0x2C
 #define   R_2C3_DRAW_INDEX_LOC                  0x2C3
 #define     S_2C3_COUNT_INDIRECT_ENABLE(x)      (((unsigned)(x) & 0x1) << 30)
 #define     S_2C3_DRAW_INDEX_ENABLE(x)          (((unsigned)(x) & 0x1) << 31)
 #define PKT3_DRAW_INDEX_AUTO                   0x2D
 #define PKT3_DRAW_INDEX_IMMD                   0x2E /* not on CIK */
 #define PKT3_NUM_INSTANCES                     0x2F
 #define PKT3_DRAW_INDEX_MULTI_AUTO             0x30
 #define PKT3_INDIRECT_BUFFER_SI                0x32 /* not on CIK */
 #define PKT3_INDIRECT_BUFFER_CONST             0x33
@@ -146,27 +155,28 @@
 #define                 COPY_DATA_PERF          4
 #define                 COPY_DATA_IMM           5
 #define		COPY_DATA_DST_SEL(x)		(((unsigned)(x) & 0xf) << 8)
 #define		COPY_DATA_COUNT_SEL		(1 << 16)
 #define		COPY_DATA_WR_CONFIRM		(1 << 20)
 #define PKT3_PFP_SYNC_ME		       0x42
 #define PKT3_SURFACE_SYNC                      0x43 /* deprecated on CIK, use ACQUIRE_MEM */
 #define PKT3_ME_INITIALIZE                     0x44 /* not on CIK */
 #define PKT3_COND_WRITE                        0x45
 #define PKT3_EVENT_WRITE                       0x46
-#define PKT3_EVENT_WRITE_EOP                   0x47
+#define PKT3_EVENT_WRITE_EOP                   0x47 /* not on GFX9 */
 /* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
  * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
  * DST_SEL=MC. Only CIK chips are affected.
  */
-/*#define PKT3_EVENT_WRITE_EOS                   0x48*/ /* fix CP DMA before uncommenting */
-#define PKT3_RELEASE_MEM                       0x49
+/* fix CP DMA before uncommenting: */
+/*#define PKT3_EVENT_WRITE_EOS                   0x48*/ /* not on GFX9 */
+#define PKT3_RELEASE_MEM                       0x49 /* GFX9+ (any ring) or GFX8 (compute ring only) */
 #define PKT3_ONE_REG_WRITE                     0x57 /* not on CIK */
 #define PKT3_ACQUIRE_MEM                       0x58 /* new for CIK */
 #define PKT3_SET_CONFIG_REG                    0x68
 #define PKT3_SET_CONTEXT_REG                   0x69
 #define PKT3_SET_SH_REG                        0x76
 #define PKT3_SET_SH_REG_OFFSET                 0x77
 #define PKT3_SET_UCONFIG_REG                   0x79 /* new for CIK */
 #define PKT3_LOAD_CONST_RAM                    0x80
 #define PKT3_WRITE_CONST_RAM                   0x81
 #define PKT3_DUMP_CONST_RAM                    0x83
@@ -214,45 +224,47 @@
 #define     S_411_DSL_SEL(x)		(((unsigned)(x) & 0x3) << 20)
 #define       V_411_DST_ADDR		0
 #define       V_411_GDS			1 /* program DAS to 1 as well */
 #define       V_411_DST_ADDR_TC_L2	3 /* new for CIK */
 #define     S_411_SRC_ADDR_HI(x)	((x) & 0xffff)
 #define   R_412_CP_DMA_WORD2		0x412 /* 0x[packet number][word index] */
 #define     S_412_DST_ADDR_LO(x)	((x) & 0xffffffff)
 #define   R_413_CP_DMA_WORD3		0x413 /* 0x[packet number][word index] */
 #define     S_413_DST_ADDR_HI(x)	((x) & 0xffff)
 #define   R_414_COMMAND			0x414
-#define     S_414_BYTE_COUNT(x)		((x) & 0x1fffff)
-#define     S_414_DISABLE_WR_CONFIRM(x)	(((unsigned)(x) & 0x1) << 21)
-#define     S_414_SRC_SWAP(x)		(((unsigned)(x) & 0x3) << 22)
+#define     S_414_BYTE_COUNT_GFX6(x)	((x) & 0x1fffff)
+#define     S_414_BYTE_COUNT_GFX9(x)	((x) & 0x3ffffff)
+#define     S_414_DISABLE_WR_CONFIRM_GFX6(x) (((unsigned)(x) & 0x1) << 21) /* not on GFX9 */
+#define     S_414_SRC_SWAP(x)		(((unsigned)(x) & 0x3) << 22) /* not on GFX9 */
 #define       V_414_NONE		0
 #define       V_414_8_IN_16		1
 #define       V_414_8_IN_32		2
 #define       V_414_8_IN_64		3
-#define     S_414_DST_SWAP(x)		(((unsigned)(x) & 0x3) << 24)
+#define     S_414_DST_SWAP(x)		(((unsigned)(x) & 0x3) << 24) /* not on GFX9 */
 #define       V_414_NONE		0
 #define       V_414_8_IN_16		1
 #define       V_414_8_IN_32		2
 #define       V_414_8_IN_64		3
 #define     S_414_SAS(x)		(((unsigned)(x) & 0x1) << 26)
 #define       V_414_MEMORY		0
 #define       V_414_REGISTER		1
 #define     S_414_DAS(x)		(((unsigned)(x) & 0x1) << 27)
 #define       V_414_MEMORY		0
 #define       V_414_REGISTER		1
 #define     S_414_SAIC(x)		(((unsigned)(x) & 0x1) << 28)
 #define       V_414_INCREMENT		0
 #define       V_414_NO_INCREMENT	1
 #define     S_414_DAIC(x)		(((unsigned)(x) & 0x1) << 29)
 #define       V_414_INCREMENT		0
 #define       V_414_NO_INCREMENT	1
 #define     S_414_RAW_WAIT(x)		(((unsigned)(x) & 0x1) << 30)
+#define     S_414_DISABLE_WR_CONFIRM_GFX9(x) (((unsigned)(x) & 0x1) << 31)
 
 #define PKT3_DMA_DATA					0x50 /* new for CIK */
 /* 1. header
  * 2. CP_SYNC [31] | SRC_SEL [30:29] | DST_SEL [21:20] | ENGINE [0]
  * 2. SRC_ADDR_LO [31:0] or DATA [31:0]
  * 3. SRC_ADDR_HI [31:0]
  * 4. DST_ADDR_LO [31:0]
  * 5. DST_ADDR_HI [31:0]
  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
  */
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 1be7586..a564468 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -44,30 +44,30 @@
 
 /* Emit a CP DMA packet to do a copy from one buffer to another, or to clear
  * a buffer. The size must fit in bits [20:0]. If CP_DMA_CLEAR is set, src_va is a 32-bit
  * clear value.
  */
 static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va,
 			   uint64_t src_va, unsigned size, unsigned flags,
 			   enum r600_coherency coher)
 {
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
-	uint32_t header = 0, command = S_414_BYTE_COUNT(size);
+	uint32_t header = 0, command = S_414_BYTE_COUNT_GFX6(size);
 
 	assert(size);
 	assert(size <= CP_DMA_MAX_BYTE_COUNT);
 
 	/* Sync flags. */
 	if (flags & CP_DMA_SYNC)
 		header |= S_411_CP_SYNC(1);
 	else
-		command |= S_414_DISABLE_WR_CONFIRM(1);
+		command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
 
 	if (flags & CP_DMA_RAW_WAIT)
 		command |= S_414_RAW_WAIT(1);
 
 	/* Src and dst flags. */
 	if (flags & CP_DMA_USE_L2)
 		header |= S_411_DSL_SEL(V_411_DST_ADDR_TC_L2);
 
 	if (flags & CP_DMA_CLEAR)
 		header |= S_411_SRC_SEL(V_411_DATA);
-- 
2.7.4



More information about the mesa-dev mailing list