[Mesa-dev] [PATCH 14/15] radeonsi: add IB parser support for CP DMA packets

Marek Olšák maraeo at gmail.com
Sun Aug 23 05:05:00 PDT 2015


From: Marek Olšák <marek.olsak at amd.com>

If the packet encoding is defined in the same format as register definitions,
the python script can process them automatically and the parser support
becomes trivial.
---
 src/gallium/drivers/radeonsi/si_cp_dma.c     |  17 ++--
 src/gallium/drivers/radeonsi/si_debug.c      |  24 +++++
 src/gallium/drivers/radeonsi/si_state_draw.c |   6 +-
 src/gallium/drivers/radeonsi/sid.h           | 136 +++++++++++++++++----------
 4 files changed, 122 insertions(+), 61 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index f8a9da4..7bdac97 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -47,10 +47,11 @@ static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
 				       unsigned size, unsigned flags)
 {
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
-	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
-	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
+	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
+	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
 	uint32_t sel = flags & CIK_CP_DMA_USE_L2 ?
-			   PKT3_CP_DMA_SRC_SEL(3) | PKT3_CP_DMA_DST_SEL(3) : 0;
+			   S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) |
+			   S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0;
 
 	assert(size);
 	assert((size & ((1<<21)-1)) == size);
@@ -79,16 +80,16 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
 					uint32_t clear_value, unsigned flags)
 {
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
-	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
-	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
-	uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? PKT3_CP_DMA_DST_SEL(3) : 0;
+	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
+	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
+	uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0;
 
 	assert(size);
 	assert((size & ((1<<21)-1)) == size);
 
 	if (sctx->b.chip_class >= CIK) {
 		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
-		radeon_emit(cs, sync_flag | dst_sel | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
+		radeon_emit(cs, sync_flag | dst_sel | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
 		radeon_emit(cs, clear_value);		/* DATA [31:0] */
 		radeon_emit(cs, 0);
 		radeon_emit(cs, dst_va);		/* DST_ADDR_LO [31:0] */
@@ -97,7 +98,7 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
 	} else {
 		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
 		radeon_emit(cs, clear_value);		/* DATA [31:0] */
-		radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
+		radeon_emit(cs, sync_flag | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
 		radeon_emit(cs, dst_va);			/* DST_ADDR_LO [31:0] */
 		radeon_emit(cs, (dst_va >> 32) & 0xffff);	/* DST_ADDR_HI [15:0] */
 		radeon_emit(cs, size | raw_wait);		/* COMMAND [29:22] | BYTE_COUNT [20:0] */
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index cf09686..22d6f25 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -229,6 +229,30 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw,
 	case PKT3_NUM_INSTANCES:
 		si_dump_reg(f, R_030934_VGT_NUM_INSTANCES, ib[1], ~0);
 		break;
+	case PKT3_WRITE_DATA:
+		si_dump_reg(f, R_370_CONTROL, ib[1], ~0);
+		si_dump_reg(f, R_371_DST_ADDR_LO, ib[2], ~0);
+		si_dump_reg(f, R_372_DST_ADDR_HI, ib[3], ~0);
+		for (i = 2; i < count; i++) {
+			print_spaces(f, INDENT_PKT);
+			fprintf(f, "0x%08x\n", ib[2+i]);
+		}
+		break;
+	case PKT3_CP_DMA:
+		si_dump_reg(f, R_410_CP_DMA_WORD0, ib[1], ~0);
+		si_dump_reg(f, R_411_CP_DMA_WORD1, ib[2], ~0);
+		si_dump_reg(f, R_412_CP_DMA_WORD2, ib[3], ~0);
+		si_dump_reg(f, R_413_CP_DMA_WORD3, ib[4], ~0);
+		si_dump_reg(f, R_414_COMMAND, ib[5], ~0);
+		break;
+	case PKT3_DMA_DATA:
+		si_dump_reg(f, R_500_DMA_DATA_WORD0, ib[1], ~0);
+		si_dump_reg(f, R_501_SRC_ADDR_LO, ib[2], ~0);
+		si_dump_reg(f, R_502_SRC_ADDR_HI, ib[3], ~0);
+		si_dump_reg(f, R_503_DST_ADDR_LO, ib[4], ~0);
+		si_dump_reg(f, R_504_DST_ADDR_HI, ib[5], ~0);
+		si_dump_reg(f, R_414_COMMAND, ib[6], ~0);
+		break;
 	case PKT3_NOP:
 		if (ib[0] == 0xffff1000) {
 			count = -1; /* One dword NOP. */
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index b1aba12..fd2feca 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -879,9 +879,9 @@ void si_trace_emit(struct si_context *sctx)
 	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
 			      RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-	radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
-				PKT3_WRITE_DATA_WR_CONFIRM |
-				PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME));
+	radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
+		    S_370_WR_CONFIRM(1) |
+		    S_370_ENGINE_SEL(V_370_ME));
 	radeon_emit(cs, sctx->trace_buf->gpu_address);
 	radeon_emit(cs, sctx->trace_buf->gpu_address >> 32);
 	radeon_emit(cs, sctx->trace_id);
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 66660e3..cd6be73 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -69,6 +69,10 @@
 
 #define R600_TEXEL_PITCH_ALIGNMENT_MASK        0x7
 
+/* All registers defined in this packet section don't exist and the only
+ * purpose of these definitions is to define packet encoding that
+ * the IB parser understands, and also to have an accurate documentation.
+ */
 #define PKT3_NOP                               0x10
 #define PKT3_SET_BASE                          0x11
 #define PKT3_CLEAR_STATE                       0x12
@@ -95,19 +99,23 @@
 #define PKT3_DRAW_INDEX_OFFSET_2               0x35
 #define PKT3_DRAW_PREAMBLE                     0x36 /* new on CIK, required on GFX7.2 and later */
 #define PKT3_WRITE_DATA                        0x37
-#define     PKT3_WRITE_DATA_DST_SEL(x)             ((x) << 8)
-#define     PKT3_WRITE_DATA_DST_SEL_REG            0
-#define     PKT3_WRITE_DATA_DST_SEL_MEM_SYNC       1
-#define     PKT3_WRITE_DATA_DST_SEL_TC_L2          2
-#define     PKT3_WRITE_DATA_DST_SEL_GDS            3
-#define     PKT3_WRITE_DATA_DST_SEL_RESERVED_4     4
-#define     PKT3_WRITE_DATA_DST_SEL_MEM_ASYNC      5
-#define     PKT3_WR_ONE_ADDR                       (1 << 16)
-#define PKT3_WRITE_DATA_WR_CONFIRM                 (1 << 20)
-#define PKT3_WRITE_DATA_ENGINE_SEL(x)              ((x) << 30)
-#define PKT3_WRITE_DATA_ENGINE_SEL_ME              0
-#define PKT3_WRITE_DATA_ENGINE_SEL_PFP             1
-#define PKT3_WRITE_DATA_ENGINE_SEL_CE              2
+#define   R_370_CONTROL				0x370 /* 0x[packet number][word index] */
+#define     S_370_ENGINE_SEL(x)			(((x) & 0x3) << 30)
+#define       V_370_ME				0
+#define       V_370_PFP				1
+#define       V_370_CE				2
+#define       V_370_DE				3
+#define     S_370_WR_CONFIRM(x)			(((x) & 0x1) << 20)
+#define     S_370_WR_ONE_ADDR(x)		(((x) & 0x1) << 16)
+#define     S_370_DST_SEL(x)			(((x) & 0xf) << 8)
+#define       V_370_MEM_MAPPED_REGISTER		0
+#define       V_370_MEMORY_SYNC			1
+#define       V_370_TC_L2			2
+#define       V_370_GDS				3
+#define       V_370_RESERVED			4
+#define       V_370_MEM_ASYNC			5
+#define   R_371_DST_ADDR_LO			0x371
+#define   R_372_DST_ADDR_HI			0x372
 #define PKT3_DRAW_INDEX_INDIRECT_MULTI         0x38
 #define PKT3_MEM_SEMAPHORE                     0x39
 #define PKT3_MPEG_INDEX                        0x3A /* not on CIK */
@@ -159,42 +167,53 @@
  * 5. DST_ADDR_HI [15:0]
  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
  */
-#define PKT3_CP_DMA_CP_SYNC       (1 << 31)
-#define PKT3_CP_DMA_SRC_SEL(x)       ((x) << 29)
-/* 0 - SRC_ADDR
- * 1 - GDS (program SAS to 1 as well)
- * 2 - DATA
- * 3 - SRC_ADDR using TC L2 (DMA_DATA only)
- */
-#define PKT3_CP_DMA_DST_SEL(x)       ((x) << 20)
-/* 0 - DST_ADDR
- * 1 - GDS (program DAS to 1 as well)
- * 3 - DST_ADDR using TC L2 (DMA_DATA only)
- */
-/* COMMAND */
-#define PKT3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
-/* 0 - none
- * 1 - 8 in 16
- * 2 - 8 in 32
- * 3 - 8 in 64
- */
-#define PKT3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
-/* 0 - none
- * 1 - 8 in 16
- * 2 - 8 in 32
- * 3 - 8 in 64
- */
-#define PKT3_CP_DMA_CMD_SAS       (1 << 26)
-/* 0 - memory
- * 1 - register
- */
-#define PKT3_CP_DMA_CMD_DAS       (1 << 27)
-/* 0 - memory
- * 1 - register
- */
-#define PKT3_CP_DMA_CMD_SAIC      (1 << 28)
-#define PKT3_CP_DMA_CMD_DAIC      (1 << 29)
-#define PKT3_CP_DMA_CMD_RAW_WAIT  (1 << 30)
+#define   R_410_CP_DMA_WORD0		0x410 /* 0x[packet number][word index] */
+#define     S_410_SRC_ADDR_LO(x)	((x) & 0xffffffff)
+#define   R_411_CP_DMA_WORD1		0x411
+#define     S_411_CP_SYNC(x)		(((x) & 0x1) << 31)
+#define     S_411_SRC_SEL(x)		(((x) & 0x3) << 29)
+#define       V_411_SRC_ADDR		0
+#define       V_411_GDS			1 /* program SAS to 1 as well */
+#define       V_411_DATA		2
+#define       V_411_SRC_ADDR_TC_L2	3 /* new for CIK */
+#define     S_411_ENGINE(x)		(((x) & 0x1) << 27)
+#define       V_411_ME			0
+#define       V_411_PFP			1
+#define     S_411_DSL_SEL(x)		(((x) & 0x3) << 20)
+#define       V_411_DST_ADDR		0
+#define       V_411_GDS			1 /* program DAS to 1 as well */
+#define       V_411_DST_ADDR_TC_L2	3 /* new for CIK */
+#define     S_411_SRC_ADDR_HI(x)	((x) & 0xffff)
+#define   R_412_CP_DMA_WORD2		0x412 /* 0x[packet number][word index] */
+#define     S_412_DST_ADDR_LO(x)	((x) & 0xffffffff)
+#define   R_413_CP_DMA_WORD3		0x413 /* 0x[packet number][word index] */
+#define     S_413_DST_ADDR_HI(x)	((x) & 0xffff)
+#define   R_414_COMMAND			0x414
+#define     S_414_BYTE_COUNT(x)		((x) & 0x1fffff)
+#define     S_414_DISABLE_WR_CONFIRM(x)	(((x) & 0x1) << 21)
+#define     S_414_SRC_SWAP(x)		(((x) & 0x3) << 22)
+#define       V_414_NONE		0
+#define       V_414_8_IN_16		1
+#define       V_414_8_IN_32		2
+#define       V_414_8_IN_64		3
+#define     S_414_DST_SWAP(x)		(((x) & 0x3) << 24)
+#define       V_414_NONE		0
+#define       V_414_8_IN_16		1
+#define       V_414_8_IN_32		2
+#define       V_414_8_IN_64		3
+#define     S_414_SAS(x)		(((x) & 0x1) << 26)
+#define       V_414_MEMORY		0
+#define       V_414_REGISTER		1
+#define     S_414_DAS(x)		(((x) & 0x1) << 27)
+#define       V_414_MEMORY		0
+#define       V_414_REGISTER		1
+#define     S_414_SAIC(x)		(((x) & 0x1) << 28)
+#define       V_414_INCREMENT		0
+#define       V_414_NO_INCREMENT	1
+#define     S_414_DAIC(x)		(((x) & 0x1) << 29)
+#define       V_414_INCREMENT		0
+#define       V_414_NO_INCREMENT	1
+#define     S_414_RAW_WAIT(x)		(((x) & 0x1) << 30)
 
 #define PKT3_DMA_DATA					0x50 /* new for CIK */
 /* 1. header
@@ -205,7 +224,24 @@
  * 5. DST_ADDR_HI [31:0]
  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
  */
-
+#define   R_500_DMA_DATA_WORD0		0x500 /* 0x[packet number][word index] */
+#define     S_500_CP_SYNC(x)		(((x) & 0x1) << 31)
+#define     S_500_SRC_SEL(x)		(((x) & 0x3) << 29)
+#define       V_500_SRC_ADDR		0
+#define       V_500_GDS			1 /* program SAS to 1 as well */
+#define       V_500_DATA		2
+#define       V_500_SRC_ADDR_TC_L2	3 /* new for CIK */
+#define     S_500_DSL_SEL(x)		(((x) & 0x3) << 20)
+#define       V_500_DST_ADDR		0
+#define       V_500_GDS			1 /* program DAS to 1 as well */
+#define       V_500_DST_ADDR_TC_L2	3 /* new for CIK */
+#define     S_500_ENGINE(x)		((x) & 0x1)
+#define       V_500_ME			0
+#define       V_500_PFP			1
+#define   R_501_SRC_ADDR_LO		0x501
+#define   R_502_SRC_ADDR_HI		0x502
+#define   R_503_DST_ADDR_LO		0x503
+#define   R_504_DST_ADDR_HI		0x504
 
 #define R_000E4C_SRBM_STATUS2                                           0x000E4C
 #define   S_000E4C_SDMA_RQ_PENDING(x)                                 (((x) & 0x1) << 0)
-- 
2.1.4



More information about the mesa-dev mailing list