[Mesa-dev] [PATCH 9/9] radeonsi: implement ARB_draw_indirect

Marek Olšák maraeo at gmail.com
Sat Apr 26 06:27:42 PDT 2014


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_commands.c   | 53 ++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_pipe.c       |  1 +
 src/gallium/drivers/radeonsi/si_state.h      |  7 +++
 src/gallium/drivers/radeonsi/si_state_draw.c | 73 ++++++++++++++++++++++------
 src/gallium/drivers/radeonsi/sid.h           | 12 ++++-
 5 files changed, 129 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_commands.c b/src/gallium/drivers/radeonsi/si_commands.c
index 5ddc40e..2efdeda 100644
--- a/src/gallium/drivers/radeonsi/si_commands.c
+++ b/src/gallium/drivers/radeonsi/si_commands.c
@@ -57,6 +57,59 @@ void si_cmd_draw_index_auto(struct si_pm4_state *pm4, uint32_t count,
 	si_pm4_cmd_end(pm4, predicate);
 }
 
+void si_cmd_draw_indirect(struct si_pm4_state *pm4, uint64_t indirect_va,
+			  uint32_t indirect_offset, uint32_t base_vtx_loc,
+			  uint32_t start_inst_loc, bool predicate)
+{
+	assert(indirect_va % 8 == 0);
+	assert(indirect_offset % 4 == 0);
+
+	si_pm4_cmd_begin(pm4, PKT3_SET_BASE);
+	si_pm4_cmd_add(pm4, 1);
+	si_pm4_cmd_add(pm4, indirect_va);
+	si_pm4_cmd_add(pm4, indirect_va >> 32);
+	si_pm4_cmd_end(pm4, predicate);
+
+	si_pm4_cmd_begin(pm4, PKT3_DRAW_INDIRECT);
+	si_pm4_cmd_add(pm4, indirect_offset);
+	si_pm4_cmd_add(pm4, (base_vtx_loc - SI_SH_REG_OFFSET) >> 2);
+	si_pm4_cmd_add(pm4, (start_inst_loc - SI_SH_REG_OFFSET) >> 2);
+	si_pm4_cmd_add(pm4, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
+	si_pm4_cmd_end(pm4, predicate);
+}
+
+void si_cmd_draw_index_indirect(struct si_pm4_state *pm4, uint64_t indirect_va,
+				uint64_t index_va, uint32_t index_max_size,
+				uint32_t indirect_offset, uint32_t base_vtx_loc,
+				uint32_t start_inst_loc, bool predicate)
+{
+	assert(indirect_va % 8 == 0);
+	assert(index_va % 2 == 0);
+	assert(indirect_offset % 4 == 0);
+
+	si_pm4_cmd_begin(pm4, PKT3_SET_BASE);
+	si_pm4_cmd_add(pm4, 1);
+	si_pm4_cmd_add(pm4, indirect_va);
+	si_pm4_cmd_add(pm4, indirect_va >> 32);
+	si_pm4_cmd_end(pm4, predicate);
+
+	si_pm4_cmd_begin(pm4, PKT3_INDEX_BASE);
+	si_pm4_cmd_add(pm4, index_va);
+	si_pm4_cmd_add(pm4, index_va >> 32);
+	si_pm4_cmd_end(pm4, predicate);
+
+	si_pm4_cmd_begin(pm4, PKT3_INDEX_BUFFER_SIZE);
+	si_pm4_cmd_add(pm4, index_max_size);
+	si_pm4_cmd_end(pm4, predicate);
+
+	si_pm4_cmd_begin(pm4, PKT3_DRAW_INDEX_INDIRECT);
+	si_pm4_cmd_add(pm4, indirect_offset);
+	si_pm4_cmd_add(pm4, (base_vtx_loc - SI_SH_REG_OFFSET) >> 2);
+	si_pm4_cmd_add(pm4, (start_inst_loc - SI_SH_REG_OFFSET) >> 2);
+	si_pm4_cmd_add(pm4, V_0287F0_DI_SRC_SEL_DMA);
+	si_pm4_cmd_end(pm4, predicate);
+}
+
 void si_cmd_surface_sync(struct si_pm4_state *pm4, uint32_t cp_coher_cntl)
 {
 	if (pm4->chip_class >= CIK) {
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 373199c..587eded 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -210,6 +210,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
 	case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
 	case PIPE_CAP_CUBE_MAP_ARRAY:
+	case PIPE_CAP_DRAW_INDIRECT:
 		return 1;
 
 	case PIPE_CAP_TEXTURE_MULTISAMPLE:
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 4c5b09e..0e0e480 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -249,6 +249,13 @@ void si_cmd_draw_index_2(struct si_pm4_state *pm4, uint32_t max_size,
 			 uint32_t initiator, bool predicate);
 void si_cmd_draw_index_auto(struct si_pm4_state *pm4, uint32_t count,
 			    uint32_t initiator, bool predicate);
+void si_cmd_draw_indirect(struct si_pm4_state *pm4, uint64_t indirect_va,
+			  uint32_t indirect_offset, uint32_t base_vtx_loc,
+			  uint32_t start_inst_loc, bool predicate);
+void si_cmd_draw_index_indirect(struct si_pm4_state *pm4, uint64_t indirect_va,
+				uint64_t index_va, uint32_t index_max_size,
+				uint32_t indirect_offset, uint32_t base_vtx_loc,
+				uint32_t start_inst_loc, bool predicate);
 void si_cmd_surface_sync(struct si_pm4_state *pm4, uint32_t cp_coher_cntl);
 
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 34c3399..bc69c94 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -797,15 +797,18 @@ static void si_state_draw(struct si_context *sctx,
 	}
 	si_pm4_cmd_end(pm4, sctx->b.predicate_drawing);
 
-	si_pm4_cmd_begin(pm4, PKT3_NUM_INSTANCES);
-	si_pm4_cmd_add(pm4, info->instance_count);
-	si_pm4_cmd_end(pm4, sctx->b.predicate_drawing);
-
 	if (!info->indirect) {
+		si_pm4_cmd_begin(pm4, PKT3_NUM_INSTANCES);
+		si_pm4_cmd_add(pm4, info->instance_count);
+		si_pm4_cmd_end(pm4, sctx->b.predicate_drawing);
+
 		si_pm4_set_reg(pm4, sh_base_reg + SI_SGPR_BASE_VERTEX * 4,
 			       info->indexed ? info->index_bias : info->start);
 		si_pm4_set_reg(pm4, sh_base_reg + SI_SGPR_START_INSTANCE * 4,
 			       info->start_instance);
+	} else {
+		si_pm4_add_bo(pm4, (struct r600_resource *)info->indirect,
+			      RADEON_USAGE_READ, RADEON_PRIO_MIN);
 	}
 
 	if (info->indexed) {
@@ -817,14 +820,35 @@ static void si_state_draw(struct si_context *sctx,
 
 		si_pm4_add_bo(pm4, (struct r600_resource *)ib->buffer, RADEON_USAGE_READ,
 			      RADEON_PRIO_MIN);
-		va += info->start * ib->index_size;
-		si_cmd_draw_index_2(pm4, max_size, va, info->count,
-				    V_0287F0_DI_SRC_SEL_DMA,
-				    sctx->b.predicate_drawing);
+
+		if (info->indirect) {
+			uint64_t indirect_va = r600_resource_va(&sctx->screen->b.b,
+								info->indirect);
+			si_cmd_draw_index_indirect(pm4, indirect_va, va, max_size,
+						   info->indirect_offset,
+						   sh_base_reg + SI_SGPR_BASE_VERTEX * 4,
+						   sh_base_reg + SI_SGPR_START_INSTANCE * 4,
+						   sctx->b.predicate_drawing);
+		} else {
+			va += info->start * ib->index_size;
+			si_cmd_draw_index_2(pm4, max_size, va, info->count,
+					    V_0287F0_DI_SRC_SEL_DMA,
+					    sctx->b.predicate_drawing);
+		}
 	} else {
-		uint32_t initiator = V_0287F0_DI_SRC_SEL_AUTO_INDEX;
-		initiator |= S_0287F0_USE_OPAQUE(!!info->count_from_stream_output);
-		si_cmd_draw_index_auto(pm4, info->count, initiator, sctx->b.predicate_drawing);
+		if (info->indirect) {
+			uint64_t indirect_va = r600_resource_va(&sctx->screen->b.b,
+								info->indirect);
+			si_cmd_draw_indirect(pm4, indirect_va, info->indirect_offset,
+					     sh_base_reg + SI_SGPR_BASE_VERTEX * 4,
+					     sh_base_reg + SI_SGPR_START_INSTANCE * 4,
+					     sctx->b.predicate_drawing);
+		} else {
+			si_cmd_draw_index_auto(pm4, info->count,
+					       V_0287F0_DI_SRC_SEL_AUTO_INDEX |
+					       S_0287F0_USE_OPAQUE(!!info->count_from_stream_output),
+					       sctx->b.predicate_drawing);
+		}
 	}
 
 	si_pm4_set_state(sctx, draw, pm4);
@@ -912,13 +936,32 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
 
 const struct r600_atom si_atom_cache_flush = { si_emit_cache_flush, 13 }; /* number of CS dwords */
 
+static void si_get_draw_start_count(struct si_context *sctx,
+				    const struct pipe_draw_info *info,
+				    unsigned *start, unsigned *count)
+{
+	if (info->indirect) {
+		struct r600_resource *indirect =
+			(struct r600_resource*)info->indirect;
+		int *data = r600_buffer_map_sync_with_rings(&sctx->b,
+					indirect, PIPE_TRANSFER_READ);
+                data += info->indirect_offset/sizeof(int);
+		*start = data[2];
+		*count = data[0];
+	} else {
+		*start = info->start;
+		*count = info->count;
+	}
+}
+
 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct pipe_index_buffer ib = {};
 	uint32_t i;
 
-	if (!info->count && (info->indexed || !info->count_from_stream_output))
+	if (!info->count && !info->indirect &&
+	    (info->indexed || !info->count_from_stream_output))
 		return;
 
 	if (!sctx->ps_shader || !sctx->vs_shader)
@@ -940,8 +983,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 			unsigned out_offset, start, count, start_offset;
 			void *ptr;
 
-			start = info->start;
-			count = info->count;
+			si_get_draw_start_count(sctx, info, &start, &count);
 			start_offset = start * ib.index_size;
 
 			u_upload_alloc(sctx->b.uploader, start_offset, count * 2,
@@ -960,8 +1002,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		} else if (ib.user_buffer && !ib.buffer) {
 			unsigned start, count, start_offset;
 
-			start = info->start;
-			count = info->count;
+			si_get_draw_start_count(sctx, info, &start, &count);
 			start_offset = start * ib.index_size;
 
 			u_upload_data(sctx->b.uploader, start_offset, count * ib.index_size,
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 2bd2cb4..5d6da1f 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -70,19 +70,29 @@
 #define R600_TEXEL_PITCH_ALIGNMENT_MASK        0x7
 
 #define PKT3_NOP                               0x10
+#define PKT3_SET_BASE                          0x11
+#define PKT3_CLEAR_STATE                       0x12
+#define PKT3_INDEX_BUFFER_SIZE                 0x13
 #define PKT3_DISPATCH_DIRECT                   0x15
 #define PKT3_DISPATCH_INDIRECT                 0x16
 #define PKT3_OCCLUSION_QUERY                   0x1F /* new for CIK */
 #define PKT3_SET_PREDICATION                   0x20
 #define PKT3_COND_EXEC                         0x22
 #define PKT3_PRED_EXEC                         0x23
+#define PKT3_DRAW_INDIRECT                     0x24
+#define PKT3_DRAW_INDEX_INDIRECT               0x25
+#define PKT3_INDEX_BASE                        0x26
 #define PKT3_DRAW_INDEX_2                      0x27
 #define PKT3_CONTEXT_CONTROL                   0x28
 #define PKT3_INDEX_TYPE                        0x2A
+#define PKT3_DRAW_INDIRECT_MULTI               0x2C
 #define PKT3_DRAW_INDEX_AUTO                   0x2D
 #define PKT3_DRAW_INDEX_IMMD                   0x2E /* not on CIK */
 #define PKT3_NUM_INSTANCES                     0x2F
+#define PKT3_DRAW_INDEX_MULTI_AUTO             0x30
+#define PKT3_INDIRECT_BUFFER                   0x32
 #define PKT3_STRMOUT_BUFFER_UPDATE             0x34
+#define PKT3_DRAW_INDEX_OFFSET_2               0x35
 #define PKT3_WRITE_DATA                        0x37
 #define     PKT3_WRITE_DATA_DST_SEL(x)             ((x) << 8)
 #define     PKT3_WRITE_DATA_DST_SEL_REG            0
@@ -97,12 +107,12 @@
 #define PKT3_WRITE_DATA_ENGINE_SEL_ME              0
 #define PKT3_WRITE_DATA_ENGINE_SEL_PFP             1
 #define PKT3_WRITE_DATA_ENGINE_SEL_CE              2
+#define PKT3_DRAW_INDEX_INDIRECT_MULTI         0x38
 #define PKT3_MEM_SEMAPHORE                     0x39
 #define PKT3_MPEG_INDEX                        0x3A /* not on CIK */
 #define PKT3_WAIT_REG_MEM                      0x3C
 #define		WAIT_REG_MEM_EQUAL		3
 #define PKT3_MEM_WRITE                         0x3D /* not on CIK */
-#define PKT3_INDIRECT_BUFFER                   0x32
 #define PKT3_COPY_DATA			       0x40
 #define		COPY_DATA_SRC_SEL(x)		((x) & 0xf)
 #define			COPY_DATA_REG		0
-- 
1.8.3.2



More information about the mesa-dev mailing list