[Mesa-dev] [PATCH 41/42] radeonsi: add IB2 indirect buffer support for pm4 states

Marek Olšák maraeo at gmail.com
Sun Aug 30 12:12:11 PDT 2015


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_pm4.c | 48 ++++++++++++++++++++++++++++++++++-
 src/gallium/drivers/radeonsi/si_pm4.h |  5 ++++
 src/gallium/drivers/radeonsi/sid.h    |  3 ++-
 3 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index b06e92b..b1834af 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -107,6 +107,7 @@ void si_pm4_free_state_simple(struct si_pm4_state *state)
 {
 	for (int i = 0; i < state->nbo; ++i)
 		r600_resource_reference(&state->bo[i], NULL);
+	r600_resource_reference(&state->indirect_buffer, NULL);
 	FREE(state);
 }
 
@@ -133,7 +134,19 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
 				      state->bo_usage[i], state->bo_priority[i]);
 	}
 
-	radeon_emit_array(cs, state->pm4, state->ndw);
+	if (!state->indirect_buffer) {
+		radeon_emit_array(cs, state->pm4, state->ndw);
+	} else {
+		struct r600_resource *ib = state->indirect_buffer;
+
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, ib,
+					  RADEON_USAGE_READ, RADEON_PRIO_MIN);
+
+		radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+		radeon_emit(cs, ib->gpu_address);
+		radeon_emit(cs, (ib->gpu_address >> 32) & 0xffff);
+		radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff);
+	}
 }
 
 void si_pm4_emit_dirty(struct si_context *sctx)
@@ -153,3 +166,36 @@ void si_pm4_reset_emitted(struct si_context *sctx)
 {
 	memset(&sctx->emitted, 0, sizeof(sctx->emitted));
 }
+
+void si_pm4_upload_indirect_buffer(struct si_context *sctx,
+				   struct si_pm4_state *state)
+{
+	struct pipe_screen *screen = sctx->b.b.screen;
+	unsigned aligned_ndw = align(state->ndw, 8);
+
+	/* only supported on CIK and later */
+	if (sctx->b.chip_class < CIK)
+		return;
+
+	assert(state->ndw);
+	assert(aligned_ndw <= SI_PM4_MAX_DW);
+
+	r600_resource_reference(&state->indirect_buffer, NULL);
+	state->indirect_buffer = (struct r600_resource*)
+		pipe_buffer_create(screen, PIPE_BIND_CUSTOM,
+				   PIPE_USAGE_DEFAULT, aligned_ndw * 4);
+	if (!state->indirect_buffer)
+		return;
+
+	/* Pad the IB to 8 DWs to meet CP fetch alignment requirements. */
+	if (sctx->screen->b.info.gfx_ib_pad_with_type2) {
+		for (int i = state->ndw; i < aligned_ndw; i++)
+			state->pm4[i] = 0x80000000; /* type2 nop packet */
+	} else {
+		for (int i = state->ndw; i < aligned_ndw; i++)
+			state->pm4[i] = 0xffff1000; /* type3 nop packet */
+	}
+
+	pipe_buffer_write(&sctx->b.b, &state->indirect_buffer->b.b,
+			  0, aligned_ndw *4, state->pm4);
+}
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index efa2062..5282d00 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -39,6 +39,9 @@ enum chip_class;
 
 struct si_pm4_state
 {
+	/* optional indirect buffer */
+	struct r600_resource	*indirect_buffer;
+
 	/* PKT3_SET_*_REG handling */
 	unsigned	last_opcode;
 	unsigned	last_reg;
@@ -66,6 +69,8 @@ void si_pm4_add_bo(struct si_pm4_state *state,
 		   struct r600_resource *bo,
 		   enum radeon_bo_usage usage,
 		   enum radeon_bo_priority priority);
+void si_pm4_upload_indirect_buffer(struct si_context *sctx,
+				   struct si_pm4_state *state);
 
 void si_pm4_free_state_simple(struct si_pm4_state *state);
 void si_pm4_free_state(struct si_context *sctx,
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index cd6be73..4bb2457 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -94,7 +94,7 @@
 #define PKT3_DRAW_INDEX_IMMD                   0x2E /* not on CIK */
 #define PKT3_NUM_INSTANCES                     0x2F
 #define PKT3_DRAW_INDEX_MULTI_AUTO             0x30
-#define PKT3_INDIRECT_BUFFER                   0x32
+#define PKT3_INDIRECT_BUFFER_SI                0x32 /* not on CIK */
 #define PKT3_STRMOUT_BUFFER_UPDATE             0x34
 #define PKT3_DRAW_INDEX_OFFSET_2               0x35
 #define PKT3_DRAW_PREAMBLE                     0x36 /* new on CIK, required on GFX7.2 and later */
@@ -122,6 +122,7 @@
 #define PKT3_WAIT_REG_MEM                      0x3C
 #define		WAIT_REG_MEM_EQUAL		3
 #define PKT3_MEM_WRITE                         0x3D /* not on CIK */
+#define PKT3_INDIRECT_BUFFER_CIK               0x3F /* new on CIK */
 #define PKT3_COPY_DATA			       0x40
 #define		COPY_DATA_SRC_SEL(x)		((x) & 0xf)
 #define			COPY_DATA_REG		0
-- 
2.1.4



More information about the mesa-dev mailing list