[Mesa-dev] [PATCH 21/23] radeonsi: use a single descriptor for the GSVS ring

Nicolai Hähnle nhaehnle at gmail.com
Wed Nov 30 13:35:23 UTC 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

We can hardcode all of the fields for swizzling in the geometry shader.

The advantage is that we use fewer descriptor slots and we no longer have to
update any of the (ring) descriptors when the geometry shader changes.
---
 src/gallium/drivers/radeonsi/si_pipe.h          |  1 -
 src/gallium/drivers/radeonsi/si_shader.c        | 70 ++++++++++++++++++++++---
 src/gallium/drivers/radeonsi/si_state.h         |  6 +--
 src/gallium/drivers/radeonsi/si_state_shaders.c | 37 +------------
 4 files changed, 64 insertions(+), 50 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 42cbecb..2409c85 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -315,21 +315,20 @@ struct si_context {
 	int			last_primitive_restart_en;
 	int			last_restart_index;
 	int			last_gs_out_prim;
 	int			last_prim;
 	int			last_multi_vgt_param;
 	int			last_rast_prim;
 	unsigned		last_sc_line_stipple;
 	int			last_vtx_reuse_depth;
 	int			current_rast_prim; /* primitive type after TES, GS */
 	bool			gs_tri_strip_adj_fix;
-	unsigned		last_gsvs_itemsize;
 
 	/* Scratch buffer */
 	struct r600_resource	*scratch_buffer;
 	bool			emit_scratch_reloc;
 	unsigned		scratch_waves;
 	unsigned		spi_tmpring_size;
 
 	struct r600_resource	*compute_scratch_buffer;
 
 	/* Emitted derived tessellation state. */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 8c1f458..fba98e4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5672,51 +5672,105 @@ static void create_function(struct si_shader_context *ctx)
 }
 
 /**
  * Load ESGS and GSVS ring buffer resource descriptors and save the variables
  * for later use.
  */
 static void preload_ring_buffers(struct si_shader_context *ctx)
 {
 	struct gallivm_state *gallivm =
 		ctx->soa.bld_base.base.gallivm;
+	LLVMBuilderRef builder = gallivm->builder;
 
 	LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
 					    SI_PARAM_RW_BUFFERS);
 
 	if ((ctx->type == PIPE_SHADER_VERTEX &&
 	     ctx->shader->key.as_es) ||
 	    (ctx->type == PIPE_SHADER_TESS_EVAL &&
 	     ctx->shader->key.as_es) ||
 	    ctx->type == PIPE_SHADER_GEOMETRY) {
 		unsigned ring =
 			ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
 							     : SI_ES_RING_ESGS;
 		LLVMValueRef offset = lp_build_const_int32(gallivm, ring);
 
 		ctx->esgs_ring =
 			build_indexed_load_const(ctx, buf_ptr, offset);
 	}
 
 	if (ctx->shader->is_gs_copy_shader) {
-		LLVMValueRef offset = lp_build_const_int32(gallivm, SI_VS_RING_GSVS);
+		LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
 
 		ctx->gsvs_ring[0] =
 			build_indexed_load_const(ctx, buf_ptr, offset);
-	}
-	if (ctx->type == PIPE_SHADER_GEOMETRY) {
-		int i;
-		for (i = 0; i < 4; i++) {
-			LLVMValueRef offset = lp_build_const_int32(gallivm, SI_GS_RING_GSVS0 + i);
+	} else if (ctx->type == PIPE_SHADER_GEOMETRY) {
+		struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld;
+		LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
+		LLVMValueRef base_ring;
+
+		base_ring = build_indexed_load_const(ctx, buf_ptr, offset);
+
+		/* The conceptual layout of the GSVS ring is
+		 *   v0c0 .. vLv0 v0c1 .. vLc1 ..
+		 * but the real memory layout is swizzled across
+		 * threads:
+		 *   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
+		 *   t16v0c0 ..
+		 * Override the buffer descriptor accordingly.
+		 */
+		LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2);
+		unsigned max_gsvs_emit_size = ctx->shader->selector->max_gsvs_emit_size;
+		unsigned num_records;
+
+		num_records = 64;
+		if (ctx->screen->b.chip_class >= VI)
+			num_records *= max_gsvs_emit_size;
+
+		for (unsigned stream = 0; stream < 4; ++stream) {
+			LLVMValueRef ring, tmp;
+
+			if (!ctx->shader->selector->info.num_stream_output_components[stream])
+				continue;
 
-			ctx->gsvs_ring[i] =
-				build_indexed_load_const(ctx, buf_ptr, offset);
+			ring = LLVMBuildBitCast(builder, base_ring, v2i64, "");
+			tmp = LLVMBuildExtractElement(builder, ring, uint->zero, "");
+			tmp = LLVMBuildAdd(builder, tmp,
+					   LLVMConstInt(ctx->i64,
+							max_gsvs_emit_size * 64 * stream, 0), "");
+			ring = LLVMBuildInsertElement(builder, ring, tmp, uint->zero, "");
+			ring = LLVMBuildBitCast(builder, ring, ctx->v4i32, "");
+			tmp = LLVMBuildExtractElement(builder, ring, uint->one, "");
+			tmp = LLVMBuildOr(builder, tmp,
+				LLVMConstInt(ctx->i32,
+					     S_008F04_STRIDE(max_gsvs_emit_size) |
+					     S_008F04_SWIZZLE_ENABLE(1), 0), "");
+			ring = LLVMBuildInsertElement(builder, ring, tmp, uint->one, "");
+			ring = LLVMBuildInsertElement(builder, ring,
+					LLVMConstInt(ctx->i32, num_records, 0),
+					LLVMConstInt(ctx->i32, 2, 0), "");
+			ring = LLVMBuildInsertElement(builder, ring,
+				LLVMConstInt(ctx->i32,
+					     S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+					     S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+					     S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+					     S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+					     S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+					     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+					     S_008F0C_ELEMENT_SIZE(1) | /* element_size = 4 (bytes) */
+					     S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
+					     S_008F0C_ADD_TID_ENABLE(1),
+					     0),
+				LLVMConstInt(ctx->i32, 3, 0), "");
+			ring = LLVMBuildBitCast(builder, ring, ctx->v16i8, "");
+
+			ctx->gsvs_ring[stream] = ring;
 		}
 	}
 }
 
 static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
 					 LLVMValueRef param_rw_buffers,
 					 unsigned param_pos_fixed_pt)
 {
 	struct lp_build_tgsi_context *bld_base =
 		&ctx->soa.bld_base;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 3a9f0cf..9ea52ea 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -160,25 +160,21 @@ struct si_shader_data {
 };
 
 /* Private read-write buffer slots. */
 enum {
 	SI_HS_RING_TESS_FACTOR,
 	SI_HS_RING_TESS_OFFCHIP,
 
 	SI_ES_RING_ESGS,
 	SI_GS_RING_ESGS,
 
-	SI_GS_RING_GSVS0,
-	SI_GS_RING_GSVS1,
-	SI_GS_RING_GSVS2,
-	SI_GS_RING_GSVS3,
-	SI_VS_RING_GSVS,
+	SI_RING_GSVS,
 
 	SI_VS_STREAMOUT_BUF0,
 	SI_VS_STREAMOUT_BUF1,
 	SI_VS_STREAMOUT_BUF2,
 	SI_VS_STREAMOUT_BUF3,
 
 	SI_HS_CONST_DEFAULT_TESS_LEVELS,
 	SI_VS_CONST_CLIP_PLANES,
 	SI_PS_CONST_POLY_STIPPLE,
 	SI_PS_CONST_SAMPLE_POSITIONS,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index ea71569..1e9f5f0 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2032,61 +2032,28 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
 	/* Set ring bindings. */
 	if (sctx->esgs_ring) {
 		si_set_ring_buffer(&sctx->b.b, SI_ES_RING_ESGS,
 				   sctx->esgs_ring, 0, sctx->esgs_ring->width0,
 				   true, true, 4, 64, 0);
 		si_set_ring_buffer(&sctx->b.b, SI_GS_RING_ESGS,
 				   sctx->esgs_ring, 0, sctx->esgs_ring->width0,
 				   false, false, 0, 0, 0);
 	}
 	if (sctx->gsvs_ring) {
-		si_set_ring_buffer(&sctx->b.b, SI_VS_RING_GSVS,
+		si_set_ring_buffer(&sctx->b.b, SI_RING_GSVS,
 				   sctx->gsvs_ring, 0, sctx->gsvs_ring->width0,
 				   false, false, 0, 0, 0);
-
-		/* Also update SI_GS_RING_GSVSi descriptors. */
-		sctx->last_gsvs_itemsize = 0;
 	}
 
 	return true;
 }
 
-static void si_update_gsvs_ring_bindings(struct si_context *sctx)
-{
-	unsigned gsvs_itemsize = sctx->gs_shader.cso->max_gsvs_emit_size;
-	uint64_t offset;
-
-	if (!sctx->gsvs_ring || gsvs_itemsize == sctx->last_gsvs_itemsize)
-		return;
-
-	sctx->last_gsvs_itemsize = gsvs_itemsize;
-
-	si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS0,
-			   sctx->gsvs_ring, gsvs_itemsize,
-			   64, true, true, 4, 16, 0);
-
-	offset = gsvs_itemsize * 64;
-	si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS1,
-			   sctx->gsvs_ring, gsvs_itemsize,
-			   64, true, true, 4, 16, offset);
-
-	offset = (gsvs_itemsize * 2) * 64;
-	si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS2,
-			   sctx->gsvs_ring, gsvs_itemsize,
-			   64, true, true, 4, 16, offset);
-
-	offset = (gsvs_itemsize * 3) * 64;
-	si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS3,
-			   sctx->gsvs_ring, gsvs_itemsize,
-			   64, true, true, 4, 16, offset);
-}
-
 /**
  * @returns 1 if \p sel has been updated to use a new scratch buffer
  *          0 if not
  *          < 0 if there was a failure
  */
 static int si_update_scratch_buffer(struct si_context *sctx,
 				    struct si_shader *shader)
 {
 	uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
 	int r;
@@ -2462,22 +2429,20 @@ bool si_update_shaders(struct si_context *sctx)
 	if (sctx->gs_shader.cso) {
 		r = si_shader_select(ctx, &sctx->gs_shader);
 		if (r)
 			return false;
 		si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
 		si_pm4_bind_state(sctx, vs, sctx->gs_shader.cso->gs_copy_shader->pm4);
 		si_update_so(sctx, sctx->gs_shader.cso);
 
 		if (!si_update_gs_ring_buffers(sctx))
 			return false;
-
-		si_update_gsvs_ring_bindings(sctx);
 	} else {
 		si_pm4_bind_state(sctx, gs, NULL);
 		si_pm4_bind_state(sctx, es, NULL);
 	}
 
 	si_update_vgt_shader_config(sctx);
 
 	if (sctx->ps_shader.cso) {
 		unsigned db_shader_control;
 
-- 
2.7.4



More information about the mesa-dev mailing list