[Mesa-dev] [PATCH 13/23] radeonsi: group streamout writes by vertex stream

Nicolai Hähnle nhaehnle at gmail.com
Wed Nov 30 13:35:15 UTC 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index d26c36a..fd2ed42 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2137,20 +2137,22 @@ static void emit_streamout_output(struct si_shader_context *ctx,
 	unsigned start = stream_out->start_component;
 	unsigned num_comps = stream_out->num_components;
 	LLVMValueRef out[4];
 
 	assert(num_comps && num_comps <= 4);
 	if (!num_comps || num_comps > 4)
 		return;
 
 	/* Load the output as int. */
 	for (int j = 0; j < num_comps; j++) {
+		assert(stream_out->stream == shader_out->vertex_stream[start + j]);
+
 		out[j] = LLVMBuildBitCast(builder,
 					  shader_out->values[start + j],
 				ctx->i32, "");
 	}
 
 	/* Pack the output. */
 	LLVMValueRef vdata = NULL;
 
 	switch (num_comps) {
 	case 1: /* as i32 */
@@ -2173,21 +2175,22 @@ static void emit_streamout_output(struct si_shader_context *ctx,
 				   LLVMConstInt(ctx->i32, 0, 0),
 				   stream_out->dst_offset * 4);
 }
 
 /* On SI, the vertex shader is responsible for writing streamout data
  * to buffers. */
 static void si_llvm_emit_streamout(struct si_shader_context *ctx,
 				   struct si_shader_output_values *outputs,
 				   unsigned noutput)
 {
-	struct pipe_stream_output_info *so = &ctx->shader->selector->so;
+	struct si_shader_selector *sel = ctx->shader->selector;
+	struct pipe_stream_output_info *so = &sel->so;
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
 	int i;
 	struct lp_build_if_state if_ctx;
 
 	/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
 	LLVMValueRef so_vtx_count =
 		unpack_param(ctx, ctx->param_streamout_config, 16, 7);
 
 	LLVMValueRef tid = get_thread_id(ctx);
@@ -2196,21 +2199,34 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx,
 	LLVMValueRef can_emit =
 		LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
 
 	LLVMValueRef stream_id =
 		unpack_param(ctx, ctx->param_streamout_config, 24, 2);
 
 	/* Emit the streamout code conditionally. This actually avoids
 	 * out-of-bounds buffer access. The hw tells us via the SGPR
 	 * (so_vtx_count) which threads are allowed to emit streamout data. */
 	lp_build_if(&if_ctx, gallivm, can_emit);
-	{
+
+	for (int stream = 0; stream < 4; ++stream) {
+		struct lp_build_if_state if_ctx_stream;
+
+		if (!sel->info.num_stream_output_components[stream])
+			continue;
+
+		LLVMValueRef is_stream =
+			LLVMBuildICmp(builder, LLVMIntEQ,
+				      stream_id,
+				      lp_build_const_int32(gallivm, stream), "");
+
+		lp_build_if(&if_ctx_stream, gallivm, is_stream);
+
 		/* The buffer offset is computed as follows:
 		 *   ByteOffset = streamout_offset[buffer_id]*4 +
 		 *                (streamout_write_index + thread_id)*stride[buffer_id] +
 		 *                attrib_offset
                  */
 
 		LLVMValueRef so_write_index =
 			LLVMGetParam(ctx->main_fn,
 				     ctx->param_streamout_write_index);
 
@@ -2238,36 +2254,32 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx,
 			so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->i32, 4, 0), "");
 
 			so_write_offset[i] = LLVMBuildMul(builder, so_write_index,
 							  LLVMConstInt(ctx->i32, so->stride[i]*4, 0), "");
 			so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, "");
 		}
 
 		/* Write streamout data. */
 		for (i = 0; i < so->num_outputs; i++) {
 			unsigned reg = so->output[i].register_index;
-			unsigned stream = so->output[i].stream;
-			struct lp_build_if_state if_ctx_stream;
 
 			if (reg >= noutput)
 				continue;
 
-			LLVMValueRef can_emit_stream =
-				LLVMBuildICmp(builder, LLVMIntEQ,
-					      stream_id,
-					      lp_build_const_int32(gallivm, stream), "");
+			if (stream != so->output[i].stream)
+				continue;
 
-			lp_build_if(&if_ctx_stream, gallivm, can_emit_stream);
 			emit_streamout_output(ctx, so_buffers, so_write_offset,
 					      &so->output[i], &outputs[reg]);
-			lp_build_endif(&if_ctx_stream);
 		}
+
+		lp_build_endif(&if_ctx_stream);
 	}
 	lp_build_endif(&if_ctx);
 }
 
 
 /* Generate export instructions for hardware VS shader stage */
 static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
 			      struct si_shader_output_values *outputs,
 			      unsigned noutput)
 {
-- 
2.7.4



More information about the mesa-dev mailing list