[Mesa-dev] [PATCH 14/23] radeonsi: pull iteration over vertex streams into GS copy shader logic

Nicolai Hähnle nhaehnle at gmail.com
Wed Nov 30 13:35:16 UTC 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

The iteration is not needed for normal vertex shaders.
---
 src/gallium/drivers/radeonsi/si_shader.c | 62 +++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index fd2ed42..cd2fd09 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2169,64 +2169,50 @@ static void emit_streamout_output(struct si_shader_context *ctx,
 		break;
 	}
 
 	build_tbuffer_store_dwords(ctx, so_buffers[buf_idx],
 				   vdata, num_comps,
 				   so_write_offsets[buf_idx],
 				   LLVMConstInt(ctx->i32, 0, 0),
 				   stream_out->dst_offset * 4);
 }
 
-/* On SI, the vertex shader is responsible for writing streamout data
- * to buffers. */
+/**
+ * Write streamout data to buffers for vertex stream @p stream (different
+ * vertex streams can occur for GS copy shaders).
+ */
 static void si_llvm_emit_streamout(struct si_shader_context *ctx,
 				   struct si_shader_output_values *outputs,
-				   unsigned noutput)
+				   unsigned noutput, unsigned stream)
 {
 	struct si_shader_selector *sel = ctx->shader->selector;
 	struct pipe_stream_output_info *so = &sel->so;
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
 	int i;
 	struct lp_build_if_state if_ctx;
 
 	/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
 	LLVMValueRef so_vtx_count =
 		unpack_param(ctx, ctx->param_streamout_config, 16, 7);
 
 	LLVMValueRef tid = get_thread_id(ctx);
 
 	/* can_emit = tid < so_vtx_count; */
 	LLVMValueRef can_emit =
 		LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
 
-	LLVMValueRef stream_id =
-		unpack_param(ctx, ctx->param_streamout_config, 24, 2);
-
 	/* Emit the streamout code conditionally. This actually avoids
 	 * out-of-bounds buffer access. The hw tells us via the SGPR
 	 * (so_vtx_count) which threads are allowed to emit streamout data. */
 	lp_build_if(&if_ctx, gallivm, can_emit);
-
-	for (int stream = 0; stream < 4; ++stream) {
-		struct lp_build_if_state if_ctx_stream;
-
-		if (!sel->info.num_stream_output_components[stream])
-			continue;
-
-		LLVMValueRef is_stream =
-			LLVMBuildICmp(builder, LLVMIntEQ,
-				      stream_id,
-				      lp_build_const_int32(gallivm, stream), "");
-
-		lp_build_if(&if_ctx_stream, gallivm, is_stream);
-
+	{
 		/* The buffer offset is computed as follows:
 		 *   ByteOffset = streamout_offset[buffer_id]*4 +
 		 *                (streamout_write_index + thread_id)*stride[buffer_id] +
 		 *                attrib_offset
                  */
 
 		LLVMValueRef so_write_index =
 			LLVMGetParam(ctx->main_fn,
 				     ctx->param_streamout_write_index);
 
@@ -2264,22 +2250,20 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx,
 
 			if (reg >= noutput)
 				continue;
 
 			if (stream != so->output[i].stream)
 				continue;
 
 			emit_streamout_output(ctx, so_buffers, so_write_offset,
 					      &so->output[i], &outputs[reg]);
 		}
-
-		lp_build_endif(&if_ctx_stream);
 	}
 	lp_build_endif(&if_ctx);
 }
 
 
 /* Generate export instructions for hardware VS shader stage */
 static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
 			      struct si_shader_output_values *outputs,
 			      unsigned noutput)
 {
@@ -2828,21 +2812,21 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
 
 	/* Return the primitive ID from the LLVM function. */
 	ctx->return_value =
 		LLVMBuildInsertValue(gallivm->builder,
 				     ctx->return_value,
 				     bitcast(bld_base, TGSI_TYPE_FLOAT,
 					     get_primitive_id(bld_base, 0)),
 				     VS_EPILOG_PRIMID_LOC, "");
 
 	if (ctx->shader->selector->so.num_outputs)
-		si_llvm_emit_streamout(ctx, outputs, i);
+		si_llvm_emit_streamout(ctx, outputs, i, 0);
 	si_llvm_export_vs(bld_base, outputs, i);
 	FREE(outputs);
 }
 
 struct si_ps_exports {
 	unsigned num;
 	LLVMValueRef args[10][9];
 };
 
 unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
@@ -6203,20 +6187,21 @@ static void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
 /* Generate code for the hardware VS shader stage to go with a geometry shader */
 struct si_shader *
 si_generate_gs_copy_shader(struct si_screen *sscreen,
 			   LLVMTargetMachineRef tm,
 			   struct si_shader_selector *gs_selector,
 			   struct pipe_debug_callback *debug)
 {
 	struct si_shader_context ctx;
 	struct si_shader *shader;
 	struct gallivm_state *gallivm = &ctx.gallivm;
+	LLVMBuilderRef builder;
 	struct lp_build_tgsi_context *bld_base = &ctx.soa.bld_base;
 	struct lp_build_context *uint = &bld_base->uint_bld;
 	struct si_shader_output_values *outputs;
 	struct tgsi_shader_info *gsinfo = &gs_selector->info;
 	LLVMValueRef args[9];
 	int i, r;
 
 	outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
 
 	if (!outputs)
@@ -6228,36 +6213,46 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
 		return NULL;
 	}
 
 
 	shader->selector = gs_selector;
 	shader->is_gs_copy_shader = true;
 
 	si_init_shader_ctx(&ctx, sscreen, shader, tm);
 	ctx.type = PIPE_SHADER_VERTEX;
 
+	builder = gallivm->builder;
+
 	create_meta_data(&ctx);
 	create_function(&ctx);
 	preload_ring_buffers(&ctx);
 
 	args[0] = ctx.gsvs_ring[0];
 	args[1] = lp_build_mul_imm(uint,
 				   LLVMGetParam(ctx.main_fn,
 						ctx.param_vertex_id),
 				   4);
 	args[3] = uint->zero;
 	args[4] = uint->one;  /* OFFEN */
 	args[5] = uint->zero; /* IDXEN */
 	args[6] = uint->one;  /* GLC */
 	args[7] = uint->one;  /* SLC */
 	args[8] = uint->zero; /* TFE */
 
+	/* Fetch the vertex stream ID.*/
+	LLVMValueRef stream_id;
+
+	if (gs_selector->so.num_outputs)
+		stream_id = unpack_param(&ctx, ctx.param_streamout_config, 24, 2);
+	else
+		stream_id = uint->zero;
+
 	/* Fetch vertex data from GSVS ring */
 	for (i = 0; i < gsinfo->num_outputs; ++i) {
 		unsigned chan;
 
 		outputs[i].semantic_name = gsinfo->output_semantic_name[i];
 		outputs[i].semantic_index = gsinfo->output_semantic_index[i];
 
 		for (chan = 0; chan < 4; chan++) {
 			outputs[i].vertex_stream[chan] =
 				(gsinfo->output_streams[i] >> (2 * chan)) & 3;
@@ -6269,22 +6264,39 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
 			outputs[i].values[chan] =
 				LLVMBuildBitCast(gallivm->builder,
 						 lp_build_intrinsic(gallivm->builder,
 								 "llvm.SI.buffer.load.dword.i32.i32",
 								 ctx.i32, args, 9,
 								 LP_FUNC_ATTR_READONLY),
 						 ctx.f32, "");
 		}
 	}
 
-	if (gs_selector->so.num_outputs)
-		si_llvm_emit_streamout(&ctx, outputs, gsinfo->num_outputs);
+	if (gs_selector->so.num_outputs) {
+		for (int stream = 0; stream < 4; stream++) {
+			struct lp_build_if_state if_ctx_stream;
+
+			if (!gsinfo->num_stream_output_components[stream])
+				continue;
+
+			LLVMValueRef is_stream =
+				LLVMBuildICmp(builder, LLVMIntEQ,
+					      stream_id,
+					      lp_build_const_int32(gallivm, stream), "");
+
+			lp_build_if(&if_ctx_stream, gallivm, is_stream);
+			si_llvm_emit_streamout(&ctx, outputs,
+					       gsinfo->num_outputs,
+					       stream);
+			lp_build_endif(&if_ctx_stream);
+		}
+	}
 	si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
 
 	LLVMBuildRetVoid(gallivm->builder);
 
 	/* Dump LLVM IR before any optimization passes */
 	if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
 	    r600_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
 		LLVMDumpModule(bld_base->base.gallivm->module);
 
 	si_llvm_finalize_module(&ctx,
-- 
2.7.4



More information about the mesa-dev mailing list