[Mesa-dev] [PATCH 6/9] radeonsi: don't read the LS output vertex stride from an SGPR in LS

Marek Olšák maraeo at gmail.com
Wed Sep 6 17:03:57 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

Now it's able to generate ds_write2_b64 instead of ds_write2_b32.

-20 bytes in one shader binary. (having only 1 output)
---
 src/gallium/drivers/radeonsi/si_shader.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 32a6fa0..7c3bd8b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -391,20 +391,38 @@ static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx)
 		ctx->shader->selector ?
 		ctx->shader->selector->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : 0;
 
 	/* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
 	if (ctx->type == PIPE_SHADER_TESS_CTRL && tcs_out_vertices)
 		return LLVMConstInt(ctx->i32, tcs_out_vertices, 0);
 
 	return unpack_param(ctx, ctx->param_tcs_offchip_layout, 6, 6);
 }
 
+static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
+{
+	unsigned stride;
+
+	switch (ctx->type) {
+	case PIPE_SHADER_VERTEX:
+		stride = util_last_bit64(ctx->shader->selector->outputs_written);
+		return LLVMConstInt(ctx->i32, stride * 4, 0);
+
+	case PIPE_SHADER_TESS_CTRL:
+		return unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+
+	default:
+		assert(0);
+		return NULL;
+	}
+}
+
 static LLVMValueRef get_instance_index_for_fetch(
 	struct si_shader_context *ctx,
 	unsigned param_start_instance, LLVMValueRef divisor)
 {
 	struct gallivm_state *gallivm = &ctx->gallivm;
 
 	LLVMValueRef result = ctx->abi.instance_id;
 
 	/* The division must be done before START_INSTANCE is added. */
 	if (divisor != ctx->i32_1)
@@ -1040,21 +1058,21 @@ static LLVMValueRef desc_from_addr_base64k(struct si_shader_context *ctx,
 }
 
 static LLVMValueRef fetch_input_tcs(
 	struct lp_build_tgsi_context *bld_base,
 	const struct tgsi_full_src_register *reg,
 	enum tgsi_opcode_type type, unsigned swizzle)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	LLVMValueRef dw_addr, stride;
 
-	stride = unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+	stride = get_tcs_in_vertex_dw_stride(ctx);
 	dw_addr = get_tcs_in_current_patch_offset(ctx);
 	dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
 
 	return lds_load(bld_base, type, swizzle, dw_addr);
 }
 
 static LLVMValueRef fetch_output_tcs(
 		struct lp_build_tgsi_context *bld_base,
 		const struct tgsi_full_src_register *reg,
 		enum tgsi_opcode_type type, unsigned swizzle)
@@ -2603,21 +2621,21 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMValueRef invocation_id, buffer, buffer_offset;
 	LLVMValueRef lds_vertex_stride, lds_vertex_offset, lds_base;
 	uint64_t inputs;
 
 	invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
 	buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
 	buffer_offset = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
 
-	lds_vertex_stride = unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+	lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx);
 	lds_vertex_offset = LLVMBuildMul(gallivm->builder, invocation_id,
 	                                 lds_vertex_stride, "");
 	lds_base = get_tcs_in_current_patch_offset(ctx);
 	lds_base = LLVMBuildAdd(gallivm->builder, lds_base, lds_vertex_offset, "");
 
 	inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy;
 	while (inputs) {
 		unsigned i = u_bit_scan64(&inputs);
 
 		LLVMValueRef lds_ptr = LLVMBuildAdd(gallivm->builder, lds_base,
@@ -3014,22 +3032,21 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
 
 static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct si_shader *shader = ctx->shader;
 	struct tgsi_shader_info *info = &shader->selector->info;
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	unsigned i, chan;
 	LLVMValueRef vertex_id = LLVMGetParam(ctx->main_fn,
 					      ctx->param_rel_auto_id);
-	LLVMValueRef vertex_dw_stride =
-		unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+	LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx);
 	LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
 						 vertex_dw_stride, "");
 
 	/* Write outputs to LDS. The next shader (TCS aka HS) will read
 	 * its inputs from it. */
 	for (i = 0; i < info->num_outputs; i++) {
 		LLVMValueRef *out_ptr = ctx->outputs[i];
 		unsigned name = info->output_semantic_name[i];
 		unsigned index = info->output_semantic_index[i];
 
-- 
2.7.4



More information about the mesa-dev mailing list