[Mesa-dev] [PATCH 20/31] radeonsi: use TCS epilog for monolithic shaders

Mon Oct 31 22:11:07 UTC 2016

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

For fixed function TCS, we keep the copying of VS outputs to TES inputs inside
the main function; the call to si_copy_tcs_inputs is moved accordingly.
---
 src/gallium/drivers/radeonsi/si_shader.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index b33f54a..b7678e1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -68,20 +68,22 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
 				 struct lp_build_tgsi_context *bld_base,
 				 struct lp_build_emit_data *emit_data);
 
 static void si_dump_shader_key(unsigned shader, union si_shader_key *key,
 			       FILE *f);
 
 static void si_build_vs_prolog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key);
 static void si_build_vs_epilog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key);
+static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
+					 union si_shader_part_key *key);
 static void si_build_ps_prolog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key);
 static void si_build_ps_epilog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key);
 
 /* Ideally pass the sample mask input to the PS epilog as v13, which
  * is its usual location, so that the shader doesn't have to add v_mov.
  */
 #define PS_EPILOG_SAMPLEMASK_MIN_LOC 13
 
@@ -2476,20 +2478,24 @@ handle_semantic:
 
 		if (pos_idx == shader->info.nr_pos_exports)
 			/* Specify that this is the last export */
 			pos_args[i][2] = uint->one;
 
 		lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
 				   ctx->voidt, pos_args[i], 9, 0);
 	}
 }
 
+/**
+ * Forward all outputs from the vertex shader to the TES. This is only used
+ * for the fixed function TCS.
+ */
 static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	LLVMValueRef invocation_id, rw_buffers, buffer, buffer_offset;
 	LLVMValueRef lds_vertex_stride, lds_vertex_offset, lds_base;
 	uint64_t inputs;
 
 	invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
 
@@ -2630,20 +2636,22 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 					   stride - 4, byteoffset, tf_base, 20);
 	lp_build_endif(&if_ctx);
 }
 
 /* This only writes the tessellation factor levels. */
 static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
 
+	si_copy_tcs_inputs(bld_base);
+
 	rel_patch_id = get_rel_patch_id(ctx);
 	invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
 	tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
 
 	if (!ctx->no_epilog) {
 		/* Return epilog parameters from this function. */
 		LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 		LLVMValueRef ret = ctx->return_value;
 		LLVMValueRef rw_buffers, rw0, rw1, tf_soffset;
 		unsigned vgpr;
@@ -2672,21 +2680,20 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 		tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset);
 
 		vgpr = SI_TCS_NUM_USER_SGPR + 2;
 		ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
 		ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
 		ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
 		ctx->return_value = ret;
 		return;
 	}
 
-	si_copy_tcs_inputs(bld_base);
 	si_write_tess_factors(bld_base, rel_patch_id, invocation_id, tf_lds_offset);
 }
 
 static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct si_shader *shader = ctx->shader;
 	struct tgsi_shader_info *info = &shader->selector->info;
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	unsigned i, chan;
@@ -7172,20 +7179,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 		tgsi_dump(sel->tokens, 0);
 		si_dump_streamout(&sel->so);
 	}
 
 	si_init_shader_ctx(&ctx, sscreen, shader, tm);
 	ctx.no_prolog = is_monolithic;
 	ctx.no_epilog = is_monolithic;
 	ctx.separate_prolog = !is_monolithic;
 
 	if (ctx.type == PIPE_SHADER_VERTEX ||
+	    ctx.type == PIPE_SHADER_TESS_CTRL ||
 	    ctx.type == PIPE_SHADER_TESS_EVAL ||
 	    ctx.type == PIPE_SHADER_FRAGMENT) {
 		ctx.no_prolog = false;
 		ctx.no_epilog = false;
 	}
 
 	memset(shader->info.vs_output_param_offset, 0xff,
 	       sizeof(shader->info.vs_output_param_offset));
 
 	shader->info.uses_instanceid = sel->info.uses_instanceid;
@@ -7217,20 +7225,32 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 
 		if (need_epilog) {
 			union si_shader_part_key epilog_key;
 			si_get_vs_epilog_key(shader, &shader->key.vs.epilog, &epilog_key);
 			si_build_vs_epilog_function(&ctx, &epilog_key);
 			parts[need_prolog ? 2 : 1] = ctx.main_fn;
 		}
 
 		si_build_wrapper_function(&ctx, parts, 1 + need_prolog + need_epilog,
 					  need_prolog ? 1 : 0);
+	} else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_CTRL) {
+		LLVMValueRef parts[2];
+		union si_shader_part_key epilog_key;
+
+		parts[0] = ctx.main_fn;
+
+		memset(&epilog_key, 0, sizeof(epilog_key));
+		epilog_key.tcs_epilog.states = shader->key.tcs.epilog;
+		si_build_tcs_epilog_function(&ctx, &epilog_key);
+		parts[1] = ctx.main_fn;
+
+		si_build_wrapper_function(&ctx, parts, 2, 0);
 	} else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_EVAL &&
 		   !shader->key.tes.as_es) {
 		LLVMValueRef parts[2];
 		union si_shader_part_key epilog_key;
 
 		parts[0] = ctx.main_fn;
 
 		si_get_vs_epilog_key(shader, &shader->key.tes.epilog, &epilog_key);
 		si_build_vs_epilog_function(&ctx, &epilog_key);
 		parts[1] = ctx.main_fn;
-- 
2.7.4