[Mesa-dev] [PATCH 24/61] radeonsi/gfx9: add VS prolog support for merged LS-HS

Marek Olšák maraeo at gmail.com
Mon Apr 24 08:45:21 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

HS input VGPRs must be reserved.
---
 src/gallium/drivers/radeonsi/si_shader.c | 29 +++++++++++++++++++----------
 src/gallium/drivers/radeonsi/si_shader.h |  2 ++
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index dab95e2..f0e3f0c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -7207,20 +7207,23 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info,
 				 unsigned num_input_sgprs,
 				 const struct si_vs_prolog_bits *prolog_key,
 				 struct si_shader *shader_out,
 				 union si_shader_part_key *key)
 {
 	memset(key, 0, sizeof(*key));
 	key->vs_prolog.states = *prolog_key;
 	key->vs_prolog.num_input_sgprs = num_input_sgprs;
 	key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
 
+	if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL)
+		key->vs_prolog.num_merged_next_stage_vgprs = 2;
+
 	/* Set the instanceID flag. */
 	for (unsigned i = 0; i < info->num_inputs; i++)
 		if (key->vs_prolog.states.instance_divisors[i])
 			shader_out->info.uses_instanceid = true;
 }
 
 /**
  * Compute the VS epilog key, which contains all the information needed to
  * build the VS epilog function, and set the PrimitiveID output offset.
  */
@@ -7990,43 +7993,47 @@ out:
  *   (InstanceID + StartInstance),
  *   (InstanceID / 2 + StartInstance)
  */
 static void si_build_vs_prolog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key)
 {
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMTypeRef *params, *returns;
 	LLVMValueRef ret, func;
 	int last_sgpr, num_params, num_returns, i;
+	unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs +
+				 key->vs_prolog.num_merged_next_stage_vgprs;
+	unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4;
+	unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs +
+				      num_input_vgprs;
+	unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
 
-	ctx->param_vertex_id = key->vs_prolog.num_input_sgprs;
-	ctx->param_instance_id = key->vs_prolog.num_input_sgprs + 3;
+	ctx->param_vertex_id = first_vs_vgpr;
+	ctx->param_instance_id = first_vs_vgpr + 3;
 
 	/* 4 preloaded VGPRs + vertex load indices as prolog outputs */
-	params = alloca((key->vs_prolog.num_input_sgprs + 4) *
-			sizeof(LLVMTypeRef));
-	returns = alloca((key->vs_prolog.num_input_sgprs + 4 +
-			  key->vs_prolog.last_input + 1) *
+	params = alloca(num_all_input_regs * sizeof(LLVMTypeRef));
+	returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) *
 			 sizeof(LLVMTypeRef));
 	num_params = 0;
 	num_returns = 0;
 
 	/* Declare input and output SGPRs. */
 	num_params = 0;
 	for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
 		params[num_params++] = ctx->i32;
 		returns[num_returns++] = ctx->i32;
 	}
 	last_sgpr = num_params - 1;
 
-	/* 4 preloaded VGPRs (outputs must be floats) */
-	for (i = 0; i < 4; i++) {
+	/* Preloaded VGPRs (outputs must be floats) */
+	for (i = 0; i < num_input_vgprs; i++) {
 		params[num_params++] = ctx->i32;
 		returns[num_returns++] = ctx->f32;
 	}
 
 	/* Vertex load indices. */
 	for (i = 0; i <= key->vs_prolog.last_input; i++)
 		returns[num_returns++] = ctx->f32;
 
 	/* Create the function. */
 	si_create_function(ctx, "vs_prolog", returns, num_returns, params,
@@ -8034,41 +8041,43 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
 	func = ctx->main_fn;
 
 	/* Copy inputs to outputs. This should be no-op, as the registers match,
 	 * but it will prevent the compiler from overwriting them unintentionally.
 	 */
 	ret = ctx->return_value;
 	for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
 		LLVMValueRef p = LLVMGetParam(func, i);
 		ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
 	}
-	for (i = num_params - 4; i < num_params; i++) {
+	for (; i < num_params; i++) {
 		LLVMValueRef p = LLVMGetParam(func, i);
 		p = LLVMBuildBitCast(gallivm->builder, p, ctx->f32, "");
 		ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
 	}
 
 	/* Compute vertex load indices from instance divisors. */
 	for (i = 0; i <= key->vs_prolog.last_input; i++) {
 		unsigned divisor = key->vs_prolog.states.instance_divisors[i];
 		LLVMValueRef index;
 
 		if (divisor) {
 			/* InstanceID / Divisor + StartInstance */
 			index = get_instance_index_for_fetch(ctx,
+							     user_sgpr_base +
 							     SI_SGPR_START_INSTANCE,
 							     divisor);
 		} else {
 			/* VertexID + BaseVertex */
 			index = LLVMBuildAdd(gallivm->builder,
 					     LLVMGetParam(func, ctx->param_vertex_id),
-					     LLVMGetParam(func, SI_SGPR_BASE_VERTEX), "");
+					     LLVMGetParam(func, user_sgpr_base +
+								SI_SGPR_BASE_VERTEX), "");
 		}
 
 		index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, "");
 		ret = LLVMBuildInsertValue(gallivm->builder, ret, index,
 					   num_params++, "");
 	}
 
 	si_llvm_build_ret(ctx, ret);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 1fee044..afbe547 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -345,20 +345,22 @@ struct si_ps_epilog_bits {
 	unsigned	alpha_func:3;
 	unsigned	alpha_to_one:1;
 	unsigned	poly_line_smoothing:1;
 	unsigned	clamp_color:1;
 };
 
 union si_shader_part_key {
 	struct {
 		struct si_vs_prolog_bits states;
 		unsigned	num_input_sgprs:6;
+		/* For merged stages such as LS-HS, HS input VGPRs are first. */
+		unsigned	num_merged_next_stage_vgprs:3;
 		unsigned	last_input:4;
 	} vs_prolog;
 	struct {
 		struct si_vs_epilog_bits states;
 		unsigned	prim_id_param_offset:5;
 	} vs_epilog;
 	struct {
 		struct si_tcs_epilog_bits states;
 	} tcs_epilog;
 	struct {
-- 
2.7.4



More information about the mesa-dev mailing list