[Mesa-dev] [PATCH 14/31] radeonsi: set num_input_vgprs for fragment shaders in create_function

Nicolai Hähnle nhaehnle at gmail.com
Mon Oct 31 22:11:01 UTC 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

So that the prolog generated for monolithic fragment shaders will have the
right signature.
---
 src/gallium/drivers/radeonsi/si_shader.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 447293c..b15c60d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5488,20 +5488,21 @@ static void declare_tess_lds(struct si_shader_context *ctx)
 
 static void create_function(struct si_shader_context *ctx)
 {
 	struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	struct si_shader *shader = ctx->shader;
 	LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32;
 	LLVMTypeRef returns[16+32*4];
 	unsigned i, last_sgpr, num_params, num_return_sgprs;
 	unsigned num_returns = 0;
+	unsigned num_prolog_vgprs = 0;
 
 	v3i32 = LLVMVectorType(ctx->i32, 3);
 
 	params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
 	params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
 	params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
 	params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
 	params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
 
 	switch (ctx->type) {
@@ -5538,20 +5539,22 @@ static void create_function(struct si_shader_context *ctx)
 		params[ctx->param_vs_prim_id = num_params++] = ctx->i32;
 		params[ctx->param_instance_id = num_params++] = ctx->i32;
 
 		if (!ctx->no_prolog &&
 		    !ctx->is_gs_copy_shader) {
 			/* Vertex load indices. */
 			ctx->param_vertex_index0 = num_params;
 
 			for (i = 0; i < shader->selector->info.num_inputs; i++)
 				params[num_params++] = ctx->i32;
+
+			num_prolog_vgprs += shader->selector->info.num_inputs;
 		}
 
 		if (!ctx->no_epilog &&
 		    !ctx->is_gs_copy_shader) {
 			/* PrimitiveID output. */
 			if (!shader->key.vs.as_es && !shader->key.vs.as_ls)
 				for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
 					returns[num_returns++] = ctx->f32;
 		}
 		break;
@@ -5637,34 +5640,37 @@ static void create_function(struct si_shader_context *ctx)
 		params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
 		params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32;
 		params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32;
 		params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32;
 		params[SI_PARAM_LINE_STIPPLE_TEX] = ctx->f32;
 		params[SI_PARAM_POS_X_FLOAT] = ctx->f32;
 		params[SI_PARAM_POS_Y_FLOAT] = ctx->f32;
 		params[SI_PARAM_POS_Z_FLOAT] = ctx->f32;
 		params[SI_PARAM_POS_W_FLOAT] = ctx->f32;
 		params[SI_PARAM_FRONT_FACE] = ctx->i32;
+		shader->info.face_vgpr_index = 20;
 		params[SI_PARAM_ANCILLARY] = ctx->i32;
 		params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32;
 		params[SI_PARAM_POS_FIXED_PT] = ctx->i32;
 		num_params = SI_PARAM_POS_FIXED_PT+1;
 
 		if (!ctx->no_prolog) {
 			/* Color inputs from the prolog. */
 			if (shader->selector->info.colors_read) {
 				unsigned num_color_elements =
 					util_bitcount(shader->selector->info.colors_read);
 
 				assert(num_params + num_color_elements <= ARRAY_SIZE(params));
 				for (i = 0; i < num_color_elements; i++)
 					params[num_params++] = ctx->f32;
+
+				num_prolog_vgprs += num_color_elements;
 			}
 		}
 
 		if (!ctx->no_epilog) {
 			/* Outputs for the epilog. */
 			num_return_sgprs = SI_SGPR_ALPHA_REF + 1;
 			num_returns =
 				num_return_sgprs +
 				util_bitcount(shader->selector->info.colors_written) * 4 +
 				shader->selector->info.writes_z +
@@ -5733,26 +5739,25 @@ static void create_function(struct si_shader_context *ctx)
 				      "amdgpu-max-work-group-size",
 				      max_work_group_size);
 	}
 
 	shader->info.num_input_sgprs = 0;
 	shader->info.num_input_vgprs = 0;
 
 	for (i = 0; i <= last_sgpr; ++i)
 		shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4;
 
-	/* Unused fragment shader inputs are eliminated by the compiler,
-	 * so we don't know yet how many there will be.
-	 */
-	if (ctx->type != PIPE_SHADER_FRAGMENT)
-		for (; i < num_params; ++i)
-			shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
+	for (; i < num_params; ++i)
+		shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
+
+	assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
+	shader->info.num_input_vgprs -= num_prolog_vgprs;
 
 	if (!ctx->screen->has_ds_bpermute &&
 	    bld_base->info &&
 	    (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
 	     bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
 	     bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
 	     bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
 	     bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
 	     bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
 		ctx->lds =
-- 
2.7.4



More information about the mesa-dev mailing list