[Mesa-dev] [PATCH 14/31] radeonsi: set num_input_vgprs for fragment shaders in create_function
Nicolai Hähnle
nhaehnle at gmail.com
Mon Oct 31 22:11:01 UTC 2016
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
So that the prolog generated for monolithic fragment shaders will have the
right signature.
---
src/gallium/drivers/radeonsi/si_shader.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 447293c..b15c60d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5488,20 +5488,21 @@ static void declare_tess_lds(struct si_shader_context *ctx)
static void create_function(struct si_shader_context *ctx)
{
struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct si_shader *shader = ctx->shader;
LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32;
LLVMTypeRef returns[16+32*4];
unsigned i, last_sgpr, num_params, num_return_sgprs;
unsigned num_returns = 0;
+ unsigned num_prolog_vgprs = 0;
v3i32 = LLVMVectorType(ctx->i32, 3);
params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
switch (ctx->type) {
@@ -5538,20 +5539,22 @@ static void create_function(struct si_shader_context *ctx)
params[ctx->param_vs_prim_id = num_params++] = ctx->i32;
params[ctx->param_instance_id = num_params++] = ctx->i32;
if (!ctx->no_prolog &&
!ctx->is_gs_copy_shader) {
/* Vertex load indices. */
ctx->param_vertex_index0 = num_params;
for (i = 0; i < shader->selector->info.num_inputs; i++)
params[num_params++] = ctx->i32;
+
+ num_prolog_vgprs += shader->selector->info.num_inputs;
}
if (!ctx->no_epilog &&
!ctx->is_gs_copy_shader) {
/* PrimitiveID output. */
if (!shader->key.vs.as_es && !shader->key.vs.as_ls)
for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
returns[num_returns++] = ctx->f32;
}
break;
@@ -5637,34 +5640,37 @@ static void create_function(struct si_shader_context *ctx)
params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32;
params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32;
params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32;
params[SI_PARAM_LINE_STIPPLE_TEX] = ctx->f32;
params[SI_PARAM_POS_X_FLOAT] = ctx->f32;
params[SI_PARAM_POS_Y_FLOAT] = ctx->f32;
params[SI_PARAM_POS_Z_FLOAT] = ctx->f32;
params[SI_PARAM_POS_W_FLOAT] = ctx->f32;
params[SI_PARAM_FRONT_FACE] = ctx->i32;
+ shader->info.face_vgpr_index = 20;
params[SI_PARAM_ANCILLARY] = ctx->i32;
params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32;
params[SI_PARAM_POS_FIXED_PT] = ctx->i32;
num_params = SI_PARAM_POS_FIXED_PT+1;
if (!ctx->no_prolog) {
/* Color inputs from the prolog. */
if (shader->selector->info.colors_read) {
unsigned num_color_elements =
util_bitcount(shader->selector->info.colors_read);
assert(num_params + num_color_elements <= ARRAY_SIZE(params));
for (i = 0; i < num_color_elements; i++)
params[num_params++] = ctx->f32;
+
+ num_prolog_vgprs += num_color_elements;
}
}
if (!ctx->no_epilog) {
/* Outputs for the epilog. */
num_return_sgprs = SI_SGPR_ALPHA_REF + 1;
num_returns =
num_return_sgprs +
util_bitcount(shader->selector->info.colors_written) * 4 +
shader->selector->info.writes_z +
@@ -5733,26 +5739,25 @@ static void create_function(struct si_shader_context *ctx)
"amdgpu-max-work-group-size",
max_work_group_size);
}
shader->info.num_input_sgprs = 0;
shader->info.num_input_vgprs = 0;
for (i = 0; i <= last_sgpr; ++i)
shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4;
- /* Unused fragment shader inputs are eliminated by the compiler,
- * so we don't know yet how many there will be.
- */
- if (ctx->type != PIPE_SHADER_FRAGMENT)
- for (; i < num_params; ++i)
- shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
+ for (; i < num_params; ++i)
+ shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
+
+ assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
+ shader->info.num_input_vgprs -= num_prolog_vgprs;
if (!ctx->screen->has_ds_bpermute &&
bld_base->info &&
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
ctx->lds =
--
2.7.4
More information about the mesa-dev
mailing list