[Mesa-dev] [PATCH 24/61] radeonsi/gfx9: add VS prolog support for merged LS-HS
Marek Olšák
maraeo at gmail.com
Mon Apr 24 08:45:21 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
HS input VGPRs must be reserved.
---
src/gallium/drivers/radeonsi/si_shader.c | 29 +++++++++++++++++++----------
src/gallium/drivers/radeonsi/si_shader.h | 2 ++
2 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index dab95e2..f0e3f0c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -7207,20 +7207,23 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info,
unsigned num_input_sgprs,
const struct si_vs_prolog_bits *prolog_key,
struct si_shader *shader_out,
union si_shader_part_key *key)
{
memset(key, 0, sizeof(*key));
key->vs_prolog.states = *prolog_key;
key->vs_prolog.num_input_sgprs = num_input_sgprs;
key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
+ if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL)
+ key->vs_prolog.num_merged_next_stage_vgprs = 2;
+
/* Set the instanceID flag. */
for (unsigned i = 0; i < info->num_inputs; i++)
if (key->vs_prolog.states.instance_divisors[i])
shader_out->info.uses_instanceid = true;
}
/**
* Compute the VS epilog key, which contains all the information needed to
* build the VS epilog function, and set the PrimitiveID output offset.
*/
@@ -7990,43 +7993,47 @@ out:
* (InstanceID + StartInstance),
* (InstanceID / 2 + StartInstance)
*/
static void si_build_vs_prolog_function(struct si_shader_context *ctx,
union si_shader_part_key *key)
{
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMTypeRef *params, *returns;
LLVMValueRef ret, func;
int last_sgpr, num_params, num_returns, i;
+ unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs +
+ key->vs_prolog.num_merged_next_stage_vgprs;
+ unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4;
+ unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs +
+ num_input_vgprs;
+ unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
- ctx->param_vertex_id = key->vs_prolog.num_input_sgprs;
- ctx->param_instance_id = key->vs_prolog.num_input_sgprs + 3;
+ ctx->param_vertex_id = first_vs_vgpr;
+ ctx->param_instance_id = first_vs_vgpr + 3;
/* 4 preloaded VGPRs + vertex load indices as prolog outputs */
- params = alloca((key->vs_prolog.num_input_sgprs + 4) *
- sizeof(LLVMTypeRef));
- returns = alloca((key->vs_prolog.num_input_sgprs + 4 +
- key->vs_prolog.last_input + 1) *
+ params = alloca(num_all_input_regs * sizeof(LLVMTypeRef));
+ returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) *
sizeof(LLVMTypeRef));
num_params = 0;
num_returns = 0;
/* Declare input and output SGPRs. */
num_params = 0;
for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
params[num_params++] = ctx->i32;
returns[num_returns++] = ctx->i32;
}
last_sgpr = num_params - 1;
- /* 4 preloaded VGPRs (outputs must be floats) */
- for (i = 0; i < 4; i++) {
+ /* Preloaded VGPRs (outputs must be floats) */
+ for (i = 0; i < num_input_vgprs; i++) {
params[num_params++] = ctx->i32;
returns[num_returns++] = ctx->f32;
}
/* Vertex load indices. */
for (i = 0; i <= key->vs_prolog.last_input; i++)
returns[num_returns++] = ctx->f32;
/* Create the function. */
si_create_function(ctx, "vs_prolog", returns, num_returns, params,
@@ -8034,41 +8041,43 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
func = ctx->main_fn;
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
*/
ret = ctx->return_value;
for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
LLVMValueRef p = LLVMGetParam(func, i);
ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
}
- for (i = num_params - 4; i < num_params; i++) {
+ for (; i < num_params; i++) {
LLVMValueRef p = LLVMGetParam(func, i);
p = LLVMBuildBitCast(gallivm->builder, p, ctx->f32, "");
ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
}
/* Compute vertex load indices from instance divisors. */
for (i = 0; i <= key->vs_prolog.last_input; i++) {
unsigned divisor = key->vs_prolog.states.instance_divisors[i];
LLVMValueRef index;
if (divisor) {
/* InstanceID / Divisor + StartInstance */
index = get_instance_index_for_fetch(ctx,
+ user_sgpr_base +
SI_SGPR_START_INSTANCE,
divisor);
} else {
/* VertexID + BaseVertex */
index = LLVMBuildAdd(gallivm->builder,
LLVMGetParam(func, ctx->param_vertex_id),
- LLVMGetParam(func, SI_SGPR_BASE_VERTEX), "");
+ LLVMGetParam(func, user_sgpr_base +
+ SI_SGPR_BASE_VERTEX), "");
}
index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, "");
ret = LLVMBuildInsertValue(gallivm->builder, ret, index,
num_params++, "");
}
si_llvm_build_ret(ctx, ret);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 1fee044..afbe547 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -345,20 +345,22 @@ struct si_ps_epilog_bits {
unsigned alpha_func:3;
unsigned alpha_to_one:1;
unsigned poly_line_smoothing:1;
unsigned clamp_color:1;
};
union si_shader_part_key {
struct {
struct si_vs_prolog_bits states;
unsigned num_input_sgprs:6;
+ /* For merged stages such as LS-HS, HS input VGPRs are first. */
+ unsigned num_merged_next_stage_vgprs:3;
unsigned last_input:4;
} vs_prolog;
struct {
struct si_vs_epilog_bits states;
unsigned prim_id_param_offset:5;
} vs_epilog;
struct {
struct si_tcs_epilog_bits states;
} tcs_epilog;
struct {
--
2.7.4
More information about the mesa-dev
mailing list