[Mesa-dev] [PATCH 13/31] radeonsi: extract si_build_ps_prolog_function
Nicolai Hähnle
nhaehnle at gmail.com
Mon Oct 31 22:11:00 UTC 2016
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
src/gallium/drivers/radeonsi/si_shader.c | 310 +++++++++++++++++--------------
1 file changed, 171 insertions(+), 139 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 281a4dd..447293c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6756,20 +6756,147 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
return false;
}
si_llvm_build_ret(ctx, ctx->return_value);
return true;
}
/**
+ * Compute the PS prolog key, which contains all the information needed to
+ * build the PS prolog function, and set related bits in shader->config.
+ */
+static void si_get_ps_prolog_key(struct si_shader *shader,
+ union si_shader_part_key *key)
+{
+ struct tgsi_shader_info *info = &shader->selector->info;
+
+ memset(key, 0, sizeof(*key));
+ key->ps_prolog.states = shader->key.ps.prolog;
+ key->ps_prolog.colors_read = info->colors_read;
+ key->ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
+ key->ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
+ key->ps_prolog.wqm = info->uses_derivatives &&
+ (key->ps_prolog.colors_read ||
+ key->ps_prolog.states.force_persp_sample_interp ||
+ key->ps_prolog.states.force_linear_sample_interp ||
+ key->ps_prolog.states.force_persp_center_interp ||
+ key->ps_prolog.states.force_linear_center_interp ||
+ key->ps_prolog.states.bc_optimize_for_persp ||
+ key->ps_prolog.states.bc_optimize_for_linear);
+
+ if (info->colors_read) {
+ unsigned *color = shader->selector->color_attr_index;
+
+ if (shader->key.ps.prolog.color_two_side) {
+ /* BCOLORs are stored after the last input. */
+ key->ps_prolog.num_interp_inputs = info->num_inputs;
+ key->ps_prolog.face_vgpr_index = shader->info.face_vgpr_index;
+ shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
+ }
+
+ for (unsigned i = 0; i < 2; i++) {
+ unsigned interp = info->input_interpolate[color[i]];
+ unsigned location = info->input_interpolate_loc[color[i]];
+
+ if (!(info->colors_read & (0xf << i*4)))
+ continue;
+
+ key->ps_prolog.color_attr_index[i] = color[i];
+
+ if (shader->key.ps.prolog.flatshade_colors &&
+ interp == TGSI_INTERPOLATE_COLOR)
+ interp = TGSI_INTERPOLATE_CONSTANT;
+
+ switch (interp) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ key->ps_prolog.color_interp_vgpr_index[i] = -1;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ case TGSI_INTERPOLATE_COLOR:
+ /* Force the interpolation location for colors here. */
+ if (shader->key.ps.prolog.force_persp_sample_interp)
+ location = TGSI_INTERPOLATE_LOC_SAMPLE;
+ if (shader->key.ps.prolog.force_persp_center_interp)
+ location = TGSI_INTERPOLATE_LOC_CENTER;
+
+ switch (location) {
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ key->ps_prolog.color_interp_vgpr_index[i] = 0;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_SAMPLE_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ key->ps_prolog.color_interp_vgpr_index[i] = 2;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_CENTER_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ key->ps_prolog.color_interp_vgpr_index[i] = 4;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_CENTROID_ENA(1);
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ /* Force the interpolation location for colors here. */
+ if (shader->key.ps.prolog.force_linear_sample_interp)
+ location = TGSI_INTERPOLATE_LOC_SAMPLE;
+ if (shader->key.ps.prolog.force_linear_center_interp)
+ location = TGSI_INTERPOLATE_LOC_CENTER;
+
+ switch (location) {
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ key->ps_prolog.color_interp_vgpr_index[i] = 6;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_SAMPLE_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ key->ps_prolog.color_interp_vgpr_index[i] = 8;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_CENTER_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ key->ps_prolog.color_interp_vgpr_index[i] = 10;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_CENTROID_ENA(1);
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ default:
+ assert(0);
+ }
+ }
+ }
+}
+
+/**
+ * Check whether a PS prolog is required based on the key.
+ */
+static bool si_need_ps_prolog(const union si_shader_part_key *key)
+{
+ return key->ps_prolog.colors_read ||
+ key->ps_prolog.states.force_persp_sample_interp ||
+ key->ps_prolog.states.force_linear_sample_interp ||
+ key->ps_prolog.states.force_persp_center_interp ||
+ key->ps_prolog.states.force_linear_center_interp ||
+ key->ps_prolog.states.bc_optimize_for_persp ||
+ key->ps_prolog.states.bc_optimize_for_linear ||
+ key->ps_prolog.states.poly_stipple;
+}
+
+/**
* Compute the PS epilog key, which contains all the information needed to
* build the PS epilog function.
*/
static void si_get_ps_epilog_key(struct si_shader *shader,
union si_shader_part_key *key)
{
struct tgsi_shader_info *info = &shader->selector->info;
memset(key, 0, sizeof(*key));
key->ps_epilog.colors_written = info->colors_written;
key->ps_epilog.writes_z = info->writes_z;
@@ -7549,115 +7676,107 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
memset(&epilog_key, 0, sizeof(epilog_key));
epilog_key.tcs_epilog.states = shader->key.tcs.epilog;
shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs,
&epilog_key, tm, debug,
si_compile_tcs_epilog);
return shader->epilog != NULL;
}
/**
- * Compile the pixel shader prolog. This handles:
+ * Build the pixel shader prolog function. This handles:
* - two-side color selection and interpolation
* - overriding interpolation parameters for the API PS
* - polygon stippling
*
* All preloaded SGPRs and VGPRs are passed through unmodified unless they are
* overriden by other states. (e.g. per-sample interpolation)
* Interpolated colors are stored after the preloaded VGPRs.
*/
-static bool si_compile_ps_prolog(struct si_screen *sscreen,
- LLVMTargetMachineRef tm,
- struct pipe_debug_callback *debug,
- struct si_shader_part *out)
+static void si_build_ps_prolog_function(struct si_shader_context *ctx,
+ union si_shader_part_key *key)
{
- union si_shader_part_key *key = &out->key;
- struct si_shader shader = {};
- struct si_shader_context ctx;
- struct gallivm_state *gallivm = &ctx.gallivm;
+ struct gallivm_state *gallivm = &ctx->gallivm;
LLVMTypeRef *params;
LLVMValueRef ret, func;
int last_sgpr, num_params, num_returns, i, num_color_channels;
- bool status = true;
- si_init_shader_ctx(&ctx, sscreen, &shader, tm);
- ctx.type = PIPE_SHADER_FRAGMENT;
- shader.key.ps.prolog = key->ps_prolog.states;
+ assert(si_need_ps_prolog(key));
/* Number of inputs + 8 color elements. */
params = alloca((key->ps_prolog.num_input_sgprs +
key->ps_prolog.num_input_vgprs + 8) *
sizeof(LLVMTypeRef));
/* Declare inputs. */
num_params = 0;
for (i = 0; i < key->ps_prolog.num_input_sgprs; i++)
- params[num_params++] = ctx.i32;
+ params[num_params++] = ctx->i32;
last_sgpr = num_params - 1;
for (i = 0; i < key->ps_prolog.num_input_vgprs; i++)
- params[num_params++] = ctx.f32;
+ params[num_params++] = ctx->f32;
/* Declare outputs (same as inputs + add colors if needed) */
num_returns = num_params;
num_color_channels = util_bitcount(key->ps_prolog.colors_read);
for (i = 0; i < num_color_channels; i++)
- params[num_returns++] = ctx.f32;
+ params[num_returns++] = ctx->f32;
/* Create the function. */
- si_create_function(&ctx, "ps_prolog", params, num_returns, params,
+ si_create_function(ctx, "ps_prolog", params, num_returns, params,
num_params, last_sgpr);
- func = ctx.main_fn;
+ func = ctx->main_fn;
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
*/
- ret = ctx.return_value;
+ ret = ctx->return_value;
for (i = 0; i < num_params; i++) {
LLVMValueRef p = LLVMGetParam(func, i);
ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
}
/* Polygon stippling. */
if (key->ps_prolog.states.poly_stipple) {
/* POS_FIXED_PT is always last. */
unsigned pos = key->ps_prolog.num_input_sgprs +
key->ps_prolog.num_input_vgprs - 1;
LLVMValueRef ptr[2], list;
/* Get the pointer to rw buffers. */
ptr[0] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS);
ptr[1] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS_HI);
list = lp_build_gather_values(gallivm, ptr, 2);
- list = LLVMBuildBitCast(gallivm->builder, list, ctx.i64, "");
+ list = LLVMBuildBitCast(gallivm->builder, list, ctx->i64, "");
list = LLVMBuildIntToPtr(gallivm->builder, list,
- const_array(ctx.v16i8, SI_NUM_RW_BUFFERS), "");
+ const_array(ctx->v16i8, SI_NUM_RW_BUFFERS), "");
- si_llvm_emit_polygon_stipple(&ctx, list, pos);
+ si_llvm_emit_polygon_stipple(ctx, list, pos);
}
if (key->ps_prolog.states.bc_optimize_for_persp ||
key->ps_prolog.states.bc_optimize_for_linear) {
unsigned i, base = key->ps_prolog.num_input_sgprs;
LLVMValueRef center[2], centroid[2], tmp, bc_optimize;
/* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
* The hw doesn't compute CENTROID if the whole wave only
* contains fully-covered quads.
*
* PRIM_MASK is after user SGPRs.
*/
bc_optimize = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
bc_optimize = LLVMBuildLShr(gallivm->builder, bc_optimize,
- LLVMConstInt(ctx.i32, 31, 0), "");
+ LLVMConstInt(ctx->i32, 31, 0), "");
bc_optimize = LLVMBuildTrunc(gallivm->builder, bc_optimize,
- ctx.i1, "");
+ ctx->i1, "");
if (key->ps_prolog.states.bc_optimize_for_persp) {
/* Read PERSP_CENTER. */
for (i = 0; i < 2; i++)
center[i] = LLVMGetParam(func, base + 2 + i);
/* Read PERSP_CENTROID. */
for (i = 0; i < 2; i++)
centroid[i] = LLVMGetParam(func, base + 4 + i);
/* Select PERSP_CENTROID. */
for (i = 0; i < 2; i++) {
@@ -7768,53 +7887,75 @@ static bool si_compile_ps_prolog(struct si_screen *sscreen,
unsigned interp_vgpr = key->ps_prolog.num_input_sgprs +
key->ps_prolog.color_interp_vgpr_index[i];
/* Get the (i,j) updated by bc_optimize handling. */
interp[0] = LLVMBuildExtractValue(gallivm->builder, ret,
interp_vgpr, "");
interp[1] = LLVMBuildExtractValue(gallivm->builder, ret,
interp_vgpr + 1, "");
interp_ij = lp_build_gather_values(gallivm, interp, 2);
interp_ij = LLVMBuildBitCast(gallivm->builder, interp_ij,
- ctx.v2i32, "");
+ ctx->v2i32, "");
}
/* Use the absolute location of the input. */
prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
if (key->ps_prolog.states.color_two_side) {
face = LLVMGetParam(func, face_vgpr);
- face = LLVMBuildBitCast(gallivm->builder, face, ctx.i32, "");
+ face = LLVMBuildBitCast(gallivm->builder, face, ctx->i32, "");
}
- interp_fs_input(&ctx,
+ interp_fs_input(ctx,
key->ps_prolog.color_attr_index[i],
TGSI_SEMANTIC_COLOR, i,
key->ps_prolog.num_interp_inputs,
key->ps_prolog.colors_read, interp_ij,
prim_mask, face, color);
while (writemask) {
unsigned chan = u_bit_scan(&writemask);
ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan],
num_params++, "");
}
}
/* Tell LLVM to insert WQM instruction sequence when needed. */
if (key->ps_prolog.wqm) {
LLVMAddTargetDependentFunctionAttr(func,
"amdgpu-ps-wqm-outputs", "");
}
+ si_llvm_build_ret(ctx, ret);
+}
+
+/**
+ * Compile the pixel shader prolog.
+ */
+static bool si_compile_ps_prolog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ struct si_shader_part *out)
+{
+ union si_shader_part_key *key = &out->key;
+ struct si_shader shader = {};
+ struct si_shader_context ctx;
+ struct gallivm_state *gallivm = &ctx.gallivm;
+ bool status = true;
+
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+ ctx.type = PIPE_SHADER_FRAGMENT;
+ shader.key.ps.prolog = key->ps_prolog.states;
+
+ si_build_ps_prolog_function(&ctx, key);
+
/* Compile. */
- si_llvm_build_ret(&ctx, ret);
si_llvm_finalize_module(&ctx,
r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_FRAGMENT));
if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
gallivm->module, debug, ctx.type,
"Fragment Shader Prolog"))
status = false;
si_llvm_dispose(&ctx);
return status;
@@ -7956,137 +8097,28 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
}
/**
* Select and compile (or reuse) pixel shader parts (prolog & epilog).
*/
static bool si_shader_select_ps_parts(struct si_screen *sscreen,
LLVMTargetMachineRef tm,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
- struct tgsi_shader_info *info = &shader->selector->info;
union si_shader_part_key prolog_key;
union si_shader_part_key epilog_key;
- unsigned i;
/* Get the prolog. */
- memset(&prolog_key, 0, sizeof(prolog_key));
- prolog_key.ps_prolog.states = shader->key.ps.prolog;
- prolog_key.ps_prolog.colors_read = info->colors_read;
- prolog_key.ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
- prolog_key.ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
- prolog_key.ps_prolog.wqm = info->uses_derivatives &&
- (prolog_key.ps_prolog.colors_read ||
- prolog_key.ps_prolog.states.force_persp_sample_interp ||
- prolog_key.ps_prolog.states.force_linear_sample_interp ||
- prolog_key.ps_prolog.states.force_persp_center_interp ||
- prolog_key.ps_prolog.states.force_linear_center_interp ||
- prolog_key.ps_prolog.states.bc_optimize_for_persp ||
- prolog_key.ps_prolog.states.bc_optimize_for_linear);
-
- if (info->colors_read) {
- unsigned *color = shader->selector->color_attr_index;
-
- if (shader->key.ps.prolog.color_two_side) {
- /* BCOLORs are stored after the last input. */
- prolog_key.ps_prolog.num_interp_inputs = info->num_inputs;
- prolog_key.ps_prolog.face_vgpr_index = shader->info.face_vgpr_index;
- shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
- }
-
- for (i = 0; i < 2; i++) {
- unsigned interp = info->input_interpolate[color[i]];
- unsigned location = info->input_interpolate_loc[color[i]];
-
- if (!(info->colors_read & (0xf << i*4)))
- continue;
-
- prolog_key.ps_prolog.color_attr_index[i] = color[i];
-
- if (shader->key.ps.prolog.flatshade_colors &&
- interp == TGSI_INTERPOLATE_COLOR)
- interp = TGSI_INTERPOLATE_CONSTANT;
-
- switch (interp) {
- case TGSI_INTERPOLATE_CONSTANT:
- prolog_key.ps_prolog.color_interp_vgpr_index[i] = -1;
- break;
- case TGSI_INTERPOLATE_PERSPECTIVE:
- case TGSI_INTERPOLATE_COLOR:
- /* Force the interpolation location for colors here. */
- if (shader->key.ps.prolog.force_persp_sample_interp)
- location = TGSI_INTERPOLATE_LOC_SAMPLE;
- if (shader->key.ps.prolog.force_persp_center_interp)
- location = TGSI_INTERPOLATE_LOC_CENTER;
-
- switch (location) {
- case TGSI_INTERPOLATE_LOC_SAMPLE:
- prolog_key.ps_prolog.color_interp_vgpr_index[i] = 0;
- shader->config.spi_ps_input_ena |=
- S_0286CC_PERSP_SAMPLE_ENA(1);
- break;
- case TGSI_INTERPOLATE_LOC_CENTER:
- prolog_key.ps_prolog.color_interp_vgpr_index[i] = 2;
- shader->config.spi_ps_input_ena |=
- S_0286CC_PERSP_CENTER_ENA(1);
- break;
- case TGSI_INTERPOLATE_LOC_CENTROID:
- prolog_key.ps_prolog.color_interp_vgpr_index[i] = 4;
- shader->config.spi_ps_input_ena |=
- S_0286CC_PERSP_CENTROID_ENA(1);
- break;
- default:
- assert(0);
- }
- break;
- case TGSI_INTERPOLATE_LINEAR:
- /* Force the interpolation location for colors here. */
- if (shader->key.ps.prolog.force_linear_sample_interp)
- location = TGSI_INTERPOLATE_LOC_SAMPLE;
- if (shader->key.ps.prolog.force_linear_center_interp)
- location = TGSI_INTERPOLATE_LOC_CENTER;
-
- switch (location) {
- case TGSI_INTERPOLATE_LOC_SAMPLE:
- prolog_key.ps_prolog.color_interp_vgpr_index[i] = 6;
- shader->config.spi_ps_input_ena |=
- S_0286CC_LINEAR_SAMPLE_ENA(1);
- break;
- case TGSI_INTERPOLATE_LOC_CENTER:
- prolog_key.ps_prolog.color_interp_vgpr_index[i] = 8;
- shader->config.spi_ps_input_ena |=
- S_0286CC_LINEAR_CENTER_ENA(1);
- break;
- case TGSI_INTERPOLATE_LOC_CENTROID:
- prolog_key.ps_prolog.color_interp_vgpr_index[i] = 10;
- shader->config.spi_ps_input_ena |=
- S_0286CC_LINEAR_CENTROID_ENA(1);
- break;
- default:
- assert(0);
- }
- break;
- default:
- assert(0);
- }
- }
- }
+ si_get_ps_prolog_key(shader, &prolog_key);
/* The prolog is a no-op if these aren't set. */
- if (prolog_key.ps_prolog.colors_read ||
- prolog_key.ps_prolog.states.force_persp_sample_interp ||
- prolog_key.ps_prolog.states.force_linear_sample_interp ||
- prolog_key.ps_prolog.states.force_persp_center_interp ||
- prolog_key.ps_prolog.states.force_linear_center_interp ||
- prolog_key.ps_prolog.states.bc_optimize_for_persp ||
- prolog_key.ps_prolog.states.bc_optimize_for_linear ||
- prolog_key.ps_prolog.states.poly_stipple) {
+ if (si_need_ps_prolog(&prolog_key)) {
shader->prolog =
si_get_shader_part(sscreen, &sscreen->ps_prologs,
&prolog_key, tm, debug,
si_compile_ps_prolog);
if (!shader->prolog)
return false;
}
/* Get the epilog. */
si_get_ps_epilog_key(shader, &epilog_key);
--
2.7.4
More information about the mesa-dev
mailing list