[Mesa-dev] [PATCH 3/8] radeonsi: compute only one set of interpolation (i, j) when MSAA is disabled
Marek Olšák
maraeo at gmail.com
Thu Jun 30 23:28:42 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
This should increase the PS launch rate for shaders using at least 2 pairs
of perspective (i,j) and same for linear.
---
src/gallium/drivers/radeonsi/si_shader.c | 74 ++++++++++++++++++++++++-
src/gallium/drivers/radeonsi/si_shader.h | 4 +-
src/gallium/drivers/radeonsi/si_state_shaders.c | 13 +++++
3 files changed, 88 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 438981c..4652fe8 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1300,6 +1300,20 @@ static unsigned select_interp_param(struct si_shader_context *ctx,
return SI_PARAM_LINEAR_SAMPLE;
}
}
+ if (ctx->shader->key.ps.prolog.force_persp_center_interp) {
+ switch (param) {
+ case SI_PARAM_PERSP_CENTROID:
+ case SI_PARAM_PERSP_SAMPLE:
+ return SI_PARAM_PERSP_CENTER;
+ }
+ }
+ if (ctx->shader->key.ps.prolog.force_linear_center_interp) {
+ switch (param) {
+ case SI_PARAM_LINEAR_CENTROID:
+ case SI_PARAM_LINEAR_SAMPLE:
+ return SI_PARAM_PERSP_CENTER;
+ }
+ }
return param;
}
@@ -6394,6 +6408,8 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
fprintf(f, " prolog.poly_stipple = %u\n", key->ps.prolog.poly_stipple);
fprintf(f, " prolog.force_persp_sample_interp = %u\n", key->ps.prolog.force_persp_sample_interp);
fprintf(f, " prolog.force_linear_sample_interp = %u\n", key->ps.prolog.force_linear_sample_interp);
+ fprintf(f, " prolog.force_persp_center_interp = %u\n", key->ps.prolog.force_persp_center_interp);
+ fprintf(f, " prolog.force_linear_center_interp = %u\n", key->ps.prolog.force_linear_center_interp);
fprintf(f, " epilog.spi_shader_col_format = 0x%x\n", key->ps.epilog.spi_shader_col_format);
fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.epilog.color_is_int8);
fprintf(f, " epilog.last_cbuf = %u\n", key->ps.epilog.last_cbuf);
@@ -7267,6 +7283,40 @@ static bool si_compile_ps_prolog(struct si_screen *sscreen,
linear_sample[i], base + 10 + i, "");
}
+ /* Force center interpolation. */
+ if (key->ps_prolog.states.force_persp_center_interp) {
+ unsigned i, base = key->ps_prolog.num_input_sgprs;
+ LLVMValueRef persp_center[2];
+
+ /* Read PERSP_CENTER. */
+ for (i = 0; i < 2; i++)
+ persp_center[i] = LLVMGetParam(func, base + 2 + i);
+ /* Overwrite PERSP_SAMPLE. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ persp_center[i], base + i, "");
+ /* Overwrite PERSP_CENTROID. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ persp_center[i], base + 4 + i, "");
+ }
+ if (key->ps_prolog.states.force_linear_center_interp) {
+ unsigned i, base = key->ps_prolog.num_input_sgprs;
+ LLVMValueRef linear_center[2];
+
+ /* Read LINEAR_CENTER. */
+ for (i = 0; i < 2; i++)
+ linear_center[i] = LLVMGetParam(func, base + 8 + i);
+ /* Overwrite LINEAR_SAMPLE. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ linear_center[i], base + 6 + i, "");
+ /* Overwrite LINEAR_CENTROID. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ linear_center[i], base + 10 + i, "");
+ }
+
/* Tell LLVM to insert WQM instruction sequence when needed. */
if (key->ps_prolog.wqm) {
LLVMAddTargetDependentFunctionAttr(func,
@@ -7426,7 +7476,9 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
prolog_key.ps_prolog.wqm = info->uses_derivatives &&
(prolog_key.ps_prolog.colors_read ||
prolog_key.ps_prolog.states.force_persp_sample_interp ||
- prolog_key.ps_prolog.states.force_linear_sample_interp);
+ prolog_key.ps_prolog.states.force_linear_sample_interp ||
+ prolog_key.ps_prolog.states.force_persp_center_interp ||
+ prolog_key.ps_prolog.states.force_linear_center_interp);
if (info->colors_read) {
unsigned *color = shader->selector->color_attr_index;
@@ -7455,6 +7507,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
/* Force the interpolation location for colors here. */
if (shader->key.ps.prolog.force_persp_sample_interp)
location = TGSI_INTERPOLATE_LOC_SAMPLE;
+ if (shader->key.ps.prolog.force_persp_center_interp)
+ location = TGSI_INTERPOLATE_LOC_CENTER;
switch (location) {
case TGSI_INTERPOLATE_LOC_SAMPLE:
@@ -7480,6 +7534,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
/* Force the interpolation location for colors here. */
if (shader->key.ps.prolog.force_linear_sample_interp)
location = TGSI_INTERPOLATE_LOC_SAMPLE;
+ if (shader->key.ps.prolog.force_linear_center_interp)
+ location = TGSI_INTERPOLATE_LOC_CENTER;
switch (location) {
case TGSI_INTERPOLATE_LOC_SAMPLE:
@@ -7511,6 +7567,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
if (prolog_key.ps_prolog.colors_read ||
prolog_key.ps_prolog.states.force_persp_sample_interp ||
prolog_key.ps_prolog.states.force_linear_sample_interp ||
+ prolog_key.ps_prolog.states.force_persp_center_interp ||
+ prolog_key.ps_prolog.states.force_linear_center_interp ||
prolog_key.ps_prolog.states.poly_stipple) {
shader->prolog =
si_get_shader_part(sscreen, &sscreen->ps_prologs,
@@ -7556,6 +7614,20 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
}
+ if (shader->key.ps.prolog.force_persp_center_interp &&
+ (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
+ G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
+ shader->config.spi_ps_input_ena &= C_0286CC_PERSP_SAMPLE_ENA;
+ shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
+ shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
+ }
+ if (shader->key.ps.prolog.force_linear_center_interp &&
+ (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
+ G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
+ shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_SAMPLE_ENA;
+ shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
+ shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
+ }
/* POW_W_FLOAT requires that one of the perspective weights is enabled. */
if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) &&
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 6c2e832..0647736 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -315,9 +315,9 @@ struct si_ps_prolog_bits {
unsigned poly_stipple:1;
unsigned force_persp_sample_interp:1;
unsigned force_linear_sample_interp:1;
+ unsigned force_persp_center_interp:1;
+ unsigned force_linear_center_interp:1;
/* TODO:
- * - add force_center_interp if MSAA is disabled and centroid or
- * sample are present
* - add force_center_interp_bc_optimize to force center interpolation
* based on the bc_optimize SGPR bit if MSAA is enabled, centroid is
* present and sample isn't present.
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 710d51d..eb3c2f9 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -953,6 +953,19 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
key->ps.prolog.force_linear_sample_interp =
sel->info.uses_linear_center ||
sel->info.uses_linear_centroid;
+ } else if (!rs->multisample_enable ||
+ sctx->framebuffer.nr_samples <= 1) {
+ /* Make sure SPI doesn't compute more than 1 pair
+ * of (i,j), which is the optimization here. */
+ key->ps.prolog.force_persp_center_interp =
+ sel->info.uses_persp_center +
+ sel->info.uses_persp_centroid +
+ sel->info.uses_persp_sample > 1;
+
+ key->ps.prolog.force_linear_center_interp =
+ sel->info.uses_linear_center +
+ sel->info.uses_linear_centroid +
+ sel->info.uses_linear_sample > 1;
}
}
--
2.7.4
More information about the mesa-dev
mailing list