Mesa (master): radeonsi: compute only one set of interpolation (i, j) when MSAA is disabled

Marek Olšák mareko at kemper.freedesktop.org
Mon Jul 4 22:56:11 UTC 2016


Module: Mesa
Branch: master
Commit: 476e9cee1d0cbe321c401277214e6c36ce5b18c9
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=476e9cee1d0cbe321c401277214e6c36ce5b18c9

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Thu Jun 30 10:57:34 2016 +0200

radeonsi: compute only one set of interpolation (i,j) when MSAA is disabled

This should increase the PS launch rate for shaders using at least 2 pairs
of perspective (i,j) and same for linear.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

---

 src/gallium/drivers/radeonsi/si_shader.c        | 74 ++++++++++++++++++++++++-
 src/gallium/drivers/radeonsi/si_shader.h        |  4 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 13 +++++
 3 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index da4a6cb..a59c28e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1300,6 +1300,20 @@ static unsigned select_interp_param(struct si_shader_context *ctx,
 			return SI_PARAM_LINEAR_SAMPLE;
 		}
 	}
+	if (ctx->shader->key.ps.prolog.force_persp_center_interp) {
+		switch (param) {
+		case SI_PARAM_PERSP_CENTROID:
+		case SI_PARAM_PERSP_SAMPLE:
+			return SI_PARAM_PERSP_CENTER;
+		}
+	}
+	if (ctx->shader->key.ps.prolog.force_linear_center_interp) {
+		switch (param) {
+		case SI_PARAM_LINEAR_CENTROID:
+		case SI_PARAM_LINEAR_SAMPLE:
+			return SI_PARAM_PERSP_CENTER;
+		}
+	}
 
 	return param;
 }
@@ -6382,6 +6396,8 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
 		fprintf(f, "  prolog.poly_stipple = %u\n", key->ps.prolog.poly_stipple);
 		fprintf(f, "  prolog.force_persp_sample_interp = %u\n", key->ps.prolog.force_persp_sample_interp);
 		fprintf(f, "  prolog.force_linear_sample_interp = %u\n", key->ps.prolog.force_linear_sample_interp);
+		fprintf(f, "  prolog.force_persp_center_interp = %u\n", key->ps.prolog.force_persp_center_interp);
+		fprintf(f, "  prolog.force_linear_center_interp = %u\n", key->ps.prolog.force_linear_center_interp);
 		fprintf(f, "  epilog.spi_shader_col_format = 0x%x\n", key->ps.epilog.spi_shader_col_format);
 		fprintf(f, "  epilog.color_is_int8 = 0x%X\n", key->ps.epilog.color_is_int8);
 		fprintf(f, "  epilog.last_cbuf = %u\n", key->ps.epilog.last_cbuf);
@@ -7255,6 +7271,40 @@ static bool si_compile_ps_prolog(struct si_screen *sscreen,
 						   linear_sample[i], base + 10 + i, "");
 	}
 
+	/* Force center interpolation. */
+	if (key->ps_prolog.states.force_persp_center_interp) {
+		unsigned i, base = key->ps_prolog.num_input_sgprs;
+		LLVMValueRef persp_center[2];
+
+		/* Read PERSP_CENTER. */
+		for (i = 0; i < 2; i++)
+			persp_center[i] = LLVMGetParam(func, base + 2 + i);
+		/* Overwrite PERSP_SAMPLE. */
+		for (i = 0; i < 2; i++)
+			ret = LLVMBuildInsertValue(gallivm->builder, ret,
+						   persp_center[i], base + i, "");
+		/* Overwrite PERSP_CENTROID. */
+		for (i = 0; i < 2; i++)
+			ret = LLVMBuildInsertValue(gallivm->builder, ret,
+						   persp_center[i], base + 4 + i, "");
+	}
+	if (key->ps_prolog.states.force_linear_center_interp) {
+		unsigned i, base = key->ps_prolog.num_input_sgprs;
+		LLVMValueRef linear_center[2];
+
+		/* Read LINEAR_CENTER. */
+		for (i = 0; i < 2; i++)
+			linear_center[i] = LLVMGetParam(func, base + 8 + i);
+		/* Overwrite LINEAR_SAMPLE. */
+		for (i = 0; i < 2; i++)
+			ret = LLVMBuildInsertValue(gallivm->builder, ret,
+						   linear_center[i], base + 6 + i, "");
+		/* Overwrite LINEAR_CENTROID. */
+		for (i = 0; i < 2; i++)
+			ret = LLVMBuildInsertValue(gallivm->builder, ret,
+						   linear_center[i], base + 10 + i, "");
+	}
+
 	/* Tell LLVM to insert WQM instruction sequence when needed. */
 	if (key->ps_prolog.wqm) {
 		LLVMAddTargetDependentFunctionAttr(func,
@@ -7414,7 +7464,9 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
 	prolog_key.ps_prolog.wqm = info->uses_derivatives &&
 		(prolog_key.ps_prolog.colors_read ||
 		 prolog_key.ps_prolog.states.force_persp_sample_interp ||
-		 prolog_key.ps_prolog.states.force_linear_sample_interp);
+		 prolog_key.ps_prolog.states.force_linear_sample_interp ||
+		 prolog_key.ps_prolog.states.force_persp_center_interp ||
+		 prolog_key.ps_prolog.states.force_linear_center_interp);
 
 	if (info->colors_read) {
 		unsigned *color = shader->selector->color_attr_index;
@@ -7443,6 +7495,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
 				/* Force the interpolation location for colors here. */
 				if (shader->key.ps.prolog.force_persp_sample_interp)
 					location = TGSI_INTERPOLATE_LOC_SAMPLE;
+				if (shader->key.ps.prolog.force_persp_center_interp)
+					location = TGSI_INTERPOLATE_LOC_CENTER;
 
 				switch (location) {
 				case TGSI_INTERPOLATE_LOC_SAMPLE:
@@ -7468,6 +7522,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
 				/* Force the interpolation location for colors here. */
 				if (shader->key.ps.prolog.force_linear_sample_interp)
 					location = TGSI_INTERPOLATE_LOC_SAMPLE;
+				if (shader->key.ps.prolog.force_linear_center_interp)
+					location = TGSI_INTERPOLATE_LOC_CENTER;
 
 				switch (location) {
 				case TGSI_INTERPOLATE_LOC_SAMPLE:
@@ -7499,6 +7555,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
 	if (prolog_key.ps_prolog.colors_read ||
 	    prolog_key.ps_prolog.states.force_persp_sample_interp ||
 	    prolog_key.ps_prolog.states.force_linear_sample_interp ||
+	    prolog_key.ps_prolog.states.force_persp_center_interp ||
+	    prolog_key.ps_prolog.states.force_linear_center_interp ||
 	    prolog_key.ps_prolog.states.poly_stipple) {
 		shader->prolog =
 			si_get_shader_part(sscreen, &sscreen->ps_prologs,
@@ -7544,6 +7602,20 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
 		shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
 		shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
 	}
+	if (shader->key.ps.prolog.force_persp_center_interp &&
+	    (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
+	     G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
+		shader->config.spi_ps_input_ena &= C_0286CC_PERSP_SAMPLE_ENA;
+		shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
+		shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
+	}
+	if (shader->key.ps.prolog.force_linear_center_interp &&
+	    (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
+	     G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
+		shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_SAMPLE_ENA;
+		shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
+		shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
+	}
 
 	/* POW_W_FLOAT requires that one of the perspective weights is enabled. */
 	if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) &&
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 6c2e832..0647736 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -315,9 +315,9 @@ struct si_ps_prolog_bits {
 	unsigned	poly_stipple:1;
 	unsigned	force_persp_sample_interp:1;
 	unsigned	force_linear_sample_interp:1;
+	unsigned	force_persp_center_interp:1;
+	unsigned	force_linear_center_interp:1;
 	/* TODO:
-	 * - add force_center_interp if MSAA is disabled and centroid or
-	 *   sample are present
 	 * - add force_center_interp_bc_optimize to force center interpolation
 	 *   based on the bc_optimize SGPR bit if MSAA is enabled, centroid is
 	 *   present and sample isn't present.
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index cf5c1f9..d679825 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -946,6 +946,19 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 				key->ps.prolog.force_linear_sample_interp =
 					sel->info.uses_linear_center ||
 					sel->info.uses_linear_centroid;
+			} else if (!rs->multisample_enable ||
+				   sctx->framebuffer.nr_samples <= 1) {
+				/* Make sure SPI doesn't compute more than 1 pair
+				 * of (i,j), which is the optimization here. */
+				key->ps.prolog.force_persp_center_interp =
+					sel->info.uses_persp_center +
+					sel->info.uses_persp_centroid +
+					sel->info.uses_persp_sample > 1;
+
+				key->ps.prolog.force_linear_center_interp =
+					sel->info.uses_linear_center +
+					sel->info.uses_linear_centroid +
+					sel->info.uses_linear_sample > 1;
 			}
 		}
 




More information about the mesa-commit mailing list