[Mesa-dev] [PATCH 15/31] radeonsi: use PS prolog for monolithic shaders

Nicolai Hähnle nhaehnle at gmail.com
Mon Oct 31 22:11:02 UTC 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c | 42 ++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index b15c60d..1955917 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -64,20 +64,22 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
 			       struct si_shader *shader,
 			       LLVMTargetMachineRef tm);
 
 static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
 				 struct lp_build_tgsi_context *bld_base,
 				 struct lp_build_emit_data *emit_data);
 
 static void si_dump_shader_key(unsigned shader, union si_shader_key *key,
 			       FILE *f);
 
+static void si_build_ps_prolog_function(struct si_shader_context *ctx,
+					union si_shader_part_key *key);
 static void si_build_ps_epilog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key);
 
 /* Ideally pass the sample mask input to the PS epilog as v13, which
  * is its usual location, so that the shader doesn't have to add v_mov.
  */
 #define PS_EPILOG_SAMPLEMASK_MIN_LOC 13
 
 /* The VS location of the PrimitiveID input is the same in the epilog,
  * so that the main shader part doesn't have to move it.
@@ -6765,21 +6767,22 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 
 	si_llvm_build_ret(ctx, ctx->return_value);
 	return true;
 }
 
 /**
  * Compute the PS prolog key, which contains all the information needed to
  * build the PS prolog function, and set related bits in shader->config.
  */
 static void si_get_ps_prolog_key(struct si_shader *shader,
-				 union si_shader_part_key *key)
+				 union si_shader_part_key *key,
+				 bool separate_prolog)
 {
 	struct tgsi_shader_info *info = &shader->selector->info;
 
 	memset(key, 0, sizeof(*key));
 	key->ps_prolog.states = shader->key.ps.prolog;
 	key->ps_prolog.colors_read = info->colors_read;
 	key->ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
 	key->ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
 	key->ps_prolog.wqm = info->uses_derivatives &&
 		(key->ps_prolog.colors_read ||
@@ -6845,33 +6848,40 @@ static void si_get_ps_prolog_key(struct si_shader *shader,
 					assert(0);
 				}
 				break;
 			case TGSI_INTERPOLATE_LINEAR:
 				/* Force the interpolation location for colors here. */
 				if (shader->key.ps.prolog.force_linear_sample_interp)
 					location = TGSI_INTERPOLATE_LOC_SAMPLE;
 				if (shader->key.ps.prolog.force_linear_center_interp)
 					location = TGSI_INTERPOLATE_LOC_CENTER;
 
+				/* The VGPR assignment for non-monolithic shaders
+				 * works because InitialPSInputAddr is set on the
+				 * main shader and PERSP_PULL_MODEL is never used.
+				 */
 				switch (location) {
 				case TGSI_INTERPOLATE_LOC_SAMPLE:
-					key->ps_prolog.color_interp_vgpr_index[i] = 6;
+					key->ps_prolog.color_interp_vgpr_index[i] =
+						separate_prolog ? 6 : 9;
 					shader->config.spi_ps_input_ena |=
 						S_0286CC_LINEAR_SAMPLE_ENA(1);
 					break;
 				case TGSI_INTERPOLATE_LOC_CENTER:
-					key->ps_prolog.color_interp_vgpr_index[i] = 8;
+					key->ps_prolog.color_interp_vgpr_index[i] =
+						separate_prolog ? 8 : 11;
 					shader->config.spi_ps_input_ena |=
 						S_0286CC_LINEAR_CENTER_ENA(1);
 					break;
 				case TGSI_INTERPOLATE_LOC_CENTROID:
-					key->ps_prolog.color_interp_vgpr_index[i] = 10;
+					key->ps_prolog.color_interp_vgpr_index[i] =
+						separate_prolog ? 10 : 13;
 					shader->config.spi_ps_input_ena |=
 						S_0286CC_LINEAR_CENTROID_ENA(1);
 					break;
 				default:
 					assert(0);
 				}
 				break;
 			default:
 				assert(0);
 			}
@@ -7115,47 +7125,59 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 	    !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
 		tgsi_dump(sel->tokens, 0);
 		si_dump_streamout(&sel->so);
 	}
 
 	si_init_shader_ctx(&ctx, sscreen, shader, tm);
 	ctx.no_prolog = is_monolithic;
 	ctx.no_epilog = is_monolithic;
 	ctx.separate_prolog = !is_monolithic;
 
-	if (ctx.type == PIPE_SHADER_FRAGMENT)
+	if (ctx.type == PIPE_SHADER_FRAGMENT) {
+		ctx.no_prolog = false;
 		ctx.no_epilog = false;
+	}
 
 	memset(shader->info.vs_output_param_offset, 0xff,
 	       sizeof(shader->info.vs_output_param_offset));
 
 	shader->info.uses_instanceid = sel->info.uses_instanceid;
 
 	bld_base = &ctx.soa.bld_base;
 	ctx.load_system_value = declare_system_value;
 
 	if (!si_compile_tgsi_main(&ctx, shader)) {
 		si_llvm_dispose(&ctx);
 		return -1;
 	}
 
 	if (is_monolithic && ctx.type == PIPE_SHADER_FRAGMENT) {
-		LLVMValueRef parts[2];
+		LLVMValueRef parts[3];
+		union si_shader_part_key prolog_key;
 		union si_shader_part_key epilog_key;
+		bool need_prolog;
+
+		si_get_ps_prolog_key(shader, &prolog_key, false);
+		need_prolog = si_need_ps_prolog(&prolog_key);
 
-		parts[0] = ctx.main_fn;
+		parts[need_prolog ? 1 : 0] = ctx.main_fn;
+
+		if (need_prolog) {
+			si_build_ps_prolog_function(&ctx, &prolog_key);
+			parts[0] = ctx.main_fn;
+		}
 
 		si_get_ps_epilog_key(shader, &epilog_key);
 		si_build_ps_epilog_function(&ctx, &epilog_key);
-		parts[1] = ctx.main_fn;
+		parts[need_prolog ? 2 : 1] = ctx.main_fn;
 
-		si_build_wrapper_function(&ctx, parts, 2, 0);
+		si_build_wrapper_function(&ctx, parts, need_prolog ? 3 : 2, need_prolog ? 1 : 0);
 	}
 
 	mod = bld_base->base.gallivm->module;
 
 	/* Dump LLVM IR before any optimization passes */
 	if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
 	    r600_can_dump_shader(&sscreen->b, ctx.type))
 		LLVMDumpModule(mod);
 
 	si_llvm_finalize_module(&ctx,
@@ -8106,21 +8128,21 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
  */
 static bool si_shader_select_ps_parts(struct si_screen *sscreen,
 				      LLVMTargetMachineRef tm,
 				      struct si_shader *shader,
 				      struct pipe_debug_callback *debug)
 {
 	union si_shader_part_key prolog_key;
 	union si_shader_part_key epilog_key;
 
 	/* Get the prolog. */
-	si_get_ps_prolog_key(shader, &prolog_key);
+	si_get_ps_prolog_key(shader, &prolog_key, true);
 
 	/* The prolog is a no-op if these aren't set. */
 	if (si_need_ps_prolog(&prolog_key)) {
 		shader->prolog =
 			si_get_shader_part(sscreen, &sscreen->ps_prologs,
 					   &prolog_key, tm, debug,
 					   si_compile_ps_prolog);
 		if (!shader->prolog)
 			return false;
 	}
-- 
2.7.4



More information about the mesa-dev mailing list