[Mesa-dev] [PATCH 13/31] radeonsi: extract si_build_ps_prolog_function

Nicolai Hähnle nhaehnle at gmail.com
Mon Oct 31 22:11:00 UTC 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c | 310 +++++++++++++++++--------------
 1 file changed, 171 insertions(+), 139 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 281a4dd..447293c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6756,20 +6756,147 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 	if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
 		fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
 		return false;
 	}
 
 	si_llvm_build_ret(ctx, ctx->return_value);
 	return true;
 }
 
 /**
+ * Compute the PS prolog key, which contains all the information needed to
+ * build the PS prolog function, and set related bits in shader->config.
+ */
+static void si_get_ps_prolog_key(struct si_shader *shader,
+				 union si_shader_part_key *key)
+{
+	struct tgsi_shader_info *info = &shader->selector->info;
+
+	memset(key, 0, sizeof(*key));
+	key->ps_prolog.states = shader->key.ps.prolog;
+	key->ps_prolog.colors_read = info->colors_read;
+	key->ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
+	key->ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
+	key->ps_prolog.wqm = info->uses_derivatives &&
+		(key->ps_prolog.colors_read ||
+		 key->ps_prolog.states.force_persp_sample_interp ||
+		 key->ps_prolog.states.force_linear_sample_interp ||
+		 key->ps_prolog.states.force_persp_center_interp ||
+		 key->ps_prolog.states.force_linear_center_interp ||
+		 key->ps_prolog.states.bc_optimize_for_persp ||
+		 key->ps_prolog.states.bc_optimize_for_linear);
+
+	if (info->colors_read) {
+		unsigned *color = shader->selector->color_attr_index;
+
+		if (shader->key.ps.prolog.color_two_side) {
+			/* BCOLORs are stored after the last input. */
+			key->ps_prolog.num_interp_inputs = info->num_inputs;
+			key->ps_prolog.face_vgpr_index = shader->info.face_vgpr_index;
+			shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
+		}
+
+		for (unsigned i = 0; i < 2; i++) {
+			unsigned interp = info->input_interpolate[color[i]];
+			unsigned location = info->input_interpolate_loc[color[i]];
+
+			if (!(info->colors_read & (0xf << i*4)))
+				continue;
+
+			key->ps_prolog.color_attr_index[i] = color[i];
+
+			if (shader->key.ps.prolog.flatshade_colors &&
+			    interp == TGSI_INTERPOLATE_COLOR)
+				interp = TGSI_INTERPOLATE_CONSTANT;
+
+			switch (interp) {
+			case TGSI_INTERPOLATE_CONSTANT:
+				key->ps_prolog.color_interp_vgpr_index[i] = -1;
+				break;
+			case TGSI_INTERPOLATE_PERSPECTIVE:
+			case TGSI_INTERPOLATE_COLOR:
+				/* Force the interpolation location for colors here. */
+				if (shader->key.ps.prolog.force_persp_sample_interp)
+					location = TGSI_INTERPOLATE_LOC_SAMPLE;
+				if (shader->key.ps.prolog.force_persp_center_interp)
+					location = TGSI_INTERPOLATE_LOC_CENTER;
+
+				switch (location) {
+				case TGSI_INTERPOLATE_LOC_SAMPLE:
+					key->ps_prolog.color_interp_vgpr_index[i] = 0;
+					shader->config.spi_ps_input_ena |=
+						S_0286CC_PERSP_SAMPLE_ENA(1);
+					break;
+				case TGSI_INTERPOLATE_LOC_CENTER:
+					key->ps_prolog.color_interp_vgpr_index[i] = 2;
+					shader->config.spi_ps_input_ena |=
+						S_0286CC_PERSP_CENTER_ENA(1);
+					break;
+				case TGSI_INTERPOLATE_LOC_CENTROID:
+					key->ps_prolog.color_interp_vgpr_index[i] = 4;
+					shader->config.spi_ps_input_ena |=
+						S_0286CC_PERSP_CENTROID_ENA(1);
+					break;
+				default:
+					assert(0);
+				}
+				break;
+			case TGSI_INTERPOLATE_LINEAR:
+				/* Force the interpolation location for colors here. */
+				if (shader->key.ps.prolog.force_linear_sample_interp)
+					location = TGSI_INTERPOLATE_LOC_SAMPLE;
+				if (shader->key.ps.prolog.force_linear_center_interp)
+					location = TGSI_INTERPOLATE_LOC_CENTER;
+
+				switch (location) {
+				case TGSI_INTERPOLATE_LOC_SAMPLE:
+					key->ps_prolog.color_interp_vgpr_index[i] = 6;
+					shader->config.spi_ps_input_ena |=
+						S_0286CC_LINEAR_SAMPLE_ENA(1);
+					break;
+				case TGSI_INTERPOLATE_LOC_CENTER:
+					key->ps_prolog.color_interp_vgpr_index[i] = 8;
+					shader->config.spi_ps_input_ena |=
+						S_0286CC_LINEAR_CENTER_ENA(1);
+					break;
+				case TGSI_INTERPOLATE_LOC_CENTROID:
+					key->ps_prolog.color_interp_vgpr_index[i] = 10;
+					shader->config.spi_ps_input_ena |=
+						S_0286CC_LINEAR_CENTROID_ENA(1);
+					break;
+				default:
+					assert(0);
+				}
+				break;
+			default:
+				assert(0);
+			}
+		}
+	}
+}
+
+/**
+ * Check whether a PS prolog is required based on the key.
+ */
+static bool si_need_ps_prolog(const union si_shader_part_key *key)
+{
+	return key->ps_prolog.colors_read ||
+	       key->ps_prolog.states.force_persp_sample_interp ||
+	       key->ps_prolog.states.force_linear_sample_interp ||
+	       key->ps_prolog.states.force_persp_center_interp ||
+	       key->ps_prolog.states.force_linear_center_interp ||
+	       key->ps_prolog.states.bc_optimize_for_persp ||
+	       key->ps_prolog.states.bc_optimize_for_linear ||
+	       key->ps_prolog.states.poly_stipple;
+}
+
+/**
  * Compute the PS epilog key, which contains all the information needed to
  * build the PS epilog function.
  */
 static void si_get_ps_epilog_key(struct si_shader *shader,
 				 union si_shader_part_key *key)
 {
 	struct tgsi_shader_info *info = &shader->selector->info;
 	memset(key, 0, sizeof(*key));
 	key->ps_epilog.colors_written = info->colors_written;
 	key->ps_epilog.writes_z = info->writes_z;
@@ -7549,115 +7676,107 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
 	memset(&epilog_key, 0, sizeof(epilog_key));
 	epilog_key.tcs_epilog.states = shader->key.tcs.epilog;
 
 	shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs,
 					    &epilog_key, tm, debug,
 					    si_compile_tcs_epilog);
 	return shader->epilog != NULL;
 }
 
 /**
- * Compile the pixel shader prolog. This handles:
+ * Build the pixel shader prolog function. This handles:
  * - two-side color selection and interpolation
  * - overriding interpolation parameters for the API PS
  * - polygon stippling
  *
  * All preloaded SGPRs and VGPRs are passed through unmodified unless they are
  * overriden by other states. (e.g. per-sample interpolation)
  * Interpolated colors are stored after the preloaded VGPRs.
  */
-static bool si_compile_ps_prolog(struct si_screen *sscreen,
-				 LLVMTargetMachineRef tm,
-				 struct pipe_debug_callback *debug,
-				 struct si_shader_part *out)
+static void si_build_ps_prolog_function(struct si_shader_context *ctx,
+					union si_shader_part_key *key)
 {
-	union si_shader_part_key *key = &out->key;
-	struct si_shader shader = {};
-	struct si_shader_context ctx;
-	struct gallivm_state *gallivm = &ctx.gallivm;
+	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMTypeRef *params;
 	LLVMValueRef ret, func;
 	int last_sgpr, num_params, num_returns, i, num_color_channels;
-	bool status = true;
 
-	si_init_shader_ctx(&ctx, sscreen, &shader, tm);
-	ctx.type = PIPE_SHADER_FRAGMENT;
-	shader.key.ps.prolog = key->ps_prolog.states;
+	assert(si_need_ps_prolog(key));
 
 	/* Number of inputs + 8 color elements. */
 	params = alloca((key->ps_prolog.num_input_sgprs +
 			 key->ps_prolog.num_input_vgprs + 8) *
 			sizeof(LLVMTypeRef));
 
 	/* Declare inputs. */
 	num_params = 0;
 	for (i = 0; i < key->ps_prolog.num_input_sgprs; i++)
-		params[num_params++] = ctx.i32;
+		params[num_params++] = ctx->i32;
 	last_sgpr = num_params - 1;
 
 	for (i = 0; i < key->ps_prolog.num_input_vgprs; i++)
-		params[num_params++] = ctx.f32;
+		params[num_params++] = ctx->f32;
 
 	/* Declare outputs (same as inputs + add colors if needed) */
 	num_returns = num_params;
 	num_color_channels = util_bitcount(key->ps_prolog.colors_read);
 	for (i = 0; i < num_color_channels; i++)
-		params[num_returns++] = ctx.f32;
+		params[num_returns++] = ctx->f32;
 
 	/* Create the function. */
-	si_create_function(&ctx, "ps_prolog", params, num_returns, params,
+	si_create_function(ctx, "ps_prolog", params, num_returns, params,
 			   num_params, last_sgpr);
-	func = ctx.main_fn;
+	func = ctx->main_fn;
 
 	/* Copy inputs to outputs. This should be no-op, as the registers match,
 	 * but it will prevent the compiler from overwriting them unintentionally.
 	 */
-	ret = ctx.return_value;
+	ret = ctx->return_value;
 	for (i = 0; i < num_params; i++) {
 		LLVMValueRef p = LLVMGetParam(func, i);
 		ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
 	}
 
 	/* Polygon stippling. */
 	if (key->ps_prolog.states.poly_stipple) {
 		/* POS_FIXED_PT is always last. */
 		unsigned pos = key->ps_prolog.num_input_sgprs +
 			       key->ps_prolog.num_input_vgprs - 1;
 		LLVMValueRef ptr[2], list;
 
 		/* Get the pointer to rw buffers. */
 		ptr[0] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS);
 		ptr[1] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS_HI);
 		list = lp_build_gather_values(gallivm, ptr, 2);
-		list = LLVMBuildBitCast(gallivm->builder, list, ctx.i64, "");
+		list = LLVMBuildBitCast(gallivm->builder, list, ctx->i64, "");
 		list = LLVMBuildIntToPtr(gallivm->builder, list,
-					  const_array(ctx.v16i8, SI_NUM_RW_BUFFERS), "");
+					  const_array(ctx->v16i8, SI_NUM_RW_BUFFERS), "");
 
-		si_llvm_emit_polygon_stipple(&ctx, list, pos);
+		si_llvm_emit_polygon_stipple(ctx, list, pos);
 	}
 
 	if (key->ps_prolog.states.bc_optimize_for_persp ||
 	    key->ps_prolog.states.bc_optimize_for_linear) {
 		unsigned i, base = key->ps_prolog.num_input_sgprs;
 		LLVMValueRef center[2], centroid[2], tmp, bc_optimize;
 
 		/* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
 		 * The hw doesn't compute CENTROID if the whole wave only
 		 * contains fully-covered quads.
 		 *
 		 * PRIM_MASK is after user SGPRs.
 		 */
 		bc_optimize = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
 		bc_optimize = LLVMBuildLShr(gallivm->builder, bc_optimize,
-					    LLVMConstInt(ctx.i32, 31, 0), "");
+					    LLVMConstInt(ctx->i32, 31, 0), "");
 		bc_optimize = LLVMBuildTrunc(gallivm->builder, bc_optimize,
-					     ctx.i1, "");
+					     ctx->i1, "");
 
 		if (key->ps_prolog.states.bc_optimize_for_persp) {
 			/* Read PERSP_CENTER. */
 			for (i = 0; i < 2; i++)
 				center[i] = LLVMGetParam(func, base + 2 + i);
 			/* Read PERSP_CENTROID. */
 			for (i = 0; i < 2; i++)
 				centroid[i] = LLVMGetParam(func, base + 4 + i);
 			/* Select PERSP_CENTROID. */
 			for (i = 0; i < 2; i++) {
@@ -7768,53 +7887,75 @@ static bool si_compile_ps_prolog(struct si_screen *sscreen,
 			unsigned interp_vgpr = key->ps_prolog.num_input_sgprs +
 					       key->ps_prolog.color_interp_vgpr_index[i];
 
 			/* Get the (i,j) updated by bc_optimize handling. */
 			interp[0] = LLVMBuildExtractValue(gallivm->builder, ret,
 							  interp_vgpr, "");
 			interp[1] = LLVMBuildExtractValue(gallivm->builder, ret,
 							  interp_vgpr + 1, "");
 			interp_ij = lp_build_gather_values(gallivm, interp, 2);
 			interp_ij = LLVMBuildBitCast(gallivm->builder, interp_ij,
-						     ctx.v2i32, "");
+						     ctx->v2i32, "");
 		}
 
 		/* Use the absolute location of the input. */
 		prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
 
 		if (key->ps_prolog.states.color_two_side) {
 			face = LLVMGetParam(func, face_vgpr);
-			face = LLVMBuildBitCast(gallivm->builder, face, ctx.i32, "");
+			face = LLVMBuildBitCast(gallivm->builder, face, ctx->i32, "");
 		}
 
-		interp_fs_input(&ctx,
+		interp_fs_input(ctx,
 				key->ps_prolog.color_attr_index[i],
 				TGSI_SEMANTIC_COLOR, i,
 				key->ps_prolog.num_interp_inputs,
 				key->ps_prolog.colors_read, interp_ij,
 				prim_mask, face, color);
 
 		while (writemask) {
 			unsigned chan = u_bit_scan(&writemask);
 			ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan],
 						   num_params++, "");
 		}
 	}
 
 	/* Tell LLVM to insert WQM instruction sequence when needed. */
 	if (key->ps_prolog.wqm) {
 		LLVMAddTargetDependentFunctionAttr(func,
 						   "amdgpu-ps-wqm-outputs", "");
 	}
 
+	si_llvm_build_ret(ctx, ret);
+}
+
+/**
+ * Compile the pixel shader prolog.
+ */
+static bool si_compile_ps_prolog(struct si_screen *sscreen,
+				 LLVMTargetMachineRef tm,
+				 struct pipe_debug_callback *debug,
+				 struct si_shader_part *out)
+{
+	union si_shader_part_key *key = &out->key;
+	struct si_shader shader = {};
+	struct si_shader_context ctx;
+	struct gallivm_state *gallivm = &ctx.gallivm;
+	bool status = true;
+
+	si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+	ctx.type = PIPE_SHADER_FRAGMENT;
+	shader.key.ps.prolog = key->ps_prolog.states;
+
+	si_build_ps_prolog_function(&ctx, key);
+
 	/* Compile. */
-	si_llvm_build_ret(&ctx, ret);
 	si_llvm_finalize_module(&ctx,
 		r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_FRAGMENT));
 
 	if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
 			    gallivm->module, debug, ctx.type,
 			    "Fragment Shader Prolog"))
 		status = false;
 
 	si_llvm_dispose(&ctx);
 	return status;
@@ -7956,137 +8097,28 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
 }
 
 /**
  * Select and compile (or reuse) pixel shader parts (prolog & epilog).
  */
 static bool si_shader_select_ps_parts(struct si_screen *sscreen,
 				      LLVMTargetMachineRef tm,
 				      struct si_shader *shader,
 				      struct pipe_debug_callback *debug)
 {
-	struct tgsi_shader_info *info = &shader->selector->info;
 	union si_shader_part_key prolog_key;
 	union si_shader_part_key epilog_key;
-	unsigned i;
 
 	/* Get the prolog. */
-	memset(&prolog_key, 0, sizeof(prolog_key));
-	prolog_key.ps_prolog.states = shader->key.ps.prolog;
-	prolog_key.ps_prolog.colors_read = info->colors_read;
-	prolog_key.ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
-	prolog_key.ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
-	prolog_key.ps_prolog.wqm = info->uses_derivatives &&
-		(prolog_key.ps_prolog.colors_read ||
-		 prolog_key.ps_prolog.states.force_persp_sample_interp ||
-		 prolog_key.ps_prolog.states.force_linear_sample_interp ||
-		 prolog_key.ps_prolog.states.force_persp_center_interp ||
-		 prolog_key.ps_prolog.states.force_linear_center_interp ||
-		 prolog_key.ps_prolog.states.bc_optimize_for_persp ||
-		 prolog_key.ps_prolog.states.bc_optimize_for_linear);
-
-	if (info->colors_read) {
-		unsigned *color = shader->selector->color_attr_index;
-
-		if (shader->key.ps.prolog.color_two_side) {
-			/* BCOLORs are stored after the last input. */
-			prolog_key.ps_prolog.num_interp_inputs = info->num_inputs;
-			prolog_key.ps_prolog.face_vgpr_index = shader->info.face_vgpr_index;
-			shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
-		}
-
-		for (i = 0; i < 2; i++) {
-			unsigned interp = info->input_interpolate[color[i]];
-			unsigned location = info->input_interpolate_loc[color[i]];
-
-			if (!(info->colors_read & (0xf << i*4)))
-				continue;
-
-			prolog_key.ps_prolog.color_attr_index[i] = color[i];
-
-			if (shader->key.ps.prolog.flatshade_colors &&
-			    interp == TGSI_INTERPOLATE_COLOR)
-				interp = TGSI_INTERPOLATE_CONSTANT;
-
-			switch (interp) {
-			case TGSI_INTERPOLATE_CONSTANT:
-				prolog_key.ps_prolog.color_interp_vgpr_index[i] = -1;
-				break;
-			case TGSI_INTERPOLATE_PERSPECTIVE:
-			case TGSI_INTERPOLATE_COLOR:
-				/* Force the interpolation location for colors here. */
-				if (shader->key.ps.prolog.force_persp_sample_interp)
-					location = TGSI_INTERPOLATE_LOC_SAMPLE;
-				if (shader->key.ps.prolog.force_persp_center_interp)
-					location = TGSI_INTERPOLATE_LOC_CENTER;
-
-				switch (location) {
-				case TGSI_INTERPOLATE_LOC_SAMPLE:
-					prolog_key.ps_prolog.color_interp_vgpr_index[i] = 0;
-					shader->config.spi_ps_input_ena |=
-						S_0286CC_PERSP_SAMPLE_ENA(1);
-					break;
-				case TGSI_INTERPOLATE_LOC_CENTER:
-					prolog_key.ps_prolog.color_interp_vgpr_index[i] = 2;
-					shader->config.spi_ps_input_ena |=
-						S_0286CC_PERSP_CENTER_ENA(1);
-					break;
-				case TGSI_INTERPOLATE_LOC_CENTROID:
-					prolog_key.ps_prolog.color_interp_vgpr_index[i] = 4;
-					shader->config.spi_ps_input_ena |=
-						S_0286CC_PERSP_CENTROID_ENA(1);
-					break;
-				default:
-					assert(0);
-				}
-				break;
-			case TGSI_INTERPOLATE_LINEAR:
-				/* Force the interpolation location for colors here. */
-				if (shader->key.ps.prolog.force_linear_sample_interp)
-					location = TGSI_INTERPOLATE_LOC_SAMPLE;
-				if (shader->key.ps.prolog.force_linear_center_interp)
-					location = TGSI_INTERPOLATE_LOC_CENTER;
-
-				switch (location) {
-				case TGSI_INTERPOLATE_LOC_SAMPLE:
-					prolog_key.ps_prolog.color_interp_vgpr_index[i] = 6;
-					shader->config.spi_ps_input_ena |=
-						S_0286CC_LINEAR_SAMPLE_ENA(1);
-					break;
-				case TGSI_INTERPOLATE_LOC_CENTER:
-					prolog_key.ps_prolog.color_interp_vgpr_index[i] = 8;
-					shader->config.spi_ps_input_ena |=
-						S_0286CC_LINEAR_CENTER_ENA(1);
-					break;
-				case TGSI_INTERPOLATE_LOC_CENTROID:
-					prolog_key.ps_prolog.color_interp_vgpr_index[i] = 10;
-					shader->config.spi_ps_input_ena |=
-						S_0286CC_LINEAR_CENTROID_ENA(1);
-					break;
-				default:
-					assert(0);
-				}
-				break;
-			default:
-				assert(0);
-			}
-		}
-	}
+	si_get_ps_prolog_key(shader, &prolog_key);
 
 	/* The prolog is a no-op if these aren't set. */
-	if (prolog_key.ps_prolog.colors_read ||
-	    prolog_key.ps_prolog.states.force_persp_sample_interp ||
-	    prolog_key.ps_prolog.states.force_linear_sample_interp ||
-	    prolog_key.ps_prolog.states.force_persp_center_interp ||
-	    prolog_key.ps_prolog.states.force_linear_center_interp ||
-	    prolog_key.ps_prolog.states.bc_optimize_for_persp ||
-	    prolog_key.ps_prolog.states.bc_optimize_for_linear ||
-	    prolog_key.ps_prolog.states.poly_stipple) {
+	if (si_need_ps_prolog(&prolog_key)) {
 		shader->prolog =
 			si_get_shader_part(sscreen, &sscreen->ps_prologs,
 					   &prolog_key, tm, debug,
 					   si_compile_ps_prolog);
 		if (!shader->prolog)
 			return false;
 	}
 
 	/* Get the epilog. */
 	si_get_ps_epilog_key(shader, &epilog_key);
-- 
2.7.4



More information about the mesa-dev mailing list