[Mesa-dev] [PATCH 15/25] radeonsi: add PS epilog

Marek Olšák maraeo at gmail.com
Mon Feb 15 23:59:26 UTC 2016


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_pipe.c   |   1 +
 src/gallium/drivers/radeonsi/si_pipe.h   |   1 +
 src/gallium/drivers/radeonsi/si_shader.c | 289 ++++++++++++++++++++++++++++++-
 src/gallium/drivers/radeonsi/si_shader.h |   7 +
 4 files changed, 296 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 645d418..02c430d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -541,6 +541,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
 		sscreen->vs_prologs,
 		sscreen->vs_epilogs,
 		sscreen->tcs_epilogs,
+		sscreen->ps_epilogs
 	};
 	unsigned i;
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index d9175b9..5d204ec 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -92,6 +92,7 @@ struct si_screen {
 	struct si_shader_part		*vs_prologs;
 	struct si_shader_part		*vs_epilogs;
 	struct si_shader_part		*tcs_epilogs;
+	struct si_shader_part		*ps_epilogs;
 };
 
 struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index bc6f8cd..915ac1d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -131,6 +131,10 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
 			       LLVMTargetMachineRef tm,
 			       struct tgsi_shader_info *info);
 
+/* ideally pass the sample mask input to the PS epilog as v13, which
+ * is its usual location, so that the shader doesn't have to add v_mov.
+ */
+#define PS_EPILOG_SAMPLEMASK_MIN_LOC 13
 #define VS_EPILOG_PRIMID_LOC 2
 
 #define PERSPECTIVE_BASE 0
@@ -2527,6 +2531,100 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
 		si_export_mrt_z(bld_base, depth, stencil, samplemask);
 }
 
+/**
+ * Return PS outputs in this order:
+ *
+ * v[0:3] = color0.xyzw
+ * v[4:7] = color1.xyzw
+ * ...
+ * vN+0 = Depth
+ * vN+1 = Stencil
+ * vN+2 = SampleMask
+ * vN+3 = SampleMaskIn (used for OpenGL smoothing)
+ *
+ * The alpha-ref SGPR is returned via its original location.
+ */
+static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base)
+{
+	struct si_shader_context *ctx = si_shader_context(bld_base);
+	struct si_shader *shader = ctx->shader;
+	struct lp_build_context *base = &bld_base->base;
+	struct tgsi_shader_info *info = &shader->selector->info;
+	LLVMBuilderRef builder = base->gallivm->builder;
+	unsigned i, j, first_vgpr, vgpr;
+
+	LLVMValueRef color[8][4] = {};
+	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
+	LLVMValueRef ret;
+
+	/* Read the output values. */
+	for (i = 0; i < info->num_outputs; i++) {
+		unsigned semantic_name = info->output_semantic_name[i];
+		unsigned semantic_index = info->output_semantic_index[i];
+
+		switch (semantic_name) {
+		case TGSI_SEMANTIC_COLOR:
+			assert(semantic_index < 8);
+			for (j = 0; j < 4; j++) {
+				LLVMValueRef ptr = ctx->radeon_bld.soa.outputs[i][j];
+				LLVMValueRef result = LLVMBuildLoad(builder, ptr, "");
+				color[semantic_index][j] = result;
+			}
+			break;
+		case TGSI_SEMANTIC_POSITION:
+			depth = LLVMBuildLoad(builder,
+					      ctx->radeon_bld.soa.outputs[i][2], "");
+			break;
+		case TGSI_SEMANTIC_STENCIL:
+			stencil = LLVMBuildLoad(builder,
+						ctx->radeon_bld.soa.outputs[i][1], "");
+			break;
+		case TGSI_SEMANTIC_SAMPLEMASK:
+			samplemask = LLVMBuildLoad(builder,
+						   ctx->radeon_bld.soa.outputs[i][0], "");
+			break;
+		default:
+			fprintf(stderr, "Warning: SI unhandled fs output type:%d\n",
+				semantic_name);
+		}
+	}
+
+	/* Fill the return structure. */
+	ret = ctx->return_value;
+
+	/* Set SGPRs. */
+	ret = LLVMBuildInsertValue(builder, ret,
+				   bitcast(bld_base, TGSI_TYPE_SIGNED,
+					   LLVMGetParam(ctx->radeon_bld.main_fn,
+							SI_PARAM_ALPHA_REF)),
+				   SI_SGPR_ALPHA_REF, "");
+
+	/* Set VGPRs */
+	first_vgpr = vgpr = SI_SGPR_ALPHA_REF + 1;
+	for (i = 0; i < ARRAY_SIZE(color); i++) {
+		if (!color[i][0])
+			continue;
+
+		for (j = 0; j < 4; j++)
+			ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, "");
+	}
+	if (depth)
+		ret = LLVMBuildInsertValue(builder, ret, depth, vgpr++, "");
+	if (stencil)
+		ret = LLVMBuildInsertValue(builder, ret, stencil, vgpr++, "");
+	if (samplemask)
+		ret = LLVMBuildInsertValue(builder, ret, samplemask, vgpr++, "");
+
+	/* Add the input sample mask for smoothing at the end. */
+	if (vgpr < first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC)
+		vgpr = first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC;
+	ret = LLVMBuildInsertValue(builder, ret,
+				   LLVMGetParam(ctx->radeon_bld.main_fn,
+						SI_PARAM_SAMPLE_COVERAGE), vgpr++, "");
+
+	ctx->return_value = ret;
+}
+
 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 				struct lp_build_tgsi_context *bld_base,
 				struct lp_build_emit_data *emit_data);
@@ -3720,7 +3818,7 @@ static void create_function(struct si_shader_context *ctx)
 	struct si_shader *shader = ctx->shader;
 	LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32;
 	LLVMTypeRef returns[16+32*4];
-	unsigned i, last_array_pointer, last_sgpr, num_params;
+	unsigned i, last_array_pointer, last_sgpr, num_params, num_return_sgprs;
 	unsigned num_returns = 0;
 
 	v3i32 = LLVMVectorType(ctx->i32, 3);
@@ -3866,6 +3964,27 @@ static void create_function(struct si_shader_context *ctx)
 		params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32;
 		params[SI_PARAM_POS_FIXED_PT] = ctx->f32;
 		num_params = SI_PARAM_POS_FIXED_PT+1;
+
+		if (!ctx->is_monolithic) {
+			/* Outputs for the epilog. */
+			num_return_sgprs = SI_SGPR_ALPHA_REF + 1;
+			num_returns =
+				num_return_sgprs +
+				util_bitcount(shader->selector->info.colors_written) * 4 +
+				shader->selector->info.writes_z +
+				shader->selector->info.writes_stencil +
+				shader->selector->info.writes_samplemask +
+				1 /* SampleMaskIn */;
+
+			num_returns = MAX2(num_returns,
+					   num_return_sgprs +
+					   PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
+
+			for (i = 0; i < num_return_sgprs; i++)
+				returns[i] = ctx->i32;
+			for (; i < num_returns; i++)
+				returns[i] = ctx->f32;
+		}
 		break;
 
 	default:
@@ -4654,7 +4773,10 @@ static int si_compile_tgsi_shader(struct si_screen *sscreen,
 		break;
 	case TGSI_PROCESSOR_FRAGMENT:
 		ctx.radeon_bld.load_input = declare_input_fs;
-		bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
+		if (is_monolithic)
+			bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
+		else
+			bld_base->emit_epilogue = si_llvm_return_fs_outputs;
 		break;
 	default:
 		assert(!"Unsupported shader type");
@@ -5170,6 +5292,159 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
 	return shader->epilog != NULL;
 }
 
+/**
+ * Compile the pixel shader epilog. This handles everything that must be
+ * emulated for pixel shader exports. (alpha-test, format conversions, etc)
+ */
+static bool si_compile_ps_epilog(struct si_screen *sscreen,
+				 LLVMTargetMachineRef tm,
+				 struct pipe_debug_callback *debug,
+				 struct si_shader_part *out)
+{
+	union si_shader_part_key *key = &out->key;
+	struct si_shader shader = {};
+	struct si_shader_context ctx;
+	struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+	struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
+	LLVMTypeRef params[16+8*4+3];
+	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
+	int last_array_pointer, last_sgpr, num_params, i;
+	bool status = true;
+
+	si_init_shader_ctx(&ctx, sscreen, &shader, tm, NULL);
+	ctx.type = TGSI_PROCESSOR_FRAGMENT;
+	shader.key.ps.epilog = key->ps_epilog.states;
+
+	/* Declare input SGPRs. */
+	params[SI_PARAM_RW_BUFFERS] = ctx.i64;
+	params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
+	params[SI_PARAM_SAMPLERS] = ctx.i64;
+	params[SI_PARAM_UNUSED] = ctx.i64;
+	params[SI_PARAM_ALPHA_REF] = ctx.f32;
+	last_array_pointer = -1;
+	last_sgpr = SI_PARAM_ALPHA_REF;
+
+	/* Declare input VGPRs. */
+	num_params = (last_sgpr + 1) +
+		     util_bitcount(key->ps_epilog.colors_written) * 4 +
+		     key->ps_epilog.writes_z +
+		     key->ps_epilog.writes_stencil +
+		     key->ps_epilog.writes_samplemask;
+
+	num_params = MAX2(num_params,
+			  last_sgpr + 1 + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
+
+	assert(num_params <= ARRAY_SIZE(params));
+
+	for (i = last_sgpr + 1; i < num_params; i++)
+		params[i] = ctx.f32;
+
+	/* Create the function. */
+	si_create_function(&ctx, NULL, 0, params, num_params,
+			   last_array_pointer, last_sgpr);
+	/* Disable elimination of unused inputs. */
+	radeon_llvm_add_attribute(ctx.radeon_bld.main_fn,
+				  "InitialPSInputAddr", 0xffffff);
+
+	/* Process colors. */
+	unsigned vgpr = last_sgpr + 1;
+	unsigned colors_written = key->ps_epilog.colors_written;
+	int last_color_export = -1;
+
+	/* Find the last color export. */
+	if (!key->ps_epilog.writes_z &&
+	    !key->ps_epilog.writes_stencil &&
+	    !key->ps_epilog.writes_samplemask) {
+		unsigned spi_format = key->ps_epilog.states.spi_shader_col_format;
+
+		/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
+		if (colors_written == 0x1 && key->ps_epilog.states.last_cbuf > 0) {
+			/* Just set this if any of the colorbuffers are enabled. */
+			if (spi_format &
+			    ((1llu << (4 * (key->ps_epilog.states.last_cbuf + 1))) - 1))
+				last_color_export = 0;
+		} else {
+			for (i = 0; i < 8; i++)
+				if (colors_written & (1 << i) &&
+				    (spi_format >> (i * 4)) & 0xf)
+					last_color_export = i;
+		}
+	}
+
+	while (colors_written) {
+		LLVMValueRef color[4];
+		int mrt = u_bit_scan(&colors_written);
+
+		for (i = 0; i < 4; i++)
+			color[i] = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
+
+		si_export_mrt_color(bld_base, color, mrt,
+				    num_params - 1,
+				    mrt == last_color_export);
+	}
+
+	/* Process depth, stencil, samplemask. */
+	if (key->ps_epilog.writes_z)
+		depth = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
+	if (key->ps_epilog.writes_stencil)
+		stencil = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
+	if (key->ps_epilog.writes_samplemask)
+		samplemask = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
+
+	if (depth || stencil || samplemask)
+		si_export_mrt_z(bld_base, depth, stencil, samplemask);
+	else if (last_color_export == -1)
+		si_export_null(bld_base);
+
+	/* Compile. */
+	LLVMBuildRetVoid(gallivm->builder);
+	radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+	if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+			    gallivm->module, debug, ctx.type,
+			    "Fragment Shader Epilog"))
+		status = false;
+
+	radeon_llvm_dispose(&ctx.radeon_bld);
+	return status;
+}
+
+/**
+ * Select and compile (or reuse) pixel shader parts (prolog & epilog).
+ */
+static bool si_shader_select_ps_parts(struct si_screen *sscreen,
+				      LLVMTargetMachineRef tm,
+				      struct si_shader *shader,
+				      struct pipe_debug_callback *debug)
+{
+	struct tgsi_shader_info *info = &shader->selector->info;
+	union si_shader_part_key epilog_key;
+
+	/* Get the epilog. */
+	memset(&epilog_key, 0, sizeof(epilog_key));
+	epilog_key.ps_epilog.colors_written = info->colors_written;
+	epilog_key.ps_epilog.writes_z = info->writes_z;
+	epilog_key.ps_epilog.writes_stencil = info->writes_stencil;
+	epilog_key.ps_epilog.writes_samplemask = info->writes_samplemask;
+	epilog_key.ps_epilog.states = shader->key.ps.epilog;
+
+	shader->epilog =
+		si_get_shader_part(sscreen, &sscreen->ps_epilogs,
+				   &epilog_key, tm, debug,
+				   si_compile_ps_epilog);
+	if (!shader->epilog)
+		return false;
+
+	/* The sample mask input is always enabled, because the API shader always
+	 * passes it through to the epilog. Disable it here if it's unused.
+	 */
+	if (!shader->key.ps.epilog.poly_line_smoothing &&
+	    !shader->selector->info.reads_samplemask)
+		shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
+
+	return true;
+}
+
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 		     struct si_shader *shader,
 		     struct pipe_debug_callback *debug)
@@ -5196,6 +5471,16 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 			if (!si_shader_select_tes_parts(sscreen, tm, shader, debug))
 				return -1;
 			break;
+		case PIPE_SHADER_FRAGMENT:
+			if (!si_shader_select_ps_parts(sscreen, tm, shader, debug))
+				return -1;
+
+			/* Make sure we have at least as many VGPRs as there
+			 * are allocated inputs.
+			 */
+			shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
+							shader->num_input_vgprs);
+			break;
 		}
 
 		/* Update SGPR and VGPR counts. */
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index e2b9534..928cb2e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -281,6 +281,13 @@ union si_shader_part_key {
 	struct {
 		struct si_tcs_epilog_bits states;
 	} tcs_epilog;
+	struct {
+		struct si_ps_epilog_bits states;
+		unsigned	colors_written:8;
+		unsigned	writes_z:1;
+		unsigned	writes_stencil:1;
+		unsigned	writes_samplemask:1;
+	} ps_epilog;
 };
 
 union si_shader_key {
-- 
2.5.0



More information about the mesa-dev mailing list