[Mesa-dev] [PATCH] ac/nir: do not emit unnecessary null exports in fragment shaders

Samuel Pitoiset samuel.pitoiset at gmail.com
Tue Mar 6 16:09:44 UTC 2018


Null exports should only be needed when no other exports are
emitted. This removes a bunch of 'exp null off, off, off, off done vm'.

Affected games are Dota 2 and Wolfenstein 2, not sure if that
really helps, but code size is decreasing there.

Polaris10:
Totals from affected shaders:
SGPRS: 8216 -> 8216 (0.00 %)
VGPRS: 7072 -> 7072 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Code Size: 454968 -> 453896 (-0.24 %) bytes
Max Waves: 772 -> 772 (0.00 %)

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
 src/amd/common/ac_nir_to_llvm.c | 98 ++++++++++++++++++++++-------------------
 1 file changed, 52 insertions(+), 46 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index ea51c3a54a..c370c80ec7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -6484,67 +6484,73 @@ handle_tcs_outputs_post(struct radv_shader_context *ctx)
 	write_tess_factors(ctx);
 }
 
-static bool
-si_export_mrt_color(struct radv_shader_context *ctx,
-		    LLVMValueRef *color, unsigned index, bool is_last,
-		    struct ac_export_args *args)
-{
-	/* Export */
-	si_llvm_init_export_args(ctx, color, 0xf,
-				 V_008DFC_SQ_EXP_MRT + index, args);
-
-	if (is_last) {
-		args->valid_mask = 1; /* whether the EXEC mask is valid */
-		args->done = 1; /* DONE bit */
-	} else if (!args->enabled_channels)
-		return false; /* unnecessary NULL export */
-
-	return true;
-}
+struct radv_ps_exports {
+	unsigned num;
+	struct ac_export_args args[10];
+};
 
 static void
 radv_export_mrt_z(struct radv_shader_context *ctx,
 		  LLVMValueRef depth, LLVMValueRef stencil,
-		  LLVMValueRef samplemask)
+		  LLVMValueRef samplemask, struct radv_ps_exports *exp)
 {
 	struct ac_export_args args;
 
 	ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
 
-	ac_build_export(&ctx->ac, &args);
+	memcpy(&exp->args[exp->num++], &args, sizeof(args));
 }
 
 static void
 handle_fs_outputs_post(struct radv_shader_context *ctx)
 {
-	unsigned index = 0;
 	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
-	struct ac_export_args color_args[8];
+	struct radv_ps_exports exp = {};
+	struct ac_export_args args[8];
+	int last_color_export = -1;
+	int last_non_null = -1;
+	unsigned colors_written;
+
+	/* Find the last written color export. */
+	colors_written = ctx->output_mask >> FRAG_RESULT_DATA0;
+	if (!ctx->shader_info->info.ps.writes_z &&
+	    !ctx->shader_info->info.ps.writes_stencil &&
+	    !ctx->shader_info->info.ps.writes_sample_mask) {
+		last_color_export = util_last_bit(colors_written) - 1;
+	}
+
+	/* Get the export arguments, and find the last non-null color export. */
+	for (unsigned mrt = 0; mrt < 8; mrt++) {
+		unsigned index = mrt + FRAG_RESULT_DATA0;
+		LLVMValueRef color[4];
+
+		if (!(colors_written & (1 << mrt)))
+			continue;
 
-	for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
-		LLVMValueRef values[4];
-		bool last = false;
+		for (unsigned chan = 0; chan < 4; chan++) {
+			color[chan] =
+				ac_to_float(&ctx->ac,
+					    radv_load_output(ctx, index, chan));
+		}
 
-		if (!(ctx->output_mask & (1ull << i)))
-			continue;
+		si_llvm_init_export_args(ctx, color, 0xf,
+					 V_008DFC_SQ_EXP_MRT + mrt, &args[mrt]);
+		if (args[mrt].enabled_channels)
+			last_non_null = mrt;
+	}
 
-		if (i < FRAG_RESULT_DATA0)
+	/* Emit all exports. */
+	for (unsigned mrt = 0; mrt < 8; mrt++) {
+		if (!(colors_written & (1 << mrt)))
 			continue;
 
-		for (unsigned j = 0; j < 4; j++)
-			values[j] = ac_to_float(&ctx->ac,
-						radv_load_output(ctx, i, j));
+		if (last_color_export != -1 && last_non_null == mrt) {
+			args[mrt].valid_mask = 1; /* whether the EXEC mask is valid */
+			args[mrt].done = 1; /* DONE bit */
+		} else if (!args[mrt].enabled_channels)
+			continue; /* unnecessary NULL export */
 
-		if (!ctx->shader_info->info.ps.writes_z &&
-		    !ctx->shader_info->info.ps.writes_stencil &&
-		    !ctx->shader_info->info.ps.writes_sample_mask)
-			last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
-
-		bool ret = si_export_mrt_color(ctx, values,
-					       i - FRAG_RESULT_DATA0,
-					       last, &color_args[index]);
-		if (ret)
-			index++;
+		memcpy(&exp.args[exp.num++], &args[mrt], sizeof(args[mrt]));
 	}
 
 	/* Process depth, stencil, samplemask. */
@@ -6561,14 +6567,14 @@ handle_fs_outputs_post(struct radv_shader_context *ctx)
 					 radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0));
 	}
 
-	/* Export PS outputs. */
-	for (unsigned i = 0; i < index; i++)
-		ac_build_export(&ctx->ac, &color_args[i]);
-
 	if (depth || stencil || samplemask)
-		radv_export_mrt_z(ctx, depth, stencil, samplemask);
-	else if (!index)
+		radv_export_mrt_z(ctx, depth, stencil, samplemask, &exp);
+	else if (last_color_export == -1 || last_non_null == -1)
 		ac_build_export_null(&ctx->ac);
+
+	/* Export PS outputs. */
+	for (unsigned i = 0; i < exp.num; i++)
+		ac_build_export(&ctx->ac, &exp.args[i]);
 }
 
 static void
-- 
2.16.2



More information about the mesa-dev mailing list