Mesa (master): radeonsi: eliminate trivial constant VS outputs

Marek Olšák mareko at kemper.freedesktop.org
Wed Oct 19 20:21:52 UTC 2016


Module: Mesa
Branch: master
Commit: 3ec9975555d1cc5365413ad9062f412904f944a3
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3ec9975555d1cc5365413ad9062f412904f944a3

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Tue Oct 18 15:20:22 2016 +0200

radeonsi: eliminate trivial constant VS outputs

These constant value VS PARAM exports:
- 0,0,0,0
- 0,0,0,1
- 1,1,1,0
- 1,1,1,1
can be loaded into PS inputs using the DEFAULT_VAL field, and the VS exports
can be removed from the IR to save export & parameter memory.

After LLVM optimizations, analyze the IR to see which exports are equal to
the ones listed above (or undef) and remove them if they are.

Targeted use cases:
- All DX9 eON ports always clear 10 VS outputs to 0.0 even if most of them
  are unused by PS (such as Witcher 2 below).
- VS output arrays with unused elements that the GLSL compiler can't
  eliminate (such as Batman below).

The shader-db deltas are quite interesting:
(not from upstream si-report.py, it won't be upstreamed)

PERCENTAGE DELTAS    Shaders PARAM exports (affected only)
batman_arkham_origins    589  -67.17 %
bioshock-infinite       1769   -0.47 %
dirt-showdown            548   -2.68 %
dota2                   1747   -3.36 %
f1-2015                  776   -4.94 %
left_4_dead_2           1762   -0.07 %
metro_2033_redux        2670   -0.43 %
portal                   474   -0.22 %
talos_principle          324   -3.63 %
warsow                   176   -2.20 %
witcher2                1040  -73.78 %
----------------------------------------
All affected             991  -65.37 %  ... 9681 -> 3353
----------------------------------------
Total                  26725  -10.82 %  ... 58490 -> 52162

v2: treat Undef as both 0 and 1

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com> (v1)
Tested-by: Edmondo Tommasina <edmondo.tommasina at gmail.com> (v1)

---

 src/gallium/drivers/radeonsi/si_shader.c        | 160 ++++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_shader.h        |  11 ++
 src/gallium/drivers/radeonsi/si_state_shaders.c |  17 ++-
 3 files changed, 186 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 6a42a8f..a810d9a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6523,6 +6523,159 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
 	bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
 }
 
+/* Return true if the PARAM export has been eliminated. */
+static bool si_eliminate_const_output(struct si_shader_context *ctx,
+				      LLVMValueRef inst, unsigned offset)
+{
+	struct si_shader *shader = ctx->shader;
+	unsigned num_outputs = shader->selector->info.num_outputs;
+	unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
+	bool is_zero[4] = {}, is_one[4] = {};
+
+	for (i = 0; i < 4; i++) {
+		LLVMBool loses_info;
+		LLVMValueRef p = LLVMGetOperand(inst, 5 + i);
+		if (!LLVMIsConstant(p))
+			return false;
+
+		/* It's a constant expression. Undef outputs are eliminated too. */
+		if (LLVMIsUndef(p)) {
+			is_zero[i] = true;
+			is_one[i] = true;
+		} else {
+			double a = LLVMConstRealGetDouble(p, &loses_info);
+
+			if (a == 0)
+				is_zero[i] = true;
+			else if (a == 1)
+				is_one[i] = true;
+			else
+				return false; /* other constant */
+		}
+	}
+
+	/* Only certain combinations of 0 and 1 can be eliminated. */
+	if (is_zero[0] && is_zero[1] && is_zero[2])
+		default_val = is_zero[3] ? 0 : 1;
+	else if (is_one[0] && is_one[1] && is_one[2])
+		default_val = is_zero[3] ? 2 : 3;
+	else
+		return false;
+
+	/* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
+	LLVMInstructionEraseFromParent(inst);
+
+	/* Change OFFSET to DEFAULT_VAL. */
+	for (i = 0; i < num_outputs; i++) {
+		if (shader->info.vs_output_param_offset[i] == offset) {
+			shader->info.vs_output_param_offset[i] =
+				EXP_PARAM_DEFAULT_VAL_0000 + default_val;
+			break;
+		}
+	}
+	return true;
+}
+
+struct si_vs_exports {
+	unsigned num;
+	unsigned offset[SI_MAX_VS_OUTPUTS];
+	LLVMValueRef inst[SI_MAX_VS_OUTPUTS];
+};
+
+static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
+{
+	struct si_shader *shader = ctx->shader;
+	struct tgsi_shader_info *info = &shader->selector->info;
+	LLVMBasicBlockRef bb;
+	struct si_vs_exports exports;
+	bool removed_any = false;
+
+	exports.num = 0;
+
+	if ((ctx->type == PIPE_SHADER_VERTEX &&
+	     (shader->key.vs.as_es || shader->key.vs.as_ls)) ||
+	    (ctx->type == PIPE_SHADER_TESS_EVAL && shader->key.tes.as_es))
+		return;
+
+	/* Process all LLVM instructions. */
+	bb = LLVMGetFirstBasicBlock(ctx->main_fn);
+	while (bb) {
+		LLVMValueRef inst = LLVMGetFirstInstruction(bb);
+
+		while (inst) {
+			LLVMValueRef cur = inst;
+			inst = LLVMGetNextInstruction(inst);
+
+			if (LLVMGetInstructionOpcode(cur) != LLVMCall)
+				continue;
+
+			LLVMValueRef callee = LLVMGetCalledValue(cur);
+			LLVMValueKind kind = LLVMGetValueKind(callee);
+
+			if (kind != LLVMFunctionValueKind)
+				continue;
+
+			const char *name = LLVMGetValueName(callee);
+			unsigned num_args = LLVMCountParams(callee);
+
+			/* Check if this is an export instruction. */
+			if (num_args != 9 || strcmp(name, "llvm.SI.export"))
+				continue;
+
+			LLVMValueRef arg = LLVMGetOperand(cur, 3);
+			unsigned target = LLVMConstIntGetZExtValue(arg);
+
+			if (target < V_008DFC_SQ_EXP_PARAM)
+				continue;
+
+			target -= V_008DFC_SQ_EXP_PARAM;
+
+			/* Eliminate constant value PARAM exports. */
+			if (si_eliminate_const_output(ctx, cur, target)) {
+				removed_any = true;
+			} else {
+				exports.offset[exports.num] = target;
+				exports.inst[exports.num] = cur;
+				exports.num++;
+			}
+		}
+		bb = LLVMGetNextBasicBlock(bb);
+	}
+
+	/* Remove holes in export memory due to removed PARAM exports.
+	 * This is done by renumbering all PARAM exports.
+	 */
+	if (removed_any) {
+		ubyte current_offset[SI_MAX_VS_OUTPUTS];
+		unsigned new_count = 0;
+		unsigned out, i;
+
+		/* Make a copy of the offsets. We need the old version while
+		 * we are modifying some of them. */
+		assert(sizeof(current_offset) ==
+		       sizeof(shader->info.vs_output_param_offset));
+		memcpy(current_offset, shader->info.vs_output_param_offset,
+		       sizeof(current_offset));
+
+		for (i = 0; i < exports.num; i++) {
+			unsigned offset = exports.offset[i];
+
+			for (out = 0; out < info->num_outputs; out++) {
+				if (current_offset[out] != offset)
+					continue;
+
+				LLVMSetOperand(exports.inst[i], 3,
+					       LLVMConstInt(ctx->i32,
+							    V_008DFC_SQ_EXP_PARAM + new_count, 0));
+				shader->info.vs_output_param_offset[out] = new_count;
+				new_count++;
+				break;
+			}
+		}
+		shader->info.nr_param_exports = new_count;
+	}
+}
+
 int si_compile_tgsi_shader(struct si_screen *sscreen,
 			   LLVMTargetMachineRef tm,
 			   struct si_shader *shader,
@@ -6546,6 +6699,9 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 	si_init_shader_ctx(&ctx, sscreen, shader, tm);
 	ctx.is_monolithic = is_monolithic;
 
+	memset(shader->info.vs_output_param_offset, 0xff,
+	       sizeof(shader->info.vs_output_param_offset));
+
 	shader->info.uses_instanceid = sel->info.uses_instanceid;
 
 	bld_base = &ctx.soa.bld_base;
@@ -6630,6 +6786,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 	si_llvm_finalize_module(&ctx,
 				    r600_extra_shader_checks(&sscreen->b, ctx.type));
 
+	/* Post-optimization transformations. */
+	si_eliminate_const_vs_outputs(&ctx);
+
+	/* Compile to bytecode. */
 	r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
 			    mod, debug, ctx.type, "TGSI shader");
 	if (r) {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index b07210c..6c7a05f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -415,6 +415,17 @@ struct si_shader_config {
 	unsigned			rsrc2;
 };
 
+enum {
+	/* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
+	EXP_PARAM_OFFSET_0 = 0,
+	EXP_PARAM_OFFSET_31 = 31,
+	/* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
+	EXP_PARAM_DEFAULT_VAL_0000 = 64,
+	EXP_PARAM_DEFAULT_VAL_0001,
+	EXP_PARAM_DEFAULT_VAL_1110,
+	EXP_PARAM_DEFAULT_VAL_1111,
+};
+
 /* GCN-specific shader info. */
 struct si_shader_info {
 	ubyte			vs_output_param_offset[SI_MAX_VS_OUTPUTS];
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 137a5d1..f59bfcd 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1567,7 +1567,7 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
 				     unsigned index, unsigned interpolate)
 {
 	struct tgsi_shader_info *vsinfo = &vs->selector->info;
-	unsigned j, ps_input_cntl = 0;
+	unsigned j, offset, ps_input_cntl = 0;
 
 	if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
 	    (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
@@ -1582,7 +1582,20 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
 	for (j = 0; j < vsinfo->num_outputs; j++) {
 		if (name == vsinfo->output_semantic_name[j] &&
 		    index == vsinfo->output_semantic_index[j]) {
-			ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[j]);
+			offset = vs->info.vs_output_param_offset[j];
+
+			if (offset <= EXP_PARAM_OFFSET_31) {
+				/* The input is loaded from parameter memory. */
+				ps_input_cntl |= S_028644_OFFSET(offset);
+			} else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
+				/* The input is a DEFAULT_VAL constant. */
+				assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
+				       offset <= EXP_PARAM_DEFAULT_VAL_1111);
+
+				offset -= EXP_PARAM_DEFAULT_VAL_0000;
+				ps_input_cntl = S_028644_OFFSET(0x20) |
+						S_028644_DEFAULT_VAL(offset);
+			}
 			break;
 		}
 	}




More information about the mesa-commit mailing list