[Mesa-dev] [PATCH 3/3] radv/ac: eliminate unused vertex shader outputs.

Dave Airlie airlied at gmail.com
Fri Apr 21 02:41:46 UTC 2017


From: Dave Airlie <airlied at redhat.com>

This is ported from radeonsi, and I can see at least one
Talos shader drops an export due to this, and saves some
VGPR usage.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/amd/common/ac_nir_to_llvm.c | 178 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 178 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index ab929bc..38d5359 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -5753,6 +5753,182 @@ static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
 	LLVMDisposePassManager(passmgr);
 }
 
+#define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
+#define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
+
+/* Return true if the PARAM export has been eliminated. */
+static bool
+ac_eliminate_const_output(struct nir_to_llvm_context *ctx,
+			  struct ac_vs_output_info *outinfo,
+			  LLVMValueRef inst, unsigned offset)
+{
+	unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
+	bool is_zero[4] = {}, is_one[4] = {};
+
+	for (i = 0; i < 4; i++) {
+		LLVMBool loses_info;
+		LLVMValueRef p = LLVMGetOperand(inst, EXP_OUT0 + i);
+
+		/* It's a constant expression. Undef outputs are eliminated too. */
+		if (LLVMIsUndef(p)) {
+			is_zero[i] = true;
+			is_one[i] = true;
+		} else if (LLVMIsAConstantFP(p)) {
+			double a = LLVMConstRealGetDouble(p, &loses_info);
+
+			if (a == 0)
+				is_zero[i] = true;
+			else if (a == 1)
+				is_one[i] = true;
+			else
+
+				return false; /* other constant */
+		} else
+			return false;
+	}
+
+	/* Only certain combinations of 0 and 1 can be eliminated. */
+	if (is_zero[0] && is_zero[1] && is_zero[2])
+		default_val = is_zero[3] ? 0 : 1;
+	else if (is_one[0] && is_one[1] && is_one[2])
+		default_val = is_zero[3] ? 2 : 3;
+	else
+		return false;
+
+	/* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
+	LLVMInstructionEraseFromParent(inst);
+
+	/* Change OFFSET to DEFAULT_VAL. */
+	for (i = 0; i < VARYING_SLOT_MAX; i++) {
+		if (outinfo->vs_output_param_offset[i] == offset) {
+			outinfo->vs_output_param_offset[i] =
+				EXP_PARAM_DEFAULT_VAL_0000 + default_val;
+			break;
+		}
+	}
+	return true;
+}
+
+struct si_vs_exports {
+	unsigned num;
+	unsigned offset[VARYING_SLOT_MAX];
+	LLVMValueRef inst[VARYING_SLOT_MAX];
+};
+
+static bool
+ac_is_function(LLVMValueRef v)
+{
+	return LLVMGetValueKind(v) == LLVMFunctionValueKind;
+}
+
+static void
+ac_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx)
+{
+	LLVMBasicBlockRef bb;
+	struct si_vs_exports exports;
+	bool removed_any = false;
+	struct ac_vs_output_info *outinfo;
+	exports.num = 0;
+
+	if (ctx->stage == MESA_SHADER_FRAGMENT ||
+	    ctx->stage == MESA_SHADER_COMPUTE ||
+	    ctx->stage == MESA_SHADER_TESS_CTRL ||
+	    ctx->stage == MESA_SHADER_GEOMETRY)
+		return;
+
+	if (ctx->stage == MESA_SHADER_VERTEX) {
+		if (ctx->options->key.vs.as_ls ||
+		    ctx->options->key.vs.as_es)
+			return;
+		outinfo = &ctx->shader_info->vs.outinfo;
+	}
+
+	if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+		if (ctx->options->key.vs.as_es)
+			return;
+		outinfo = &ctx->shader_info->tes.outinfo;
+	}
+
+	/* Process all LLVM instructions. */
+	bb = LLVMGetFirstBasicBlock(ctx->main_function);
+	while (bb) {
+		LLVMValueRef inst = LLVMGetFirstInstruction(bb);
+
+		while (inst) {
+			LLVMValueRef cur = inst;
+			inst = LLVMGetNextInstruction(inst);
+
+			if (LLVMGetInstructionOpcode(cur) != LLVMCall)
+				continue;
+
+			LLVMValueRef callee = LLVMGetCalledValue(cur);
+
+			if (!ac_is_function(callee))
+				continue;
+
+			const char *name = LLVMGetValueName(callee);
+			unsigned num_args = LLVMCountParams(callee);
+
+			/* Check if this is an export instruction. */
+			if ((num_args != 9 && num_args != 8) ||
+			    (strcmp(name, "llvm.SI.export") &&
+			     strcmp(name, "llvm.amdgcn.exp.f32")))
+				continue;
+
+			LLVMValueRef arg = LLVMGetOperand(cur, EXP_TARGET);
+			unsigned target = LLVMConstIntGetZExtValue(arg);
+
+			if (target < V_008DFC_SQ_EXP_PARAM)
+				continue;
+
+			target -= V_008DFC_SQ_EXP_PARAM;
+
+			/* Eliminate constant value PARAM exports. */
+			if (ac_eliminate_const_output(ctx, outinfo, cur, target)) {
+				removed_any = true;
+			} else {
+				exports.offset[exports.num] = target;
+				exports.inst[exports.num] = cur;
+				exports.num++;
+			}
+		}
+		bb = LLVMGetNextBasicBlock(bb);
+	}
+
+	/* Remove holes in export memory due to removed PARAM exports.
+	 * This is done by renumbering all PARAM exports.
+	 */
+	if (removed_any) {
+		ubyte current_offset[VARYING_SLOT_MAX];
+		unsigned new_count = 0;
+		unsigned out, i;
+
+		/* Make a copy of the offsets. We need the old version while
+		 * we are modifying some of them. */
+		assert(sizeof(current_offset) ==
+		       sizeof(outinfo->vs_output_param_offset));
+		memcpy(current_offset, outinfo->vs_output_param_offset,
+		       sizeof(current_offset));
+
+		for (i = 0; i < exports.num; i++) {
+			unsigned offset = exports.offset[i];
+
+			for (out = 0; out < VARYING_SLOT_MAX; out++) {
+				if (current_offset[out] != offset)
+					continue;
+
+				LLVMSetOperand(exports.inst[i], EXP_TARGET,
+					       LLVMConstInt(ctx->i32,
+							    V_008DFC_SQ_EXP_PARAM + new_count, 0));
+				outinfo->vs_output_param_offset[out] = new_count;
+				new_count++;
+				break;
+			}
+		}
+		outinfo->param_exports = new_count;
+	}
+}
+
 static void
 ac_setup_rings(struct nir_to_llvm_context *ctx)
 {
@@ -5890,6 +6066,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 	LLVMBuildRetVoid(ctx.builder);
 
 	ac_llvm_finalize_module(&ctx);
+
+	ac_eliminate_const_vs_outputs(&ctx);
 	free(ctx.locals);
 	ralloc_free(ctx.defs);
 	ralloc_free(ctx.phis);
-- 
2.7.4



More information about the mesa-dev mailing list