[Mesa-dev] [PATCH 1/2] radeonsi/ac: move vertex export remove to common code.

Dave Airlie airlied at gmail.com
Tue Apr 25 23:12:10 UTC 2017


From: Dave Airlie <airlied at redhat.com>

This code can be shared by radv, we bump the max to
VARYING_SLOT_MAX here, but that shouldn't have too
much fallout.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/amd/common/ac_exp_param.h                   |  40 ++++++
 src/amd/common/ac_llvm_build.c                  | 156 +++++++++++++++++++++++-
 src/amd/common/ac_llvm_build.h                  |   6 +
 src/amd/common/ac_llvm_helper.cpp               |  20 +++
 src/amd/common/ac_llvm_util.h                   |   2 +
 src/gallium/drivers/radeonsi/si_shader.c        | 152 ++---------------------
 src/gallium/drivers/radeonsi/si_shader.h        |  12 --
 src/gallium/drivers/radeonsi/si_state_shaders.c |  13 +-
 8 files changed, 237 insertions(+), 164 deletions(-)
 create mode 100644 src/amd/common/ac_exp_param.h

diff --git a/src/amd/common/ac_exp_param.h b/src/amd/common/ac_exp_param.h
new file mode 100644
index 0000000..b97ce81
--- /dev/null
+++ b/src/amd/common/ac_exp_param.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+#ifndef AC_EXP_PARAM_H
+#define AC_EXP_PARAM_H
+
+enum {
+	/* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
+	AC_EXP_PARAM_OFFSET_0 = 0,
+	AC_EXP_PARAM_OFFSET_31 = 31,
+	/* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
+	AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
+	AC_EXP_PARAM_DEFAULT_VAL_0001,
+	AC_EXP_PARAM_DEFAULT_VAL_1110,
+	AC_EXP_PARAM_DEFAULT_VAL_1111,
+	AC_EXP_PARAM_UNDEFINED = 255,
+};
+
+#endif
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index d45094c..f452f3e 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -33,11 +33,13 @@
 #include <stdio.h>
 
 #include "ac_llvm_util.h"
-
+#include "ac_exp_param.h"
 #include "util/bitscan.h"
 #include "util/macros.h"
 #include "sid.h"
 
+#include "shader_enums.h"
+
 /* Initialize module-independent parts of the context.
  *
  * The caller is responsible for initializing ctx::module and ctx::builder.
@@ -1244,3 +1246,155 @@ void ac_get_image_intr_name(const char *base_name,
                          data_type_name, coords_type_name, rsrc_type_name);
         }
 }
+
+#define AC_EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
+#define AC_EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
+
+/* Return true if the PARAM export has been eliminated. */
+static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset,
+				      uint32_t num_outputs,
+				      LLVMValueRef inst, unsigned offset)
+{
+	unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
+	bool is_zero[4] = {}, is_one[4] = {};
+
+	for (i = 0; i < 4; i++) {
+		LLVMBool loses_info;
+		LLVMValueRef p = LLVMGetOperand(inst, AC_EXP_OUT0 + i);
+
+		/* It's a constant expression. Undef outputs are eliminated too. */
+		if (LLVMIsUndef(p)) {
+			is_zero[i] = true;
+			is_one[i] = true;
+		} else if (LLVMIsAConstantFP(p)) {
+			double a = LLVMConstRealGetDouble(p, &loses_info);
+
+			if (a == 0)
+				is_zero[i] = true;
+			else if (a == 1)
+				is_one[i] = true;
+			else
+				return false; /* other constant */
+		} else
+			return false;
+	}
+
+	/* Only certain combinations of 0 and 1 can be eliminated. */
+	if (is_zero[0] && is_zero[1] && is_zero[2])
+		default_val = is_zero[3] ? 0 : 1;
+	else if (is_one[0] && is_one[1] && is_one[2])
+		default_val = is_zero[3] ? 2 : 3;
+	else
+		return false;
+
+	/* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
+	LLVMInstructionEraseFromParent(inst);
+
+	/* Change OFFSET to DEFAULT_VAL. */
+	for (i = 0; i < num_outputs; i++) {
+		if (vs_output_param_offset[i] == offset) {
+			vs_output_param_offset[i] =
+				AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
+			break;
+		}
+	}
+	return true;
+}
+
+struct ac_vs_exports {
+	unsigned num;
+	unsigned offset[VARYING_SLOT_MAX];
+	LLVMValueRef inst[VARYING_SLOT_MAX];
+};
+
+void ac_eliminate_const_vs_outputs(struct ac_llvm_context *ctx,
+				   LLVMValueRef main_fn,
+				   uint8_t *vs_output_param_offset,
+				   uint32_t num_outputs,
+				   uint8_t *num_param_exports)
+{
+	LLVMBasicBlockRef bb;
+	bool removed_any = false;
+	struct ac_vs_exports exports;
+
+	assert(num_outputs < VARYING_SLOT_MAX);
+	exports.num = 0;
+
+	/* Process all LLVM instructions. */
+	bb = LLVMGetFirstBasicBlock(main_fn);
+	while (bb) {
+		LLVMValueRef inst = LLVMGetFirstInstruction(bb);
+
+		while (inst) {
+			LLVMValueRef cur = inst;
+			inst = LLVMGetNextInstruction(inst);
+
+			if (LLVMGetInstructionOpcode(cur) != LLVMCall)
+				continue;
+
+			LLVMValueRef callee = ac_llvm_get_called_value(cur);
+
+			if (!ac_llvm_is_function(callee))
+				continue;
+
+			const char *name = LLVMGetValueName(callee);
+			unsigned num_args = LLVMCountParams(callee);
+
+			/* Check if this is an export instruction. */
+			if ((num_args != 9 && num_args != 8) ||
+			    (strcmp(name, "llvm.SI.export") &&
+			     strcmp(name, "llvm.amdgcn.exp.f32")))
+				continue;
+
+			LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
+			unsigned target = LLVMConstIntGetZExtValue(arg);
+
+			if (target < V_008DFC_SQ_EXP_PARAM)
+				continue;
+
+			target -= V_008DFC_SQ_EXP_PARAM;
+
+			/* Eliminate constant value PARAM exports. */
+			if (ac_eliminate_const_output(vs_output_param_offset,
+						      num_outputs, cur, target)) {
+				removed_any = true;
+			} else {
+				exports.offset[exports.num] = target;
+				exports.inst[exports.num] = cur;
+				exports.num++;
+			}
+		}
+		bb = LLVMGetNextBasicBlock(bb);
+	}
+
+	/* Remove holes in export memory due to removed PARAM exports.
+	 * This is done by renumbering all PARAM exports.
+	 */
+	if (removed_any) {
+		uint8_t current_offset[VARYING_SLOT_MAX];
+		unsigned new_count = 0;
+		unsigned out, i;
+
+		/* Make a copy of the offsets. We need the old version while
+		 * we are modifying some of them. */
+		memcpy(current_offset, vs_output_param_offset,
+		       sizeof(current_offset));
+
+		for (i = 0; i < exports.num; i++) {
+			unsigned offset = exports.offset[i];
+
+			for (out = 0; out < num_outputs; out++) {
+				if (current_offset[out] != offset)
+					continue;
+
+				LLVMSetOperand(exports.inst[i], AC_EXP_TARGET,
+					       LLVMConstInt(ctx->i32,
+							    V_008DFC_SQ_EXP_PARAM + new_count, 0));
+				vs_output_param_offset[out] = new_count;
+				new_count++;
+				break;
+			}
+		}
+		*num_param_exports = new_count;
+	}
+}
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index d6edcde..1c3610a 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -239,6 +239,12 @@ void ac_get_image_intr_name(const char *base_name,
 			    LLVMTypeRef coords_type,
 			    LLVMTypeRef rsrc_type,
 			    char *out_name, unsigned out_len);
+
+void ac_eliminate_const_vs_outputs(struct ac_llvm_context *ac,
+				   LLVMValueRef main_fn,
+				   uint8_t *vs_output_param_offset,
+				   uint32_t num_outputs,
+				   uint8_t *num_param_exports);
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_helper.cpp
index 11fa809..582a8f7 100644
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -61,3 +61,23 @@ bool ac_is_sgpr_param(LLVMValueRef arg)
 	return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) ||
 	       AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
 }
+
+LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
+{
+#if HAVE_LLVM >= 0x0309
+	return LLVMGetCalledValue(call);
+#elif HAVE_LLVM >= 0x0305
+	return llvm::wrap(llvm::CallSite(llvm::unwrap<llvm::Instruction>(call)).getCalledValue());
+#else
+	return NULL; /* radeonsi doesn't support so old LLVM. */
+#endif
+}
+
+bool ac_llvm_is_function(LLVMValueRef v)
+{
+#if HAVE_LLVM >= 0x0309
+	return LLVMGetValueKind(v) == LLVMFunctionValueKind;
+#else
+	return llvm::isa<llvm::Function>(llvm::unwrap(v));
+#endif
+}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index faecf1e..38e7dde 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -64,6 +64,8 @@ void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
 			    unsigned attrib_mask);
 void ac_dump_module(LLVMModuleRef module);
 
+LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
+bool ac_llvm_is_function(LLVMValueRef v);
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 5d7175d..27d88b1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -41,6 +41,7 @@
 
 #include "ac_binary.h"
 #include "ac_llvm_util.h"
+#include "ac_exp_param.h"
 #include "si_shader_internal.h"
 #include "si_pipe.h"
 #include "sid.h"
@@ -6793,76 +6794,10 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
 	bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
 }
 
-#define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
-#define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
-
-/* Return true if the PARAM export has been eliminated. */
-static bool si_eliminate_const_output(struct si_shader_context *ctx,
-				      LLVMValueRef inst, unsigned offset)
-{
-	struct si_shader *shader = ctx->shader;
-	unsigned num_outputs = shader->selector->info.num_outputs;
-	unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
-	bool is_zero[4] = {}, is_one[4] = {};
-
-	for (i = 0; i < 4; i++) {
-		LLVMBool loses_info;
-		LLVMValueRef p = LLVMGetOperand(inst, EXP_OUT0 + i);
-
-		/* It's a constant expression. Undef outputs are eliminated too. */
-		if (LLVMIsUndef(p)) {
-			is_zero[i] = true;
-			is_one[i] = true;
-		} else if (LLVMIsAConstantFP(p)) {
-			double a = LLVMConstRealGetDouble(p, &loses_info);
-
-			if (a == 0)
-				is_zero[i] = true;
-			else if (a == 1)
-				is_one[i] = true;
-			else
-				return false; /* other constant */
-		} else
-			return false;
-	}
-
-	/* Only certain combinations of 0 and 1 can be eliminated. */
-	if (is_zero[0] && is_zero[1] && is_zero[2])
-		default_val = is_zero[3] ? 0 : 1;
-	else if (is_one[0] && is_one[1] && is_one[2])
-		default_val = is_zero[3] ? 2 : 3;
-	else
-		return false;
-
-	/* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
-	LLVMInstructionEraseFromParent(inst);
-
-	/* Change OFFSET to DEFAULT_VAL. */
-	for (i = 0; i < num_outputs; i++) {
-		if (shader->info.vs_output_param_offset[i] == offset) {
-			shader->info.vs_output_param_offset[i] =
-				EXP_PARAM_DEFAULT_VAL_0000 + default_val;
-			break;
-		}
-	}
-	return true;
-}
-
-struct si_vs_exports {
-	unsigned num;
-	unsigned offset[SI_MAX_VS_OUTPUTS];
-	LLVMValueRef inst[SI_MAX_VS_OUTPUTS];
-};
-
 static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
 {
 	struct si_shader *shader = ctx->shader;
 	struct tgsi_shader_info *info = &shader->selector->info;
-	LLVMBasicBlockRef bb;
-	struct si_vs_exports exports;
-	bool removed_any = false;
-
-	exports.num = 0;
 
 	if (ctx->type == PIPE_SHADER_FRAGMENT ||
 	    ctx->type == PIPE_SHADER_COMPUTE ||
@@ -6870,84 +6805,11 @@ static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
 	    shader->key.as_ls)
 		return;
 
-	/* Process all LLVM instructions. */
-	bb = LLVMGetFirstBasicBlock(ctx->main_fn);
-	while (bb) {
-		LLVMValueRef inst = LLVMGetFirstInstruction(bb);
-
-		while (inst) {
-			LLVMValueRef cur = inst;
-			inst = LLVMGetNextInstruction(inst);
-
-			if (LLVMGetInstructionOpcode(cur) != LLVMCall)
-				continue;
-
-			LLVMValueRef callee = lp_get_called_value(cur);
-
-			if (!lp_is_function(callee))
-				continue;
-
-			const char *name = LLVMGetValueName(callee);
-			unsigned num_args = LLVMCountParams(callee);
-
-			/* Check if this is an export instruction. */
-			if ((num_args != 9 && num_args != 8) ||
-			    (strcmp(name, "llvm.SI.export") &&
-			     strcmp(name, "llvm.amdgcn.exp.f32")))
-				continue;
-
-			LLVMValueRef arg = LLVMGetOperand(cur, EXP_TARGET);
-			unsigned target = LLVMConstIntGetZExtValue(arg);
-
-			if (target < V_008DFC_SQ_EXP_PARAM)
-				continue;
-
-			target -= V_008DFC_SQ_EXP_PARAM;
-
-			/* Eliminate constant value PARAM exports. */
-			if (si_eliminate_const_output(ctx, cur, target)) {
-				removed_any = true;
-			} else {
-				exports.offset[exports.num] = target;
-				exports.inst[exports.num] = cur;
-				exports.num++;
-			}
-		}
-		bb = LLVMGetNextBasicBlock(bb);
-	}
-
-	/* Remove holes in export memory due to removed PARAM exports.
-	 * This is done by renumbering all PARAM exports.
-	 */
-	if (removed_any) {
-		ubyte current_offset[SI_MAX_VS_OUTPUTS];
-		unsigned new_count = 0;
-		unsigned out, i;
-
-		/* Make a copy of the offsets. We need the old version while
-		 * we are modifying some of them. */
-		assert(sizeof(current_offset) ==
-		       sizeof(shader->info.vs_output_param_offset));
-		memcpy(current_offset, shader->info.vs_output_param_offset,
-		       sizeof(current_offset));
-
-		for (i = 0; i < exports.num; i++) {
-			unsigned offset = exports.offset[i];
-
-			for (out = 0; out < info->num_outputs; out++) {
-				if (current_offset[out] != offset)
-					continue;
-
-				LLVMSetOperand(exports.inst[i], EXP_TARGET,
-					       LLVMConstInt(ctx->i32,
-							    V_008DFC_SQ_EXP_PARAM + new_count, 0));
-				shader->info.vs_output_param_offset[out] = new_count;
-				new_count++;
-				break;
-			}
-		}
-		shader->info.nr_param_exports = new_count;
-	}
+	ac_eliminate_const_vs_outputs(&ctx->ac,
+				      ctx->main_fn,
+				      shader->info.vs_output_param_offset,
+				      info->num_outputs,
+				      &shader->info.nr_param_exports);
 }
 
 static void si_count_scratch_private_memory(struct si_shader_context *ctx)
@@ -7521,7 +7383,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 	si_init_shader_ctx(&ctx, sscreen, shader, tm);
 	ctx.separate_prolog = !is_monolithic;
 
-	memset(shader->info.vs_output_param_offset, EXP_PARAM_UNDEFINED,
+	memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
 	       sizeof(shader->info.vs_output_param_offset));
 
 	shader->info.uses_instanceid = sel->info.uses_instanceid;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index cfa691b..2dfb567 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -486,18 +486,6 @@ struct si_shader_config {
 	unsigned			rsrc2;
 };
 
-enum {
-	/* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
-	EXP_PARAM_OFFSET_0 = 0,
-	EXP_PARAM_OFFSET_31 = 31,
-	/* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
-	EXP_PARAM_DEFAULT_VAL_0000 = 64,
-	EXP_PARAM_DEFAULT_VAL_0001,
-	EXP_PARAM_DEFAULT_VAL_1110,
-	EXP_PARAM_DEFAULT_VAL_1111,
-	EXP_PARAM_UNDEFINED = 255,
-};
-
 /* GCN-specific shader info. */
 struct si_shader_info {
 	ubyte			vs_output_param_offset[SI_MAX_VS_OUTPUTS];
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 21185c3..baf1eae 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -39,6 +39,7 @@
 
 #include "util/disk_cache.h"
 #include "util/mesa-sha1.h"
+#include "ac_exp_param.h"
 
 /* SHADER_CACHE */
 
@@ -1506,7 +1507,7 @@ void si_init_shader_selector_async(void *job, int thread_index)
 			for (i = 0; i < sel->info.num_outputs; i++) {
 				unsigned offset = shader->info.vs_output_param_offset[i];
 
-				if (offset <= EXP_PARAM_OFFSET_31)
+				if (offset <= AC_EXP_PARAM_OFFSET_31)
 					continue;
 
 				unsigned name = sel->info.output_semantic_name[i];
@@ -2001,18 +2002,18 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
 		    index == vsinfo->output_semantic_index[j]) {
 			offset = vs->info.vs_output_param_offset[j];
 
-			if (offset <= EXP_PARAM_OFFSET_31) {
+			if (offset <= AC_EXP_PARAM_OFFSET_31) {
 				/* The input is loaded from parameter memory. */
 				ps_input_cntl |= S_028644_OFFSET(offset);
 			} else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
-				if (offset == EXP_PARAM_UNDEFINED) {
+				if (offset == AC_EXP_PARAM_UNDEFINED) {
 					/* This can happen with depth-only rendering. */
 					offset = 0;
 				} else {
 					/* The input is a DEFAULT_VAL constant. */
-					assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
-					       offset <= EXP_PARAM_DEFAULT_VAL_1111);
-					offset -= EXP_PARAM_DEFAULT_VAL_0000;
+					assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
+					       offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
+					offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
 				}
 
 				ps_input_cntl = S_028644_OFFSET(0x20) |
-- 
2.7.4



More information about the mesa-dev mailing list