[Mesa-dev] [PATCH 17/19] radeonsi: strengthen emit_optimization_barrier

Nicolai Hähnle nhaehnle at gmail.com
Fri Mar 31 17:14:17 UTC 2017


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

LLVM will lift inline assembly out of if-else-blocks if both paths have
the same inline assembly. Prevent this by adding an irrelevant unique
text to the assembly.

This requires the LLVM assembly parser to be initialized.

Furthermore, allow forcing subsequent computations to happen after the
optimization barrier by defining a data dependency.
---
 src/gallium/drivers/radeonsi/si_shader.c           | 39 +++++++++++++++++++---
 .../drivers/radeonsi/si_shader_tgsi_setup.c        |  3 ++
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index da1db4e..a56e886 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -65,20 +65,22 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
 			       struct si_shader *shader,
 			       LLVMTargetMachineRef tm);
 
 static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
 				 struct lp_build_tgsi_context *bld_base,
 				 struct lp_build_emit_data *emit_data);
 
 static void si_dump_shader_key(unsigned shader, struct si_shader_key *key,
 			       FILE *f);
 
+static unsigned llvm_get_type_size(LLVMTypeRef type);
+
 static void si_build_vs_prolog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key);
 static void si_build_vs_epilog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key);
 static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
 					 union si_shader_part_key *key);
 static void si_build_ps_prolog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key);
 static void si_build_ps_epilog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key);
@@ -3140,28 +3142,57 @@ static LLVMValueRef get_buffer_size(
 	return size;
 }
 
 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 				struct lp_build_tgsi_context *bld_base,
 				struct lp_build_emit_data *emit_data);
 
 /* Prevent optimizations (at least of memory accesses) across the current
  * point in the program by emitting empty inline assembly that is marked as
  * having side effects.
+ *
+ * Optionally, a value can be passed through the inline assembly to prevent
+ * LLVM from hoisting calls to ReadNone functions.
  */
 #if 0 /* unused currently */
-static void emit_optimization_barrier(struct si_shader_context *ctx)
+static void emit_optimization_barrier(struct si_shader_context *ctx,
+				      LLVMValueRef *pvgpr)
 {
+	static int counter = 0;
+
 	LLVMBuilderRef builder = ctx->gallivm.builder;
-	LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
-	LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false);
-	LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+	char code[16];
+
+	snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));
+
+	if (!pvgpr) {
+		LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
+		LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
+		LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+	} else {
+		LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
+		LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
+		LLVMValueRef vgpr = *pvgpr;
+		LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
+		unsigned vgpr_size = llvm_get_type_size(vgpr_type);
+		LLVMValueRef vgpr0;
+
+		assert(vgpr_size % 4 == 0);
+
+		vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
+		vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
+		vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
+		vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
+		vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
+
+		*pvgpr = vgpr;
+	}
 }
 #endif
 
 /* Combine these with & instead of |. */
 #define NOOP_WAITCNT 0xf7f
 #define LGKM_CNT 0x07f
 #define VM_CNT 0xf70
 
 static void emit_waitcnt(struct si_shader_context *ctx, unsigned simm16)
 {
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index da463c2..a5725a3 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -119,20 +119,23 @@ void si_llvm_shader_type(LLVMValueRef F, unsigned type)
 }
 
 static void init_amdgpu_target()
 {
 	gallivm_init_llvm_targets();
 	LLVMInitializeAMDGPUTargetInfo();
 	LLVMInitializeAMDGPUTarget();
 	LLVMInitializeAMDGPUTargetMC();
 	LLVMInitializeAMDGPUAsmPrinter();
 
+	/* For inline assembly. */
+	LLVMInitializeAMDGPUAsmParser();
+
 	if (HAVE_LLVM >= 0x0400) {
 		/*
 		 * Workaround for bug in llvm 4.0 that causes image intrinsics
 		 * to disappear.
 		 * https://reviews.llvm.org/D26348
 		 */
 		const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"};
 		LLVMParseCommandLineOptions(2, argv, NULL);
 	}
 }
-- 
2.9.3



More information about the mesa-dev mailing list