Mesa (master): radeonsi: strengthen emit_optimization_barrier

Nicolai Hähnle nh at kemper.freedesktop.org
Wed Apr 5 13:33:01 UTC 2017


Module: Mesa
Branch: master
Commit: 24d4fbe226c1f5b9a216d05c25737b4b14391975
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=24d4fbe226c1f5b9a216d05c25737b4b14391975

Author: Nicolai Hähnle <nicolai.haehnle at amd.com>
Date:   Fri Mar 31 13:04:34 2017 +0200

radeonsi: strengthen emit_optimization_barrier

LLVM will lift inline assembly out of if-else-blocks if both paths have
the same inline assembly. Prevent this by adding an irrelevant unique
text to the assembly.

This requires the LLVM assembly parser to be initialized.

Furthermore, allow forcing subsequent computations to happen after the
optimization barrier by defining a data dependency.

Reviewed-by: Marek Olšák <marek.olsak at amd.com>

---

 src/gallium/drivers/radeonsi/si_shader.c           | 39 +++++++++++++++++++---
 .../drivers/radeonsi/si_shader_tgsi_setup.c        |  3 ++
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 541dc7b943..082e29111b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -72,6 +72,8 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
 static void si_dump_shader_key(unsigned shader, struct si_shader_key *key,
 			       FILE *f);
 
+static unsigned llvm_get_type_size(LLVMTypeRef type);
+
 static void si_build_vs_prolog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key);
 static void si_build_vs_epilog_function(struct si_shader_context *ctx,
@@ -3125,14 +3127,43 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 /* Prevent optimizations (at least of memory accesses) across the current
  * point in the program by emitting empty inline assembly that is marked as
  * having side effects.
+ *
+ * Optionally, a value can be passed through the inline assembly to prevent
+ * LLVM from hoisting calls to ReadNone functions.
  */
 #if 0 /* unused currently */
-static void emit_optimization_barrier(struct si_shader_context *ctx)
+static void emit_optimization_barrier(struct si_shader_context *ctx,
+				      LLVMValueRef *pvgpr)
 {
+	static int counter = 0;
+
 	LLVMBuilderRef builder = ctx->gallivm.builder;
-	LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
-	LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false);
-	LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+	char code[16];
+
+	snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));
+
+	if (!pvgpr) {
+		LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
+		LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
+		LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+	} else {
+		LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
+		LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
+		LLVMValueRef vgpr = *pvgpr;
+		LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
+		unsigned vgpr_size = llvm_get_type_size(vgpr_type);
+		LLVMValueRef vgpr0;
+
+		assert(vgpr_size % 4 == 0);
+
+		vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
+		vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
+		vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
+		vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
+		vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
+
+		*pvgpr = vgpr;
+	}
 }
 #endif
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 3b8951cd94..7218d2da11 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -126,6 +126,9 @@ static void init_amdgpu_target()
 	LLVMInitializeAMDGPUTargetMC();
 	LLVMInitializeAMDGPUAsmPrinter();
 
+	/* For inline assembly. */
+	LLVMInitializeAMDGPUAsmParser();
+
 	if (HAVE_LLVM >= 0x0400) {
 		/*
 		 * Workaround for bug in llvm 4.0 that causes image intrinsics




More information about the mesa-commit mailing list