[Mesa-dev] [PATCH v2 12/14] radeonsi: Use barrier instructions for TCS barriers.

Fri May 13 01:37:22 UTC 2016

With potentially more than 1 wave working on a patch we
need the barrier.

Also adds a barrier before loading the tessellation factors to
write them to the TF ring.

Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c | 48 ++++++++++----------------------
 1 file changed, 15 insertions(+), 33 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 087b5bb..235983a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2514,6 +2514,19 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
 	}
 }
 
+static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
+				 struct lp_build_tgsi_context *bld_base,
+				 struct lp_build_emit_data *emit_data)
+{
+	struct si_shader_context *ctx = si_shader_context(bld_base);
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+
+	lp_build_intrinsic(gallivm->builder,
+			   HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier"
+					       : "llvm.AMDGPU.barrier.local",
+			   ctx->voidt, NULL, 0, LLVMNoUnwindAttribute);
+}
+
 static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 				  LLVMValueRef rel_patch_id,
 				  LLVMValueRef invocation_id,
@@ -2528,6 +2541,8 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 	unsigned stride, outer_comps, inner_comps, i;
 	struct lp_build_if_state if_ctx;
 
+	si_llvm_emit_barrier(NULL, bld_base, NULL);
+
 	/* Do this only for invocation 0, because the tess levels are per-patch,
 	 * not per-vertex.
 	 *
@@ -3213,18 +3228,6 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 				struct lp_build_tgsi_context *bld_base,
 				struct lp_build_emit_data *emit_data);
 
-/* Prevent optimizations (at least of memory accesses) across the current
- * point in the program by emitting empty inline assembly that is marked as
- * having side effects.
- */
-static void emit_optimization_barrier(struct si_shader_context *ctx)
-{
-	LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
-	LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
-	LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false);
-	LLVMBuildCall(builder, inlineasm, NULL, 0, "");
-}
-
 static void emit_waitcnt(struct si_shader_context *ctx)
 {
 	struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
@@ -5154,27 +5157,6 @@ static void si_llvm_emit_primitive(
 			   ctx->voidt, args, 2, LLVMNoUnwindAttribute);
 }
 
-static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
-				 struct lp_build_tgsi_context *bld_base,
-				 struct lp_build_emit_data *emit_data)
-{
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct gallivm_state *gallivm = bld_base->base.gallivm;
-
-	/* The real barrier instruction isn’t needed, because an entire patch
-	 * always fits into a single wave.
-	 */
-	if (ctx->type == PIPE_SHADER_TESS_CTRL) {
-		emit_optimization_barrier(ctx);
-		return;
-	}
-
-	lp_build_intrinsic(gallivm->builder,
-			   HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier"
-					       : "llvm.AMDGPU.barrier.local",
-			   ctx->voidt, NULL, 0, LLVMNoUnwindAttribute);
-}
-
 static const struct lp_build_tgsi_action tex_action = {
 	.fetch_args = tex_fetch_args,
 	.emit = build_tex_intrinsic,
-- 
2.8.2