[Mesa-dev] [PATCH 17/19] radeonsi: simplify si_llvm_emit_ddxy

Marek Olšák maraeo at gmail.com
Sun Oct 2 21:09:32 UTC 2016


From: Marek Olšák <marek.olsak at amd.com>

si_llvm_emit_ddxy is called once per element, so we don't have to generate
code for 4 elements at once.
---
 src/gallium/drivers/radeonsi/si_shader.c | 80 ++++++++++++--------------------
 1 file changed, 29 insertions(+), 51 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 414810e..c150ae4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4998,98 +4998,76 @@ static void si_llvm_emit_txqs(
 #define TID_MASK_TOP      0xfffffffd
 #define TID_MASK_LEFT     0xfffffffe
 
 static void si_llvm_emit_ddxy(
 	const struct lp_build_tgsi_action *action,
 	struct lp_build_tgsi_context *bld_base,
 	struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
-	const struct tgsi_full_instruction *inst = emit_data->inst;
-	unsigned opcode = inst->Instruction.Opcode;
-	LLVMValueRef store_ptr, load_ptr0, load_ptr1, thread_id;
-	LLVMValueRef tl, trbl, result[4];
-	LLVMValueRef tl_tid, trbl_tid;
-	unsigned swizzle[4];
-	unsigned c;
+	unsigned opcode = emit_data->info->opcode;
+	LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, val, args[2];
 	int idx;
 	unsigned mask;
 	bool has_ds_bpermute = HAVE_LLVM >= 0x0309 &&
 			       ctx->screen->b.chip_class >= VI;
 
-	thread_id = get_thread_id(ctx);;
+	thread_id = get_thread_id(ctx);
 
 	if (opcode == TGSI_OPCODE_DDX_FINE)
 		mask = TID_MASK_LEFT;
 	else if (opcode == TGSI_OPCODE_DDY_FINE)
 		mask = TID_MASK_TOP;
 	else
 		mask = TID_MASK_TOP_LEFT;
 
 	tl_tid = LLVMBuildAnd(gallivm->builder, thread_id,
 				lp_build_const_int32(gallivm, mask), "");
 
 	/* for DDX we want to next X pixel, DDY next Y pixel. */
 	idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
 	trbl_tid = LLVMBuildAdd(gallivm->builder, tl_tid,
 				  lp_build_const_int32(gallivm, idx), "");
 
-	if (!has_ds_bpermute) {
-		store_ptr = build_gep0(ctx, ctx->lds, thread_id);
-		load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
-		load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
-	}
-
-	for (c = 0; c < 4; ++c) {
-		unsigned i;
-		LLVMValueRef val;
-		LLVMValueRef args[2];
-
-		swizzle[c] = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], c);
-		for (i = 0; i < c; ++i) {
-			if (swizzle[i] == swizzle[c]) {
-				result[c] = result[i];
-				break;
-			}
-		}
-		if (i != c)
-			continue;
-
-		val = LLVMBuildBitCast(gallivm->builder,
-				lp_build_emit_fetch(bld_base, inst, 0, c),
-						ctx->i32, "");
+	val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
 
-		if (has_ds_bpermute) {
-	                args[0] = LLVMBuildMul(gallivm->builder, tl_tid,
-                                        lp_build_const_int32(gallivm, 4), "");
-			args[1] = val;
-			tl = lp_build_intrinsic(gallivm->builder,
+	if (has_ds_bpermute) {
+		args[0] = LLVMBuildMul(gallivm->builder, tl_tid,
+				       lp_build_const_int32(gallivm, 4), "");
+		args[1] = val;
+		tl = lp_build_intrinsic(gallivm->builder,
 					"llvm.amdgcn.ds.bpermute", ctx->i32,
 					args, 2, LLVMReadNoneAttribute);
 
-	                args[0] = LLVMBuildMul(gallivm->builder, trbl_tid,
-                                        lp_build_const_int32(gallivm, 4), "");
-			trbl = lp_build_intrinsic(gallivm->builder,
-					"llvm.amdgcn.ds.bpermute", ctx->i32,
-					args, 2, LLVMReadNoneAttribute);
-		} else {
-			LLVMBuildStore(gallivm->builder, val, store_ptr);
-			tl = LLVMBuildLoad(gallivm->builder, load_ptr0, "");
-			trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, "");
-		}
-		tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, "");
-		trbl = LLVMBuildBitCast(gallivm->builder, trbl,	ctx->f32, "");
-		result[c] = LLVMBuildFSub(gallivm->builder, trbl, tl, "");
+		args[0] = LLVMBuildMul(gallivm->builder, trbl_tid,
+				       lp_build_const_int32(gallivm, 4), "");
+		trbl = lp_build_intrinsic(gallivm->builder,
+					  "llvm.amdgcn.ds.bpermute", ctx->i32,
+					  args, 2, LLVMReadNoneAttribute);
+	} else {
+		LLVMValueRef store_ptr, load_ptr0, load_ptr1;
+
+		store_ptr = build_gep0(ctx, ctx->lds, thread_id);
+		load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
+		load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
+
+		LLVMBuildStore(gallivm->builder, val, store_ptr);
+		tl = LLVMBuildLoad(gallivm->builder, load_ptr0, "");
+		trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, "");
 	}
 
-	emit_data->output[0] = lp_build_gather_values(gallivm, result, 4);
+	tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, "");
+	trbl = LLVMBuildBitCast(gallivm->builder, trbl,	ctx->f32, "");
+
+	emit_data->output[emit_data->chan] =
+		LLVMBuildFSub(gallivm->builder, trbl, tl, "");
 }
 
 /*
  * this takes an I,J coordinate pair,
  * and works out the X and Y derivatives.
  * it returns DDX(I), DDX(J), DDY(I), DDY(J).
  */
 static LLVMValueRef si_llvm_emit_ddxy_interp(
 	struct lp_build_tgsi_context *bld_base,
 	LLVMValueRef interp_ij)
-- 
2.7.4



More information about the mesa-dev mailing list