[Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.

Dave Airlie airlied at gmail.com
Tue Aug 1 04:14:48 UTC 2017


From: Dave Airlie <airlied at redhat.com>

This looks like it's supported since llvm 3.9 at least,
so switch over radeonsi and radv to using it, -pro also
uses this. We can now drop creating lds for these operations
as the ds_swizzle operation doesn't actually write to lds at all.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/amd/common/ac_llvm_build.c           | 57 +++++++++++++++++++++++---------
 src/amd/common/ac_llvm_build.h           |  1 -
 src/amd/common/ac_nir_to_llvm.c          |  9 +----
 src/gallium/drivers/radeonsi/si_shader.c | 16 +--------
 4 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 9b939c1..a38aad6 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -796,21 +796,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 	      bool has_ds_bpermute,
 	      uint32_t mask,
 	      int idx,
-	      LLVMValueRef lds,
 	      LLVMValueRef val)
 {
-	LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
+	LLVMValueRef tl, trbl, args[2];
 	LLVMValueRef result;
 
-	thread_id = ac_get_thread_id(ctx);
+	if (has_ds_bpermute) {
+		LLVMValueRef thread_id, tl_tid, trbl_tid;
+		thread_id = ac_get_thread_id(ctx);
 
-	tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
-			      LLVMConstInt(ctx->i32, mask, false), "");
+		tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
+				      LLVMConstInt(ctx->i32, mask, false), "");
 
-	trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
-				LLVMConstInt(ctx->i32, idx, false), "");
+		trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
+					LLVMConstInt(ctx->i32, idx, false), "");
 
-	if (has_ds_bpermute) {
 		args[0] = LLVMBuildMul(ctx->builder, tl_tid,
 				       LLVMConstInt(ctx->i32, 4, false), "");
 		args[1] = val;
@@ -828,15 +828,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 					  AC_FUNC_ATTR_READNONE |
 					  AC_FUNC_ATTR_CONVERGENT);
 	} else {
-		LLVMValueRef store_ptr, load_ptr0, load_ptr1;
+		uint32_t masks[2];
+
+		switch (mask) {
+		case AC_TID_MASK_TOP_LEFT:
+			masks[0] = 0x8000;
+			if (idx == 1)
+				masks[1] = 0x8055;
+			else
+				masks[1] = 0x80aa;
+
+			break;
+		case AC_TID_MASK_TOP:
+			masks[0] = 0x8044;
+			masks[1] = 0x80ee;
+			break;
+		case AC_TID_MASK_LEFT:
+			masks[0] = 0x80a0;
+			masks[1] = 0x80f5;
+			break;
+		}
 
-		store_ptr = ac_build_gep0(ctx, lds, thread_id);
-		load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
-		load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
+		args[0] = val;
+		args[1] = LLVMConstInt(ctx->i32, masks[0], false);
 
-		LLVMBuildStore(ctx->builder, val, store_ptr);
-		tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
-		trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
+		tl = ac_build_intrinsic(ctx,
+					"llvm.amdgcn.ds.swizzle", ctx->i32,
+					args, 2,
+					AC_FUNC_ATTR_READNONE |
+					AC_FUNC_ATTR_CONVERGENT);
+
+		args[1] = LLVMConstInt(ctx->i32, masks[1], false);
+		trbl = ac_build_intrinsic(ctx,
+					"llvm.amdgcn.ds.swizzle", ctx->i32,
+					args, 2,
+					AC_FUNC_ATTR_READNONE |
+					AC_FUNC_ATTR_CONVERGENT);
 	}
 
 	tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 09fd585..ee27d3c 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -174,7 +174,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 	      bool has_ds_bpermute,
 	      uint32_t mask,
 	      int idx,
-	      LLVMValueRef lds,
 	      LLVMValueRef val);
 
 #define AC_SENDMSG_GS 2
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 530b581..dc765fe 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -68,8 +68,6 @@ struct ac_nir_context {
 	int num_locals;
 	LLVMValueRef *locals;
 
-	LLVMValueRef ddxy_lds;
-
 	struct nir_to_llvm_context *nctx; /* TODO get rid of this */
 };
 
@@ -1453,11 +1451,6 @@ static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
 	LLVMValueRef result;
 	bool has_ds_bpermute = ctx->abi->chip_class >= VI;
 
-	if (!ctx->ddxy_lds && !has_ds_bpermute)
-		ctx->ddxy_lds = LLVMAddGlobalInAddressSpace(ctx->ac.module,
-						       LLVMArrayType(ctx->ac.i32, 64),
-						       "ddxy_lds", LOCAL_ADDR_SPACE);
-
 	if (op == nir_op_fddx_fine || op == nir_op_fddx)
 		mask = AC_TID_MASK_LEFT;
 	else if (op == nir_op_fddy_fine || op == nir_op_fddy)
@@ -1474,7 +1467,7 @@ static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
 		idx = 2;
 
 	result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
-			      mask, idx, ctx->ddxy_lds,
+			      mask, idx,
 			      src0);
 	return result;
 }
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 628e6f8..09053c3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3591,7 +3591,7 @@ static void si_llvm_emit_ddxy(
 
 	val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
 	val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
-			    mask, idx, ctx->lds, val);
+			    mask, idx, val);
 	emit_data->output[emit_data->chan] = val;
 }
 
@@ -4635,20 +4635,6 @@ static void create_function(struct si_shader_context *ctx)
 	assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
 	shader->info.num_input_vgprs -= num_prolog_vgprs;
 
-	if (!ctx->screen->has_ds_bpermute &&
-	    bld_base->info &&
-	    (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
-	     bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
-	     bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
-	     bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
-	     bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
-	     bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
-		ctx->lds =
-			LLVMAddGlobalInAddressSpace(gallivm->module,
-						    LLVMArrayType(ctx->i32, 64),
-						    "ddxy_lds",
-						    LOCAL_ADDR_SPACE);
-
 	if (shader->key.as_ls ||
 	    ctx->type == PIPE_SHADER_TESS_CTRL ||
 	    /* GFX9 has the ESGS ring buffer in LDS. */
-- 
2.9.4



More information about the mesa-dev mailing list