[Mesa-dev] [PATCH v2 11/12] ac: use new LLVM 8 intrinsic when storing 16-bit values

Samuel Pitoiset samuel.pitoiset at gmail.com
Wed Mar 13 10:50:19 UTC 2019


v2: do not force enable IDXEN when unecessary

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
 src/amd/common/ac_llvm_build.c  | 111 ++++++++++++++++++++++++++++++++
 src/amd/common/ac_llvm_build.h  |  26 ++++++++
 src/amd/common/ac_nir_to_llvm.c |  26 ++------
 3 files changed, 142 insertions(+), 21 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 8d5682f6f7a..06dc1383121 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1554,6 +1554,117 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
 				  ac_get_load_intr_attribs(can_speculate));
 }
 
+static void
+ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx,
+			     LLVMValueRef rsrc,
+			     LLVMValueRef vdata,
+			     LLVMValueRef vindex,
+			     LLVMValueRef voffset,
+			     LLVMValueRef soffset,
+			     unsigned num_channels,
+			     unsigned dfmt,
+			     unsigned nfmt,
+			     bool glc,
+			     bool slc,
+			     bool writeonly_memory,
+			     bool structurized)
+{
+	LLVMValueRef args[7];
+	int idx = 0;
+	args[idx++] = vdata;
+	args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+	if (structurized)
+		args[idx++] = vindex ? vindex : ctx->i32_0;
+	args[idx++] = voffset ? voffset : ctx->i32_0;
+	args[idx++] = soffset ? soffset : ctx->i32_0;
+	args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
+	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+	unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+	const char *type_names[] = {"i32", "v2i32", "v4i32"};
+	const char *indexing_kind = structurized ? "struct" : "raw";
+	char name[256];
+
+	snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s",
+		 indexing_kind, type_names[func]);
+
+	ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
+			   ac_get_store_intr_attribs(writeonly_memory));
+}
+
+void
+ac_build_tbuffer_store(struct ac_llvm_context *ctx,
+		       LLVMValueRef rsrc,
+		       LLVMValueRef vdata,
+		       LLVMValueRef vindex,
+		       LLVMValueRef voffset,
+		       LLVMValueRef soffset,
+		       LLVMValueRef immoffset,
+		       unsigned num_channels,
+		       unsigned dfmt,
+		       unsigned nfmt,
+		       bool glc,
+		       bool slc,
+		       bool writeonly_memory)
+{
+	if (HAVE_LLVM >= 0x800) {
+		bool structurized = vindex && vindex != ctx->i32_0;
+
+		voffset = LLVMBuildAdd(ctx->builder,
+				       voffset ? voffset : ctx->i32_0,
+				       immoffset, "");
+
+		ac_build_llvm8_tbuffer_store(ctx, rsrc, vdata, vindex, voffset,
+					     soffset, num_channels, dfmt, nfmt,
+					     glc, slc, writeonly_memory,
+					     structurized);
+	} else {
+		LLVMValueRef params[] = {
+			vdata,
+			rsrc,
+			vindex,
+			voffset ? voffset : ctx->i32_0,
+			soffset ? soffset : ctx->i32_0,
+			immoffset,
+			LLVMConstInt(ctx->i32, dfmt, false),
+			LLVMConstInt(ctx->i32, nfmt, false),
+			LLVMConstInt(ctx->i32, glc, false),
+			LLVMConstInt(ctx->i32, slc, false),
+		};
+		unsigned func = CLAMP(num_channels, 1, 3) - 1;
+		const char *type_names[] = {"i32", "v2i32", "v4i32"};
+		char name[256];
+
+		snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
+			 type_names[func]);
+
+		ac_build_intrinsic(ctx, name, ctx->voidt, params, 10,
+				   ac_get_store_intr_attribs(writeonly_memory));
+	}
+}
+
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+			     LLVMValueRef rsrc,
+			     LLVMValueRef vdata,
+			     LLVMValueRef vindex,
+			     LLVMValueRef voffset,
+			     LLVMValueRef soffset,
+			     bool glc,
+			     bool slc,
+			     bool writeonly_memory)
+{
+	unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
+	unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+	vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
+	vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
+
+	ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset,
+			       ctx->i32_0, 1, dfmt, nfmt, glc, slc,
+			       writeonly_memory);
+}
+
 /**
  * Set range metadata on an instruction.  This can only be used on load and
  * call instructions.  If you know an instruction can only produce the values
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 069ba7aa3c9..5ca93e66982 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -331,6 +331,32 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
 			    bool can_speculate,
 			    bool structurized);
 
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+			     LLVMValueRef rsrc,
+			     LLVMValueRef vdata,
+			     LLVMValueRef vindex,
+			     LLVMValueRef voffset,
+			     LLVMValueRef soffset,
+			     bool glc,
+			     bool slc,
+			     bool writeonly_memory);
+
+void
+ac_build_tbuffer_store(struct ac_llvm_context *ctx,
+		       LLVMValueRef rsrc,
+		       LLVMValueRef vdata,
+		       LLVMValueRef vindex,
+		       LLVMValueRef voffset,
+		       LLVMValueRef soffset,
+		       LLVMValueRef immoffset,
+		       unsigned num_channels,
+		       unsigned dfmt,
+		       unsigned nfmt,
+		       bool glc,
+		       bool slc,
+		       bool writeonly_memory);
+
 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);
 
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index f4d408cd587..f2070eb9a8e 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1521,14 +1521,12 @@ static unsigned get_cache_policy(struct ac_nir_context *ctx,
 static void visit_store_ssbo(struct ac_nir_context *ctx,
                              nir_intrinsic_instr *instr)
 {
-	const char *store_name;
 	LLVMValueRef src_data = get_src(ctx, instr->src[0]);
 	int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
 	unsigned writemask = nir_intrinsic_write_mask(instr);
 	enum gl_access_qualifier access = nir_intrinsic_access(instr);
 	bool writeonly_memory = access & ACCESS_NON_READABLE;
 	unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
-	LLVMValueRef glc = (cache_policy & ac_glc) ? ctx->ac.i1true : ctx->ac.i1false;
 
 	LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
 				        get_src(ctx, instr->src[1]), true);
@@ -1571,25 +1569,11 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 				      LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
 
 		if (num_bytes == 2) {
-			store_name = "llvm.amdgcn.tbuffer.store.i32";
-			data_type = ctx->ac.i32;
-			data = LLVMBuildBitCast(ctx->ac.builder, data, ctx->ac.i16, "");
-			data = LLVMBuildZExt(ctx->ac.builder, data, data_type, "");
-			LLVMValueRef tbuffer_params[] = {
-				data,
-				rsrc,
-				ctx->ac.i32_0, /* vindex */
-				offset,        /* voffset */
-				ctx->ac.i32_0,
-				ctx->ac.i32_0,
-				LLVMConstInt(ctx->ac.i32, 2, false), // dfmt (= 16bit)
-				LLVMConstInt(ctx->ac.i32, 4, false), // nfmt (= uint)
-				glc,
-				ctx->ac.i1false,
-			};
-			ac_build_intrinsic(&ctx->ac, store_name,
-					   ctx->ac.voidt, tbuffer_params, 10,
-					   ac_get_store_intr_attribs(writeonly_memory));
+			ac_build_tbuffer_store_short(&ctx->ac, rsrc, data,
+						     ctx->ac.i32_0, offset,
+						     ctx->ac.i32_0,
+						     cache_policy & ac_glc,
+						     false, writeonly_memory);
 		} else {
 			int num_channels = num_bytes / 4;
 
-- 
2.21.0



More information about the mesa-dev mailing list