[Mesa-dev] [PATCH 10/11] ac: use new LLVM 8 intrinsic when storing 16-bit values
Samuel Pitoiset
samuel.pitoiset at gmail.com
Tue Mar 12 16:19:46 UTC 2019
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
src/amd/common/ac_llvm_build.c | 108 ++++++++++++++++++++++++++++++++
src/amd/common/ac_llvm_build.h | 26 ++++++++
src/amd/common/ac_nir_to_llvm.c | 26 ++------
3 files changed, 139 insertions(+), 21 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 7aec8154a76..ce6639d49bf 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1550,6 +1550,114 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
ac_get_load_intr_attribs(can_speculate));
}
+static void
+ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool writeonly_memory,
+ bool structurized)
+{
+ LLVMValueRef args[7];
+ int idx = 0;
+ args[idx++] = vdata;
+ args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+ if (structurized)
+ args[idx++] = vindex ? vindex : ctx->i32_0;
+ args[idx++] = voffset ? voffset : ctx->i32_0;
+ args[idx++] = soffset ? soffset : ctx->i32_0;
+ args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
+ args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+ unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+ const char *type_names[] = {"i32", "v2i32", "v4i32"};
+ const char *indexing_kind = structurized ? "struct" : "raw";
+ char name[256];
+
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s",
+ indexing_kind, type_names[func]);
+
+ ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
+ ac_get_store_intr_attribs(writeonly_memory));
+}
+
+void
+ac_build_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool writeonly_memory)
+{
+ if (HAVE_LLVM >= 0x800) {
+ voffset = LLVMBuildAdd(ctx->builder,
+ voffset ? voffset : ctx->i32_0,
+ immoffset, "");
+
+ ac_build_llvm8_tbuffer_store(ctx, rsrc, vdata, vindex, voffset,
+ soffset, num_channels, dfmt, nfmt,
+ glc, slc, writeonly_memory, true);
+ } else {
+ LLVMValueRef params[] = {
+ vdata,
+ rsrc,
+ vindex,
+ voffset ? voffset : ctx->i32_0,
+ soffset ? soffset : ctx->i32_0,
+ immoffset,
+ LLVMConstInt(ctx->i32, dfmt, false),
+ LLVMConstInt(ctx->i32, nfmt, false),
+ LLVMConstInt(ctx->i32, glc, false),
+ LLVMConstInt(ctx->i32, slc, false),
+ };
+ unsigned func = CLAMP(num_channels, 1, 3) - 1;
+ const char *type_names[] = {"i32", "v2i32", "v4i32"};
+ char name[256];
+
+ snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
+ type_names[func]);
+
+ ac_build_intrinsic(ctx, name, ctx->voidt, params, 10,
+ ac_get_store_intr_attribs(writeonly_memory));
+ }
+}
+
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ bool glc,
+ bool slc,
+ bool writeonly_memory)
+{
+ unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+ vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
+ vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
+
+ ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset,
+ ctx->i32_0, 1, dfmt, nfmt, glc, slc,
+ writeonly_memory);
+}
+
/**
* Set range metadata on an instruction. This can only be used on load and
* call instructions. If you know an instruction can only produce the values
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 069ba7aa3c9..5ca93e66982 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -331,6 +331,32 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
bool can_speculate,
bool structurized);
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ bool glc,
+ bool slc,
+ bool writeonly_memory);
+
+void
+ac_build_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool writeonly_memory);
+
LLVMValueRef
ac_get_thread_id(struct ac_llvm_context *ctx);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 47a865de36f..b65ccd2f6d3 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1521,14 +1521,12 @@ static unsigned get_cache_policy(struct ac_nir_context *ctx,
static void visit_store_ssbo(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
- const char *store_name;
LLVMValueRef src_data = get_src(ctx, instr->src[0]);
int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
unsigned writemask = nir_intrinsic_write_mask(instr);
enum gl_access_qualifier access = nir_intrinsic_access(instr);
bool writeonly_memory = access & ACCESS_NON_READABLE;
unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
- LLVMValueRef glc = (cache_policy & ac_glc) ? ctx->ac.i1true : ctx->ac.i1false;
LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
get_src(ctx, instr->src[1]), true);
@@ -1571,25 +1569,11 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
if (num_bytes == 2) {
- store_name = "llvm.amdgcn.tbuffer.store.i32";
- data_type = ctx->ac.i32;
- data = LLVMBuildBitCast(ctx->ac.builder, data, ctx->ac.i16, "");
- data = LLVMBuildZExt(ctx->ac.builder, data, data_type, "");
- LLVMValueRef tbuffer_params[] = {
- data,
- rsrc,
- ctx->ac.i32_0, /* vindex */
- offset, /* voffset */
- ctx->ac.i32_0,
- ctx->ac.i32_0,
- LLVMConstInt(ctx->ac.i32, 2, false), // dfmt (= 16bit)
- LLVMConstInt(ctx->ac.i32, 4, false), // nfmt (= uint)
- glc,
- ctx->ac.i1false,
- };
- ac_build_intrinsic(&ctx->ac, store_name,
- ctx->ac.voidt, tbuffer_params, 10,
- ac_get_store_intr_attribs(writeonly_memory));
+ ac_build_tbuffer_store_short(&ctx->ac, rsrc, data,
+ ctx->ac.i32_0, offset,
+ ctx->ac.i32_0,
+ cache_policy & ac_glc,
+ false, writeonly_memory);
} else {
int num_channels = num_bytes / 4;
--
2.21.0
More information about the mesa-dev
mailing list