[Mesa-dev] [PATCH 2/2] radeonsi: Use llvm.amdgcn.s.buffer.load instead of llvm.SI.load.const
Tom Stellard
thomas.stellard at amd.com
Tue Jan 31 21:36:47 UTC 2017
Advantages of using llvm.amdgcn.s.buffer.load
- We can use a real pointer type, which LLVM can better reason about and do
alias analysis on. This will also ease the transition to using fat pointers
and LLVM IR loads.
- llvm.amdgcn.s.buffer.load is defined in IntrinsicsAMDGPU.td so passes can
query information about it other than just its attributes.
---
src/gallium/auxiliary/gallivm/lp_bld_intr.c | 1 +
src/gallium/auxiliary/gallivm/lp_bld_intr.h | 3 +-
src/gallium/drivers/radeonsi/si_shader.c | 48 +++++++++++++++++-----
src/gallium/drivers/radeonsi/si_shader_internal.h | 8 ++++
.../drivers/radeonsi/si_shader_tgsi_setup.c | 6 +++
5 files changed, 55 insertions(+), 11 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index 049671a..dc8de55 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -144,6 +144,7 @@ static const char *attr_to_str(enum lp_func_attr attr)
{
switch (attr) {
case LP_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
+ case LP_FUNC_ATTR_ARGMEMONLY: return "argmemonly";
case LP_FUNC_ATTR_BYVAL: return "byval";
case LP_FUNC_ATTR_INREG: return "inreg";
case LP_FUNC_ATTR_NOALIAS: return "noalias";
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index f1e075a..7c8f09b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -54,7 +54,8 @@ enum lp_func_attr {
LP_FUNC_ATTR_NOUNWIND = (1 << 4),
LP_FUNC_ATTR_READNONE = (1 << 5),
LP_FUNC_ATTR_READONLY = (1 << 6),
- LP_FUNC_ATTR_LAST = (1 << 7)
+ LP_FUNC_ATTR_ARGMEMONLY = (1 << 7),
+ LP_FUNC_ATTR_LAST = (1 << 8)
};
void
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index a6de7c4..cf13cb5 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -93,11 +93,6 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
*/
#define VS_EPILOG_PRIMID_LOC 2
-enum {
- CONST_ADDR_SPACE = 2,
- LOCAL_ADDR_SPACE = 3,
-};
-
#define SENDMSG_GS 2
#define SENDMSG_GS_DONE 3
@@ -360,8 +355,21 @@ static LLVMValueRef build_indexed_load_const(
struct si_shader_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index)
{
+ LLVMTypeRef ptr_type = LLVMTypeOf(base_ptr);
+ LLVMTypeRef elem_type = LLVMGetElementType(ptr_type);
+ LLVMTypeKind elem_kind = LLVMGetTypeKind(elem_type);
LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
+
+ /* Set !dereferenceable metadata */
+ if (elem_kind == LLVMPointerTypeKind ||
+ (elem_kind == LLVMArrayTypeKind && LLVMGetTypeKind(LLVMGetElementType(elem_type)) == LLVMPointerTypeKind)) {
+ LLVMValueRef deref_bytes, deref_md;
+ deref_bytes = LLVMConstInt(ctx->i64, UINT64_MAX, 0);
+ deref_md = LLVMMDNodeInContext(LLVMGetTypeContext(ptr_type),
+ &deref_bytes, 1);
+ LLVMSetMetadata(result, ctx->dereferenceable_md_kind, deref_md);
+ }
return result;
}
@@ -1571,16 +1579,34 @@ static LLVMValueRef get_thread_id(struct si_shader_context *ctx)
/**
* Load a dword from a constant buffer.
+ * @param offset This is a byte offset.
+ * @returns An LLVMValueRef with f32 type.
*/
static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
LLVMValueRef resource,
LLVMValueRef offset)
{
LLVMBuilderRef builder = ctx->gallivm.builder;
- LLVMValueRef args[2] = {resource, offset};
+ LLVMValueRef load;
+ LLVMValueRef args[3] = {resource, offset, LLVMConstInt(ctx->i1, 0, 0) };
+ LLVMTypeRef resource_type = LLVMTypeOf(resource);
+ LLVMTypeKind resource_kind = LLVMGetTypeKind(resource_type);
+
+ /* XXX: We can have a non-pointer resource if we do a constant load
+ * from the RW_BUFFERS whicha are still represented using the <16 x i8>
+ * type. We can eliminate this once we start using pointer types for
+ * those buffers.
+ */
+ if (resource_kind != LLVMPointerTypeKind) {
+ return lp_build_intrinsic(builder, "llvm.SI.load.const",
+ ctx->f32, args, 2,
+ LP_FUNC_ATTR_READNONE);
+ }
- return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2,
- LP_FUNC_ATTR_READNONE);
+ load = lp_build_intrinsic(builder, "llvm.amdgcn.s.buffer.load.i32",
+ ctx->i32, args, 3,
+ LP_FUNC_ATTR_READONLY | LP_FUNC_ATTR_ARGMEMONLY);
+ return LLVMBuildBitCast(builder, load, ctx->f32, "");
}
static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, LLVMValueRef sample_id)
@@ -5504,9 +5530,10 @@ static void create_meta_data(struct si_shader_context *ctx)
"invariant.load", 14);
ctx->range_md_kind = LLVMGetMDKindIDInContext(gallivm->context,
"range", 5);
+ ctx->dereferenceable_md_kind = LLVMGetMDKindIDInContext(
+ gallivm->context, "dereferenceable", 15);
ctx->uniform_md_kind = LLVMGetMDKindIDInContext(gallivm->context,
"amdgpu.uniform", 14);
-
ctx->empty_md = LLVMMDNodeInContext(gallivm->context, NULL, 0);
}
@@ -5601,7 +5628,7 @@ static void create_function(struct si_shader_context *ctx)
v3i32 = LLVMVectorType(ctx->i32, 3);
params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
- params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
+ params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->const_buffer_rsrc_type, SI_NUM_CONST_BUFFERS);
params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
@@ -7722,6 +7749,7 @@ si_get_shader_part(struct si_screen *sscreen,
struct gallivm_state *gallivm = &ctx.gallivm;
si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+ create_meta_data(&ctx);
ctx.type = type;
switch (type) {
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 9055b4d..943b9a0 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -127,6 +127,7 @@ struct si_shader_context {
unsigned range_md_kind;
unsigned uniform_md_kind;
unsigned fpmath_md_kind;
+ unsigned dereferenceable_md_kind;
LLVMValueRef fpmath_md_2p5_ulp;
LLVMValueRef empty_md;
@@ -150,10 +151,17 @@ struct si_shader_context {
LLVMTypeRef v4i32;
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
+ LLVMTypeRef const_buffer_rsrc_type;
LLVMValueRef shared_memory;
};
+enum {
+ CONST_ADDR_SPACE = 2,
+ LOCAL_ADDR_SPACE = 3,
+ CONST_ADDR_SPACE_W_RSRC = 42,
+};
+
static inline struct si_shader_context *
si_shader_context(struct lp_build_tgsi_context *bld_base)
{
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 205686a..7a54e74 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1364,6 +1364,12 @@ void si_llvm_context_init(struct si_shader_context *ctx,
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+ ctx->const_buffer_rsrc_type = ctx->v16i8;
+
+ if (HAVE_LLVM >= 0x0500) {
+ ctx->const_buffer_rsrc_type =
+ LLVMPointerType(ctx->i32, CONST_ADDR_SPACE_W_RSRC);
+ }
}
void si_llvm_create_func(struct si_shader_context *ctx,
--
2.7.4
More information about the mesa-dev
mailing list