[Mesa-dev] [PATCH 2/2] radeonsi: Use llvm.amdgcn.s.buffer.load instead of llvm.SI.load.const

Tom Stellard thomas.stellard at amd.com
Tue Jan 31 21:36:47 UTC 2017


Advantages of using llvm.amdgcn.s.buffer.load

- We can use a real pointer type, which LLVM can better reason about and do
  alias analysis on.  This will also ease the transition to using fat pointers
  and LLVM IR loads.

- llvm.amdgcn.s.buffer.load is defined in IntrinsicsAMDGPU.td so passes can
  query information about it other than just its attributes.
---
 src/gallium/auxiliary/gallivm/lp_bld_intr.c        |  1 +
 src/gallium/auxiliary/gallivm/lp_bld_intr.h        |  3 +-
 src/gallium/drivers/radeonsi/si_shader.c           | 48 +++++++++++++++++-----
 src/gallium/drivers/radeonsi/si_shader_internal.h  |  8 ++++
 .../drivers/radeonsi/si_shader_tgsi_setup.c        |  6 +++
 5 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index 049671a..dc8de55 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -144,6 +144,7 @@ static const char *attr_to_str(enum lp_func_attr attr)
 {
    switch (attr) {
    case LP_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
+   case LP_FUNC_ATTR_ARGMEMONLY: return "argmemonly";
    case LP_FUNC_ATTR_BYVAL: return "byval";
    case LP_FUNC_ATTR_INREG: return "inreg";
    case LP_FUNC_ATTR_NOALIAS: return "noalias";
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index f1e075a..7c8f09b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -54,7 +54,8 @@ enum lp_func_attr {
    LP_FUNC_ATTR_NOUNWIND     = (1 << 4),
    LP_FUNC_ATTR_READNONE     = (1 << 5),
    LP_FUNC_ATTR_READONLY     = (1 << 6),
-   LP_FUNC_ATTR_LAST         = (1 << 7)
+   LP_FUNC_ATTR_ARGMEMONLY   = (1 << 7),
+   LP_FUNC_ATTR_LAST         = (1 << 8)
 };
 
 void
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index a6de7c4..cf13cb5 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -93,11 +93,6 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
  */
 #define VS_EPILOG_PRIMID_LOC 2
 
-enum {
-	CONST_ADDR_SPACE = 2,
-	LOCAL_ADDR_SPACE = 3,
-};
-
 #define SENDMSG_GS 2
 #define SENDMSG_GS_DONE 3
 
@@ -360,8 +355,21 @@ static LLVMValueRef build_indexed_load_const(
 	struct si_shader_context *ctx,
 	LLVMValueRef base_ptr, LLVMValueRef index)
 {
+	LLVMTypeRef ptr_type = LLVMTypeOf(base_ptr);
+	LLVMTypeRef elem_type = LLVMGetElementType(ptr_type);
+	LLVMTypeKind elem_kind = LLVMGetTypeKind(elem_type);
 	LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
 	LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
+
+	/* Set !dereferenceable metadata */
+	if (elem_kind == LLVMPointerTypeKind ||
+		(elem_kind == LLVMArrayTypeKind && LLVMGetTypeKind(LLVMGetElementType(elem_type)) == LLVMPointerTypeKind)) {
+		LLVMValueRef deref_bytes, deref_md;
+	 	deref_bytes = LLVMConstInt(ctx->i64, UINT64_MAX, 0);
+		deref_md = LLVMMDNodeInContext(LLVMGetTypeContext(ptr_type),
+						&deref_bytes, 1);
+		LLVMSetMetadata(result, ctx->dereferenceable_md_kind, deref_md);
+	}
 	return result;
 }
 
@@ -1571,16 +1579,34 @@ static LLVMValueRef get_thread_id(struct si_shader_context *ctx)
 
 /**
  * Load a dword from a constant buffer.
+ * @param offset This is a byte offset.
+ * @returns An LLVMValueRef with f32 type.
  */
 static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
 				      LLVMValueRef resource,
 				      LLVMValueRef offset)
 {
 	LLVMBuilderRef builder = ctx->gallivm.builder;
-	LLVMValueRef args[2] = {resource, offset};
+	LLVMValueRef load;
+	LLVMValueRef args[3] = {resource, offset, LLVMConstInt(ctx->i1, 0, 0) };
+	LLVMTypeRef resource_type = LLVMTypeOf(resource);
+	LLVMTypeKind resource_kind = LLVMGetTypeKind(resource_type);
+
+	/* XXX: We can have a non-pointer resource if we do a constant load
+         * from the RW_BUFFERS whicha are still represented using the <16 x i8>
+         * type. We can eliminate this once we start using pointer types for
+	 * those buffers.
+	 */
+	if (resource_kind != LLVMPointerTypeKind) {
+		return lp_build_intrinsic(builder, "llvm.SI.load.const",
+					  ctx->f32, args, 2,
+					  LP_FUNC_ATTR_READNONE);
+	}
 
-	return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2,
-			       LP_FUNC_ATTR_READNONE);
+	load = lp_build_intrinsic(builder, "llvm.amdgcn.s.buffer.load.i32",
+				  ctx->i32, args, 3,
+				  LP_FUNC_ATTR_READONLY | LP_FUNC_ATTR_ARGMEMONLY);
+	return LLVMBuildBitCast(builder, load, ctx->f32, "");
 }
 
 static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, LLVMValueRef sample_id)
@@ -5504,9 +5530,10 @@ static void create_meta_data(struct si_shader_context *ctx)
 							       "invariant.load", 14);
 	ctx->range_md_kind = LLVMGetMDKindIDInContext(gallivm->context,
 						     "range", 5);
+	ctx->dereferenceable_md_kind = LLVMGetMDKindIDInContext(
+		gallivm->context, "dereferenceable", 15);
 	ctx->uniform_md_kind = LLVMGetMDKindIDInContext(gallivm->context,
 							"amdgpu.uniform", 14);
-
 	ctx->empty_md = LLVMMDNodeInContext(gallivm->context, NULL, 0);
 }
 
@@ -5601,7 +5628,7 @@ static void create_function(struct si_shader_context *ctx)
 	v3i32 = LLVMVectorType(ctx->i32, 3);
 
 	params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
-	params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
+	params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->const_buffer_rsrc_type, SI_NUM_CONST_BUFFERS);
 	params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
 	params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
 	params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
@@ -7722,6 +7749,7 @@ si_get_shader_part(struct si_screen *sscreen,
 	struct gallivm_state *gallivm = &ctx.gallivm;
 
 	si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+	create_meta_data(&ctx);
 	ctx.type = type;
 
 	switch (type) {
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 9055b4d..943b9a0 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -127,6 +127,7 @@ struct si_shader_context {
 	unsigned range_md_kind;
 	unsigned uniform_md_kind;
 	unsigned fpmath_md_kind;
+	unsigned dereferenceable_md_kind;
 	LLVMValueRef fpmath_md_2p5_ulp;
 	LLVMValueRef empty_md;
 
@@ -150,10 +151,17 @@ struct si_shader_context {
 	LLVMTypeRef v4i32;
 	LLVMTypeRef v4f32;
 	LLVMTypeRef v8i32;
+	LLVMTypeRef const_buffer_rsrc_type;
 
 	LLVMValueRef shared_memory;
 };
 
+enum {
+	CONST_ADDR_SPACE = 2,
+	LOCAL_ADDR_SPACE = 3,
+	CONST_ADDR_SPACE_W_RSRC = 42,
+};
+
 static inline struct si_shader_context *
 si_shader_context(struct lp_build_tgsi_context *bld_base)
 {
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 205686a..7a54e74 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1364,6 +1364,12 @@ void si_llvm_context_init(struct si_shader_context *ctx,
 	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
 	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
 	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+	ctx->const_buffer_rsrc_type = ctx->v16i8;
+
+	if (HAVE_LLVM >= 0x0500) {
+		ctx->const_buffer_rsrc_type =
+			LLVMPointerType(ctx->i32, CONST_ADDR_SPACE_W_RSRC);
+	}
 }
 
 void si_llvm_create_func(struct si_shader_context *ctx,
-- 
2.7.4



More information about the mesa-dev mailing list