[Mesa-dev] [PATCH 2/2] radeonsi: Use llvm.amdgcn.s.buffer.load instead of llvm.SI.load.const

Wed Feb 1 11:52:39 UTC 2017

On 31.01.2017 22:36, Tom Stellard wrote:
> Advantages of using llvm.amdgcn.s.buffer.load
>
> - We can use a real pointer type, which LLVM can better reason about and do
>   alias analysis on.  This will also ease the transition to using fat pointers
>   and LLVM IR loads.
>
> - llvm.amdgcn.s.buffer.load is defined in IntrinsicsAMDGPU.td so passes can
>   query information about it other than just its attributes.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_intr.c        |  1 +
>  src/gallium/auxiliary/gallivm/lp_bld_intr.h        |  3 +-
>  src/gallium/drivers/radeonsi/si_shader.c           | 48 +++++++++++++++++-----
>  src/gallium/drivers/radeonsi/si_shader_internal.h  |  8 ++++
>  .../drivers/radeonsi/si_shader_tgsi_setup.c        |  6 +++
>  5 files changed, 55 insertions(+), 11 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> index 049671a..dc8de55 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> @@ -144,6 +144,7 @@ static const char *attr_to_str(enum lp_func_attr attr)
>  {
>     switch (attr) {
>     case LP_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
> +   case LP_FUNC_ATTR_ARGMEMONLY: return "argmemonly";
>     case LP_FUNC_ATTR_BYVAL: return "byval";
>     case LP_FUNC_ATTR_INREG: return "inreg";
>     case LP_FUNC_ATTR_NOALIAS: return "noalias";
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
> index f1e075a..7c8f09b 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
> @@ -54,7 +54,8 @@ enum lp_func_attr {
>     LP_FUNC_ATTR_NOUNWIND     = (1 << 4),
>     LP_FUNC_ATTR_READNONE     = (1 << 5),
>     LP_FUNC_ATTR_READONLY     = (1 << 6),
> -   LP_FUNC_ATTR_LAST         = (1 << 7)
> +   LP_FUNC_ATTR_ARGMEMONLY   = (1 << 7),
> +   LP_FUNC_ATTR_LAST         = (1 << 8)
>  };
>
>  void
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index a6de7c4..cf13cb5 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -93,11 +93,6 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
>   */
>  #define VS_EPILOG_PRIMID_LOC 2
>
> -enum {
> -	CONST_ADDR_SPACE = 2,
> -	LOCAL_ADDR_SPACE = 3,
> -};
> -
>  #define SENDMSG_GS 2
>  #define SENDMSG_GS_DONE 3
>
> @@ -360,8 +355,21 @@ static LLVMValueRef build_indexed_load_const(
>  	struct si_shader_context *ctx,
>  	LLVMValueRef base_ptr, LLVMValueRef index)
>  {
> +	LLVMTypeRef ptr_type = LLVMTypeOf(base_ptr);
> +	LLVMTypeRef elem_type = LLVMGetElementType(ptr_type);
> +	LLVMTypeKind elem_kind = LLVMGetTypeKind(elem_type);
>  	LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
>  	LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
> +
> +	/* Set !dereferenceable metadata */
> +	if (elem_kind == LLVMPointerTypeKind ||
> +		(elem_kind == LLVMArrayTypeKind && LLVMGetTypeKind(LLVMGetElementType(elem_type)) == LLVMPointerTypeKind)) {
> +		LLVMValueRef deref_bytes, deref_md;
> +	 	deref_bytes = LLVMConstInt(ctx->i64, UINT64_MAX, 0);
> +		deref_md = LLVMMDNodeInContext(LLVMGetTypeContext(ptr_type),
> +						&deref_bytes, 1);
> +		LLVMSetMetadata(result, ctx->dereferenceable_md_kind, deref_md);
> +	}
>  	return result;
>  }
>
> @@ -1571,16 +1579,34 @@ static LLVMValueRef get_thread_id(struct si_shader_context *ctx)
>
>  /**
>   * Load a dword from a constant buffer.
> + * @param offset This is a byte offset.
> + * @returns An LLVMValueRef with f32 type.
>   */
>  static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
>  				      LLVMValueRef resource,
>  				      LLVMValueRef offset)
>  {
>  	LLVMBuilderRef builder = ctx->gallivm.builder;
> -	LLVMValueRef args[2] = {resource, offset};
> +	LLVMValueRef load;
> +	LLVMValueRef args[3] = {resource, offset, LLVMConstInt(ctx->i1, 0, 0) };
> +	LLVMTypeRef resource_type = LLVMTypeOf(resource);
> +	LLVMTypeKind resource_kind = LLVMGetTypeKind(resource_type);
> +
> +	/* XXX: We can have a non-pointer resource if we do a constant load
> +         * from the RW_BUFFERS whicha are still represented using the <16 x i8>

s/whicha/which/

Also, this affects constant buffers in addition to RW buffers (at least 
for older versions of LLVM).

Apart from that and having to wait until the corresponding LLVM changes 
land, the two patches are

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

> +         * type. We can eliminate this once we start using pointer types for
> +	 * those buffers.
> +	 */
> +	if (resource_kind != LLVMPointerTypeKind) {
> +		return lp_build_intrinsic(builder, "llvm.SI.load.const",
> +					  ctx->f32, args, 2,
> +					  LP_FUNC_ATTR_READNONE);
> +	}
>
> -	return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2,
> -			       LP_FUNC_ATTR_READNONE);
> +	load = lp_build_intrinsic(builder, "llvm.amdgcn.s.buffer.load.i32",
> +				  ctx->i32, args, 3,
> +				  LP_FUNC_ATTR_READONLY | LP_FUNC_ATTR_ARGMEMONLY);
> +	return LLVMBuildBitCast(builder, load, ctx->f32, "");
>  }
>
>  static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, LLVMValueRef sample_id)
> @@ -5504,9 +5530,10 @@ static void create_meta_data(struct si_shader_context *ctx)
>  							       "invariant.load", 14);
>  	ctx->range_md_kind = LLVMGetMDKindIDInContext(gallivm->context,
>  						     "range", 5);
> +	ctx->dereferenceable_md_kind = LLVMGetMDKindIDInContext(
> +		gallivm->context, "dereferenceable", 15);
>  	ctx->uniform_md_kind = LLVMGetMDKindIDInContext(gallivm->context,
>  							"amdgpu.uniform", 14);
> -
>  	ctx->empty_md = LLVMMDNodeInContext(gallivm->context, NULL, 0);
>  }
>
> @@ -5601,7 +5628,7 @@ static void create_function(struct si_shader_context *ctx)
>  	v3i32 = LLVMVectorType(ctx->i32, 3);
>
>  	params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
> -	params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
> +	params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->const_buffer_rsrc_type, SI_NUM_CONST_BUFFERS);
>  	params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
>  	params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
>  	params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
> @@ -7722,6 +7749,7 @@ si_get_shader_part(struct si_screen *sscreen,
>  	struct gallivm_state *gallivm = &ctx.gallivm;
>
>  	si_init_shader_ctx(&ctx, sscreen, &shader, tm);
> +	create_meta_data(&ctx);
>  	ctx.type = type;
>
>  	switch (type) {
> diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
> index 9055b4d..943b9a0 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_internal.h
> +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
> @@ -127,6 +127,7 @@ struct si_shader_context {
>  	unsigned range_md_kind;
>  	unsigned uniform_md_kind;
>  	unsigned fpmath_md_kind;
> +	unsigned dereferenceable_md_kind;
>  	LLVMValueRef fpmath_md_2p5_ulp;
>  	LLVMValueRef empty_md;
>
> @@ -150,10 +151,17 @@ struct si_shader_context {
>  	LLVMTypeRef v4i32;
>  	LLVMTypeRef v4f32;
>  	LLVMTypeRef v8i32;
> +	LLVMTypeRef const_buffer_rsrc_type;
>
>  	LLVMValueRef shared_memory;
>  };
>
> +enum {
> +	CONST_ADDR_SPACE = 2,
> +	LOCAL_ADDR_SPACE = 3,
> +	CONST_ADDR_SPACE_W_RSRC = 42,
> +};
> +
>  static inline struct si_shader_context *
>  si_shader_context(struct lp_build_tgsi_context *bld_base)
>  {
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
> index 205686a..7a54e74 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
> @@ -1364,6 +1364,12 @@ void si_llvm_context_init(struct si_shader_context *ctx,
>  	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
>  	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
>  	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
> +	ctx->const_buffer_rsrc_type = ctx->v16i8;
> +
> +	if (HAVE_LLVM >= 0x0500) {
> +		ctx->const_buffer_rsrc_type =
> +			LLVMPointerType(ctx->i32, CONST_ADDR_SPACE_W_RSRC);
> +	}
>  }
>
>  void si_llvm_create_func(struct si_shader_context *ctx,
>