[Mesa-dev] [PATCH 11/15] ac: don't use byval LLVM qualifier in shaders

Samuel Pitoiset samuel.pitoiset at gmail.com
Tue Jan 9 11:08:48 UTC 2018


Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

On 01/06/2018 12:12 PM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> shader-db doesn't show any regression and 32-bit pointers with byval
> are declared as VGPRs for some reason.
> ---
>   src/amd/common/ac_llvm_helper.cpp           |  3 +--
>   src/amd/common/ac_llvm_util.c               |  2 --
>   src/amd/common/ac_llvm_util.h               |  1 -
>   src/amd/common/ac_nir_to_llvm.c             |  6 ++----
>   src/gallium/auxiliary/gallivm/lp_bld_intr.c |  2 --
>   src/gallium/auxiliary/gallivm/lp_bld_intr.h |  1 -
>   src/gallium/drivers/radeonsi/si_shader.c    | 17 +++++------------
>   7 files changed, 8 insertions(+), 24 deletions(-)
> 
> diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_helper.cpp
> index 4db7036..54562cc 100644
> --- a/src/amd/common/ac_llvm_helper.cpp
> +++ b/src/amd/common/ac_llvm_helper.cpp
> @@ -52,22 +52,21 @@ void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
>   #else
>      A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
>   #endif
>   }
>   
>   bool ac_is_sgpr_param(LLVMValueRef arg)
>   {
>   	llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
>   	llvm::AttributeList AS = A->getParent()->getAttributes();
>   	unsigned ArgNo = A->getArgNo();
> -	return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) ||
> -	       AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
> +	return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
>   }
>   
>   LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
>   {
>   #if HAVE_LLVM >= 0x0309
>   	return LLVMGetCalledValue(call);
>   #else
>   	return llvm::wrap(llvm::CallSite(llvm::unwrap<llvm::Instruction>(call)).getCalledValue());
>   #endif
>   }
> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
> index 429904c..5fd785a 100644
> --- a/src/amd/common/ac_llvm_util.c
> +++ b/src/amd/common/ac_llvm_util.c
> @@ -145,39 +145,37 @@ LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum ac
>   
>   	return tm;
>   }
>   
>   
>   #if HAVE_LLVM < 0x0400
>   static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr)
>   {
>      switch (attr) {
>      case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute;
> -   case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute;
>      case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute;
>      case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute;
>      case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute;
>      case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute;
>      case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute;
>      default:
>   	   fprintf(stderr, "Unhandled function attribute: %x\n", attr);
>   	   return 0;
>      }
>   }
>   
>   #else
>   
>   static const char *attr_to_str(enum ac_func_attr attr)
>   {
>      switch (attr) {
>      case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
> -   case AC_FUNC_ATTR_BYVAL: return "byval";
>      case AC_FUNC_ATTR_INREG: return "inreg";
>      case AC_FUNC_ATTR_NOALIAS: return "noalias";
>      case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
>      case AC_FUNC_ATTR_READNONE: return "readnone";
>      case AC_FUNC_ATTR_READONLY: return "readonly";
>      case AC_FUNC_ATTR_WRITEONLY: return "writeonly";
>      case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly";
>      case AC_FUNC_ATTR_CONVERGENT: return "convergent";
>      default:
>   	   fprintf(stderr, "Unhandled function attribute: %x\n", attr);
> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
> index 7c8b6b0..26b0959 100644
> --- a/src/amd/common/ac_llvm_util.h
> +++ b/src/amd/common/ac_llvm_util.h
> @@ -30,21 +30,20 @@
>   #include <llvm-c/TargetMachine.h>
>   
>   #include "amd_family.h"
>   
>   #ifdef __cplusplus
>   extern "C" {
>   #endif
>   
>   enum ac_func_attr {
>   	AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
> -	AC_FUNC_ATTR_BYVAL        = (1 << 1),
>   	AC_FUNC_ATTR_INREG        = (1 << 2),
>   	AC_FUNC_ATTR_NOALIAS      = (1 << 3),
>   	AC_FUNC_ATTR_NOUNWIND     = (1 << 4),
>   	AC_FUNC_ATTR_READNONE     = (1 << 5),
>   	AC_FUNC_ATTR_READONLY     = (1 << 6),
>   	AC_FUNC_ATTR_WRITEONLY    = HAVE_LLVM >= 0x0400 ? (1 << 7) : 0,
>   	AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = HAVE_LLVM >= 0x0400 ? (1 << 8) : 0,
>   	AC_FUNC_ATTR_CONVERGENT = HAVE_LLVM >= 0x0400 ? (1 << 9) : 0,
>   
>   	/* Legacy intrinsic that needs attributes on function declarations
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 48e2920..187fdfb 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -316,28 +316,26 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
>   	main_function_type =
>   	    LLVMFunctionType(ret_type, args->types, args->count, 0);
>   	LLVMValueRef main_function =
>   	    LLVMAddFunction(module, "main", main_function_type);
>   	main_function_body =
>   	    LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
>   	LLVMPositionBuilderAtEnd(builder, main_function_body);
>   
>   	LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
>   	for (unsigned i = 0; i < args->sgpr_count; ++i) {
> +		ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG);
> +
>   		if (args->array_params_mask & (1 << i)) {
>   			LLVMValueRef P = LLVMGetParam(main_function, i);
> -			ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_BYVAL);
>   			ac_add_attr_dereferenceable(P, UINT64_MAX);
>   		}
> -		else {
> -			ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG);
> -		}
>   	}
>   
>   	if (max_workgroup_size) {
>   		ac_llvm_add_target_dep_function_attr(main_function,
>   						     "amdgpu-max-work-group-size",
>   						     max_workgroup_size);
>   	}
>   	if (unsafe_math) {
>   		/* These were copied from some LLVM test. */
>   		LLVMAddTargetDependentFunctionAttr(main_function,
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> index b924555..74ed16f 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> @@ -119,39 +119,37 @@ lp_declare_intrinsic(LLVMModuleRef module,
>   
>      return function;
>   }
>   
>   
>   #if HAVE_LLVM < 0x0400
>   static LLVMAttribute lp_attr_to_llvm_attr(enum lp_func_attr attr)
>   {
>      switch (attr) {
>      case LP_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute;
> -   case LP_FUNC_ATTR_BYVAL: return LLVMByValAttribute;
>      case LP_FUNC_ATTR_INREG: return LLVMInRegAttribute;
>      case LP_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute;
>      case LP_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute;
>      case LP_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute;
>      case LP_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute;
>      default:
>         _debug_printf("Unhandled function attribute: %x\n", attr);
>         return 0;
>      }
>   }
>   
>   #else
>   
>   static const char *attr_to_str(enum lp_func_attr attr)
>   {
>      switch (attr) {
>      case LP_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
> -   case LP_FUNC_ATTR_BYVAL: return "byval";
>      case LP_FUNC_ATTR_INREG: return "inreg";
>      case LP_FUNC_ATTR_NOALIAS: return "noalias";
>      case LP_FUNC_ATTR_NOUNWIND: return "nounwind";
>      case LP_FUNC_ATTR_READNONE: return "readnone";
>      case LP_FUNC_ATTR_READONLY: return "readonly";
>      case LP_FUNC_ATTR_WRITEONLY: return "writeonly";
>      case LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly";
>      case LP_FUNC_ATTR_CONVERGENT: return "convergent";
>      default:
>         _debug_printf("Unhandled function attribute: %x\n", attr);
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
> index 0a929c5..bf8143d 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
> @@ -41,21 +41,20 @@
>   #include "gallivm/lp_bld_init.h"
>   
>   
>   /**
>    * Max number of arguments in an intrinsic.
>    */
>   #define LP_MAX_FUNC_ARGS 32
>   
>   enum lp_func_attr {
>      LP_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
> -   LP_FUNC_ATTR_BYVAL        = (1 << 1),
>      LP_FUNC_ATTR_INREG        = (1 << 2),
>      LP_FUNC_ATTR_NOALIAS      = (1 << 3),
>      LP_FUNC_ATTR_NOUNWIND     = (1 << 4),
>      LP_FUNC_ATTR_READNONE     = (1 << 5),
>      LP_FUNC_ATTR_READONLY     = (1 << 6),
>      LP_FUNC_ATTR_WRITEONLY    = HAVE_LLVM >= 0x0400 ? (1 << 7) : 0,
>      LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = HAVE_LLVM >= 0x0400 ? (1 << 8) : 0,
>      LP_FUNC_ATTR_CONVERGENT   = HAVE_LLVM >= 0x0400 ? (1 << 9) : 0,
>   
>      /* Legacy intrinsic that needs attributes on function declarations
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 84a26a2..708da13 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -4320,32 +4320,32 @@ static void si_create_function(struct si_shader_context *ctx,
>   	int i;
>   
>   	si_llvm_create_func(ctx, name, returns, num_returns,
>   			    fninfo->types, fninfo->num_params);
>   	ctx->return_value = LLVMGetUndef(ctx->return_type);
>   
>   	for (i = 0; i < fninfo->num_sgpr_params; ++i) {
>   		LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
>   
>   		/* The combination of:
> -		 * - ByVal
> +		 * - noalias
>   		 * - dereferenceable
>   		 * - invariant.load
>   		 * allows the optimization passes to move loads and reduces
>   		 * SGPR spilling significantly.
>   		 */
> +		lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
> +
>   		if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
> -			lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_BYVAL);
>   			lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_NOALIAS);
>   			ac_add_attr_dereferenceable(P, UINT64_MAX);
> -		} else
> -			lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
> +		}
>   	}
>   
>   	for (i = 0; i < fninfo->num_params; ++i) {
>   		if (fninfo->assign[i])
>   			*fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i);
>   	}
>   
>   	if (max_workgroup_size) {
>   		si_llvm_add_attribute(ctx->main_fn, "amdgpu-max-work-group-size",
>   				      max_workgroup_size);
> @@ -6459,29 +6459,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
>   			LLVMTypeRef param_type;
>   			bool is_sgpr;
>   			unsigned param_size;
>   			LLVMValueRef arg = NULL;
>   
>   			param = LLVMGetParam(parts[part], param_idx);
>   			param_type = LLVMTypeOf(param);
>   			param_size = ac_get_type_size(param_type) / 4;
>   			is_sgpr = ac_is_sgpr_param(param);
>   
> -			if (is_sgpr) {
> -#if HAVE_LLVM < 0x0400
> -				LLVMRemoveAttribute(param, LLVMByValAttribute);
> -#else
> -				unsigned kind_id = LLVMGetEnumAttributeKindForName("byval", 5);
> -				LLVMRemoveEnumAttributeAtIndex(parts[part], param_idx + 1, kind_id);
> -#endif
> +			if (is_sgpr)
>   				lp_add_function_attr(parts[part], param_idx + 1, LP_FUNC_ATTR_INREG);
> -			}
>   
>   			assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out));
>   			assert(is_sgpr || out_idx >= num_out_sgpr);
>   
>   			if (param_size == 1)
>   				arg = out[out_idx];
>   			else
>   				arg = lp_build_gather_values(&ctx->gallivm, &out[out_idx], param_size);
>   
>   			if (LLVMTypeOf(arg) != param_type) {
> 


More information about the mesa-dev mailing list