[Mesa-dev] [PATCH 2/2] gallivm, ac: add function attributes at call sites instead of declarations

Jose Fonseca jfonseca at vmware.com
Wed Mar 1 16:29:21 UTC 2017


On 23/02/17 00:01, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> They can vary at call sites if the intrinsic is NOT a legacy SI intrinsic.
> We need this to force readnone or inaccessiblememonly on some amdgcn
> intrinsics.
>
> This is only used with LLVM 4.0 and later. Intrinsics only used with
> LLVM <= 3.9 don't need the LEGACY flag.
>
> gallivm and ac code is in the same patch, because splitting would be
> more complicated with all the LEGACY uses all over the place.
> ---
>  src/amd/common/ac_llvm_build.c                    | 23 ++++----
>  src/amd/common/ac_llvm_util.c                     | 31 +++++++----
>  src/amd/common/ac_llvm_util.h                     | 17 +++---
>  src/amd/common/ac_nir_to_llvm.c                   | 63 ++++++++++++++--------
>  src/gallium/auxiliary/draw/draw_llvm.c            |  6 ++-
>  src/gallium/auxiliary/gallivm/lp_bld_intr.c       | 51 ++++++++++++------
>  src/gallium/auxiliary/gallivm/lp_bld_intr.h       | 11 ++--
>  src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |  3 +-
>  src/gallium/drivers/llvmpipe/lp_state_fs.c        |  3 +-
>  src/gallium/drivers/llvmpipe/lp_state_setup.c     |  7 +--
>  src/gallium/drivers/radeonsi/si_shader.c          | 64 ++++++++++++++---------
>  src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 14 +++--
>  12 files changed, 184 insertions(+), 109 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 2f25b14..5c8b7f7 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -75,47 +75,50 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
>  							"amdgpu.uniform", 14);
>
>  	ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
>  }
>
>  LLVMValueRef
>  ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
>  		       LLVMTypeRef return_type, LLVMValueRef *params,
>  		       unsigned param_count, unsigned attrib_mask)
>  {
> -	LLVMValueRef function;
> +	LLVMValueRef function, call;
> +	bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
> +				  !(attrib_mask & AC_FUNC_ATTR_LEGACY);
>
>  	function = LLVMGetNamedFunction(ctx->module, name);
>  	if (!function) {
>  		LLVMTypeRef param_types[32], function_type;
>  		unsigned i;
>
>  		assert(param_count <= 32);
>
>  		for (i = 0; i < param_count; ++i) {
>  			assert(params[i]);
>  			param_types[i] = LLVMTypeOf(params[i]);
>  		}
>  		function_type =
>  		    LLVMFunctionType(return_type, param_types, param_count, 0);
>  		function = LLVMAddFunction(ctx->module, name, function_type);
>
>  		LLVMSetFunctionCallConv(function, LLVMCCallConv);
>  		LLVMSetLinkage(function, LLVMExternalLinkage);
>
> -		attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
> -		while (attrib_mask) {
> -			enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
> -			ac_add_function_attr(function, -1, attr);
> -		}
> +		if (!set_callsite_attrs)
> +			ac_add_func_attributes(ctx->context, function, attrib_mask);
>  	}
> -	return LLVMBuildCall(ctx->builder, function, params, param_count, "");
> +
> +	call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
> +	if (set_callsite_attrs)
> +		ac_add_func_attributes(ctx->context, call, attrib_mask);
> +	return call;
>  }
>
>  LLVMValueRef
>  ac_build_gather_values_extended(struct ac_llvm_context *ctx,
>  				LLVMValueRef *values,
>  				unsigned value_count,
>  				unsigned value_stride,
>  				bool load)
>  {
>  	LLVMBuilderRef builder = ctx->builder;
> @@ -524,21 +527,22 @@ ac_build_tbuffer_store(struct ac_llvm_context *ctx,
>  	/* The instruction offset field has 12 bits */
>  	assert(offen || inst_offset < (1 << 12));
>
>  	/* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
>  	unsigned func = CLAMP(num_channels, 1, 3) - 1;
>  	const char *types[] = {"i32", "v2i32", "v4i32"};
>  	char name[256];
>  	snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
>
>  	ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
> -			       args, ARRAY_SIZE(args), 0);
> +			       args, ARRAY_SIZE(args),
> +			       AC_FUNC_ATTR_LEGACY);
>  }
>
>  void
>  ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
>  			      LLVMValueRef rsrc,
>  			      LLVMValueRef vdata,
>  			      unsigned num_channels,
>  			      LLVMValueRef vaddr,
>  			      LLVMValueRef soffset,
>  			      unsigned inst_offset)
> @@ -836,12 +840,13 @@ LLVMValueRef ac_emit_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
>
>  	const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." :
>  						 "llvm.AMDIL.clamp.";
>  	LLVMValueRef args[3] = {
>  		value,
>  		LLVMConstReal(ctx->f32, 0),
>  		LLVMConstReal(ctx->f32, 1),
>  	};
>
>  	return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3,
> -				      AC_FUNC_ATTR_READNONE);
> +				      AC_FUNC_ATTR_READNONE |
> +				      AC_FUNC_ATTR_LEGACY);
>  }
> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
> index be127c5..fb525dd 100644
> --- a/src/amd/common/ac_llvm_util.c
> +++ b/src/amd/common/ac_llvm_util.c
> @@ -17,21 +17,21 @@
>   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>   * USE OR OTHER DEALINGS IN THE SOFTWARE.
>   *
>   * The above copyright notice and this permission notice (including the
>   * next paragraph) shall be included in all copies or substantial portions
>   * of the Software.
>   *
>   */
>  /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
>  #include "ac_llvm_util.h"
> -
> +#include "util/bitscan.h"
>  #include <llvm-c/Core.h>
>
>  #include "c11/threads.h"
>
>  #include <assert.h>
>  #include <stdio.h>
>  #include <string.h>
>
>  static void ac_init_llvm_target()
>  {
> @@ -173,40 +173,53 @@ static const char *attr_to_str(enum ac_func_attr attr)
>     case AC_FUNC_ATTR_READNONE: return "readnone";
>     case AC_FUNC_ATTR_READONLY: return "readonly";
>     default:
>  	   fprintf(stderr, "Unhandled function attribute: %x\n", attr);
>  	   return 0;
>     }
>  }
>
>  #endif
>
> -void
> -ac_add_function_attr(LLVMValueRef function,
> -                     int attr_idx,
> -                     enum ac_func_attr attr)
> +static void
> +ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
> +                     int attr_idx, enum ac_func_attr attr)
>  {
> -
>  #if HAVE_LLVM < 0x0400
>     LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
>     if (attr_idx == -1) {
>        LLVMAddFunctionAttr(function, llvm_attr);
>     } else {
>        LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
>     }
>  #else
> -   LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
>     const char *attr_name = attr_to_str(attr);
>     unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
>                                                        strlen(attr_name));
> -   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
> -   LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
> +   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
> +
> +   if (LLVMIsAFunction(function))
> +      LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
> +   else
> +      LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
>  #endif
>  }
>
> +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
> +			    unsigned attrib_mask)
> +{
> +	attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
> +	attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
> +
> +	while (attrib_mask) {
> +		enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
> +		ac_add_function_attr(ctx, function, -1, attr);
> +	}
> +}
> +
>  void
>  ac_dump_module(LLVMModuleRef module)
>  {
>  	char *str = LLVMPrintModuleToString(module);
>  	fprintf(stderr, "%s", str);
>  	LLVMDisposeMessage(str);
>  }
> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
> index 93d3d27..4fe4ab4 100644
> --- a/src/amd/common/ac_llvm_util.h
> +++ b/src/amd/common/ac_llvm_util.h
> @@ -34,28 +34,29 @@ extern "C" {
>  #endif
>
>  enum ac_func_attr {
>  	AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
>  	AC_FUNC_ATTR_BYVAL        = (1 << 1),
>  	AC_FUNC_ATTR_INREG        = (1 << 2),
>  	AC_FUNC_ATTR_NOALIAS      = (1 << 3),
>  	AC_FUNC_ATTR_NOUNWIND     = (1 << 4),
>  	AC_FUNC_ATTR_READNONE     = (1 << 5),
>  	AC_FUNC_ATTR_READONLY     = (1 << 6),
> +
> +	/* Legacy intrinsic that needs attributes on function declarations
> +	 * and they must match the internal LLVM definition exactly, otherwise
> +	 * intrinsic selection fails.
> +	 */
> +	AC_FUNC_ATTR_LEGACY       = (1u << 31),
>  };
>
>  LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill);
>
>  void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
>  bool ac_is_sgpr_param(LLVMValueRef param);
> -
> -void
> -ac_add_function_attr(LLVMValueRef function,
> -                     int attr_idx,
> -                     enum ac_func_attr attr);
> -
> -void
> -ac_dump_module(LLVMModuleRef module);
> +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
> +			    unsigned attrib_mask);
> +void ac_dump_module(LLVMModuleRef module);
>
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index b3dc63c..4ec19d5 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1039,26 +1039,27 @@ static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
>  	src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
>  	src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
>
>  	dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
>  	dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
>  	result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
>  	return result;
>  }
>
>  static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
> -					  const char *intrin,
> +					  const char *intrin, unsigned attr_mask,
>  					  LLVMValueRef srcs[3])
>  {
>  	LLVMValueRef result;
>  	LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
> -	result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE);
> +	result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3,
> +					AC_FUNC_ATTR_READNONE | attr_mask);
>
>  	result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
>  	return result;
>  }
>
>  static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
>  					 LLVMValueRef src0, LLVMValueRef src1,
>  					 LLVMValueRef src2, LLVMValueRef src3)
>  {
>  	LLVMValueRef bfi_args[3], result;
> @@ -1418,24 +1419,26 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
>  		break;
>  	case nir_op_fmin:
>  		result = emit_intrin_2f_param(ctx, "llvm.minnum",
>  		                              to_float_type(ctx, def_type), src[0], src[1]);
>  		break;
>  	case nir_op_ffma:
>  		result = emit_intrin_3f_param(ctx, "llvm.fma",
>  		                              to_float_type(ctx, def_type), src[0], src[1], src[2]);
>  		break;
>  	case nir_op_ibitfield_extract:
> -		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
> +		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32",
> +					       AC_FUNC_ATTR_LEGACY, src);
>  		break;
>  	case nir_op_ubitfield_extract:
> -		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
> +		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32",
> +					       AC_FUNC_ATTR_LEGACY, src);
>  		break;
>  	case nir_op_bitfield_insert:
>  		result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
>  		break;
>  	case nir_op_bitfield_reverse:
>  		result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
>  		break;
>  	case nir_op_bit_count:
>  		result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
>  		break;
> @@ -1635,22 +1638,23 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
>  		txq_args[txq_arg_count++] = tinfo->args[1];
>  		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */
>  		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */
>  		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
>  		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
>  		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
>  		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
>  		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
>  		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
>  		size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
> -					   txq_args, txq_arg_count,
> -					   AC_FUNC_ATTR_READNONE);
> +					      txq_args, txq_arg_count,
> +					      AC_FUNC_ATTR_READNONE |
> +					      AC_FUNC_ATTR_LEGACY);
>
>  		for (c = 0; c < 2; c++) {
>  			half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
>  								LLVMConstInt(ctx->i32, c, false), "");
>  			half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
>  			half_texel[c] = ac_emit_fdiv(&ctx->ac, ctx->f32one, half_texel[c]);
>  			half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
>  						      LLVMConstReal(ctx->f32, -0.5), "");
>  		}
>  	}
> @@ -1660,21 +1664,22 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
>  		LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
>  		tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
>  		tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
>  		tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
>  		tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
>  		coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
>  	}
>
>  	tinfo->args[0] = coord;
>  	return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
> -				   AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
> +				      AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
> +				      AC_FUNC_ATTR_LEGACY);
>
>  }
>
>  static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
>  					nir_tex_instr *instr,
>  					struct ac_tex_info *tinfo)
>  {
>  	const char *name = "llvm.SI.image.sample";
>  	const char *infix = "";
>  	char intr_name[127];
> @@ -1728,21 +1733,22 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
>  		has_offset ? ".o" : "", type);
>
>  	if (instr->op == nir_texop_tg4) {
>  		enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
>  		if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
>  			return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name,
>  							  (int)has_offset + (int)is_shadow);
>  		}
>  	}
>  	return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
> -				   AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
> +				      AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
> +				      AC_FUNC_ATTR_LEGACY);
>
>  }
>
>  static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
>                                                  nir_intrinsic_instr *instr)
>  {
>  	LLVMValueRef index = get_src(ctx, instr->src[0]);
>  	unsigned desc_set = nir_intrinsic_desc_set(instr);
>  	unsigned binding = nir_intrinsic_binding(instr);
>  	LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
> @@ -2006,21 +2012,23 @@ static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
>  	if (instr->dest.ssa.bit_size == 64)
>  		num_components *= 2;
>
>  	for (unsigned i = 0; i < num_components; ++i) {
>  		LLVMValueRef params[] = {
>  			rsrc,
>  			LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
>  				     offset, "")
>  		};
>  		results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32,
> -						 params, 2, AC_FUNC_ATTR_READNONE);
> +						    params, 2,
> +						    AC_FUNC_ATTR_READNONE |
> +						    AC_FUNC_ATTR_LEGACY);
>  	}
>
>
>  	ret = ac_build_gather_values(&ctx->ac, results, instr->num_components);
>  	return LLVMBuildBitCast(ctx->builder, ret,
>  	                        get_def_type(ctx, &instr->dest.ssa), "");
>  }
>
>  static void
>  radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
> @@ -2103,21 +2111,23 @@ load_gs_input(struct nir_to_llvm_context *ctx,
>  		args[1] = vtx_offset;
>  		args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false);
>  		args[3] = ctx->i32zero;
>  		args[4] = ctx->i32one; /* OFFEN */
>  		args[5] = ctx->i32zero; /* IDXEN */
>  		args[6] = ctx->i32one; /* GLC */
>  		args[7] = ctx->i32zero; /* SLC */
>  		args[8] = ctx->i32zero; /* TFE */
>
>  		value[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
> -					    ctx->i32, args, 9, AC_FUNC_ATTR_READONLY);
> +						  ctx->i32, args, 9,
> +						  AC_FUNC_ATTR_READONLY |
> +						  AC_FUNC_ATTR_LEGACY);
>  	}
>  	result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
>
>  	return result;
>  }
>
>  static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>  				   nir_intrinsic_instr *instr)
>  {
>  	LLVMValueRef values[8];
> @@ -2685,21 +2695,23 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
>  	params[2] = LLVMConstInt(ctx->i32, 15, false);
>  	params[3] = ctx->i32zero;
>  	params[4] = ctx->i32zero;
>  	params[5] = da ? ctx->i32one : ctx->i32zero;
>  	params[6] = ctx->i32zero;
>  	params[7] = ctx->i32zero;
>  	params[8] = ctx->i32zero;
>  	params[9] = ctx->i32zero;
>
>  	res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
> -				  params, 10, AC_FUNC_ATTR_READNONE);
> +				     params, 10,
> +				     AC_FUNC_ATTR_READNONE |
> +				     AC_FUNC_ATTR_LEGACY);
>
>  	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
>  	    glsl_sampler_type_is_array(type)) {
>  		LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
>  		LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
>  		LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
>  		z = LLVMBuildSDiv(ctx->builder, z, six, "");
>  		res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
>  	}
>  	return res;
> @@ -2729,21 +2741,21 @@ static void emit_discard_if(struct nir_to_llvm_context *ctx,
>
>  	cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
>  			     get_src(ctx, instr->src[0]),
>  			     ctx->i32zero, "");
>
>  	cond = LLVMBuildSelect(ctx->builder, cond,
>  			       LLVMConstReal(ctx->f32, -1.0f),
>  			       ctx->f32zero, "");
>  	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
>  			       ctx->voidt,
> -			       &cond, 1, 0);
> +			       &cond, 1, AC_FUNC_ATTR_LEGACY);
>  }
>
>  static LLVMValueRef
>  visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
>  {
>  	LLVMValueRef result;
>  	LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
>  	result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
>  			      LLVMConstInt(ctx->i32, 0xfc0, false), "");
>
> @@ -2984,21 +2996,21 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
>  	 * have any effect, and GS threads have no externally observable
>  	 * effects other than emitting vertices.
>  	 */
>  	can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
>  				 LLVMConstInt(ctx->i32, ctx->gs_max_out_vertices, false), "");
>
>  	kill = LLVMBuildSelect(ctx->builder, can_emit,
>  			       LLVMConstReal(ctx->f32, 1.0f),
>  			       LLVMConstReal(ctx->f32, -1.0f), "");
>  	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
> -			    ctx->voidt, &kill, 1, 0);
> +			    ctx->voidt, &kill, 1, AC_FUNC_ATTR_LEGACY);
>
>  	/* loop num outputs */
>  	idx = 0;
>  	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
>  		LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
>  		if (!(ctx->output_mask & (1ull << i)))
>  			continue;
>
>  		for (unsigned j = 0; j < 4; j++) {
>  			LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
> @@ -3144,21 +3156,21 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx,
>  	case nir_intrinsic_image_atomic_comp_swap:
>  		result = visit_image_atomic(ctx, instr);
>  		break;
>  	case nir_intrinsic_image_size:
>  		result = visit_image_size(ctx, instr);
>  		break;
>  	case nir_intrinsic_discard:
>  		ctx->shader_info->fs.can_discard = true;
>  		ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
>  				       ctx->voidt,
> -				       NULL, 0, 0);
> +				       NULL, 0, AC_FUNC_ATTR_LEGACY);
>  		break;
>  	case nir_intrinsic_discard_if:
>  		emit_discard_if(ctx, instr);
>  		break;
>  	case nir_intrinsic_memory_barrier:
>  		emit_waitcnt(ctx);
>  		break;
>  	case nir_intrinsic_barrier:
>  		emit_barrier(ctx);
>  		break;
> @@ -3924,21 +3936,22 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,
>
>  	for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
>  		t_offset = LLVMConstInt(ctx->i32, index + i, false);
>
>  		t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
>  		args[0] = t_list;
>  		args[1] = LLVMConstInt(ctx->i32, 0, false);
>  		args[2] = buffer_index;
>  		input = ac_emit_llvm_intrinsic(&ctx->ac,
>  			"llvm.SI.vs.load.input", ctx->v4f32, args, 3,
> -			AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
> +			AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
> +			AC_FUNC_ATTR_LEGACY);
>
>  		for (unsigned chan = 0; chan < 4; chan++) {
>  			LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
>  			ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
>  				to_integer(ctx, LLVMBuildExtractElement(ctx->builder,
>  							input, llvm_chan, ""));
>  		}
>  	}
>  }
>
> @@ -4257,22 +4270,23 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
>  			args[4] = ctx->i32one;
>
>  			for (unsigned chan = 0; chan < 2; chan++) {
>  				LLVMValueRef pack_args[2] = {
>  					values[2 * chan],
>  					values[2 * chan + 1]
>  				};
>  				LLVMValueRef packed;
>
>  				packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16",
> -							     ctx->i32, pack_args, 2,
> -							     AC_FUNC_ATTR_READNONE);
> +								ctx->i32, pack_args, 2,
> +								AC_FUNC_ATTR_READNONE |
> +								AC_FUNC_ATTR_LEGACY);
>  				args[chan + 5] = packed;
>  			}
>  			break;
>
>  		case V_028714_SPI_SHADER_UNORM16_ABGR:
>  			for (unsigned chan = 0; chan < 4; chan++) {
>  				val[chan] = emit_float_saturate(ctx, values[chan], 0, 1);
>  				val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
>  							LLVMConstReal(ctx->f32, 65535), "");
>  				val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
> @@ -4443,21 +4457,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
>  		si_llvm_init_export_args(ctx, values, target, args);
>
>  		if (target >= V_008DFC_SQ_EXP_POS &&
>  		    target <= (V_008DFC_SQ_EXP_POS + 3)) {
>  			memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
>  			       args, sizeof(args));
>  		} else {
>  			ac_emit_llvm_intrinsic(&ctx->ac,
>  					       "llvm.SI.export",
>  					       ctx->voidt,
> -					       args, 9, 0);
> +					       args, 9,
> +					       AC_FUNC_ATTR_LEGACY);
>  		}
>  	}
>
>  	/* We need to add the position output manually if it's missing. */
>  	if (!pos_args[0][0]) {
>  		pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
>  		pos_args[0][1] = ctx->i32zero; /* EXEC mask */
>  		pos_args[0][2] = ctx->i32zero; /* last export? */
>  		pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false);
>  		pos_args[0][4] = ctx->i32zero; /* COMPR flag */
> @@ -4498,21 +4513,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
>  		if (!pos_args[i][0])
>  			continue;
>
>  		/* Specify the target we are exporting */
>  		pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false);
>  		if (pos_idx == num_pos_exports)
>  			pos_args[i][2] = ctx->i32one;
>  		ac_emit_llvm_intrinsic(&ctx->ac,
>  				       "llvm.SI.export",
>  				       ctx->voidt,
> -				       pos_args[i], 9, 0);
> +				       pos_args[i], 9,
> +				       AC_FUNC_ATTR_LEGACY);
>  	}
>
>  	ctx->shader_info->vs.pos_exports = num_pos_exports;
>  	ctx->shader_info->vs.param_exports = param_count;
>  }
>
>  static void
>  handle_es_outputs_post(struct nir_to_llvm_context *ctx)
>  {
>  	int j;
> @@ -4554,21 +4570,22 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
>  	si_llvm_init_export_args(ctx, color, param,
>  				 args);
>
>  	if (is_last) {
>  		args[1] = ctx->i32one; /* whether the EXEC mask is valid */
>  		args[2] = ctx->i32one; /* DONE bit */
>  	} else if (args[0] == ctx->i32zero)
>  		return; /* unnecessary NULL export */
>
>  	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
> -			    ctx->voidt, args, 9, 0);
> +			       ctx->voidt, args, 9,
> +			       AC_FUNC_ATTR_LEGACY);
>  }
>
>  static void
>  si_export_mrt_z(struct nir_to_llvm_context *ctx,
>  		LLVMValueRef depth, LLVMValueRef stencil,
>  		LLVMValueRef samplemask)
>  {
>  	LLVMValueRef args[9];
>  	unsigned mask = 0;
>  	args[1] = ctx->i32one; /* whether the EXEC mask is valid */
> @@ -4598,21 +4615,22 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
>  	}
>
>  	/* SI (except OLAND) has a bug that it only looks
>  	 * at the X writemask component. */
>  	if (ctx->options->chip_class == SI &&
>  	    ctx->options->family != CHIP_OLAND)
>  		mask |= 0x01;
>
>  	args[0] = LLVMConstInt(ctx->i32, mask, false);
>  	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
> -			    ctx->voidt, args, 9, 0);
> +			       ctx->voidt, args, 9,
> +			       AC_FUNC_ATTR_LEGACY);
>  }
>
>  static void
>  handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
>  {
>  	unsigned index = 0;
>  	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
>
>  	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
>  		LLVMValueRef values[4];
> @@ -5021,21 +5039,22 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
>
>  		for (unsigned j = 0; j < 4; j++) {
>  			LLVMValueRef value;
>  			args[2] = LLVMConstInt(ctx->i32,
>  					       (idx * 4 + j) *
>  					       ctx->gs_max_out_vertices * 16 * 4, false);
>
>  			value = ac_emit_llvm_intrinsic(&ctx->ac,
>  						       "llvm.SI.buffer.load.dword.i32.i32",
>  						       ctx->i32, args, 9,
> -						       AC_FUNC_ATTR_READONLY);
> +						       AC_FUNC_ATTR_READONLY |
> +						       AC_FUNC_ATTR_LEGACY);
>
>  			LLVMBuildStore(ctx->builder,
>  				       to_float(ctx, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]);
>  		}
>  		idx++;
>  	}
>  	handle_vs_outputs_post(ctx);
>  }
>
>  void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
> diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
> index 8952dc8..586a9be 100644
> --- a/src/gallium/auxiliary/draw/draw_llvm.c
> +++ b/src/gallium/auxiliary/draw/draw_llvm.c
> @@ -1588,21 +1588,22 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
>
>     func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
>                                  arg_types, num_arg_types, 0);
>
>     variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
>     variant->function = variant_func;
>
>     LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
>     for (i = 0; i < num_arg_types; ++i)
>        if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
> -         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
> +         lp_add_function_attr(context, variant_func, i + 1,
> +                              LP_FUNC_ATTR_NOALIAS);
>
>     context_ptr               = LLVMGetParam(variant_func, 0);
>     io_ptr                    = LLVMGetParam(variant_func, 1);
>     vbuffers_ptr              = LLVMGetParam(variant_func, 2);
>     count                     = LLVMGetParam(variant_func, 3);
>     /*
>      * XXX: the maxelt part is unused. Not really useful, since we cannot
>      * get index buffer overflows due to vsplit (which provides its own
>      * elts buffer, with a different size than what's passed in here).
>      */
> @@ -2262,21 +2263,22 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
>     func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
>
>     variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
>
>     variant->function = variant_func;
>
>     LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
>
>     for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
>        if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
> -         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
> +         lp_add_function_attr(context, variant_func, i + 1,
> +                              LP_FUNC_ATTR_NOALIAS);
>
>     context_ptr               = LLVMGetParam(variant_func, 0);
>     input_array               = LLVMGetParam(variant_func, 1);
>     io_ptr                    = LLVMGetParam(variant_func, 2);
>     num_prims                 = LLVMGetParam(variant_func, 3);
>     system_values.instance_id = LLVMGetParam(variant_func, 4);
>     prim_id_ptr               = LLVMGetParam(variant_func, 5);
>     system_values.invocation_id = LLVMGetParam(variant_func, 6);
>
>     lp_build_name(context_ptr, "context");
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> index 049671a..1b50e68 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> @@ -152,83 +152,100 @@ static const char *attr_to_str(enum lp_func_attr attr)
>     case LP_FUNC_ATTR_READONLY: return "readonly";
>     default:
>        _debug_printf("Unhandled function attribute: %x\n", attr);
>        return 0;
>     }
>  }
>
>  #endif
>
>  void
> -lp_add_function_attr(LLVMValueRef function,
> -                     int attr_idx,
> -                     enum lp_func_attr attr)
> +lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,

If function is not always a function, then it's better the parameter is 
renamed to functionOrCall or something like that.

> +                     int attr_idx, enum lp_func_attr attr)
>  {
>
>  #if HAVE_LLVM < 0x0400
>     LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr);
>     if (attr_idx == -1) {
>        LLVMAddFunctionAttr(function, llvm_attr);
>     } else {
>        LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
>     }
>  #else
> -   LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));

Even when LLVMIsAFunction(function) is false, we could still get the 
LLVMContextRef:

    LLVMModuleRef module;
    if (LLVMIsAFunction(functionOrCall)) {
       module = LLVMGetGlobalParent(functionOrCall);
    } else {
       LLVMBasicBlockRef bb = LLVMValueAsBasicBlock(functionOrCall);
       LLVMValueRef function = LLVMGetBasicBlockParent(bb)
       module = LLVMGetGlobalParent(function);

    }
    LLVMContextRef context = LLVMGetModuleContext(module);

This would enable to keep lp_add_function_attr prototype alone.

Otherwise looks good to me.

Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

>     const char *attr_name = attr_to_str(attr);
>     unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
>                                                        strlen(attr_name));
> -   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
> -   LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
> +   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
> +
> +   if (LLVMIsAFunction(function))
> +      LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
> +   else
> +      LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
>  #endif
>  }
>
> +static void
> +lp_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
> +                       unsigned attrib_mask)
> +{
> +   /* NoUnwind indicates that the intrinsic never raises a C++ exception.
> +    * Set it for all intrinsics.
> +    */
> +   attrib_mask |= LP_FUNC_ATTR_NOUNWIND;
> +   attrib_mask &= ~LP_FUNC_ATTR_LEGACY;
> +
> +   while (attrib_mask) {
> +      enum lp_func_attr attr = 1u << u_bit_scan(&attrib_mask);
> +      lp_add_function_attr(ctx, function, -1, attr);
> +   }
> +}
> +
>  LLVMValueRef
>  lp_build_intrinsic(LLVMBuilderRef builder,
>                     const char *name,
>                     LLVMTypeRef ret_type,
>                     LLVMValueRef *args,
>                     unsigned num_args,
>                     unsigned attr_mask)
>  {
>     LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
> -   LLVMValueRef function;
> +   LLVMContextRef ctx = LLVMGetModuleContext(module);
> +   LLVMValueRef function, call;
> +   bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
> +                             !(attr_mask & LP_FUNC_ATTR_LEGACY);
>
>     function = LLVMGetNamedFunction(module, name);
>     if(!function) {
>        LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
>        unsigned i;
>
>        assert(num_args <= LP_MAX_FUNC_ARGS);
>
>        for(i = 0; i < num_args; ++i) {
>           assert(args[i]);
>           arg_types[i] = LLVMTypeOf(args[i]);
>        }
>
>        function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
>
> -      /* NoUnwind indicates that the intrinsic never raises a C++ exception.
> -       * Set it for all intrinsics.
> -       */
> -      attr_mask |= LP_FUNC_ATTR_NOUNWIND;
> -
> -      while (attr_mask) {
> -         enum lp_func_attr attr = 1 << u_bit_scan(&attr_mask);
> -         lp_add_function_attr(function, -1, attr);
> -      }
> +      if (!set_callsite_attrs)
> +         lp_add_func_attributes(ctx, function, attr_mask);
>
>        if (gallivm_debug & GALLIVM_DEBUG_IR) {
>           lp_debug_dump_value(function);
>        }
>     }
>
> -   return LLVMBuildCall(builder, function, args, num_args, "");
> +   call = LLVMBuildCall(builder, function, args, num_args, "");
> +   if (set_callsite_attrs)
> +      lp_add_func_attributes(ctx, call, attr_mask);
> +   return call;
>  }
>
>
>  LLVMValueRef
>  lp_build_intrinsic_unary(LLVMBuilderRef builder,
>                           const char *name,
>                           LLVMTypeRef ret_type,
>                           LLVMValueRef a)
>  {
>     return lp_build_intrinsic(builder, name, ret_type, &a, 1, 0);
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
> index 039e9ab..d279911 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
> @@ -47,39 +47,44 @@
>  #define LP_MAX_FUNC_ARGS 32
>
>  enum lp_func_attr {
>     LP_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
>     LP_FUNC_ATTR_BYVAL        = (1 << 1),
>     LP_FUNC_ATTR_INREG        = (1 << 2),
>     LP_FUNC_ATTR_NOALIAS      = (1 << 3),
>     LP_FUNC_ATTR_NOUNWIND     = (1 << 4),
>     LP_FUNC_ATTR_READNONE     = (1 << 5),
>     LP_FUNC_ATTR_READONLY     = (1 << 6),
> +
> +   /* Legacy intrinsic that needs attributes on function declarations
> +    * and they must match the internal LLVM definition exactly, otherwise
> +    * intrinsic selection fails.
> +    */
> +   LP_FUNC_ATTR_LEGACY       = (1u << 31),
>  };
>
>  void
>  lp_format_intrinsic(char *name,
>                      size_t size,
>                      const char *name_root,
>                      LLVMTypeRef type);
>
>  LLVMValueRef
>  lp_declare_intrinsic(LLVMModuleRef module,
>                       const char *name,
>                       LLVMTypeRef ret_type,
>                       LLVMTypeRef *arg_types,
>                       unsigned num_args);
>
>  void
> -lp_add_function_attr(LLVMValueRef function,
> -                     int attr_idx,
> -                     enum lp_func_attr attr);
> +lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
> +                     int attr_idx, enum lp_func_attr attr);
>
>  LLVMValueRef
>  lp_build_intrinsic(LLVMBuilderRef builder,
>                     const char *name,
>                     LLVMTypeRef ret_type,
>                     LLVMValueRef *args,
>                     unsigned num_args,
>                     unsigned attr_mask);
>
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> index 161a03f..a1e2601 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> @@ -3311,21 +3311,22 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
>
>        val_type[0] = val_type[1] = val_type[2] = val_type[3] =
>           lp_build_vec_type(gallivm, params->type);
>        ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
>        function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
>        function = LLVMAddFunction(module, func_name, function_type);
>
>        for (i = 0; i < num_param; ++i) {
>           if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
>
> -            lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
> +            lp_add_function_attr(gallivm->context, function, i + 1,
> +                                 LP_FUNC_ATTR_NOALIAS);
>           }
>        }
>
>        LLVMSetFunctionCallConv(function, LLVMFastCallConv);
>        LLVMSetLinkage(function, LLVMInternalLinkage);
>
>        lp_build_sample_gen_func(gallivm,
>                                 static_texture_state,
>                                 static_sampler_state,
>                                 dynamic_state,
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> index af47b52..70b0a67 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
> +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> @@ -2488,21 +2488,22 @@ generate_fragment(struct llvmpipe_context *lp,
>     function = LLVMAddFunction(gallivm->module, func_name, func_type);
>     LLVMSetFunctionCallConv(function, LLVMCCallConv);
>
>     variant->function[partial_mask] = function;
>
>     /* XXX: need to propagate noalias down into color param now we are
>      * passing a pointer-to-pointer?
>      */
>     for(i = 0; i < ARRAY_SIZE(arg_types); ++i)
>        if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
> -         lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
> +         lp_add_function_attr(gallivm->context, function, i + 1,
> +                              LP_FUNC_ATTR_NOALIAS);
>
>     context_ptr  = LLVMGetParam(function, 0);
>     x            = LLVMGetParam(function, 1);
>     y            = LLVMGetParam(function, 2);
>     facing       = LLVMGetParam(function, 3);
>     a0_ptr       = LLVMGetParam(function, 4);
>     dadx_ptr     = LLVMGetParam(function, 5);
>     dady_ptr     = LLVMGetParam(function, 6);
>     color_ptr_ptr = LLVMGetParam(function, 7);
>     depth_ptr    = LLVMGetParam(function, 8);
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
> index 6b0df21..66bc42c 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
> +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
> @@ -609,29 +609,29 @@ emit_tri_coef( struct gallivm_state *gallivm,
>        default:
>           assert(0);
>        }
>     }
>  }
>
>
>  /* XXX: generic code:
>   */
>  static void
> -set_noalias(LLVMBuilderRef builder,
> +set_noalias(LLVMContextRef ctx,
>              LLVMValueRef function,
>              const LLVMTypeRef *arg_types,
>              int nr_args)
>  {
>     int i;
>     for(i = 0; i < nr_args; ++i)
>        if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
> -         lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
> +         lp_add_function_attr(ctx, function, i + 1, LP_FUNC_ATTR_NOALIAS);
>  }
>
>  static void
>  init_args(struct gallivm_state *gallivm,
>            const struct lp_setup_variant_key *key,
>            struct lp_setup_args *args)
>  {
>     LLVMBuilderRef b = gallivm->builder;
>     LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
>     LLVMValueRef onef = lp_build_const_float(gallivm, 1.0);
> @@ -783,21 +783,22 @@ generate_setup_variant(struct lp_setup_variant_key *key,
>     lp_build_name(args.dadx, "out_dadx");
>     lp_build_name(args.dady, "out_dady");
>
>     /*
>      * Function body
>      */
>     block = LLVMAppendBasicBlockInContext(gallivm->context,
>                                           variant->function, "entry");
>     LLVMPositionBuilderAtEnd(builder, block);
>
> -   set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types));
> +   set_noalias(gallivm->context, variant->function, arg_types,
> +               ARRAY_SIZE(arg_types));
>     init_args(gallivm, &variant->key, &args);
>     emit_tri_coef(gallivm, &variant->key, &args);
>
>     LLVMBuildRetVoid(builder);
>
>     gallivm_verify_function(gallivm, variant->function);
>
>     gallivm_compile_module(gallivm);
>
>     variant->jit_function = (lp_jit_setup_triangle)
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index f9eaea2..ea3f4fd 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -394,21 +394,22 @@ static void declare_input_vs(
>  	}
>
>  	args[0] = t_list;
>  	args[2] = vertex_index;
>
>  	for (unsigned i = 0; i < num_fetches; i++) {
>  		args[1] = LLVMConstInt(ctx->i32, fetch_stride * i, 0);
>
>  		input[i] = lp_build_intrinsic(gallivm->builder,
>  			"llvm.SI.vs.load.input", ctx->v4f32, args, 3,
> -			LP_FUNC_ATTR_READNONE);
> +			LP_FUNC_ATTR_READNONE |
> +			LP_FUNC_ATTR_LEGACY);
>  	}
>
>  	/* Break up the vec4 into individual components */
>  	for (chan = 0; chan < 4; chan++) {
>  		LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
>  		out[chan] = LLVMBuildExtractElement(gallivm->builder,
>  						    input[0], llvm_chan, "");
>  	}
>
>  	switch (fix_fetch) {
> @@ -1116,28 +1117,30 @@ static LLVMValueRef fetch_input_gs(
>  	args[3] = uint->zero;
>  	args[4] = uint->one;  /* OFFEN */
>  	args[5] = uint->zero; /* IDXEN */
>  	args[6] = uint->one;  /* GLC */
>  	args[7] = uint->zero; /* SLC */
>  	args[8] = uint->zero; /* TFE */
>
>  	value = lp_build_intrinsic(gallivm->builder,
>  				   "llvm.SI.buffer.load.dword.i32.i32",
>  				   ctx->i32, args, 9,
> -				   LP_FUNC_ATTR_READONLY);
> +				   LP_FUNC_ATTR_READONLY |
> +				   LP_FUNC_ATTR_LEGACY);
>  	if (tgsi_type_is_64bit(type)) {
>  		LLVMValueRef value2;
>  		args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256);
>  		value2 = lp_build_intrinsic(gallivm->builder,
>  					    "llvm.SI.buffer.load.dword.i32.i32",
>  					    ctx->i32, args, 9,
> -					    LP_FUNC_ATTR_READONLY);
> +					    LP_FUNC_ATTR_READONLY |
> +					    LP_FUNC_ATTR_LEGACY);
>  		return si_llvm_emit_fetch_64bit(bld_base, type,
>  						value, value2);
>  	}
>  	return LLVMBuildBitCast(gallivm->builder,
>  				value,
>  				tgsi2llvmtype(bld_base, type), "");
>  }
>
>  static int lookup_interp_param_index(unsigned interpolate, unsigned location)
>  {
> @@ -1361,21 +1364,22 @@ static LLVMValueRef get_sample_id(struct si_shader_context *radeon_bld)
>   * Load a dword from a constant buffer.
>   */
>  static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
>  				      LLVMValueRef resource,
>  				      LLVMValueRef offset)
>  {
>  	LLVMBuilderRef builder = ctx->gallivm.builder;
>  	LLVMValueRef args[2] = {resource, offset};
>
>  	return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2,
> -			       LP_FUNC_ATTR_READNONE);
> +				  LP_FUNC_ATTR_READNONE |
> +				  LP_FUNC_ATTR_LEGACY);
>  }
>
>  static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, LLVMValueRef sample_id)
>  {
>  	struct si_shader_context *ctx =
>  		si_shader_context(&radeon_bld->bld_base);
>  	struct lp_build_context *uint_bld = &radeon_bld->bld_base.uint_bld;
>  	struct gallivm_state *gallivm = &radeon_bld->gallivm;
>  	LLVMBuilderRef builder = gallivm->builder;
>  	LLVMValueRef desc = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS);
> @@ -1815,21 +1819,22 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
>  		for (chan = 0; chan < 2; chan++) {
>  			LLVMValueRef pack_args[2] = {
>  				values[2 * chan],
>  				values[2 * chan + 1]
>  			};
>  			LLVMValueRef packed;
>
>  			packed = lp_build_intrinsic(base->gallivm->builder,
>  						    "llvm.SI.packf16",
>  						    ctx->i32, pack_args, 2,
> -						    LP_FUNC_ATTR_READNONE);
> +						    LP_FUNC_ATTR_READNONE |
> +						    LP_FUNC_ATTR_LEGACY);
>  			args[chan + 5] =
>  				LLVMBuildBitCast(base->gallivm->builder,
>  						 packed, ctx->f32, "");
>  		}
>  		break;
>
>  	case V_028714_SPI_SHADER_UNORM16_ABGR:
>  		for (chan = 0; chan < 4; chan++) {
>  			val[chan] = ac_emit_clamp(&ctx->ac, values[chan]);
>  			val[chan] = LLVMBuildFMul(builder, val[chan],
> @@ -1947,24 +1952,24 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
>  			lp_build_cmp(&bld_base->base,
>  				     ctx->shader->key.part.ps.epilog.alpha_func,
>  				     alpha, alpha_ref);
>  		LLVMValueRef arg =
>  			lp_build_select(&bld_base->base,
>  					alpha_pass,
>  					lp_build_const_float(gallivm, 1.0f),
>  					lp_build_const_float(gallivm, -1.0f));
>
>  		lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
> -				   ctx->voidt, &arg, 1, 0);
> +				   ctx->voidt, &arg, 1, LP_FUNC_ATTR_LEGACY);
>  	} else {
>  		lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kilp",
> -				   ctx->voidt, NULL, 0, 0);
> +				   ctx->voidt, NULL, 0, LP_FUNC_ATTR_LEGACY);
>  	}
>  }
>
>  static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
>  						  LLVMValueRef alpha,
>  						  unsigned samplemask_param)
>  {
>  	struct si_shader_context *ctx = si_shader_context(bld_base);
>  	struct gallivm_state *gallivm = bld_base->base.gallivm;
>  	LLVMValueRef coverage;
> @@ -2295,21 +2300,21 @@ handle_semantic:
>
>  		si_llvm_init_export_args(bld_base, outputs[i].values, target, args);
>
>  		if (target >= V_008DFC_SQ_EXP_POS &&
>  		    target <= (V_008DFC_SQ_EXP_POS + 3)) {
>  			memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
>  			       args, sizeof(args));
>  		} else {
>  			lp_build_intrinsic(base->gallivm->builder,
>  					   "llvm.SI.export", ctx->voidt,
> -					   args, 9, 0);
> +					   args, 9, LP_FUNC_ATTR_LEGACY);
>  		}
>
>  		if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
>  			semantic_name = TGSI_SEMANTIC_GENERIC;
>  			goto handle_semantic;
>  		}
>  	}
>
>  	shader->info.nr_param_exports = param_count;
>
> @@ -2381,21 +2386,22 @@ handle_semantic:
>  			continue;
>
>  		/* Specify the target we are exporting */
>  		pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
>
>  		if (pos_idx == shader->info.nr_pos_exports)
>  			/* Specify that this is the last export */
>  			pos_args[i][2] = uint->one;
>
>  		lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
> -				   ctx->voidt, pos_args[i], 9, 0);
> +				   ctx->voidt, pos_args[i], 9,
> +				   LP_FUNC_ATTR_LEGACY);
>  	}
>  }
>
>  /**
>   * Forward all outputs from the vertex shader to the TES. This is only used
>   * for the fixed function TCS.
>   */
>  static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
>  {
>  	struct si_shader_context *ctx = si_shader_context(bld_base);
> @@ -2972,42 +2978,42 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
>  		memcpy(exp->args[exp->num++], args, sizeof(args));
>  	}
>  }
>
>  static void si_emit_ps_exports(struct si_shader_context *ctx,
>  			       struct si_ps_exports *exp)
>  {
>  	for (unsigned i = 0; i < exp->num; i++)
>  		lp_build_intrinsic(ctx->gallivm.builder,
>  				   "llvm.SI.export", ctx->voidt,
> -				   exp->args[i], 9, 0);
> +				   exp->args[i], 9, LP_FUNC_ATTR_LEGACY);
>  }
>
>  static void si_export_null(struct lp_build_tgsi_context *bld_base)
>  {
>  	struct si_shader_context *ctx = si_shader_context(bld_base);
>  	struct lp_build_context *base = &bld_base->base;
>  	struct lp_build_context *uint = &bld_base->uint_bld;
>  	LLVMValueRef args[9];
>
>  	args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */
>  	args[1] = uint->one; /* whether the EXEC mask is valid */
>  	args[2] = uint->one; /* DONE bit */
>  	args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL);
>  	args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
>  	args[5] = base->undef; /* R */
>  	args[6] = base->undef; /* G */
>  	args[7] = base->undef; /* B */
>  	args[8] = base->undef; /* A */
>
>  	lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
> -			   ctx->voidt, args, 9, 0);
> +			   ctx->voidt, args, 9, LP_FUNC_ATTR_LEGACY);
>  }
>
>  /**
>   * Return PS outputs in this order:
>   *
>   * v[0:3] = color0.xyzw
>   * v[4:7] = color1.xyzw
>   * ...
>   * vN+0 = Depth
>   * vN+1 = Stencil
> @@ -4082,21 +4088,21 @@ static void resq_emit(
>
>  	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
>  		out = LLVMBuildExtractElement(builder, emit_data->args[0],
>  					      lp_build_const_int32(gallivm, 2), "");
>  	} else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
>  		out = get_buffer_size(bld_base, emit_data->args[0]);
>  	} else {
>  		out = lp_build_intrinsic(
>  			builder, "llvm.SI.getresinfo.i32", emit_data->dst_type,
>  			emit_data->args, emit_data->arg_count,
> -			LP_FUNC_ATTR_READNONE);
> +			LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
>
>  		/* Divide the number of layers by 6 to get the number of cubes. */
>  		if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
>  			LLVMValueRef imm2 = lp_build_const_int32(gallivm, 2);
>  			LLVMValueRef imm6 = lp_build_const_int32(gallivm, 6);
>
>  			LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
>  			z = LLVMBuildSDiv(builder, z, imm6, "");
>  			out = LLVMBuildInsertElement(builder, out, z, imm2, "");
>  		}
> @@ -4319,21 +4325,21 @@ static void txq_emit(const struct lp_build_tgsi_action *action,
>
>  	if (target == TGSI_TEXTURE_BUFFER) {
>  		/* Just return the buffer size. */
>  		emit_data->output[emit_data->chan] = emit_data->args[0];
>  		return;
>  	}
>
>  	emit_data->output[emit_data->chan] = lp_build_intrinsic(
>  		base->gallivm->builder, "llvm.SI.getresinfo.i32",
>  		emit_data->dst_type, emit_data->args, emit_data->arg_count,
> -		LP_FUNC_ATTR_READNONE);
> +		LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
>
>  	/* Divide the number of layers by 6 to get the number of cubes. */
>  	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
>  	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
>  		LLVMBuilderRef builder = bld_base->base.gallivm->builder;
>  		LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2);
>  		LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6);
>
>  		LLVMValueRef v4 = emit_data->output[emit_data->chan];
>  		LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
> @@ -4728,21 +4734,21 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx,
>  		tmp = LLVMBuildBitCast(builder, tmp, ctx->f32, "");
>  		tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
>  		tmp = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
>  		coord = LLVMBuildInsertElement(builder, coord, tmp, index, "");
>  	}
>
>  	emit_data->args[0] = coord;
>  	emit_data->output[emit_data->chan] =
>  		lp_build_intrinsic(builder, intr_name, emit_data->dst_type,
>  				   emit_data->args, emit_data->arg_count,
> -				   LP_FUNC_ATTR_READNONE);
> +				   LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
>  }
>
>  static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
>  				struct lp_build_tgsi_context *bld_base,
>  				struct lp_build_emit_data *emit_data)
>  {
>  	struct si_shader_context *ctx = si_shader_context(bld_base);
>  	struct lp_build_context *base = &bld_base->base;
>  	const struct tgsi_full_instruction *inst = emit_data->inst;
>  	unsigned opcode = inst->Instruction.Opcode;
> @@ -4752,21 +4758,21 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
>  	bool is_shadow = tgsi_is_shadow_target(target);
>  	char type[64];
>  	const char *name = "llvm.SI.image.sample";
>  	const char *infix = "";
>
>  	if (target == TGSI_TEXTURE_BUFFER) {
>  		emit_data->output[emit_data->chan] = lp_build_intrinsic(
>  			base->gallivm->builder,
>  			"llvm.SI.vs.load.input", emit_data->dst_type,
>  			emit_data->args, emit_data->arg_count,
> -			LP_FUNC_ATTR_READNONE);
> +			LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
>  		return;
>  	}
>
>  	switch (opcode) {
>  	case TGSI_OPCODE_TXF:
>  		name = target == TGSI_TEXTURE_2D_MSAA ||
>  		       target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
>  			       "llvm.SI.image.load" :
>  			       "llvm.SI.image.load.mip";
>  		is_shadow = false;
> @@ -4829,21 +4835,21 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
>  			 */
>  			si_lower_gather4_integer(ctx, emit_data, intr_name,
>  						 (int)has_offset + (int)is_shadow);
>  			return;
>  		}
>  	}
>
>  	emit_data->output[emit_data->chan] = lp_build_intrinsic(
>  		base->gallivm->builder, intr_name, emit_data->dst_type,
>  		emit_data->args, emit_data->arg_count,
> -		LP_FUNC_ATTR_READNONE);
> +		LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
>  }
>
>  static void si_llvm_emit_txqs(
>  	const struct lp_build_tgsi_action *action,
>  	struct lp_build_tgsi_context *bld_base,
>  	struct lp_build_emit_data *emit_data)
>  {
>  	struct si_shader_context *ctx = si_shader_context(bld_base);
>  	struct gallivm_state *gallivm = bld_base->base.gallivm;
>  	LLVMBuilderRef builder = gallivm->builder;
> @@ -5118,21 +5124,21 @@ static void si_llvm_emit_vertex(
>  				 lp_build_const_int32(gallivm,
>  						      shader->selector->gs_max_out_vertices), "");
>
>  	bool use_kill = !info->writes_memory;
>  	if (use_kill) {
>  		kill = lp_build_select(&bld_base->base, can_emit,
>  				       lp_build_const_float(gallivm, 1.0f),
>  				       lp_build_const_float(gallivm, -1.0f));
>
>  		lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
> -				   ctx->voidt, &kill, 1, 0);
> +				   ctx->voidt, &kill, 1, LP_FUNC_ATTR_LEGACY);
>  	} else {
>  		lp_build_if(&if_state, gallivm, can_emit);
>  	}
>
>  	offset = 0;
>  	for (i = 0; i < info->num_outputs; i++) {
>  		LLVMValueRef *out_ptr = ctx->outputs[i];
>
>  		for (chan = 0; chan < 4; chan++) {
>  			if (!(info->output_usagemask[i] & (1 << chan)) ||
> @@ -5238,24 +5244,26 @@ static void si_create_function(struct si_shader_context *ctx,
>  		LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
>
>  		/* The combination of:
>  		 * - ByVal
>  		 * - dereferenceable
>  		 * - invariant.load
>  		 * allows the optimization passes to move loads and reduces
>  		 * SGPR spilling significantly.
>  		 */
>  		if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
> -			lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_BYVAL);
> +			lp_add_function_attr(ctx->gallivm.context, ctx->main_fn,
> +                                             i + 1, LP_FUNC_ATTR_BYVAL);
>  			lp_add_attr_dereferenceable(P, UINT64_MAX);
>  		} else
> -			lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
> +			lp_add_function_attr(ctx->gallivm.context, ctx->main_fn,
> +                                             i + 1, LP_FUNC_ATTR_INREG);
>  	}
>
>  	LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
>  					   "no-signed-zeros-fp-math",
>  					   "true");
>
>  	if (ctx->screen->b.debug_flags & DBG_UNSAFE_MATH) {
>  		/* These were copied from some LLVM test. */
>  		LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
>  						   "less-precise-fpmad",
> @@ -5740,21 +5748,22 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
>  	offset = LLVMBuildMul(builder, address[1],
>  			      LLVMConstInt(ctx->i32, 4, 0), "");
>  	row = buffer_load_const(ctx, desc, offset);
>  	row = LLVMBuildBitCast(builder, row, ctx->i32, "");
>  	bit = LLVMBuildLShr(builder, row, address[0], "");
>  	bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
>
>  	/* The intrinsic kills the thread if arg < 0. */
>  	bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0),
>  			      LLVMConstReal(ctx->f32, -1), "");
> -	lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, 0);
> +	lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1,
> +			   LP_FUNC_ATTR_LEGACY);
>  }
>
>  void si_shader_binary_read_config(struct radeon_shader_binary *binary,
>  				  struct si_shader_config *conf,
>  				  unsigned symbol_offset)
>  {
>  	unsigned i;
>  	const unsigned char *config =
>  		radeon_shader_binary_config_start(binary, symbol_offset);
>  	bool really_needs_scratch = false;
> @@ -6308,23 +6317,24 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
>  				}
>
>  				args[2] = lp_build_const_int32(
>  					gallivm,
>  					offset * gs_selector->gs_max_out_vertices * 16 * 4);
>  				offset++;
>
>  				outputs[i].values[chan] =
>  					LLVMBuildBitCast(gallivm->builder,
>  						 lp_build_intrinsic(gallivm->builder,
> -								 "llvm.SI.buffer.load.dword.i32.i32",
> -								 ctx.i32, args, 9,
> -								 LP_FUNC_ATTR_READONLY),
> +								    "llvm.SI.buffer.load.dword.i32.i32",
> +								    ctx.i32, args, 9,
> +								    LP_FUNC_ATTR_READONLY |
> +								    LP_FUNC_ATTR_LEGACY),
>  						 ctx.f32, "");
>  			}
>  		}
>
>  		/* Streamout and exports. */
>  		if (gs_selector->so.num_outputs) {
>  			si_llvm_emit_streamout(&ctx, outputs,
>  					       gsinfo->num_outputs,
>  					       stream);
>  		}
> @@ -7047,21 +7057,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
>  	LLVMValueRef out[48];
>  	LLVMTypeRef function_type;
>  	unsigned num_params;
>  	unsigned num_out;
>  	MAYBE_UNUSED unsigned num_out_sgpr; /* used in debug checks */
>  	unsigned num_sgprs, num_vgprs;
>  	unsigned last_sgpr_param;
>  	unsigned gprs;
>
>  	for (unsigned i = 0; i < num_parts; ++i) {
> -		lp_add_function_attr(parts[i], -1, LP_FUNC_ATTR_ALWAYSINLINE);
> +		lp_add_function_attr(gallivm->context, parts[i], -1,
> +				     LP_FUNC_ATTR_ALWAYSINLINE);
>  		LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
>  	}
>
>  	/* The parameters of the wrapper function correspond to those of the
>  	 * first part in terms of SGPRs and VGPRs, but we use the types of the
>  	 * main part to get the right types. This is relevant for the
>  	 * dereferenceable attribute on descriptor table pointers.
>  	 */
>  	num_sgprs = 0;
>  	num_vgprs = 0;
> @@ -7164,21 +7175,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
>  			param_size = llvm_get_type_size(param_type) / 4;
>  			is_sgpr = ac_is_sgpr_param(param);
>
>  			if (is_sgpr) {
>  #if HAVE_LLVM < 0x0400
>  				LLVMRemoveAttribute(param, LLVMByValAttribute);
>  #else
>  				unsigned kind_id = LLVMGetEnumAttributeKindForName("byval", 5);
>  				LLVMRemoveEnumAttributeAtIndex(parts[part], param_idx + 1, kind_id);
>  #endif
> -				lp_add_function_attr(parts[part], param_idx + 1, LP_FUNC_ATTR_INREG);
> +				lp_add_function_attr(gallivm->context, parts[part],
> +						     param_idx + 1, LP_FUNC_ATTR_INREG);
>  			}
>
>  			assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out));
>  			assert(is_sgpr || out_idx >= num_out_sgpr);
>
>  			if (param_size == 1)
>  				arg = out[out_idx];
>  			else
>  				arg = lp_build_gather_values(gallivm, &out[out_idx], param_size);
>
> @@ -7679,21 +7691,21 @@ static void si_build_vs_epilog_function(struct si_shader_context *ctx,
>  					       key->vs_epilog.prim_id_param_offset);
>  		args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
>  		args[5] = LLVMGetParam(ctx->main_fn,
>  				       VS_EPILOG_PRIMID_LOC); /* X */
>  		args[6] = base->undef; /* Y */
>  		args[7] = base->undef; /* Z */
>  		args[8] = base->undef; /* W */
>
>  		lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
>  				   LLVMVoidTypeInContext(base->gallivm->context),
> -				   args, 9, 0);
> +				   args, 9, LP_FUNC_ATTR_LEGACY);
>  	}
>
>  	LLVMBuildRetVoid(gallivm->builder);
>  }
>
>  /**
>   * Create & compile a vertex shader epilog. This a helper used by VS and TES.
>   */
>  static bool si_get_vs_epilog(struct si_screen *sscreen,
>  			     LLVMTargetMachineRef tm,
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> index 10268e9..ee59fed 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> @@ -51,27 +51,23 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
>  	emit_data->arg_count = 1;
>  	emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
>  					lp_build_const_float(gallivm, -1.0f),
>  					bld_base->base.zero, "");
>  }
>
>  static void kil_emit(const struct lp_build_tgsi_action *action,
>  		     struct lp_build_tgsi_context *bld_base,
>  		     struct lp_build_emit_data *emit_data)
>  {
> -	unsigned i;
> -	for (i = 0; i < emit_data->arg_count; i++) {
> -		emit_data->output[i] = lp_build_intrinsic_unary(
> -			bld_base->base.gallivm->builder,
> -			action->intr_name,
> -			emit_data->dst_type, emit_data->args[i]);
> -	}
> +	lp_build_intrinsic(bld_base->base.gallivm->builder,
> +			   action->intr_name, emit_data->dst_type,
> +			   &emit_data->args[0], 1, LP_FUNC_ATTR_LEGACY);
>  }
>
>  static void emit_icmp(const struct lp_build_tgsi_action *action,
>  		      struct lp_build_tgsi_context *bld_base,
>  		      struct lp_build_emit_data *emit_data)
>  {
>  	unsigned pred;
>  	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
>  	LLVMContextRef context = bld_base->base.gallivm->context;
>
> @@ -500,21 +496,23 @@ static void emit_bfe(const struct lp_build_tgsi_action *action,
>  		     struct lp_build_tgsi_context *bld_base,
>  		     struct lp_build_emit_data *emit_data)
>  {
>  	struct gallivm_state *gallivm = bld_base->base.gallivm;
>  	LLVMBuilderRef builder = gallivm->builder;
>  	LLVMValueRef bfe_sm5;
>  	LLVMValueRef cond;
>
>  	bfe_sm5 = lp_build_intrinsic(builder, action->intr_name,
>  				     emit_data->dst_type, emit_data->args,
> -				     emit_data->arg_count, LP_FUNC_ATTR_READNONE);
> +				     emit_data->arg_count,
> +				     LP_FUNC_ATTR_READNONE |
> +				     LP_FUNC_ATTR_LEGACY);
>
>  	/* Correct for GLSL semantics. */
>  	cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],
>  			     lp_build_const_int32(gallivm, 32), "");
>  	emit_data->output[emit_data->chan] =
>  		LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, "");
>  }
>
>  /* this is ffs in C */
>  static void emit_lsb(const struct lp_build_tgsi_action *action,
>



More information about the mesa-dev mailing list