[Mesa-dev] [PATCH 2/2] gallivm, ac: add function attributes at call sites instead of declarations

Marek Olšák maraeo at gmail.com
Thu Feb 23 00:01:51 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

They can vary at call sites if the intrinsic is NOT a legacy SI intrinsic.
We need this to force readnone or inaccessiblememonly on some amdgcn
intrinsics.

This is only used with LLVM 4.0 and later. Intrinsics only used with
LLVM <= 3.9 don't need the LEGACY flag.

gallivm and ac code is in the same patch, because splitting would be
more complicated with all the LEGACY uses all over the place.
---
 src/amd/common/ac_llvm_build.c                    | 23 ++++----
 src/amd/common/ac_llvm_util.c                     | 31 +++++++----
 src/amd/common/ac_llvm_util.h                     | 17 +++---
 src/amd/common/ac_nir_to_llvm.c                   | 63 ++++++++++++++--------
 src/gallium/auxiliary/draw/draw_llvm.c            |  6 ++-
 src/gallium/auxiliary/gallivm/lp_bld_intr.c       | 51 ++++++++++++------
 src/gallium/auxiliary/gallivm/lp_bld_intr.h       | 11 ++--
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |  3 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c        |  3 +-
 src/gallium/drivers/llvmpipe/lp_state_setup.c     |  7 +--
 src/gallium/drivers/radeonsi/si_shader.c          | 64 ++++++++++++++---------
 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 14 +++--
 12 files changed, 184 insertions(+), 109 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 2f25b14..5c8b7f7 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -75,47 +75,50 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
 							"amdgpu.uniform", 14);
 
 	ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
 }
 
 LLVMValueRef
 ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
 		       LLVMTypeRef return_type, LLVMValueRef *params,
 		       unsigned param_count, unsigned attrib_mask)
 {
-	LLVMValueRef function;
+	LLVMValueRef function, call;
+	bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
+				  !(attrib_mask & AC_FUNC_ATTR_LEGACY);
 
 	function = LLVMGetNamedFunction(ctx->module, name);
 	if (!function) {
 		LLVMTypeRef param_types[32], function_type;
 		unsigned i;
 
 		assert(param_count <= 32);
 
 		for (i = 0; i < param_count; ++i) {
 			assert(params[i]);
 			param_types[i] = LLVMTypeOf(params[i]);
 		}
 		function_type =
 		    LLVMFunctionType(return_type, param_types, param_count, 0);
 		function = LLVMAddFunction(ctx->module, name, function_type);
 
 		LLVMSetFunctionCallConv(function, LLVMCCallConv);
 		LLVMSetLinkage(function, LLVMExternalLinkage);
 
-		attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
-		while (attrib_mask) {
-			enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
-			ac_add_function_attr(function, -1, attr);
-		}
+		if (!set_callsite_attrs)
+			ac_add_func_attributes(ctx->context, function, attrib_mask);
 	}
-	return LLVMBuildCall(ctx->builder, function, params, param_count, "");
+
+	call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
+	if (set_callsite_attrs)
+		ac_add_func_attributes(ctx->context, call, attrib_mask);
+	return call;
 }
 
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
 				LLVMValueRef *values,
 				unsigned value_count,
 				unsigned value_stride,
 				bool load)
 {
 	LLVMBuilderRef builder = ctx->builder;
@@ -524,21 +527,22 @@ ac_build_tbuffer_store(struct ac_llvm_context *ctx,
 	/* The instruction offset field has 12 bits */
 	assert(offen || inst_offset < (1 << 12));
 
 	/* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
 	unsigned func = CLAMP(num_channels, 1, 3) - 1;
 	const char *types[] = {"i32", "v2i32", "v4i32"};
 	char name[256];
 	snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
 
 	ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
-			       args, ARRAY_SIZE(args), 0);
+			       args, ARRAY_SIZE(args),
+			       AC_FUNC_ATTR_LEGACY);
 }
 
 void
 ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
 			      LLVMValueRef rsrc,
 			      LLVMValueRef vdata,
 			      unsigned num_channels,
 			      LLVMValueRef vaddr,
 			      LLVMValueRef soffset,
 			      unsigned inst_offset)
@@ -836,12 +840,13 @@ LLVMValueRef ac_emit_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
 
 	const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." :
 						 "llvm.AMDIL.clamp.";
 	LLVMValueRef args[3] = {
 		value,
 		LLVMConstReal(ctx->f32, 0),
 		LLVMConstReal(ctx->f32, 1),
 	};
 
 	return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3,
-				      AC_FUNC_ATTR_READNONE);
+				      AC_FUNC_ATTR_READNONE |
+				      AC_FUNC_ATTR_LEGACY);
 }
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index be127c5..fb525dd 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -17,21 +17,21 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
  *
  */
 /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
 #include "ac_llvm_util.h"
-
+#include "util/bitscan.h"
 #include <llvm-c/Core.h>
 
 #include "c11/threads.h"
 
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
 
 static void ac_init_llvm_target()
 {
@@ -173,40 +173,53 @@ static const char *attr_to_str(enum ac_func_attr attr)
    case AC_FUNC_ATTR_READNONE: return "readnone";
    case AC_FUNC_ATTR_READONLY: return "readonly";
    default:
 	   fprintf(stderr, "Unhandled function attribute: %x\n", attr);
 	   return 0;
    }
 }
 
 #endif
 
-void
-ac_add_function_attr(LLVMValueRef function,
-                     int attr_idx,
-                     enum ac_func_attr attr)
+static void
+ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
+                     int attr_idx, enum ac_func_attr attr)
 {
-
 #if HAVE_LLVM < 0x0400
    LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
    if (attr_idx == -1) {
       LLVMAddFunctionAttr(function, llvm_attr);
    } else {
       LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
    }
 #else
-   LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
    const char *attr_name = attr_to_str(attr);
    unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
                                                       strlen(attr_name));
-   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
-   LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
+
+   if (LLVMIsAFunction(function))
+      LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+   else
+      LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
 #endif
 }
 
+void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
+			    unsigned attrib_mask)
+{
+	attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
+	attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
+
+	while (attrib_mask) {
+		enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
+		ac_add_function_attr(ctx, function, -1, attr);
+	}
+}
+
 void
 ac_dump_module(LLVMModuleRef module)
 {
 	char *str = LLVMPrintModuleToString(module);
 	fprintf(stderr, "%s", str);
 	LLVMDisposeMessage(str);
 }
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 93d3d27..4fe4ab4 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -34,28 +34,29 @@ extern "C" {
 #endif
 
 enum ac_func_attr {
 	AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
 	AC_FUNC_ATTR_BYVAL        = (1 << 1),
 	AC_FUNC_ATTR_INREG        = (1 << 2),
 	AC_FUNC_ATTR_NOALIAS      = (1 << 3),
 	AC_FUNC_ATTR_NOUNWIND     = (1 << 4),
 	AC_FUNC_ATTR_READNONE     = (1 << 5),
 	AC_FUNC_ATTR_READONLY     = (1 << 6),
+
+	/* Legacy intrinsic that needs attributes on function declarations
+	 * and they must match the internal LLVM definition exactly, otherwise
+	 * intrinsic selection fails.
+	 */
+	AC_FUNC_ATTR_LEGACY       = (1u << 31),
 };
 
 LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill);
 
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
 bool ac_is_sgpr_param(LLVMValueRef param);
-
-void
-ac_add_function_attr(LLVMValueRef function,
-                     int attr_idx,
-                     enum ac_func_attr attr);
-
-void
-ac_dump_module(LLVMModuleRef module);
+void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
+			    unsigned attrib_mask);
+void ac_dump_module(LLVMModuleRef module);
 
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index b3dc63c..4ec19d5 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1039,26 +1039,27 @@ static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
 	src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
 	src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
 
 	dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
 	dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
 	result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
 	return result;
 }
 
 static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
-					  const char *intrin,
+					  const char *intrin, unsigned attr_mask,
 					  LLVMValueRef srcs[3])
 {
 	LLVMValueRef result;
 	LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
-	result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE);
+	result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3,
+					AC_FUNC_ATTR_READNONE | attr_mask);
 
 	result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
 	return result;
 }
 
 static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
 					 LLVMValueRef src0, LLVMValueRef src1,
 					 LLVMValueRef src2, LLVMValueRef src3)
 {
 	LLVMValueRef bfi_args[3], result;
@@ -1418,24 +1419,26 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
 		break;
 	case nir_op_fmin:
 		result = emit_intrin_2f_param(ctx, "llvm.minnum",
 		                              to_float_type(ctx, def_type), src[0], src[1]);
 		break;
 	case nir_op_ffma:
 		result = emit_intrin_3f_param(ctx, "llvm.fma",
 		                              to_float_type(ctx, def_type), src[0], src[1], src[2]);
 		break;
 	case nir_op_ibitfield_extract:
-		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
+		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32",
+					       AC_FUNC_ATTR_LEGACY, src);
 		break;
 	case nir_op_ubitfield_extract:
-		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
+		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32",
+					       AC_FUNC_ATTR_LEGACY, src);
 		break;
 	case nir_op_bitfield_insert:
 		result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
 		break;
 	case nir_op_bitfield_reverse:
 		result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
 		break;
 	case nir_op_bit_count:
 		result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
 		break;
@@ -1635,22 +1638,23 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
 		txq_args[txq_arg_count++] = tinfo->args[1];
 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */
 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */
 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
 		size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
-					   txq_args, txq_arg_count,
-					   AC_FUNC_ATTR_READNONE);
+					      txq_args, txq_arg_count,
+					      AC_FUNC_ATTR_READNONE |
+					      AC_FUNC_ATTR_LEGACY);
 
 		for (c = 0; c < 2; c++) {
 			half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
 								LLVMConstInt(ctx->i32, c, false), "");
 			half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
 			half_texel[c] = ac_emit_fdiv(&ctx->ac, ctx->f32one, half_texel[c]);
 			half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
 						      LLVMConstReal(ctx->f32, -0.5), "");
 		}
 	}
@@ -1660,21 +1664,22 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
 		LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
 		tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
 		tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
 		tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
 		tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
 		coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
 	}
 
 	tinfo->args[0] = coord;
 	return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
-				   AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
+				      AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
+				      AC_FUNC_ATTR_LEGACY);
 
 }
 
 static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
 					nir_tex_instr *instr,
 					struct ac_tex_info *tinfo)
 {
 	const char *name = "llvm.SI.image.sample";
 	const char *infix = "";
 	char intr_name[127];
@@ -1728,21 +1733,22 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
 		has_offset ? ".o" : "", type);
 
 	if (instr->op == nir_texop_tg4) {
 		enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
 		if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
 			return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name,
 							  (int)has_offset + (int)is_shadow);
 		}
 	}
 	return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
-				   AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
+				      AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
+				      AC_FUNC_ATTR_LEGACY);
 
 }
 
 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
                                                 nir_intrinsic_instr *instr)
 {
 	LLVMValueRef index = get_src(ctx, instr->src[0]);
 	unsigned desc_set = nir_intrinsic_desc_set(instr);
 	unsigned binding = nir_intrinsic_binding(instr);
 	LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
@@ -2006,21 +2012,23 @@ static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
 	if (instr->dest.ssa.bit_size == 64)
 		num_components *= 2;
 
 	for (unsigned i = 0; i < num_components; ++i) {
 		LLVMValueRef params[] = {
 			rsrc,
 			LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
 				     offset, "")
 		};
 		results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32,
-						 params, 2, AC_FUNC_ATTR_READNONE);
+						    params, 2,
+						    AC_FUNC_ATTR_READNONE |
+						    AC_FUNC_ATTR_LEGACY);
 	}
 
 
 	ret = ac_build_gather_values(&ctx->ac, results, instr->num_components);
 	return LLVMBuildBitCast(ctx->builder, ret,
 	                        get_def_type(ctx, &instr->dest.ssa), "");
 }
 
 static void
 radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
@@ -2103,21 +2111,23 @@ load_gs_input(struct nir_to_llvm_context *ctx,
 		args[1] = vtx_offset;
 		args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false);
 		args[3] = ctx->i32zero;
 		args[4] = ctx->i32one; /* OFFEN */
 		args[5] = ctx->i32zero; /* IDXEN */
 		args[6] = ctx->i32one; /* GLC */
 		args[7] = ctx->i32zero; /* SLC */
 		args[8] = ctx->i32zero; /* TFE */
 
 		value[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
-					    ctx->i32, args, 9, AC_FUNC_ATTR_READONLY);
+						  ctx->i32, args, 9,
+						  AC_FUNC_ATTR_READONLY |
+						  AC_FUNC_ATTR_LEGACY);
 	}
 	result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
 
 	return result;
 }
 
 static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
 				   nir_intrinsic_instr *instr)
 {
 	LLVMValueRef values[8];
@@ -2685,21 +2695,23 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
 	params[2] = LLVMConstInt(ctx->i32, 15, false);
 	params[3] = ctx->i32zero;
 	params[4] = ctx->i32zero;
 	params[5] = da ? ctx->i32one : ctx->i32zero;
 	params[6] = ctx->i32zero;
 	params[7] = ctx->i32zero;
 	params[8] = ctx->i32zero;
 	params[9] = ctx->i32zero;
 
 	res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
-				  params, 10, AC_FUNC_ATTR_READNONE);
+				     params, 10,
+				     AC_FUNC_ATTR_READNONE |
+				     AC_FUNC_ATTR_LEGACY);
 
 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
 	    glsl_sampler_type_is_array(type)) {
 		LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
 		LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
 		LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
 		z = LLVMBuildSDiv(ctx->builder, z, six, "");
 		res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
 	}
 	return res;
@@ -2729,21 +2741,21 @@ static void emit_discard_if(struct nir_to_llvm_context *ctx,
 
 	cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
 			     get_src(ctx, instr->src[0]),
 			     ctx->i32zero, "");
 
 	cond = LLVMBuildSelect(ctx->builder, cond,
 			       LLVMConstReal(ctx->f32, -1.0f),
 			       ctx->f32zero, "");
 	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
 			       ctx->voidt,
-			       &cond, 1, 0);
+			       &cond, 1, AC_FUNC_ATTR_LEGACY);
 }
 
 static LLVMValueRef
 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
 {
 	LLVMValueRef result;
 	LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
 	result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
 			      LLVMConstInt(ctx->i32, 0xfc0, false), "");
 
@@ -2984,21 +2996,21 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
 	 * have any effect, and GS threads have no externally observable
 	 * effects other than emitting vertices.
 	 */
 	can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
 				 LLVMConstInt(ctx->i32, ctx->gs_max_out_vertices, false), "");
 
 	kill = LLVMBuildSelect(ctx->builder, can_emit,
 			       LLVMConstReal(ctx->f32, 1.0f),
 			       LLVMConstReal(ctx->f32, -1.0f), "");
 	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
-			    ctx->voidt, &kill, 1, 0);
+			    ctx->voidt, &kill, 1, AC_FUNC_ATTR_LEGACY);
 
 	/* loop num outputs */
 	idx = 0;
 	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
 		LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
 		if (!(ctx->output_mask & (1ull << i)))
 			continue;
 
 		for (unsigned j = 0; j < 4; j++) {
 			LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
@@ -3144,21 +3156,21 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx,
 	case nir_intrinsic_image_atomic_comp_swap:
 		result = visit_image_atomic(ctx, instr);
 		break;
 	case nir_intrinsic_image_size:
 		result = visit_image_size(ctx, instr);
 		break;
 	case nir_intrinsic_discard:
 		ctx->shader_info->fs.can_discard = true;
 		ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
 				       ctx->voidt,
-				       NULL, 0, 0);
+				       NULL, 0, AC_FUNC_ATTR_LEGACY);
 		break;
 	case nir_intrinsic_discard_if:
 		emit_discard_if(ctx, instr);
 		break;
 	case nir_intrinsic_memory_barrier:
 		emit_waitcnt(ctx);
 		break;
 	case nir_intrinsic_barrier:
 		emit_barrier(ctx);
 		break;
@@ -3924,21 +3936,22 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,
 
 	for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
 		t_offset = LLVMConstInt(ctx->i32, index + i, false);
 
 		t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
 		args[0] = t_list;
 		args[1] = LLVMConstInt(ctx->i32, 0, false);
 		args[2] = buffer_index;
 		input = ac_emit_llvm_intrinsic(&ctx->ac,
 			"llvm.SI.vs.load.input", ctx->v4f32, args, 3,
-			AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
+			AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
+			AC_FUNC_ATTR_LEGACY);
 
 		for (unsigned chan = 0; chan < 4; chan++) {
 			LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
 			ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
 				to_integer(ctx, LLVMBuildExtractElement(ctx->builder,
 							input, llvm_chan, ""));
 		}
 	}
 }
 
@@ -4257,22 +4270,23 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 			args[4] = ctx->i32one;
 
 			for (unsigned chan = 0; chan < 2; chan++) {
 				LLVMValueRef pack_args[2] = {
 					values[2 * chan],
 					values[2 * chan + 1]
 				};
 				LLVMValueRef packed;
 
 				packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16",
-							     ctx->i32, pack_args, 2,
-							     AC_FUNC_ATTR_READNONE);
+								ctx->i32, pack_args, 2,
+								AC_FUNC_ATTR_READNONE |
+								AC_FUNC_ATTR_LEGACY);
 				args[chan + 5] = packed;
 			}
 			break;
 
 		case V_028714_SPI_SHADER_UNORM16_ABGR:
 			for (unsigned chan = 0; chan < 4; chan++) {
 				val[chan] = emit_float_saturate(ctx, values[chan], 0, 1);
 				val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
 							LLVMConstReal(ctx->f32, 65535), "");
 				val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
@@ -4443,21 +4457,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
 		si_llvm_init_export_args(ctx, values, target, args);
 
 		if (target >= V_008DFC_SQ_EXP_POS &&
 		    target <= (V_008DFC_SQ_EXP_POS + 3)) {
 			memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
 			       args, sizeof(args));
 		} else {
 			ac_emit_llvm_intrinsic(&ctx->ac,
 					       "llvm.SI.export",
 					       ctx->voidt,
-					       args, 9, 0);
+					       args, 9,
+					       AC_FUNC_ATTR_LEGACY);
 		}
 	}
 
 	/* We need to add the position output manually if it's missing. */
 	if (!pos_args[0][0]) {
 		pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
 		pos_args[0][1] = ctx->i32zero; /* EXEC mask */
 		pos_args[0][2] = ctx->i32zero; /* last export? */
 		pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false);
 		pos_args[0][4] = ctx->i32zero; /* COMPR flag */
@@ -4498,21 +4513,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
 		if (!pos_args[i][0])
 			continue;
 
 		/* Specify the target we are exporting */
 		pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false);
 		if (pos_idx == num_pos_exports)
 			pos_args[i][2] = ctx->i32one;
 		ac_emit_llvm_intrinsic(&ctx->ac,
 				       "llvm.SI.export",
 				       ctx->voidt,
-				       pos_args[i], 9, 0);
+				       pos_args[i], 9,
+				       AC_FUNC_ATTR_LEGACY);
 	}
 
 	ctx->shader_info->vs.pos_exports = num_pos_exports;
 	ctx->shader_info->vs.param_exports = param_count;
 }
 
 static void
 handle_es_outputs_post(struct nir_to_llvm_context *ctx)
 {
 	int j;
@@ -4554,21 +4570,22 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
 	si_llvm_init_export_args(ctx, color, param,
 				 args);
 
 	if (is_last) {
 		args[1] = ctx->i32one; /* whether the EXEC mask is valid */
 		args[2] = ctx->i32one; /* DONE bit */
 	} else if (args[0] == ctx->i32zero)
 		return; /* unnecessary NULL export */
 
 	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
-			    ctx->voidt, args, 9, 0);
+			       ctx->voidt, args, 9,
+			       AC_FUNC_ATTR_LEGACY);
 }
 
 static void
 si_export_mrt_z(struct nir_to_llvm_context *ctx,
 		LLVMValueRef depth, LLVMValueRef stencil,
 		LLVMValueRef samplemask)
 {
 	LLVMValueRef args[9];
 	unsigned mask = 0;
 	args[1] = ctx->i32one; /* whether the EXEC mask is valid */
@@ -4598,21 +4615,22 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
 	}
 
 	/* SI (except OLAND) has a bug that it only looks
 	 * at the X writemask component. */
 	if (ctx->options->chip_class == SI &&
 	    ctx->options->family != CHIP_OLAND)
 		mask |= 0x01;
 
 	args[0] = LLVMConstInt(ctx->i32, mask, false);
 	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
-			    ctx->voidt, args, 9, 0);
+			       ctx->voidt, args, 9,
+			       AC_FUNC_ATTR_LEGACY);
 }
 
 static void
 handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
 {
 	unsigned index = 0;
 	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
 
 	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
 		LLVMValueRef values[4];
@@ -5021,21 +5039,22 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
 
 		for (unsigned j = 0; j < 4; j++) {
 			LLVMValueRef value;
 			args[2] = LLVMConstInt(ctx->i32,
 					       (idx * 4 + j) *
 					       ctx->gs_max_out_vertices * 16 * 4, false);
 
 			value = ac_emit_llvm_intrinsic(&ctx->ac,
 						       "llvm.SI.buffer.load.dword.i32.i32",
 						       ctx->i32, args, 9,
-						       AC_FUNC_ATTR_READONLY);
+						       AC_FUNC_ATTR_READONLY |
+						       AC_FUNC_ATTR_LEGACY);
 
 			LLVMBuildStore(ctx->builder,
 				       to_float(ctx, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]);
 		}
 		idx++;
 	}
 	handle_vs_outputs_post(ctx);
 }
 
 void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 8952dc8..586a9be 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1588,21 +1588,22 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
 
    func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
                                 arg_types, num_arg_types, 0);
 
    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
    variant->function = variant_func;
 
    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
    for (i = 0; i < num_arg_types; ++i)
       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
+         lp_add_function_attr(context, variant_func, i + 1,
+                              LP_FUNC_ATTR_NOALIAS);
 
    context_ptr               = LLVMGetParam(variant_func, 0);
    io_ptr                    = LLVMGetParam(variant_func, 1);
    vbuffers_ptr              = LLVMGetParam(variant_func, 2);
    count                     = LLVMGetParam(variant_func, 3);
    /*
     * XXX: the maxelt part is unused. Not really useful, since we cannot
     * get index buffer overflows due to vsplit (which provides its own
     * elts buffer, with a different size than what's passed in here).
     */
@@ -2262,21 +2263,22 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
 
    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
 
    variant->function = variant_func;
 
    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
 
    for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
+         lp_add_function_attr(context, variant_func, i + 1,
+                              LP_FUNC_ATTR_NOALIAS);
 
    context_ptr               = LLVMGetParam(variant_func, 0);
    input_array               = LLVMGetParam(variant_func, 1);
    io_ptr                    = LLVMGetParam(variant_func, 2);
    num_prims                 = LLVMGetParam(variant_func, 3);
    system_values.instance_id = LLVMGetParam(variant_func, 4);
    prim_id_ptr               = LLVMGetParam(variant_func, 5);
    system_values.invocation_id = LLVMGetParam(variant_func, 6);
 
    lp_build_name(context_ptr, "context");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index 049671a..1b50e68 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -152,83 +152,100 @@ static const char *attr_to_str(enum lp_func_attr attr)
    case LP_FUNC_ATTR_READONLY: return "readonly";
    default:
       _debug_printf("Unhandled function attribute: %x\n", attr);
       return 0;
    }
 }
 
 #endif
 
 void
-lp_add_function_attr(LLVMValueRef function,
-                     int attr_idx,
-                     enum lp_func_attr attr)
+lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
+                     int attr_idx, enum lp_func_attr attr)
 {
 
 #if HAVE_LLVM < 0x0400
    LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr);
    if (attr_idx == -1) {
       LLVMAddFunctionAttr(function, llvm_attr);
    } else {
       LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
    }
 #else
-   LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
    const char *attr_name = attr_to_str(attr);
    unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
                                                       strlen(attr_name));
-   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
-   LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
+
+   if (LLVMIsAFunction(function))
+      LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+   else
+      LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
 #endif
 }
 
+static void
+lp_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
+                       unsigned attrib_mask)
+{
+   /* NoUnwind indicates that the intrinsic never raises a C++ exception.
+    * Set it for all intrinsics.
+    */
+   attrib_mask |= LP_FUNC_ATTR_NOUNWIND;
+   attrib_mask &= ~LP_FUNC_ATTR_LEGACY;
+
+   while (attrib_mask) {
+      enum lp_func_attr attr = 1u << u_bit_scan(&attrib_mask);
+      lp_add_function_attr(ctx, function, -1, attr);
+   }
+}
+
 LLVMValueRef
 lp_build_intrinsic(LLVMBuilderRef builder,
                    const char *name,
                    LLVMTypeRef ret_type,
                    LLVMValueRef *args,
                    unsigned num_args,
                    unsigned attr_mask)
 {
    LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
-   LLVMValueRef function;
+   LLVMContextRef ctx = LLVMGetModuleContext(module);
+   LLVMValueRef function, call;
+   bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
+                             !(attr_mask & LP_FUNC_ATTR_LEGACY);
 
    function = LLVMGetNamedFunction(module, name);
    if(!function) {
       LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
       unsigned i;
 
       assert(num_args <= LP_MAX_FUNC_ARGS);
 
       for(i = 0; i < num_args; ++i) {
          assert(args[i]);
          arg_types[i] = LLVMTypeOf(args[i]);
       }
 
       function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
 
-      /* NoUnwind indicates that the intrinsic never raises a C++ exception.
-       * Set it for all intrinsics.
-       */
-      attr_mask |= LP_FUNC_ATTR_NOUNWIND;
-
-      while (attr_mask) {
-         enum lp_func_attr attr = 1 << u_bit_scan(&attr_mask);
-         lp_add_function_attr(function, -1, attr);
-      }
+      if (!set_callsite_attrs)
+         lp_add_func_attributes(ctx, function, attr_mask);
 
       if (gallivm_debug & GALLIVM_DEBUG_IR) {
          lp_debug_dump_value(function);
       }
    }
 
-   return LLVMBuildCall(builder, function, args, num_args, "");
+   call = LLVMBuildCall(builder, function, args, num_args, "");
+   if (set_callsite_attrs)
+      lp_add_func_attributes(ctx, call, attr_mask);
+   return call;
 }
 
 
 LLVMValueRef
 lp_build_intrinsic_unary(LLVMBuilderRef builder,
                          const char *name,
                          LLVMTypeRef ret_type,
                          LLVMValueRef a)
 {
    return lp_build_intrinsic(builder, name, ret_type, &a, 1, 0);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index 039e9ab..d279911 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -47,39 +47,44 @@
 #define LP_MAX_FUNC_ARGS 32
 
 enum lp_func_attr {
    LP_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
    LP_FUNC_ATTR_BYVAL        = (1 << 1),
    LP_FUNC_ATTR_INREG        = (1 << 2),
    LP_FUNC_ATTR_NOALIAS      = (1 << 3),
    LP_FUNC_ATTR_NOUNWIND     = (1 << 4),
    LP_FUNC_ATTR_READNONE     = (1 << 5),
    LP_FUNC_ATTR_READONLY     = (1 << 6),
+
+   /* Legacy intrinsic that needs attributes on function declarations
+    * and they must match the internal LLVM definition exactly, otherwise
+    * intrinsic selection fails.
+    */
+   LP_FUNC_ATTR_LEGACY       = (1u << 31),
 };
 
 void
 lp_format_intrinsic(char *name,
                     size_t size,
                     const char *name_root,
                     LLVMTypeRef type);
 
 LLVMValueRef
 lp_declare_intrinsic(LLVMModuleRef module,
                      const char *name,
                      LLVMTypeRef ret_type,
                      LLVMTypeRef *arg_types,
                      unsigned num_args);
 
 void
-lp_add_function_attr(LLVMValueRef function,
-                     int attr_idx,
-                     enum lp_func_attr attr);
+lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
+                     int attr_idx, enum lp_func_attr attr);
 
 LLVMValueRef
 lp_build_intrinsic(LLVMBuilderRef builder,
                    const char *name,
                    LLVMTypeRef ret_type,
                    LLVMValueRef *args,
                    unsigned num_args,
                    unsigned attr_mask);
 
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 161a03f..a1e2601 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -3311,21 +3311,22 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
 
       val_type[0] = val_type[1] = val_type[2] = val_type[3] =
          lp_build_vec_type(gallivm, params->type);
       ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
       function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
       function = LLVMAddFunction(module, func_name, function_type);
 
       for (i = 0; i < num_param; ++i) {
          if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
 
-            lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
+            lp_add_function_attr(gallivm->context, function, i + 1,
+                                 LP_FUNC_ATTR_NOALIAS);
          }
       }
 
       LLVMSetFunctionCallConv(function, LLVMFastCallConv);
       LLVMSetLinkage(function, LLVMInternalLinkage);
 
       lp_build_sample_gen_func(gallivm,
                                static_texture_state,
                                static_sampler_state,
                                dynamic_state,
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index af47b52..70b0a67 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -2488,21 +2488,22 @@ generate_fragment(struct llvmpipe_context *lp,
    function = LLVMAddFunction(gallivm->module, func_name, func_type);
    LLVMSetFunctionCallConv(function, LLVMCCallConv);
 
    variant->function[partial_mask] = function;
 
    /* XXX: need to propagate noalias down into color param now we are
     * passing a pointer-to-pointer?
     */
    for(i = 0; i < ARRAY_SIZE(arg_types); ++i)
       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
+         lp_add_function_attr(gallivm->context, function, i + 1,
+                              LP_FUNC_ATTR_NOALIAS);
 
    context_ptr  = LLVMGetParam(function, 0);
    x            = LLVMGetParam(function, 1);
    y            = LLVMGetParam(function, 2);
    facing       = LLVMGetParam(function, 3);
    a0_ptr       = LLVMGetParam(function, 4);
    dadx_ptr     = LLVMGetParam(function, 5);
    dady_ptr     = LLVMGetParam(function, 6);
    color_ptr_ptr = LLVMGetParam(function, 7);
    depth_ptr    = LLVMGetParam(function, 8);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
index 6b0df21..66bc42c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -609,29 +609,29 @@ emit_tri_coef( struct gallivm_state *gallivm,
       default:
          assert(0);
       }
    }
 }
 
 
 /* XXX: generic code:
  */
 static void
-set_noalias(LLVMBuilderRef builder,
+set_noalias(LLVMContextRef ctx,
             LLVMValueRef function,
             const LLVMTypeRef *arg_types,
             int nr_args)
 {
    int i;
    for(i = 0; i < nr_args; ++i)
       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
+         lp_add_function_attr(ctx, function, i + 1, LP_FUNC_ATTR_NOALIAS);
 }
 
 static void
 init_args(struct gallivm_state *gallivm,
           const struct lp_setup_variant_key *key,
           struct lp_setup_args *args)
 {
    LLVMBuilderRef b = gallivm->builder;
    LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
    LLVMValueRef onef = lp_build_const_float(gallivm, 1.0);
@@ -783,21 +783,22 @@ generate_setup_variant(struct lp_setup_variant_key *key,
    lp_build_name(args.dadx, "out_dadx");
    lp_build_name(args.dady, "out_dady");
 
    /*
     * Function body
     */
    block = LLVMAppendBasicBlockInContext(gallivm->context,
                                          variant->function, "entry");
    LLVMPositionBuilderAtEnd(builder, block);
 
-   set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types));
+   set_noalias(gallivm->context, variant->function, arg_types,
+               ARRAY_SIZE(arg_types));
    init_args(gallivm, &variant->key, &args);
    emit_tri_coef(gallivm, &variant->key, &args);
 
    LLVMBuildRetVoid(builder);
 
    gallivm_verify_function(gallivm, variant->function);
 
    gallivm_compile_module(gallivm);
 
    variant->jit_function = (lp_jit_setup_triangle)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index f9eaea2..ea3f4fd 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -394,21 +394,22 @@ static void declare_input_vs(
 	}
 
 	args[0] = t_list;
 	args[2] = vertex_index;
 
 	for (unsigned i = 0; i < num_fetches; i++) {
 		args[1] = LLVMConstInt(ctx->i32, fetch_stride * i, 0);
 
 		input[i] = lp_build_intrinsic(gallivm->builder,
 			"llvm.SI.vs.load.input", ctx->v4f32, args, 3,
-			LP_FUNC_ATTR_READNONE);
+			LP_FUNC_ATTR_READNONE |
+			LP_FUNC_ATTR_LEGACY);
 	}
 
 	/* Break up the vec4 into individual components */
 	for (chan = 0; chan < 4; chan++) {
 		LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
 		out[chan] = LLVMBuildExtractElement(gallivm->builder,
 						    input[0], llvm_chan, "");
 	}
 
 	switch (fix_fetch) {
@@ -1116,28 +1117,30 @@ static LLVMValueRef fetch_input_gs(
 	args[3] = uint->zero;
 	args[4] = uint->one;  /* OFFEN */
 	args[5] = uint->zero; /* IDXEN */
 	args[6] = uint->one;  /* GLC */
 	args[7] = uint->zero; /* SLC */
 	args[8] = uint->zero; /* TFE */
 
 	value = lp_build_intrinsic(gallivm->builder,
 				   "llvm.SI.buffer.load.dword.i32.i32",
 				   ctx->i32, args, 9,
-				   LP_FUNC_ATTR_READONLY);
+				   LP_FUNC_ATTR_READONLY |
+				   LP_FUNC_ATTR_LEGACY);
 	if (tgsi_type_is_64bit(type)) {
 		LLVMValueRef value2;
 		args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256);
 		value2 = lp_build_intrinsic(gallivm->builder,
 					    "llvm.SI.buffer.load.dword.i32.i32",
 					    ctx->i32, args, 9,
-					    LP_FUNC_ATTR_READONLY);
+					    LP_FUNC_ATTR_READONLY |
+					    LP_FUNC_ATTR_LEGACY);
 		return si_llvm_emit_fetch_64bit(bld_base, type,
 						value, value2);
 	}
 	return LLVMBuildBitCast(gallivm->builder,
 				value,
 				tgsi2llvmtype(bld_base, type), "");
 }
 
 static int lookup_interp_param_index(unsigned interpolate, unsigned location)
 {
@@ -1361,21 +1364,22 @@ static LLVMValueRef get_sample_id(struct si_shader_context *radeon_bld)
  * Load a dword from a constant buffer.
  */
 static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
 				      LLVMValueRef resource,
 				      LLVMValueRef offset)
 {
 	LLVMBuilderRef builder = ctx->gallivm.builder;
 	LLVMValueRef args[2] = {resource, offset};
 
 	return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2,
-			       LP_FUNC_ATTR_READNONE);
+				  LP_FUNC_ATTR_READNONE |
+				  LP_FUNC_ATTR_LEGACY);
 }
 
 static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, LLVMValueRef sample_id)
 {
 	struct si_shader_context *ctx =
 		si_shader_context(&radeon_bld->bld_base);
 	struct lp_build_context *uint_bld = &radeon_bld->bld_base.uint_bld;
 	struct gallivm_state *gallivm = &radeon_bld->gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
 	LLVMValueRef desc = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS);
@@ -1815,21 +1819,22 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
 		for (chan = 0; chan < 2; chan++) {
 			LLVMValueRef pack_args[2] = {
 				values[2 * chan],
 				values[2 * chan + 1]
 			};
 			LLVMValueRef packed;
 
 			packed = lp_build_intrinsic(base->gallivm->builder,
 						    "llvm.SI.packf16",
 						    ctx->i32, pack_args, 2,
-						    LP_FUNC_ATTR_READNONE);
+						    LP_FUNC_ATTR_READNONE |
+						    LP_FUNC_ATTR_LEGACY);
 			args[chan + 5] =
 				LLVMBuildBitCast(base->gallivm->builder,
 						 packed, ctx->f32, "");
 		}
 		break;
 
 	case V_028714_SPI_SHADER_UNORM16_ABGR:
 		for (chan = 0; chan < 4; chan++) {
 			val[chan] = ac_emit_clamp(&ctx->ac, values[chan]);
 			val[chan] = LLVMBuildFMul(builder, val[chan],
@@ -1947,24 +1952,24 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
 			lp_build_cmp(&bld_base->base,
 				     ctx->shader->key.part.ps.epilog.alpha_func,
 				     alpha, alpha_ref);
 		LLVMValueRef arg =
 			lp_build_select(&bld_base->base,
 					alpha_pass,
 					lp_build_const_float(gallivm, 1.0f),
 					lp_build_const_float(gallivm, -1.0f));
 
 		lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
-				   ctx->voidt, &arg, 1, 0);
+				   ctx->voidt, &arg, 1, LP_FUNC_ATTR_LEGACY);
 	} else {
 		lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kilp",
-				   ctx->voidt, NULL, 0, 0);
+				   ctx->voidt, NULL, 0, LP_FUNC_ATTR_LEGACY);
 	}
 }
 
 static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
 						  LLVMValueRef alpha,
 						  unsigned samplemask_param)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	LLVMValueRef coverage;
@@ -2295,21 +2300,21 @@ handle_semantic:
 
 		si_llvm_init_export_args(bld_base, outputs[i].values, target, args);
 
 		if (target >= V_008DFC_SQ_EXP_POS &&
 		    target <= (V_008DFC_SQ_EXP_POS + 3)) {
 			memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
 			       args, sizeof(args));
 		} else {
 			lp_build_intrinsic(base->gallivm->builder,
 					   "llvm.SI.export", ctx->voidt,
-					   args, 9, 0);
+					   args, 9, LP_FUNC_ATTR_LEGACY);
 		}
 
 		if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
 			semantic_name = TGSI_SEMANTIC_GENERIC;
 			goto handle_semantic;
 		}
 	}
 
 	shader->info.nr_param_exports = param_count;
 
@@ -2381,21 +2386,22 @@ handle_semantic:
 			continue;
 
 		/* Specify the target we are exporting */
 		pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
 
 		if (pos_idx == shader->info.nr_pos_exports)
 			/* Specify that this is the last export */
 			pos_args[i][2] = uint->one;
 
 		lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-				   ctx->voidt, pos_args[i], 9, 0);
+				   ctx->voidt, pos_args[i], 9,
+				   LP_FUNC_ATTR_LEGACY);
 	}
 }
 
 /**
  * Forward all outputs from the vertex shader to the TES. This is only used
  * for the fixed function TCS.
  */
 static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
@@ -2972,42 +2978,42 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
 		memcpy(exp->args[exp->num++], args, sizeof(args));
 	}
 }
 
 static void si_emit_ps_exports(struct si_shader_context *ctx,
 			       struct si_ps_exports *exp)
 {
 	for (unsigned i = 0; i < exp->num; i++)
 		lp_build_intrinsic(ctx->gallivm.builder,
 				   "llvm.SI.export", ctx->voidt,
-				   exp->args[i], 9, 0);
+				   exp->args[i], 9, LP_FUNC_ATTR_LEGACY);
 }
 
 static void si_export_null(struct lp_build_tgsi_context *bld_base)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct lp_build_context *base = &bld_base->base;
 	struct lp_build_context *uint = &bld_base->uint_bld;
 	LLVMValueRef args[9];
 
 	args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */
 	args[1] = uint->one; /* whether the EXEC mask is valid */
 	args[2] = uint->one; /* DONE bit */
 	args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL);
 	args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
 	args[5] = base->undef; /* R */
 	args[6] = base->undef; /* G */
 	args[7] = base->undef; /* B */
 	args[8] = base->undef; /* A */
 
 	lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-			   ctx->voidt, args, 9, 0);
+			   ctx->voidt, args, 9, LP_FUNC_ATTR_LEGACY);
 }
 
 /**
  * Return PS outputs in this order:
  *
  * v[0:3] = color0.xyzw
  * v[4:7] = color1.xyzw
  * ...
  * vN+0 = Depth
  * vN+1 = Stencil
@@ -4082,21 +4088,21 @@ static void resq_emit(
 
 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
 		out = LLVMBuildExtractElement(builder, emit_data->args[0],
 					      lp_build_const_int32(gallivm, 2), "");
 	} else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
 		out = get_buffer_size(bld_base, emit_data->args[0]);
 	} else {
 		out = lp_build_intrinsic(
 			builder, "llvm.SI.getresinfo.i32", emit_data->dst_type,
 			emit_data->args, emit_data->arg_count,
-			LP_FUNC_ATTR_READNONE);
+			LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
 
 		/* Divide the number of layers by 6 to get the number of cubes. */
 		if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
 			LLVMValueRef imm2 = lp_build_const_int32(gallivm, 2);
 			LLVMValueRef imm6 = lp_build_const_int32(gallivm, 6);
 
 			LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
 			z = LLVMBuildSDiv(builder, z, imm6, "");
 			out = LLVMBuildInsertElement(builder, out, z, imm2, "");
 		}
@@ -4319,21 +4325,21 @@ static void txq_emit(const struct lp_build_tgsi_action *action,
 
 	if (target == TGSI_TEXTURE_BUFFER) {
 		/* Just return the buffer size. */
 		emit_data->output[emit_data->chan] = emit_data->args[0];
 		return;
 	}
 
 	emit_data->output[emit_data->chan] = lp_build_intrinsic(
 		base->gallivm->builder, "llvm.SI.getresinfo.i32",
 		emit_data->dst_type, emit_data->args, emit_data->arg_count,
-		LP_FUNC_ATTR_READNONE);
+		LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
 
 	/* Divide the number of layers by 6 to get the number of cubes. */
 	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
 	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
 		LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 		LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2);
 		LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6);
 
 		LLVMValueRef v4 = emit_data->output[emit_data->chan];
 		LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
@@ -4728,21 +4734,21 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx,
 		tmp = LLVMBuildBitCast(builder, tmp, ctx->f32, "");
 		tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
 		tmp = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
 		coord = LLVMBuildInsertElement(builder, coord, tmp, index, "");
 	}
 
 	emit_data->args[0] = coord;
 	emit_data->output[emit_data->chan] =
 		lp_build_intrinsic(builder, intr_name, emit_data->dst_type,
 				   emit_data->args, emit_data->arg_count,
-				   LP_FUNC_ATTR_READNONE);
+				   LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
 }
 
 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 				struct lp_build_tgsi_context *bld_base,
 				struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct lp_build_context *base = &bld_base->base;
 	const struct tgsi_full_instruction *inst = emit_data->inst;
 	unsigned opcode = inst->Instruction.Opcode;
@@ -4752,21 +4758,21 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 	bool is_shadow = tgsi_is_shadow_target(target);
 	char type[64];
 	const char *name = "llvm.SI.image.sample";
 	const char *infix = "";
 
 	if (target == TGSI_TEXTURE_BUFFER) {
 		emit_data->output[emit_data->chan] = lp_build_intrinsic(
 			base->gallivm->builder,
 			"llvm.SI.vs.load.input", emit_data->dst_type,
 			emit_data->args, emit_data->arg_count,
-			LP_FUNC_ATTR_READNONE);
+			LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
 		return;
 	}
 
 	switch (opcode) {
 	case TGSI_OPCODE_TXF:
 		name = target == TGSI_TEXTURE_2D_MSAA ||
 		       target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
 			       "llvm.SI.image.load" :
 			       "llvm.SI.image.load.mip";
 		is_shadow = false;
@@ -4829,21 +4835,21 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 			 */
 			si_lower_gather4_integer(ctx, emit_data, intr_name,
 						 (int)has_offset + (int)is_shadow);
 			return;
 		}
 	}
 
 	emit_data->output[emit_data->chan] = lp_build_intrinsic(
 		base->gallivm->builder, intr_name, emit_data->dst_type,
 		emit_data->args, emit_data->arg_count,
-		LP_FUNC_ATTR_READNONE);
+		LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
 }
 
 static void si_llvm_emit_txqs(
 	const struct lp_build_tgsi_action *action,
 	struct lp_build_tgsi_context *bld_base,
 	struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
@@ -5118,21 +5124,21 @@ static void si_llvm_emit_vertex(
 				 lp_build_const_int32(gallivm,
 						      shader->selector->gs_max_out_vertices), "");
 
 	bool use_kill = !info->writes_memory;
 	if (use_kill) {
 		kill = lp_build_select(&bld_base->base, can_emit,
 				       lp_build_const_float(gallivm, 1.0f),
 				       lp_build_const_float(gallivm, -1.0f));
 
 		lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
-				   ctx->voidt, &kill, 1, 0);
+				   ctx->voidt, &kill, 1, LP_FUNC_ATTR_LEGACY);
 	} else {
 		lp_build_if(&if_state, gallivm, can_emit);
 	}
 
 	offset = 0;
 	for (i = 0; i < info->num_outputs; i++) {
 		LLVMValueRef *out_ptr = ctx->outputs[i];
 
 		for (chan = 0; chan < 4; chan++) {
 			if (!(info->output_usagemask[i] & (1 << chan)) ||
@@ -5238,24 +5244,26 @@ static void si_create_function(struct si_shader_context *ctx,
 		LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
 
 		/* The combination of:
 		 * - ByVal
 		 * - dereferenceable
 		 * - invariant.load
 		 * allows the optimization passes to move loads and reduces
 		 * SGPR spilling significantly.
 		 */
 		if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
-			lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_BYVAL);
+			lp_add_function_attr(ctx->gallivm.context, ctx->main_fn,
+                                             i + 1, LP_FUNC_ATTR_BYVAL);
 			lp_add_attr_dereferenceable(P, UINT64_MAX);
 		} else
-			lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
+			lp_add_function_attr(ctx->gallivm.context, ctx->main_fn,
+                                             i + 1, LP_FUNC_ATTR_INREG);
 	}
 
 	LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
 					   "no-signed-zeros-fp-math",
 					   "true");
 
 	if (ctx->screen->b.debug_flags & DBG_UNSAFE_MATH) {
 		/* These were copied from some LLVM test. */
 		LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
 						   "less-precise-fpmad",
@@ -5740,21 +5748,22 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
 	offset = LLVMBuildMul(builder, address[1],
 			      LLVMConstInt(ctx->i32, 4, 0), "");
 	row = buffer_load_const(ctx, desc, offset);
 	row = LLVMBuildBitCast(builder, row, ctx->i32, "");
 	bit = LLVMBuildLShr(builder, row, address[0], "");
 	bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
 
 	/* The intrinsic kills the thread if arg < 0. */
 	bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0),
 			      LLVMConstReal(ctx->f32, -1), "");
-	lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, 0);
+	lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1,
+			   LP_FUNC_ATTR_LEGACY);
 }
 
 void si_shader_binary_read_config(struct radeon_shader_binary *binary,
 				  struct si_shader_config *conf,
 				  unsigned symbol_offset)
 {
 	unsigned i;
 	const unsigned char *config =
 		radeon_shader_binary_config_start(binary, symbol_offset);
 	bool really_needs_scratch = false;
@@ -6308,23 +6317,24 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
 				}
 
 				args[2] = lp_build_const_int32(
 					gallivm,
 					offset * gs_selector->gs_max_out_vertices * 16 * 4);
 				offset++;
 
 				outputs[i].values[chan] =
 					LLVMBuildBitCast(gallivm->builder,
 						 lp_build_intrinsic(gallivm->builder,
-								 "llvm.SI.buffer.load.dword.i32.i32",
-								 ctx.i32, args, 9,
-								 LP_FUNC_ATTR_READONLY),
+								    "llvm.SI.buffer.load.dword.i32.i32",
+								    ctx.i32, args, 9,
+								    LP_FUNC_ATTR_READONLY |
+								    LP_FUNC_ATTR_LEGACY),
 						 ctx.f32, "");
 			}
 		}
 
 		/* Streamout and exports. */
 		if (gs_selector->so.num_outputs) {
 			si_llvm_emit_streamout(&ctx, outputs,
 					       gsinfo->num_outputs,
 					       stream);
 		}
@@ -7047,21 +7057,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
 	LLVMValueRef out[48];
 	LLVMTypeRef function_type;
 	unsigned num_params;
 	unsigned num_out;
 	MAYBE_UNUSED unsigned num_out_sgpr; /* used in debug checks */
 	unsigned num_sgprs, num_vgprs;
 	unsigned last_sgpr_param;
 	unsigned gprs;
 
 	for (unsigned i = 0; i < num_parts; ++i) {
-		lp_add_function_attr(parts[i], -1, LP_FUNC_ATTR_ALWAYSINLINE);
+		lp_add_function_attr(gallivm->context, parts[i], -1,
+				     LP_FUNC_ATTR_ALWAYSINLINE);
 		LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
 	}
 
 	/* The parameters of the wrapper function correspond to those of the
 	 * first part in terms of SGPRs and VGPRs, but we use the types of the
 	 * main part to get the right types. This is relevant for the
 	 * dereferenceable attribute on descriptor table pointers.
 	 */
 	num_sgprs = 0;
 	num_vgprs = 0;
@@ -7164,21 +7175,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
 			param_size = llvm_get_type_size(param_type) / 4;
 			is_sgpr = ac_is_sgpr_param(param);
 
 			if (is_sgpr) {
 #if HAVE_LLVM < 0x0400
 				LLVMRemoveAttribute(param, LLVMByValAttribute);
 #else
 				unsigned kind_id = LLVMGetEnumAttributeKindForName("byval", 5);
 				LLVMRemoveEnumAttributeAtIndex(parts[part], param_idx + 1, kind_id);
 #endif
-				lp_add_function_attr(parts[part], param_idx + 1, LP_FUNC_ATTR_INREG);
+				lp_add_function_attr(gallivm->context, parts[part],
+						     param_idx + 1, LP_FUNC_ATTR_INREG);
 			}
 
 			assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out));
 			assert(is_sgpr || out_idx >= num_out_sgpr);
 
 			if (param_size == 1)
 				arg = out[out_idx];
 			else
 				arg = lp_build_gather_values(gallivm, &out[out_idx], param_size);
 
@@ -7679,21 +7691,21 @@ static void si_build_vs_epilog_function(struct si_shader_context *ctx,
 					       key->vs_epilog.prim_id_param_offset);
 		args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
 		args[5] = LLVMGetParam(ctx->main_fn,
 				       VS_EPILOG_PRIMID_LOC); /* X */
 		args[6] = base->undef; /* Y */
 		args[7] = base->undef; /* Z */
 		args[8] = base->undef; /* W */
 
 		lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
 				   LLVMVoidTypeInContext(base->gallivm->context),
-				   args, 9, 0);
+				   args, 9, LP_FUNC_ATTR_LEGACY);
 	}
 
 	LLVMBuildRetVoid(gallivm->builder);
 }
 
 /**
  * Create & compile a vertex shader epilog. This a helper used by VS and TES.
  */
 static bool si_get_vs_epilog(struct si_screen *sscreen,
 			     LLVMTargetMachineRef tm,
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index 10268e9..ee59fed 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -51,27 +51,23 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
 	emit_data->arg_count = 1;
 	emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
 					lp_build_const_float(gallivm, -1.0f),
 					bld_base->base.zero, "");
 }
 
 static void kil_emit(const struct lp_build_tgsi_action *action,
 		     struct lp_build_tgsi_context *bld_base,
 		     struct lp_build_emit_data *emit_data)
 {
-	unsigned i;
-	for (i = 0; i < emit_data->arg_count; i++) {
-		emit_data->output[i] = lp_build_intrinsic_unary(
-			bld_base->base.gallivm->builder,
-			action->intr_name,
-			emit_data->dst_type, emit_data->args[i]);
-	}
+	lp_build_intrinsic(bld_base->base.gallivm->builder,
+			   action->intr_name, emit_data->dst_type,
+			   &emit_data->args[0], 1, LP_FUNC_ATTR_LEGACY);
 }
 
 static void emit_icmp(const struct lp_build_tgsi_action *action,
 		      struct lp_build_tgsi_context *bld_base,
 		      struct lp_build_emit_data *emit_data)
 {
 	unsigned pred;
 	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 	LLVMContextRef context = bld_base->base.gallivm->context;
 
@@ -500,21 +496,23 @@ static void emit_bfe(const struct lp_build_tgsi_action *action,
 		     struct lp_build_tgsi_context *bld_base,
 		     struct lp_build_emit_data *emit_data)
 {
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
 	LLVMValueRef bfe_sm5;
 	LLVMValueRef cond;
 
 	bfe_sm5 = lp_build_intrinsic(builder, action->intr_name,
 				     emit_data->dst_type, emit_data->args,
-				     emit_data->arg_count, LP_FUNC_ATTR_READNONE);
+				     emit_data->arg_count,
+				     LP_FUNC_ATTR_READNONE |
+				     LP_FUNC_ATTR_LEGACY);
 
 	/* Correct for GLSL semantics. */
 	cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],
 			     lp_build_const_int32(gallivm, 32), "");
 	emit_data->output[emit_data->chan] =
 		LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, "");
 }
 
 /* this is ffs in C */
 static void emit_lsb(const struct lp_build_tgsi_action *action,
-- 
2.7.4



More information about the mesa-dev mailing list