Mesa (master): gallivm,ac: add function attributes at call sites instead of declarations

Marek Olšák mareko at kemper.freedesktop.org
Wed Mar 1 17:59:58 UTC 2017


Module: Mesa
Branch: master
Commit: 940da36a65c767ec9fc6817df3d0262b52de42f6
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=940da36a65c767ec9fc6817df3d0262b52de42f6

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Wed Feb 22 02:29:12 2017 +0100

gallivm,ac: add function attributes at call sites instead of declarations

They can vary at call sites if the intrinsic is NOT a legacy SI intrinsic.
We need this to force readnone or inaccessiblememonly on some amdgcn
intrinsics.

This is only used with LLVM 4.0 and later. Intrinsics only used with
LLVM <= 3.9 don't need the LEGACY flag.

gallivm and ac code is in the same patch, because splitting would be
more complicated with all the LEGACY uses all over the place.

v2: don't change the prototype of lp_add_function_attr.

Reviewed-by: Jose Fonseca <jfonseca at vmware.com> (v1)

---

 src/amd/common/ac_llvm_build.c                    | 23 +++++---
 src/amd/common/ac_llvm_util.c                     | 31 ++++++++---
 src/amd/common/ac_llvm_util.h                     | 17 +++---
 src/amd/common/ac_nir_to_llvm.c                   | 63 +++++++++++++--------
 src/gallium/auxiliary/gallivm/lp_bld_intr.c       | 68 ++++++++++++++++-------
 src/gallium/auxiliary/gallivm/lp_bld_intr.h       | 11 +++-
 src/gallium/drivers/radeonsi/si_shader.c          | 52 +++++++++--------
 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 14 ++---
 8 files changed, 177 insertions(+), 102 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index a70984e..a0b74a5 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -82,7 +82,9 @@ ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
 		       LLVMTypeRef return_type, LLVMValueRef *params,
 		       unsigned param_count, unsigned attrib_mask)
 {
-	LLVMValueRef function;
+	LLVMValueRef function, call;
+	bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
+				  !(attrib_mask & AC_FUNC_ATTR_LEGACY);
 
 	function = LLVMGetNamedFunction(ctx->module, name);
 	if (!function) {
@@ -102,13 +104,14 @@ ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
 		LLVMSetFunctionCallConv(function, LLVMCCallConv);
 		LLVMSetLinkage(function, LLVMExternalLinkage);
 
-		attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
-		while (attrib_mask) {
-			enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
-			ac_add_function_attr(function, -1, attr);
-		}
+		if (!set_callsite_attrs)
+			ac_add_func_attributes(ctx->context, function, attrib_mask);
 	}
-	return LLVMBuildCall(ctx->builder, function, params, param_count, "");
+
+	call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
+	if (set_callsite_attrs)
+		ac_add_func_attributes(ctx->context, call, attrib_mask);
+	return call;
 }
 
 LLVMValueRef
@@ -530,7 +533,8 @@ ac_build_tbuffer_store(struct ac_llvm_context *ctx,
 	snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
 
 	ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
-			       args, ARRAY_SIZE(args), 0);
+			       args, ARRAY_SIZE(args),
+			       AC_FUNC_ATTR_LEGACY);
 }
 
 void
@@ -842,5 +846,6 @@ LLVMValueRef ac_emit_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
 	};
 
 	return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3,
-				      AC_FUNC_ATTR_READNONE);
+				      AC_FUNC_ATTR_READNONE |
+				      AC_FUNC_ATTR_LEGACY);
 }
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index be127c5..fb525dd 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -24,7 +24,7 @@
  */
 /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
 #include "ac_llvm_util.h"
-
+#include "util/bitscan.h"
 #include <llvm-c/Core.h>
 
 #include "c11/threads.h"
@@ -180,12 +180,10 @@ static const char *attr_to_str(enum ac_func_attr attr)
 
 #endif
 
-void
-ac_add_function_attr(LLVMValueRef function,
-                     int attr_idx,
-                     enum ac_func_attr attr)
+static void
+ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
+                     int attr_idx, enum ac_func_attr attr)
 {
-
 #if HAVE_LLVM < 0x0400
    LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
    if (attr_idx == -1) {
@@ -194,15 +192,30 @@ ac_add_function_attr(LLVMValueRef function,
       LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
    }
 #else
-   LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
    const char *attr_name = attr_to_str(attr);
    unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
                                                       strlen(attr_name));
-   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
-   LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
+
+   if (LLVMIsAFunction(function))
+      LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+   else
+      LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
 #endif
 }
 
+void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
+			    unsigned attrib_mask)
+{
+	attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
+	attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
+
+	while (attrib_mask) {
+		enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
+		ac_add_function_attr(ctx, function, -1, attr);
+	}
+}
+
 void
 ac_dump_module(LLVMModuleRef module)
 {
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 93d3d27..4fe4ab4 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -41,20 +41,21 @@ enum ac_func_attr {
 	AC_FUNC_ATTR_NOUNWIND     = (1 << 4),
 	AC_FUNC_ATTR_READNONE     = (1 << 5),
 	AC_FUNC_ATTR_READONLY     = (1 << 6),
+
+	/* Legacy intrinsic that needs attributes on function declarations
+	 * and they must match the internal LLVM definition exactly, otherwise
+	 * intrinsic selection fails.
+	 */
+	AC_FUNC_ATTR_LEGACY       = (1u << 31),
 };
 
 LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill);
 
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
 bool ac_is_sgpr_param(LLVMValueRef param);
-
-void
-ac_add_function_attr(LLVMValueRef function,
-                     int attr_idx,
-                     enum ac_func_attr attr);
-
-void
-ac_dump_module(LLVMModuleRef module);
+void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
+			    unsigned attrib_mask);
+void ac_dump_module(LLVMModuleRef module);
 
 #ifdef __cplusplus
 }
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index a3310e1..a800ff0 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1052,12 +1052,13 @@ static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
 }
 
 static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
-					  const char *intrin,
+					  const char *intrin, unsigned attr_mask,
 					  LLVMValueRef srcs[3])
 {
 	LLVMValueRef result;
 	LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
-	result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE);
+	result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3,
+					AC_FUNC_ATTR_READNONE | attr_mask);
 
 	result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
 	return result;
@@ -1431,10 +1432,12 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
 		                              to_float_type(ctx, def_type), src[0], src[1], src[2]);
 		break;
 	case nir_op_ibitfield_extract:
-		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
+		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32",
+					       AC_FUNC_ATTR_LEGACY, src);
 		break;
 	case nir_op_ubitfield_extract:
-		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
+		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32",
+					       AC_FUNC_ATTR_LEGACY, src);
 		break;
 	case nir_op_bitfield_insert:
 		result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
@@ -1666,8 +1669,9 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
 		size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
-					   txq_args, txq_arg_count,
-					   AC_FUNC_ATTR_READNONE);
+					      txq_args, txq_arg_count,
+					      AC_FUNC_ATTR_READNONE |
+					      AC_FUNC_ATTR_LEGACY);
 
 		for (c = 0; c < 2; c++) {
 			half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
@@ -1691,7 +1695,8 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
 
 	tinfo->args[0] = coord;
 	return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
-				   AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
+				      AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
+				      AC_FUNC_ATTR_LEGACY);
 
 }
 
@@ -1759,7 +1764,8 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
 		}
 	}
 	return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
-				   AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
+				      AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
+				      AC_FUNC_ATTR_LEGACY);
 
 }
 
@@ -2037,7 +2043,9 @@ static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
 				     offset, "")
 		};
 		results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32,
-						 params, 2, AC_FUNC_ATTR_READNONE);
+						    params, 2,
+						    AC_FUNC_ATTR_READNONE |
+						    AC_FUNC_ATTR_LEGACY);
 	}
 
 
@@ -2138,7 +2146,9 @@ load_gs_input(struct nir_to_llvm_context *ctx,
 		args[8] = ctx->i32zero; /* TFE */
 
 		value[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
-					    ctx->i32, args, 9, AC_FUNC_ATTR_READONLY);
+						  ctx->i32, args, 9,
+						  AC_FUNC_ATTR_READONLY |
+						  AC_FUNC_ATTR_LEGACY);
 	}
 	result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
 
@@ -2833,7 +2843,9 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
 	params[9] = ctx->i32zero;
 
 	res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
-				  params, 10, AC_FUNC_ATTR_READNONE);
+				     params, 10,
+				     AC_FUNC_ATTR_READNONE |
+				     AC_FUNC_ATTR_LEGACY);
 
 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
 	    glsl_sampler_type_is_array(type)) {
@@ -2877,7 +2889,7 @@ static void emit_discard_if(struct nir_to_llvm_context *ctx,
 			       ctx->f32zero, "");
 	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
 			       ctx->voidt,
-			       &cond, 1, 0);
+			       &cond, 1, AC_FUNC_ATTR_LEGACY);
 }
 
 static LLVMValueRef
@@ -3134,7 +3146,7 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
 			       LLVMConstReal(ctx->f32, 1.0f),
 			       LLVMConstReal(ctx->f32, -1.0f), "");
 	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
-			    ctx->voidt, &kill, 1, 0);
+			    ctx->voidt, &kill, 1, AC_FUNC_ATTR_LEGACY);
 
 	/* loop num outputs */
 	idx = 0;
@@ -3324,7 +3336,7 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx,
 		ctx->shader_info->fs.can_discard = true;
 		ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
 				       ctx->voidt,
-				       NULL, 0, 0);
+				       NULL, 0, AC_FUNC_ATTR_LEGACY);
 		break;
 	case nir_intrinsic_discard_if:
 		emit_discard_if(ctx, instr);
@@ -4064,7 +4076,8 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,
 		args[2] = buffer_index;
 		input = ac_emit_llvm_intrinsic(&ctx->ac,
 			"llvm.SI.vs.load.input", ctx->v4f32, args, 3,
-			AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
+			AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
+			AC_FUNC_ATTR_LEGACY);
 
 		for (unsigned chan = 0; chan < 4; chan++) {
 			LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
@@ -4415,8 +4428,9 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 				LLVMValueRef packed;
 
 				packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16",
-							     ctx->i32, pack_args, 2,
-							     AC_FUNC_ATTR_READNONE);
+								ctx->i32, pack_args, 2,
+								AC_FUNC_ATTR_READNONE |
+								AC_FUNC_ATTR_LEGACY);
 				args[chan + 5] = packed;
 			}
 			break;
@@ -4601,7 +4615,8 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
 			ac_emit_llvm_intrinsic(&ctx->ac,
 					       "llvm.SI.export",
 					       ctx->voidt,
-					       args, 9, 0);
+					       args, 9,
+					       AC_FUNC_ATTR_LEGACY);
 		}
 	}
 
@@ -4656,7 +4671,8 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
 		ac_emit_llvm_intrinsic(&ctx->ac,
 				       "llvm.SI.export",
 				       ctx->voidt,
-				       pos_args[i], 9, 0);
+				       pos_args[i], 9,
+				       AC_FUNC_ATTR_LEGACY);
 	}
 
 	ctx->shader_info->vs.pos_exports = num_pos_exports;
@@ -4720,7 +4736,8 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
 		return; /* unnecessary NULL export */
 
 	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
-			    ctx->voidt, args, 9, 0);
+			       ctx->voidt, args, 9,
+			       AC_FUNC_ATTR_LEGACY);
 }
 
 static void
@@ -4764,7 +4781,8 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
 
 	args[0] = LLVMConstInt(ctx->i32, mask, false);
 	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
-			    ctx->voidt, args, 9, 0);
+			       ctx->voidt, args, 9,
+			       AC_FUNC_ATTR_LEGACY);
 }
 
 static void
@@ -5219,7 +5237,8 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
 			value = ac_emit_llvm_intrinsic(&ctx->ac,
 						       "llvm.SI.buffer.load.dword.i32.i32",
 						       ctx->i32, args, 9,
-						       AC_FUNC_ATTR_READONLY);
+						       AC_FUNC_ATTR_READONLY |
+						       AC_FUNC_ATTR_LEGACY);
 
 			LLVMBuildStore(ctx->builder,
 				       to_float(ctx, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index 049671a..0b25ae5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -159,28 +159,56 @@ static const char *attr_to_str(enum lp_func_attr attr)
 #endif
 
 void
-lp_add_function_attr(LLVMValueRef function,
-                     int attr_idx,
-                     enum lp_func_attr attr)
+lp_add_function_attr(LLVMValueRef function_or_call,
+                     int attr_idx, enum lp_func_attr attr)
 {
 
 #if HAVE_LLVM < 0x0400
    LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr);
    if (attr_idx == -1) {
-      LLVMAddFunctionAttr(function, llvm_attr);
+      LLVMAddFunctionAttr(function_or_call, llvm_attr);
    } else {
-      LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
+      LLVMAddAttribute(LLVMGetParam(function_or_call, attr_idx - 1), llvm_attr);
    }
 #else
-   LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
+
+   LLVMModuleRef module;
+   if (LLVMIsAFunction(function_or_call)) {
+      module = LLVMGetGlobalParent(function_or_call);
+   } else {
+      LLVMBasicBlockRef bb = LLVMGetInstructionParent(function_or_call);
+      LLVMValueRef function = LLVMGetBasicBlockParent(bb);
+      module = LLVMGetGlobalParent(function);
+   }
+   LLVMContextRef ctx = LLVMGetModuleContext(module);
+
    const char *attr_name = attr_to_str(attr);
    unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
                                                       strlen(attr_name));
-   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
-   LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
+
+   if (LLVMIsAFunction(function_or_call))
+      LLVMAddAttributeAtIndex(function_or_call, attr_idx, llvm_attr);
+   else
+      LLVMAddCallSiteAttribute(function_or_call, attr_idx, llvm_attr);
 #endif
 }
 
+static void
+lp_add_func_attributes(LLVMValueRef function, unsigned attrib_mask)
+{
+   /* NoUnwind indicates that the intrinsic never raises a C++ exception.
+    * Set it for all intrinsics.
+    */
+   attrib_mask |= LP_FUNC_ATTR_NOUNWIND;
+   attrib_mask &= ~LP_FUNC_ATTR_LEGACY;
+
+   while (attrib_mask) {
+      enum lp_func_attr attr = 1u << u_bit_scan(&attrib_mask);
+      lp_add_function_attr(function, -1, attr);
+   }
+}
+
 LLVMValueRef
 lp_build_intrinsic(LLVMBuilderRef builder,
                    const char *name,
@@ -190,7 +218,9 @@ lp_build_intrinsic(LLVMBuilderRef builder,
                    unsigned attr_mask)
 {
    LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
-   LLVMValueRef function;
+   LLVMValueRef function, call;
+   bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
+                             !(attr_mask & LP_FUNC_ATTR_LEGACY);
 
    function = LLVMGetNamedFunction(module, name);
    if(!function) {
@@ -206,22 +236,18 @@ lp_build_intrinsic(LLVMBuilderRef builder,
 
       function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
 
-      /* NoUnwind indicates that the intrinsic never raises a C++ exception.
-       * Set it for all intrinsics.
-       */
-      attr_mask |= LP_FUNC_ATTR_NOUNWIND;
-
-      while (attr_mask) {
-         enum lp_func_attr attr = 1 << u_bit_scan(&attr_mask);
-         lp_add_function_attr(function, -1, attr);
-      }
+      if (!set_callsite_attrs)
+         lp_add_func_attributes(function, attr_mask);
 
       if (gallivm_debug & GALLIVM_DEBUG_IR) {
          lp_debug_dump_value(function);
       }
    }
 
-   return LLVMBuildCall(builder, function, args, num_args, "");
+   call = LLVMBuildCall(builder, function, args, num_args, "");
+   if (set_callsite_attrs)
+      lp_add_func_attributes(call, attr_mask);
+   return call;
 }
 
 
@@ -309,9 +335,9 @@ lp_build_intrinsic_binary_anylength(struct gallivm_state *gallivm,
       unsigned num_vec = src_type.length / intrin_length;
       LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
 
-      /* don't support arbitrary size here as this is so yuck */
+      /* don't support arbitrary size here as this is so yuck */
       if (src_type.length % intrin_length) {
-         /* FIXME: This is something which should be supported
+         /* FIXME: This is something which should be supported
           * but there doesn't seem to be any need for it currently
           * so crash and burn.
           */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index 039e9ab..0032df5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -54,6 +54,12 @@ enum lp_func_attr {
    LP_FUNC_ATTR_NOUNWIND     = (1 << 4),
    LP_FUNC_ATTR_READNONE     = (1 << 5),
    LP_FUNC_ATTR_READONLY     = (1 << 6),
+
+   /* Legacy intrinsic that needs attributes on function declarations
+    * and they must match the internal LLVM definition exactly, otherwise
+    * intrinsic selection fails.
+    */
+   LP_FUNC_ATTR_LEGACY       = (1u << 31),
 };
 
 void
@@ -70,9 +76,8 @@ lp_declare_intrinsic(LLVMModuleRef module,
                      unsigned num_args);
 
 void
-lp_add_function_attr(LLVMValueRef function,
-                     int attr_idx,
-                     enum lp_func_attr attr);
+lp_add_function_attr(LLVMValueRef function_or_call,
+                     int attr_idx, enum lp_func_attr attr);
 
 LLVMValueRef
 lp_build_intrinsic(LLVMBuilderRef builder,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 8e51ae8..212a9be 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -401,7 +401,8 @@ static void declare_input_vs(
 
 		input[i] = lp_build_intrinsic(gallivm->builder,
 			"llvm.SI.vs.load.input", ctx->v4f32, args, 3,
-			LP_FUNC_ATTR_READNONE);
+			LP_FUNC_ATTR_READNONE |
+			LP_FUNC_ATTR_LEGACY);
 	}
 
 	/* Break up the vec4 into individual components */
@@ -1123,14 +1124,16 @@ static LLVMValueRef fetch_input_gs(
 	value = lp_build_intrinsic(gallivm->builder,
 				   "llvm.SI.buffer.load.dword.i32.i32",
 				   ctx->i32, args, 9,
-				   LP_FUNC_ATTR_READONLY);
+				   LP_FUNC_ATTR_READONLY |
+				   LP_FUNC_ATTR_LEGACY);
 	if (tgsi_type_is_64bit(type)) {
 		LLVMValueRef value2;
 		args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256);
 		value2 = lp_build_intrinsic(gallivm->builder,
 					    "llvm.SI.buffer.load.dword.i32.i32",
 					    ctx->i32, args, 9,
-					    LP_FUNC_ATTR_READONLY);
+					    LP_FUNC_ATTR_READONLY |
+					    LP_FUNC_ATTR_LEGACY);
 		return si_llvm_emit_fetch_64bit(bld_base, type,
 						value, value2);
 	}
@@ -1368,7 +1371,8 @@ static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
 	LLVMValueRef args[2] = {resource, offset};
 
 	return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2,
-			       LP_FUNC_ATTR_READNONE);
+				  LP_FUNC_ATTR_READNONE |
+				  LP_FUNC_ATTR_LEGACY);
 }
 
 static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, LLVMValueRef sample_id)
@@ -1822,7 +1826,8 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
 			packed = lp_build_intrinsic(base->gallivm->builder,
 						    "llvm.SI.packf16",
 						    ctx->i32, pack_args, 2,
-						    LP_FUNC_ATTR_READNONE);
+						    LP_FUNC_ATTR_READNONE |
+						    LP_FUNC_ATTR_LEGACY);
 			args[chan + 5] =
 				LLVMBuildBitCast(base->gallivm->builder,
 						 packed, ctx->f32, "");
@@ -1954,10 +1959,10 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
 					lp_build_const_float(gallivm, -1.0f));
 
 		lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
-				   ctx->voidt, &arg, 1, 0);
+				   ctx->voidt, &arg, 1, LP_FUNC_ATTR_LEGACY);
 	} else {
 		lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kilp",
-				   ctx->voidt, NULL, 0, 0);
+				   ctx->voidt, NULL, 0, LP_FUNC_ATTR_LEGACY);
 	}
 }
 
@@ -2302,7 +2307,7 @@ handle_semantic:
 		} else {
 			lp_build_intrinsic(base->gallivm->builder,
 					   "llvm.SI.export", ctx->voidt,
-					   args, 9, 0);
+					   args, 9, LP_FUNC_ATTR_LEGACY);
 		}
 
 		if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
@@ -2388,7 +2393,8 @@ handle_semantic:
 			pos_args[i][2] = uint->one;
 
 		lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-				   ctx->voidt, pos_args[i], 9, 0);
+				   ctx->voidt, pos_args[i], 9,
+				   LP_FUNC_ATTR_LEGACY);
 	}
 }
 
@@ -2979,7 +2985,7 @@ static void si_emit_ps_exports(struct si_shader_context *ctx,
 	for (unsigned i = 0; i < exp->num; i++)
 		lp_build_intrinsic(ctx->gallivm.builder,
 				   "llvm.SI.export", ctx->voidt,
-				   exp->args[i], 9, 0);
+				   exp->args[i], 9, LP_FUNC_ATTR_LEGACY);
 }
 
 static void si_export_null(struct lp_build_tgsi_context *bld_base)
@@ -3000,7 +3006,7 @@ static void si_export_null(struct lp_build_tgsi_context *bld_base)
 	args[8] = base->undef; /* A */
 
 	lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-			   ctx->voidt, args, 9, 0);
+			   ctx->voidt, args, 9, LP_FUNC_ATTR_LEGACY);
 }
 
 /**
@@ -4089,7 +4095,7 @@ static void resq_emit(
 		out = lp_build_intrinsic(
 			builder, "llvm.SI.getresinfo.i32", emit_data->dst_type,
 			emit_data->args, emit_data->arg_count,
-			LP_FUNC_ATTR_READNONE);
+			LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
 
 		/* Divide the number of layers by 6 to get the number of cubes. */
 		if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
@@ -4326,7 +4332,7 @@ static void txq_emit(const struct lp_build_tgsi_action *action,
 	emit_data->output[emit_data->chan] = lp_build_intrinsic(
 		base->gallivm->builder, "llvm.SI.getresinfo.i32",
 		emit_data->dst_type, emit_data->args, emit_data->arg_count,
-		LP_FUNC_ATTR_READNONE);
+		LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
 
 	/* Divide the number of layers by 6 to get the number of cubes. */
 	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
@@ -4735,7 +4741,7 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx,
 	emit_data->output[emit_data->chan] =
 		lp_build_intrinsic(builder, intr_name, emit_data->dst_type,
 				   emit_data->args, emit_data->arg_count,
-				   LP_FUNC_ATTR_READNONE);
+				   LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
 }
 
 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
@@ -4759,7 +4765,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 			base->gallivm->builder,
 			"llvm.SI.vs.load.input", emit_data->dst_type,
 			emit_data->args, emit_data->arg_count,
-			LP_FUNC_ATTR_READNONE);
+			LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
 		return;
 	}
 
@@ -4836,7 +4842,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 	emit_data->output[emit_data->chan] = lp_build_intrinsic(
 		base->gallivm->builder, intr_name, emit_data->dst_type,
 		emit_data->args, emit_data->arg_count,
-		LP_FUNC_ATTR_READNONE);
+		LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
 }
 
 static void si_llvm_emit_txqs(
@@ -5125,7 +5131,7 @@ static void si_llvm_emit_vertex(
 				       lp_build_const_float(gallivm, -1.0f));
 
 		lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
-				   ctx->voidt, &kill, 1, 0);
+				   ctx->voidt, &kill, 1, LP_FUNC_ATTR_LEGACY);
 	} else {
 		lp_build_if(&if_state, gallivm, can_emit);
 	}
@@ -5747,7 +5753,8 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
 	/* The intrinsic kills the thread if arg < 0. */
 	bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0),
 			      LLVMConstReal(ctx->f32, -1), "");
-	lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, 0);
+	lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1,
+			   LP_FUNC_ATTR_LEGACY);
 }
 
 void si_shader_binary_read_config(struct ac_shader_binary *binary,
@@ -6315,9 +6322,10 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
 				outputs[i].values[chan] =
 					LLVMBuildBitCast(gallivm->builder,
 						 lp_build_intrinsic(gallivm->builder,
-								 "llvm.SI.buffer.load.dword.i32.i32",
-								 ctx.i32, args, 9,
-								 LP_FUNC_ATTR_READONLY),
+								    "llvm.SI.buffer.load.dword.i32.i32",
+								    ctx.i32, args, 9,
+								    LP_FUNC_ATTR_READONLY |
+								    LP_FUNC_ATTR_LEGACY),
 						 ctx.f32, "");
 			}
 		}
@@ -7686,7 +7694,7 @@ static void si_build_vs_epilog_function(struct si_shader_context *ctx,
 
 		lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
 				   LLVMVoidTypeInContext(base->gallivm->context),
-				   args, 9, 0);
+				   args, 9, LP_FUNC_ATTR_LEGACY);
 	}
 
 	LLVMBuildRetVoid(gallivm->builder);
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index 10268e9..ee59fed 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -58,13 +58,9 @@ static void kil_emit(const struct lp_build_tgsi_action *action,
 		     struct lp_build_tgsi_context *bld_base,
 		     struct lp_build_emit_data *emit_data)
 {
-	unsigned i;
-	for (i = 0; i < emit_data->arg_count; i++) {
-		emit_data->output[i] = lp_build_intrinsic_unary(
-			bld_base->base.gallivm->builder,
-			action->intr_name,
-			emit_data->dst_type, emit_data->args[i]);
-	}
+	lp_build_intrinsic(bld_base->base.gallivm->builder,
+			   action->intr_name, emit_data->dst_type,
+			   &emit_data->args[0], 1, LP_FUNC_ATTR_LEGACY);
 }
 
 static void emit_icmp(const struct lp_build_tgsi_action *action,
@@ -507,7 +503,9 @@ static void emit_bfe(const struct lp_build_tgsi_action *action,
 
 	bfe_sm5 = lp_build_intrinsic(builder, action->intr_name,
 				     emit_data->dst_type, emit_data->args,
-				     emit_data->arg_count, LP_FUNC_ATTR_READNONE);
+				     emit_data->arg_count,
+				     LP_FUNC_ATTR_READNONE |
+				     LP_FUNC_ATTR_LEGACY);
 
 	/* Correct for GLSL semantics. */
 	cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],




More information about the mesa-commit mailing list