[Mesa-dev] [PATCH 2/2] gallivm, ac: add function attributes at call sites instead of declarations
Jose Fonseca
jfonseca at vmware.com
Wed Mar 1 16:29:21 UTC 2017
On 23/02/17 00:01, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> They can vary at call sites if the intrinsic is NOT a legacy SI intrinsic.
> We need this to force readnone or inaccessiblememonly on some amdgcn
> intrinsics.
>
> This is only used with LLVM 4.0 and later. Intrinsics only used with
> LLVM <= 3.9 don't need the LEGACY flag.
>
> gallivm and ac code is in the same patch, because splitting would be
> more complicated with all the LEGACY uses all over the place.
> ---
> src/amd/common/ac_llvm_build.c | 23 ++++----
> src/amd/common/ac_llvm_util.c | 31 +++++++----
> src/amd/common/ac_llvm_util.h | 17 +++---
> src/amd/common/ac_nir_to_llvm.c | 63 ++++++++++++++--------
> src/gallium/auxiliary/draw/draw_llvm.c | 6 ++-
> src/gallium/auxiliary/gallivm/lp_bld_intr.c | 51 ++++++++++++------
> src/gallium/auxiliary/gallivm/lp_bld_intr.h | 11 ++--
> src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 3 +-
> src/gallium/drivers/llvmpipe/lp_state_fs.c | 3 +-
> src/gallium/drivers/llvmpipe/lp_state_setup.c | 7 +--
> src/gallium/drivers/radeonsi/si_shader.c | 64 ++++++++++++++---------
> src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 14 +++--
> 12 files changed, 184 insertions(+), 109 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 2f25b14..5c8b7f7 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -75,47 +75,50 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
> "amdgpu.uniform", 14);
>
> ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
> }
>
> LLVMValueRef
> ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
> LLVMTypeRef return_type, LLVMValueRef *params,
> unsigned param_count, unsigned attrib_mask)
> {
> - LLVMValueRef function;
> + LLVMValueRef function, call;
> + bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
> + !(attrib_mask & AC_FUNC_ATTR_LEGACY);
>
> function = LLVMGetNamedFunction(ctx->module, name);
> if (!function) {
> LLVMTypeRef param_types[32], function_type;
> unsigned i;
>
> assert(param_count <= 32);
>
> for (i = 0; i < param_count; ++i) {
> assert(params[i]);
> param_types[i] = LLVMTypeOf(params[i]);
> }
> function_type =
> LLVMFunctionType(return_type, param_types, param_count, 0);
> function = LLVMAddFunction(ctx->module, name, function_type);
>
> LLVMSetFunctionCallConv(function, LLVMCCallConv);
> LLVMSetLinkage(function, LLVMExternalLinkage);
>
> - attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
> - while (attrib_mask) {
> - enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
> - ac_add_function_attr(function, -1, attr);
> - }
> + if (!set_callsite_attrs)
> + ac_add_func_attributes(ctx->context, function, attrib_mask);
> }
> - return LLVMBuildCall(ctx->builder, function, params, param_count, "");
> +
> + call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
> + if (set_callsite_attrs)
> + ac_add_func_attributes(ctx->context, call, attrib_mask);
> + return call;
> }
>
> LLVMValueRef
> ac_build_gather_values_extended(struct ac_llvm_context *ctx,
> LLVMValueRef *values,
> unsigned value_count,
> unsigned value_stride,
> bool load)
> {
> LLVMBuilderRef builder = ctx->builder;
> @@ -524,21 +527,22 @@ ac_build_tbuffer_store(struct ac_llvm_context *ctx,
> /* The instruction offset field has 12 bits */
> assert(offen || inst_offset < (1 << 12));
>
> /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
> unsigned func = CLAMP(num_channels, 1, 3) - 1;
> const char *types[] = {"i32", "v2i32", "v4i32"};
> char name[256];
> snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
>
> ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
> - args, ARRAY_SIZE(args), 0);
> + args, ARRAY_SIZE(args),
> + AC_FUNC_ATTR_LEGACY);
> }
>
> void
> ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
> LLVMValueRef rsrc,
> LLVMValueRef vdata,
> unsigned num_channels,
> LLVMValueRef vaddr,
> LLVMValueRef soffset,
> unsigned inst_offset)
> @@ -836,12 +840,13 @@ LLVMValueRef ac_emit_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
>
> const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." :
> "llvm.AMDIL.clamp.";
> LLVMValueRef args[3] = {
> value,
> LLVMConstReal(ctx->f32, 0),
> LLVMConstReal(ctx->f32, 1),
> };
>
> return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3,
> - AC_FUNC_ATTR_READNONE);
> + AC_FUNC_ATTR_READNONE |
> + AC_FUNC_ATTR_LEGACY);
> }
> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
> index be127c5..fb525dd 100644
> --- a/src/amd/common/ac_llvm_util.c
> +++ b/src/amd/common/ac_llvm_util.c
> @@ -17,21 +17,21 @@
> * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> * USE OR OTHER DEALINGS IN THE SOFTWARE.
> *
> * The above copyright notice and this permission notice (including the
> * next paragraph) shall be included in all copies or substantial portions
> * of the Software.
> *
> */
> /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
> #include "ac_llvm_util.h"
> -
> +#include "util/bitscan.h"
> #include <llvm-c/Core.h>
>
> #include "c11/threads.h"
>
> #include <assert.h>
> #include <stdio.h>
> #include <string.h>
>
> static void ac_init_llvm_target()
> {
> @@ -173,40 +173,53 @@ static const char *attr_to_str(enum ac_func_attr attr)
> case AC_FUNC_ATTR_READNONE: return "readnone";
> case AC_FUNC_ATTR_READONLY: return "readonly";
> default:
> fprintf(stderr, "Unhandled function attribute: %x\n", attr);
> return 0;
> }
> }
>
> #endif
>
> -void
> -ac_add_function_attr(LLVMValueRef function,
> - int attr_idx,
> - enum ac_func_attr attr)
> +static void
> +ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
> + int attr_idx, enum ac_func_attr attr)
> {
> -
> #if HAVE_LLVM < 0x0400
> LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
> if (attr_idx == -1) {
> LLVMAddFunctionAttr(function, llvm_attr);
> } else {
> LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
> }
> #else
> - LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
> const char *attr_name = attr_to_str(attr);
> unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
> strlen(attr_name));
> - LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
> - LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
> + LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
> +
> + if (LLVMIsAFunction(function))
> + LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
> + else
> + LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
> #endif
> }
>
> +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
> + unsigned attrib_mask)
> +{
> + attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
> + attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
> +
> + while (attrib_mask) {
> + enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
> + ac_add_function_attr(ctx, function, -1, attr);
> + }
> +}
> +
> void
> ac_dump_module(LLVMModuleRef module)
> {
> char *str = LLVMPrintModuleToString(module);
> fprintf(stderr, "%s", str);
> LLVMDisposeMessage(str);
> }
> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
> index 93d3d27..4fe4ab4 100644
> --- a/src/amd/common/ac_llvm_util.h
> +++ b/src/amd/common/ac_llvm_util.h
> @@ -34,28 +34,29 @@ extern "C" {
> #endif
>
> enum ac_func_attr {
> AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
> AC_FUNC_ATTR_BYVAL = (1 << 1),
> AC_FUNC_ATTR_INREG = (1 << 2),
> AC_FUNC_ATTR_NOALIAS = (1 << 3),
> AC_FUNC_ATTR_NOUNWIND = (1 << 4),
> AC_FUNC_ATTR_READNONE = (1 << 5),
> AC_FUNC_ATTR_READONLY = (1 << 6),
> +
> + /* Legacy intrinsic that needs attributes on function declarations
> + * and they must match the internal LLVM definition exactly, otherwise
> + * intrinsic selection fails.
> + */
> + AC_FUNC_ATTR_LEGACY = (1u << 31),
> };
>
> LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill);
>
> void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
> bool ac_is_sgpr_param(LLVMValueRef param);
> -
> -void
> -ac_add_function_attr(LLVMValueRef function,
> - int attr_idx,
> - enum ac_func_attr attr);
> -
> -void
> -ac_dump_module(LLVMModuleRef module);
> +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
> + unsigned attrib_mask);
> +void ac_dump_module(LLVMModuleRef module);
>
> #ifdef __cplusplus
> }
> #endif
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index b3dc63c..4ec19d5 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1039,26 +1039,27 @@ static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
> src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
> src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
>
> dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
> dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
> result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
> return result;
> }
>
> static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
> - const char *intrin,
> + const char *intrin, unsigned attr_mask,
> LLVMValueRef srcs[3])
> {
> LLVMValueRef result;
> LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
> - result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE);
> + result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3,
> + AC_FUNC_ATTR_READNONE | attr_mask);
>
> result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
> return result;
> }
>
> static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
> LLVMValueRef src0, LLVMValueRef src1,
> LLVMValueRef src2, LLVMValueRef src3)
> {
> LLVMValueRef bfi_args[3], result;
> @@ -1418,24 +1419,26 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
> break;
> case nir_op_fmin:
> result = emit_intrin_2f_param(ctx, "llvm.minnum",
> to_float_type(ctx, def_type), src[0], src[1]);
> break;
> case nir_op_ffma:
> result = emit_intrin_3f_param(ctx, "llvm.fma",
> to_float_type(ctx, def_type), src[0], src[1], src[2]);
> break;
> case nir_op_ibitfield_extract:
> - result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
> + result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32",
> + AC_FUNC_ATTR_LEGACY, src);
> break;
> case nir_op_ubitfield_extract:
> - result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
> + result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32",
> + AC_FUNC_ATTR_LEGACY, src);
> break;
> case nir_op_bitfield_insert:
> result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
> break;
> case nir_op_bitfield_reverse:
> result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
> break;
> case nir_op_bit_count:
> result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
> break;
> @@ -1635,22 +1638,23 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
> txq_args[txq_arg_count++] = tinfo->args[1];
> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */
> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */
> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
> size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
> - txq_args, txq_arg_count,
> - AC_FUNC_ATTR_READNONE);
> + txq_args, txq_arg_count,
> + AC_FUNC_ATTR_READNONE |
> + AC_FUNC_ATTR_LEGACY);
>
> for (c = 0; c < 2; c++) {
> half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
> LLVMConstInt(ctx->i32, c, false), "");
> half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
> half_texel[c] = ac_emit_fdiv(&ctx->ac, ctx->f32one, half_texel[c]);
> half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
> LLVMConstReal(ctx->f32, -0.5), "");
> }
> }
> @@ -1660,21 +1664,22 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
> LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
> tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
> tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
> tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
> tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
> coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
> }
>
> tinfo->args[0] = coord;
> return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
> - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
> + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
> + AC_FUNC_ATTR_LEGACY);
>
> }
>
> static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
> nir_tex_instr *instr,
> struct ac_tex_info *tinfo)
> {
> const char *name = "llvm.SI.image.sample";
> const char *infix = "";
> char intr_name[127];
> @@ -1728,21 +1733,22 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
> has_offset ? ".o" : "", type);
>
> if (instr->op == nir_texop_tg4) {
> enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
> if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
> return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name,
> (int)has_offset + (int)is_shadow);
> }
> }
> return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
> - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
> + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
> + AC_FUNC_ATTR_LEGACY);
>
> }
>
> static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
> nir_intrinsic_instr *instr)
> {
> LLVMValueRef index = get_src(ctx, instr->src[0]);
> unsigned desc_set = nir_intrinsic_desc_set(instr);
> unsigned binding = nir_intrinsic_binding(instr);
> LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
> @@ -2006,21 +2012,23 @@ static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
> if (instr->dest.ssa.bit_size == 64)
> num_components *= 2;
>
> for (unsigned i = 0; i < num_components; ++i) {
> LLVMValueRef params[] = {
> rsrc,
> LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
> offset, "")
> };
> results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32,
> - params, 2, AC_FUNC_ATTR_READNONE);
> + params, 2,
> + AC_FUNC_ATTR_READNONE |
> + AC_FUNC_ATTR_LEGACY);
> }
>
>
> ret = ac_build_gather_values(&ctx->ac, results, instr->num_components);
> return LLVMBuildBitCast(ctx->builder, ret,
> get_def_type(ctx, &instr->dest.ssa), "");
> }
>
> static void
> radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
> @@ -2103,21 +2111,23 @@ load_gs_input(struct nir_to_llvm_context *ctx,
> args[1] = vtx_offset;
> args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false);
> args[3] = ctx->i32zero;
> args[4] = ctx->i32one; /* OFFEN */
> args[5] = ctx->i32zero; /* IDXEN */
> args[6] = ctx->i32one; /* GLC */
> args[7] = ctx->i32zero; /* SLC */
> args[8] = ctx->i32zero; /* TFE */
>
> value[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
> - ctx->i32, args, 9, AC_FUNC_ATTR_READONLY);
> + ctx->i32, args, 9,
> + AC_FUNC_ATTR_READONLY |
> + AC_FUNC_ATTR_LEGACY);
> }
> result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
>
> return result;
> }
>
> static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
> nir_intrinsic_instr *instr)
> {
> LLVMValueRef values[8];
> @@ -2685,21 +2695,23 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
> params[2] = LLVMConstInt(ctx->i32, 15, false);
> params[3] = ctx->i32zero;
> params[4] = ctx->i32zero;
> params[5] = da ? ctx->i32one : ctx->i32zero;
> params[6] = ctx->i32zero;
> params[7] = ctx->i32zero;
> params[8] = ctx->i32zero;
> params[9] = ctx->i32zero;
>
> res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
> - params, 10, AC_FUNC_ATTR_READNONE);
> + params, 10,
> + AC_FUNC_ATTR_READNONE |
> + AC_FUNC_ATTR_LEGACY);
>
> if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
> glsl_sampler_type_is_array(type)) {
> LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
> LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
> LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
> z = LLVMBuildSDiv(ctx->builder, z, six, "");
> res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
> }
> return res;
> @@ -2729,21 +2741,21 @@ static void emit_discard_if(struct nir_to_llvm_context *ctx,
>
> cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
> get_src(ctx, instr->src[0]),
> ctx->i32zero, "");
>
> cond = LLVMBuildSelect(ctx->builder, cond,
> LLVMConstReal(ctx->f32, -1.0f),
> ctx->f32zero, "");
> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
> ctx->voidt,
> - &cond, 1, 0);
> + &cond, 1, AC_FUNC_ATTR_LEGACY);
> }
>
> static LLVMValueRef
> visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
> {
> LLVMValueRef result;
> LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
> result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
> LLVMConstInt(ctx->i32, 0xfc0, false), "");
>
> @@ -2984,21 +2996,21 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
> * have any effect, and GS threads have no externally observable
> * effects other than emitting vertices.
> */
> can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
> LLVMConstInt(ctx->i32, ctx->gs_max_out_vertices, false), "");
>
> kill = LLVMBuildSelect(ctx->builder, can_emit,
> LLVMConstReal(ctx->f32, 1.0f),
> LLVMConstReal(ctx->f32, -1.0f), "");
> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
> - ctx->voidt, &kill, 1, 0);
> + ctx->voidt, &kill, 1, AC_FUNC_ATTR_LEGACY);
>
> /* loop num outputs */
> idx = 0;
> for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
> LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
> if (!(ctx->output_mask & (1ull << i)))
> continue;
>
> for (unsigned j = 0; j < 4; j++) {
> LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
> @@ -3144,21 +3156,21 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx,
> case nir_intrinsic_image_atomic_comp_swap:
> result = visit_image_atomic(ctx, instr);
> break;
> case nir_intrinsic_image_size:
> result = visit_image_size(ctx, instr);
> break;
> case nir_intrinsic_discard:
> ctx->shader_info->fs.can_discard = true;
> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
> ctx->voidt,
> - NULL, 0, 0);
> + NULL, 0, AC_FUNC_ATTR_LEGACY);
> break;
> case nir_intrinsic_discard_if:
> emit_discard_if(ctx, instr);
> break;
> case nir_intrinsic_memory_barrier:
> emit_waitcnt(ctx);
> break;
> case nir_intrinsic_barrier:
> emit_barrier(ctx);
> break;
> @@ -3924,21 +3936,22 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,
>
> for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
> t_offset = LLVMConstInt(ctx->i32, index + i, false);
>
> t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
> args[0] = t_list;
> args[1] = LLVMConstInt(ctx->i32, 0, false);
> args[2] = buffer_index;
> input = ac_emit_llvm_intrinsic(&ctx->ac,
> "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
> - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
> + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
> + AC_FUNC_ATTR_LEGACY);
>
> for (unsigned chan = 0; chan < 4; chan++) {
> LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
> ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
> to_integer(ctx, LLVMBuildExtractElement(ctx->builder,
> input, llvm_chan, ""));
> }
> }
> }
>
> @@ -4257,22 +4270,23 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
> args[4] = ctx->i32one;
>
> for (unsigned chan = 0; chan < 2; chan++) {
> LLVMValueRef pack_args[2] = {
> values[2 * chan],
> values[2 * chan + 1]
> };
> LLVMValueRef packed;
>
> packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16",
> - ctx->i32, pack_args, 2,
> - AC_FUNC_ATTR_READNONE);
> + ctx->i32, pack_args, 2,
> + AC_FUNC_ATTR_READNONE |
> + AC_FUNC_ATTR_LEGACY);
> args[chan + 5] = packed;
> }
> break;
>
> case V_028714_SPI_SHADER_UNORM16_ABGR:
> for (unsigned chan = 0; chan < 4; chan++) {
> val[chan] = emit_float_saturate(ctx, values[chan], 0, 1);
> val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
> LLVMConstReal(ctx->f32, 65535), "");
> val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
> @@ -4443,21 +4457,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
> si_llvm_init_export_args(ctx, values, target, args);
>
> if (target >= V_008DFC_SQ_EXP_POS &&
> target <= (V_008DFC_SQ_EXP_POS + 3)) {
> memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
> args, sizeof(args));
> } else {
> ac_emit_llvm_intrinsic(&ctx->ac,
> "llvm.SI.export",
> ctx->voidt,
> - args, 9, 0);
> + args, 9,
> + AC_FUNC_ATTR_LEGACY);
> }
> }
>
> /* We need to add the position output manually if it's missing. */
> if (!pos_args[0][0]) {
> pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
> pos_args[0][1] = ctx->i32zero; /* EXEC mask */
> pos_args[0][2] = ctx->i32zero; /* last export? */
> pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false);
> pos_args[0][4] = ctx->i32zero; /* COMPR flag */
> @@ -4498,21 +4513,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
> if (!pos_args[i][0])
> continue;
>
> /* Specify the target we are exporting */
> pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false);
> if (pos_idx == num_pos_exports)
> pos_args[i][2] = ctx->i32one;
> ac_emit_llvm_intrinsic(&ctx->ac,
> "llvm.SI.export",
> ctx->voidt,
> - pos_args[i], 9, 0);
> + pos_args[i], 9,
> + AC_FUNC_ATTR_LEGACY);
> }
>
> ctx->shader_info->vs.pos_exports = num_pos_exports;
> ctx->shader_info->vs.param_exports = param_count;
> }
>
> static void
> handle_es_outputs_post(struct nir_to_llvm_context *ctx)
> {
> int j;
> @@ -4554,21 +4570,22 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
> si_llvm_init_export_args(ctx, color, param,
> args);
>
> if (is_last) {
> args[1] = ctx->i32one; /* whether the EXEC mask is valid */
> args[2] = ctx->i32one; /* DONE bit */
> } else if (args[0] == ctx->i32zero)
> return; /* unnecessary NULL export */
>
> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
> - ctx->voidt, args, 9, 0);
> + ctx->voidt, args, 9,
> + AC_FUNC_ATTR_LEGACY);
> }
>
> static void
> si_export_mrt_z(struct nir_to_llvm_context *ctx,
> LLVMValueRef depth, LLVMValueRef stencil,
> LLVMValueRef samplemask)
> {
> LLVMValueRef args[9];
> unsigned mask = 0;
> args[1] = ctx->i32one; /* whether the EXEC mask is valid */
> @@ -4598,21 +4615,22 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
> }
>
> /* SI (except OLAND) has a bug that it only looks
> * at the X writemask component. */
> if (ctx->options->chip_class == SI &&
> ctx->options->family != CHIP_OLAND)
> mask |= 0x01;
>
> args[0] = LLVMConstInt(ctx->i32, mask, false);
> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
> - ctx->voidt, args, 9, 0);
> + ctx->voidt, args, 9,
> + AC_FUNC_ATTR_LEGACY);
> }
>
> static void
> handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
> {
> unsigned index = 0;
> LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
>
> for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
> LLVMValueRef values[4];
> @@ -5021,21 +5039,22 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
>
> for (unsigned j = 0; j < 4; j++) {
> LLVMValueRef value;
> args[2] = LLVMConstInt(ctx->i32,
> (idx * 4 + j) *
> ctx->gs_max_out_vertices * 16 * 4, false);
>
> value = ac_emit_llvm_intrinsic(&ctx->ac,
> "llvm.SI.buffer.load.dword.i32.i32",
> ctx->i32, args, 9,
> - AC_FUNC_ATTR_READONLY);
> + AC_FUNC_ATTR_READONLY |
> + AC_FUNC_ATTR_LEGACY);
>
> LLVMBuildStore(ctx->builder,
> to_float(ctx, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]);
> }
> idx++;
> }
> handle_vs_outputs_post(ctx);
> }
>
> void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
> diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
> index 8952dc8..586a9be 100644
> --- a/src/gallium/auxiliary/draw/draw_llvm.c
> +++ b/src/gallium/auxiliary/draw/draw_llvm.c
> @@ -1588,21 +1588,22 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
>
> func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
> arg_types, num_arg_types, 0);
>
> variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
> variant->function = variant_func;
>
> LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
> for (i = 0; i < num_arg_types; ++i)
> if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
> - lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
> + lp_add_function_attr(context, variant_func, i + 1,
> + LP_FUNC_ATTR_NOALIAS);
>
> context_ptr = LLVMGetParam(variant_func, 0);
> io_ptr = LLVMGetParam(variant_func, 1);
> vbuffers_ptr = LLVMGetParam(variant_func, 2);
> count = LLVMGetParam(variant_func, 3);
> /*
> * XXX: the maxelt part is unused. Not really useful, since we cannot
> * get index buffer overflows due to vsplit (which provides its own
> * elts buffer, with a different size than what's passed in here).
> */
> @@ -2262,21 +2263,22 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
> func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
>
> variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
>
> variant->function = variant_func;
>
> LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
>
> for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
> if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
> - lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
> + lp_add_function_attr(context, variant_func, i + 1,
> + LP_FUNC_ATTR_NOALIAS);
>
> context_ptr = LLVMGetParam(variant_func, 0);
> input_array = LLVMGetParam(variant_func, 1);
> io_ptr = LLVMGetParam(variant_func, 2);
> num_prims = LLVMGetParam(variant_func, 3);
> system_values.instance_id = LLVMGetParam(variant_func, 4);
> prim_id_ptr = LLVMGetParam(variant_func, 5);
> system_values.invocation_id = LLVMGetParam(variant_func, 6);
>
> lp_build_name(context_ptr, "context");
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> index 049671a..1b50e68 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> @@ -152,83 +152,100 @@ static const char *attr_to_str(enum lp_func_attr attr)
> case LP_FUNC_ATTR_READONLY: return "readonly";
> default:
> _debug_printf("Unhandled function attribute: %x\n", attr);
> return 0;
> }
> }
>
> #endif
>
> void
> -lp_add_function_attr(LLVMValueRef function,
> - int attr_idx,
> - enum lp_func_attr attr)
> +lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
If function is not always a function, then it's better the parameter is
renamed to functionOrCall or something like that.
> + int attr_idx, enum lp_func_attr attr)
> {
>
> #if HAVE_LLVM < 0x0400
> LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr);
> if (attr_idx == -1) {
> LLVMAddFunctionAttr(function, llvm_attr);
> } else {
> LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
> }
> #else
> - LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
Even when LLVMIsAFunction(function) is false, we could still get the
LLVMContextRef:
LLVMModuleRef module;
if (LLVMIsAFunction(functionOrCall)) {
module = LLVMGetGlobalParent(functionOrCall);
} else {
LLVMBasicBlockRef bb = LLVMValueAsBasicBlock(functionOrCall);
LLVMValueRef function = LLVMGetBasicBlockParent(bb)
module = LLVMGetGlobalParent(function);
}
LLVMContextRef context = LLVMGetModuleContext(module);
This would enable to keep lp_add_function_attr prototype alone.
Otherwise looks good to me.
Reviewed-by: Jose Fonseca <jfonseca at vmware.com>
> const char *attr_name = attr_to_str(attr);
> unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
> strlen(attr_name));
> - LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
> - LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
> + LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
> +
> + if (LLVMIsAFunction(function))
> + LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
> + else
> + LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
> #endif
> }
>
> +static void
> +lp_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
> + unsigned attrib_mask)
> +{
> + /* NoUnwind indicates that the intrinsic never raises a C++ exception.
> + * Set it for all intrinsics.
> + */
> + attrib_mask |= LP_FUNC_ATTR_NOUNWIND;
> + attrib_mask &= ~LP_FUNC_ATTR_LEGACY;
> +
> + while (attrib_mask) {
> + enum lp_func_attr attr = 1u << u_bit_scan(&attrib_mask);
> + lp_add_function_attr(ctx, function, -1, attr);
> + }
> +}
> +
> LLVMValueRef
> lp_build_intrinsic(LLVMBuilderRef builder,
> const char *name,
> LLVMTypeRef ret_type,
> LLVMValueRef *args,
> unsigned num_args,
> unsigned attr_mask)
> {
> LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
> - LLVMValueRef function;
> + LLVMContextRef ctx = LLVMGetModuleContext(module);
> + LLVMValueRef function, call;
> + bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
> + !(attr_mask & LP_FUNC_ATTR_LEGACY);
>
> function = LLVMGetNamedFunction(module, name);
> if(!function) {
> LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
> unsigned i;
>
> assert(num_args <= LP_MAX_FUNC_ARGS);
>
> for(i = 0; i < num_args; ++i) {
> assert(args[i]);
> arg_types[i] = LLVMTypeOf(args[i]);
> }
>
> function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
>
> - /* NoUnwind indicates that the intrinsic never raises a C++ exception.
> - * Set it for all intrinsics.
> - */
> - attr_mask |= LP_FUNC_ATTR_NOUNWIND;
> -
> - while (attr_mask) {
> - enum lp_func_attr attr = 1 << u_bit_scan(&attr_mask);
> - lp_add_function_attr(function, -1, attr);
> - }
> + if (!set_callsite_attrs)
> + lp_add_func_attributes(ctx, function, attr_mask);
>
> if (gallivm_debug & GALLIVM_DEBUG_IR) {
> lp_debug_dump_value(function);
> }
> }
>
> - return LLVMBuildCall(builder, function, args, num_args, "");
> + call = LLVMBuildCall(builder, function, args, num_args, "");
> + if (set_callsite_attrs)
> + lp_add_func_attributes(ctx, call, attr_mask);
> + return call;
> }
>
>
> LLVMValueRef
> lp_build_intrinsic_unary(LLVMBuilderRef builder,
> const char *name,
> LLVMTypeRef ret_type,
> LLVMValueRef a)
> {
> return lp_build_intrinsic(builder, name, ret_type, &a, 1, 0);
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
> index 039e9ab..d279911 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
> @@ -47,39 +47,44 @@
> #define LP_MAX_FUNC_ARGS 32
>
> enum lp_func_attr {
> LP_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
> LP_FUNC_ATTR_BYVAL = (1 << 1),
> LP_FUNC_ATTR_INREG = (1 << 2),
> LP_FUNC_ATTR_NOALIAS = (1 << 3),
> LP_FUNC_ATTR_NOUNWIND = (1 << 4),
> LP_FUNC_ATTR_READNONE = (1 << 5),
> LP_FUNC_ATTR_READONLY = (1 << 6),
> +
> + /* Legacy intrinsic that needs attributes on function declarations
> + * and they must match the internal LLVM definition exactly, otherwise
> + * intrinsic selection fails.
> + */
> + LP_FUNC_ATTR_LEGACY = (1u << 31),
> };
>
> void
> lp_format_intrinsic(char *name,
> size_t size,
> const char *name_root,
> LLVMTypeRef type);
>
> LLVMValueRef
> lp_declare_intrinsic(LLVMModuleRef module,
> const char *name,
> LLVMTypeRef ret_type,
> LLVMTypeRef *arg_types,
> unsigned num_args);
>
> void
> -lp_add_function_attr(LLVMValueRef function,
> - int attr_idx,
> - enum lp_func_attr attr);
> +lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
> + int attr_idx, enum lp_func_attr attr);
>
> LLVMValueRef
> lp_build_intrinsic(LLVMBuilderRef builder,
> const char *name,
> LLVMTypeRef ret_type,
> LLVMValueRef *args,
> unsigned num_args,
> unsigned attr_mask);
>
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> index 161a03f..a1e2601 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> @@ -3311,21 +3311,22 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
>
> val_type[0] = val_type[1] = val_type[2] = val_type[3] =
> lp_build_vec_type(gallivm, params->type);
> ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
> function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
> function = LLVMAddFunction(module, func_name, function_type);
>
> for (i = 0; i < num_param; ++i) {
> if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
>
> - lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
> + lp_add_function_attr(gallivm->context, function, i + 1,
> + LP_FUNC_ATTR_NOALIAS);
> }
> }
>
> LLVMSetFunctionCallConv(function, LLVMFastCallConv);
> LLVMSetLinkage(function, LLVMInternalLinkage);
>
> lp_build_sample_gen_func(gallivm,
> static_texture_state,
> static_sampler_state,
> dynamic_state,
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> index af47b52..70b0a67 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
> +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> @@ -2488,21 +2488,22 @@ generate_fragment(struct llvmpipe_context *lp,
> function = LLVMAddFunction(gallivm->module, func_name, func_type);
> LLVMSetFunctionCallConv(function, LLVMCCallConv);
>
> variant->function[partial_mask] = function;
>
> /* XXX: need to propagate noalias down into color param now we are
> * passing a pointer-to-pointer?
> */
> for(i = 0; i < ARRAY_SIZE(arg_types); ++i)
> if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
> - lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
> + lp_add_function_attr(gallivm->context, function, i + 1,
> + LP_FUNC_ATTR_NOALIAS);
>
> context_ptr = LLVMGetParam(function, 0);
> x = LLVMGetParam(function, 1);
> y = LLVMGetParam(function, 2);
> facing = LLVMGetParam(function, 3);
> a0_ptr = LLVMGetParam(function, 4);
> dadx_ptr = LLVMGetParam(function, 5);
> dady_ptr = LLVMGetParam(function, 6);
> color_ptr_ptr = LLVMGetParam(function, 7);
> depth_ptr = LLVMGetParam(function, 8);
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
> index 6b0df21..66bc42c 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
> +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
> @@ -609,29 +609,29 @@ emit_tri_coef( struct gallivm_state *gallivm,
> default:
> assert(0);
> }
> }
> }
>
>
> /* XXX: generic code:
> */
> static void
> -set_noalias(LLVMBuilderRef builder,
> +set_noalias(LLVMContextRef ctx,
> LLVMValueRef function,
> const LLVMTypeRef *arg_types,
> int nr_args)
> {
> int i;
> for(i = 0; i < nr_args; ++i)
> if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
> - lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
> + lp_add_function_attr(ctx, function, i + 1, LP_FUNC_ATTR_NOALIAS);
> }
>
> static void
> init_args(struct gallivm_state *gallivm,
> const struct lp_setup_variant_key *key,
> struct lp_setup_args *args)
> {
> LLVMBuilderRef b = gallivm->builder;
> LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
> LLVMValueRef onef = lp_build_const_float(gallivm, 1.0);
> @@ -783,21 +783,22 @@ generate_setup_variant(struct lp_setup_variant_key *key,
> lp_build_name(args.dadx, "out_dadx");
> lp_build_name(args.dady, "out_dady");
>
> /*
> * Function body
> */
> block = LLVMAppendBasicBlockInContext(gallivm->context,
> variant->function, "entry");
> LLVMPositionBuilderAtEnd(builder, block);
>
> - set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types));
> + set_noalias(gallivm->context, variant->function, arg_types,
> + ARRAY_SIZE(arg_types));
> init_args(gallivm, &variant->key, &args);
> emit_tri_coef(gallivm, &variant->key, &args);
>
> LLVMBuildRetVoid(builder);
>
> gallivm_verify_function(gallivm, variant->function);
>
> gallivm_compile_module(gallivm);
>
> variant->jit_function = (lp_jit_setup_triangle)
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index f9eaea2..ea3f4fd 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -394,21 +394,22 @@ static void declare_input_vs(
> }
>
> args[0] = t_list;
> args[2] = vertex_index;
>
> for (unsigned i = 0; i < num_fetches; i++) {
> args[1] = LLVMConstInt(ctx->i32, fetch_stride * i, 0);
>
> input[i] = lp_build_intrinsic(gallivm->builder,
> "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
> - LP_FUNC_ATTR_READNONE);
> + LP_FUNC_ATTR_READNONE |
> + LP_FUNC_ATTR_LEGACY);
> }
>
> /* Break up the vec4 into individual components */
> for (chan = 0; chan < 4; chan++) {
> LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
> out[chan] = LLVMBuildExtractElement(gallivm->builder,
> input[0], llvm_chan, "");
> }
>
> switch (fix_fetch) {
> @@ -1116,28 +1117,30 @@ static LLVMValueRef fetch_input_gs(
> args[3] = uint->zero;
> args[4] = uint->one; /* OFFEN */
> args[5] = uint->zero; /* IDXEN */
> args[6] = uint->one; /* GLC */
> args[7] = uint->zero; /* SLC */
> args[8] = uint->zero; /* TFE */
>
> value = lp_build_intrinsic(gallivm->builder,
> "llvm.SI.buffer.load.dword.i32.i32",
> ctx->i32, args, 9,
> - LP_FUNC_ATTR_READONLY);
> + LP_FUNC_ATTR_READONLY |
> + LP_FUNC_ATTR_LEGACY);
> if (tgsi_type_is_64bit(type)) {
> LLVMValueRef value2;
> args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256);
> value2 = lp_build_intrinsic(gallivm->builder,
> "llvm.SI.buffer.load.dword.i32.i32",
> ctx->i32, args, 9,
> - LP_FUNC_ATTR_READONLY);
> + LP_FUNC_ATTR_READONLY |
> + LP_FUNC_ATTR_LEGACY);
> return si_llvm_emit_fetch_64bit(bld_base, type,
> value, value2);
> }
> return LLVMBuildBitCast(gallivm->builder,
> value,
> tgsi2llvmtype(bld_base, type), "");
> }
>
> static int lookup_interp_param_index(unsigned interpolate, unsigned location)
> {
> @@ -1361,21 +1364,22 @@ static LLVMValueRef get_sample_id(struct si_shader_context *radeon_bld)
> * Load a dword from a constant buffer.
> */
> static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
> LLVMValueRef resource,
> LLVMValueRef offset)
> {
> LLVMBuilderRef builder = ctx->gallivm.builder;
> LLVMValueRef args[2] = {resource, offset};
>
> return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2,
> - LP_FUNC_ATTR_READNONE);
> + LP_FUNC_ATTR_READNONE |
> + LP_FUNC_ATTR_LEGACY);
> }
>
> static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, LLVMValueRef sample_id)
> {
> struct si_shader_context *ctx =
> si_shader_context(&radeon_bld->bld_base);
> struct lp_build_context *uint_bld = &radeon_bld->bld_base.uint_bld;
> struct gallivm_state *gallivm = &radeon_bld->gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> LLVMValueRef desc = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS);
> @@ -1815,21 +1819,22 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
> for (chan = 0; chan < 2; chan++) {
> LLVMValueRef pack_args[2] = {
> values[2 * chan],
> values[2 * chan + 1]
> };
> LLVMValueRef packed;
>
> packed = lp_build_intrinsic(base->gallivm->builder,
> "llvm.SI.packf16",
> ctx->i32, pack_args, 2,
> - LP_FUNC_ATTR_READNONE);
> + LP_FUNC_ATTR_READNONE |
> + LP_FUNC_ATTR_LEGACY);
> args[chan + 5] =
> LLVMBuildBitCast(base->gallivm->builder,
> packed, ctx->f32, "");
> }
> break;
>
> case V_028714_SPI_SHADER_UNORM16_ABGR:
> for (chan = 0; chan < 4; chan++) {
> val[chan] = ac_emit_clamp(&ctx->ac, values[chan]);
> val[chan] = LLVMBuildFMul(builder, val[chan],
> @@ -1947,24 +1952,24 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
> lp_build_cmp(&bld_base->base,
> ctx->shader->key.part.ps.epilog.alpha_func,
> alpha, alpha_ref);
> LLVMValueRef arg =
> lp_build_select(&bld_base->base,
> alpha_pass,
> lp_build_const_float(gallivm, 1.0f),
> lp_build_const_float(gallivm, -1.0f));
>
> lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
> - ctx->voidt, &arg, 1, 0);
> + ctx->voidt, &arg, 1, LP_FUNC_ATTR_LEGACY);
> } else {
> lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kilp",
> - ctx->voidt, NULL, 0, 0);
> + ctx->voidt, NULL, 0, LP_FUNC_ATTR_LEGACY);
> }
> }
>
> static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
> LLVMValueRef alpha,
> unsigned samplemask_param)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct gallivm_state *gallivm = bld_base->base.gallivm;
> LLVMValueRef coverage;
> @@ -2295,21 +2300,21 @@ handle_semantic:
>
> si_llvm_init_export_args(bld_base, outputs[i].values, target, args);
>
> if (target >= V_008DFC_SQ_EXP_POS &&
> target <= (V_008DFC_SQ_EXP_POS + 3)) {
> memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
> args, sizeof(args));
> } else {
> lp_build_intrinsic(base->gallivm->builder,
> "llvm.SI.export", ctx->voidt,
> - args, 9, 0);
> + args, 9, LP_FUNC_ATTR_LEGACY);
> }
>
> if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
> semantic_name = TGSI_SEMANTIC_GENERIC;
> goto handle_semantic;
> }
> }
>
> shader->info.nr_param_exports = param_count;
>
> @@ -2381,21 +2386,22 @@ handle_semantic:
> continue;
>
> /* Specify the target we are exporting */
> pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
>
> if (pos_idx == shader->info.nr_pos_exports)
> /* Specify that this is the last export */
> pos_args[i][2] = uint->one;
>
> lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
> - ctx->voidt, pos_args[i], 9, 0);
> + ctx->voidt, pos_args[i], 9,
> + LP_FUNC_ATTR_LEGACY);
> }
> }
>
> /**
> * Forward all outputs from the vertex shader to the TES. This is only used
> * for the fixed function TCS.
> */
> static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> @@ -2972,42 +2978,42 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
> memcpy(exp->args[exp->num++], args, sizeof(args));
> }
> }
>
> static void si_emit_ps_exports(struct si_shader_context *ctx,
> struct si_ps_exports *exp)
> {
> for (unsigned i = 0; i < exp->num; i++)
> lp_build_intrinsic(ctx->gallivm.builder,
> "llvm.SI.export", ctx->voidt,
> - exp->args[i], 9, 0);
> + exp->args[i], 9, LP_FUNC_ATTR_LEGACY);
> }
>
> static void si_export_null(struct lp_build_tgsi_context *bld_base)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct lp_build_context *base = &bld_base->base;
> struct lp_build_context *uint = &bld_base->uint_bld;
> LLVMValueRef args[9];
>
> args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */
> args[1] = uint->one; /* whether the EXEC mask is valid */
> args[2] = uint->one; /* DONE bit */
> args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL);
> args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
> args[5] = base->undef; /* R */
> args[6] = base->undef; /* G */
> args[7] = base->undef; /* B */
> args[8] = base->undef; /* A */
>
> lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
> - ctx->voidt, args, 9, 0);
> + ctx->voidt, args, 9, LP_FUNC_ATTR_LEGACY);
> }
>
> /**
> * Return PS outputs in this order:
> *
> * v[0:3] = color0.xyzw
> * v[4:7] = color1.xyzw
> * ...
> * vN+0 = Depth
> * vN+1 = Stencil
> @@ -4082,21 +4088,21 @@ static void resq_emit(
>
> if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
> out = LLVMBuildExtractElement(builder, emit_data->args[0],
> lp_build_const_int32(gallivm, 2), "");
> } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
> out = get_buffer_size(bld_base, emit_data->args[0]);
> } else {
> out = lp_build_intrinsic(
> builder, "llvm.SI.getresinfo.i32", emit_data->dst_type,
> emit_data->args, emit_data->arg_count,
> - LP_FUNC_ATTR_READNONE);
> + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
>
> /* Divide the number of layers by 6 to get the number of cubes. */
> if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
> LLVMValueRef imm2 = lp_build_const_int32(gallivm, 2);
> LLVMValueRef imm6 = lp_build_const_int32(gallivm, 6);
>
> LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
> z = LLVMBuildSDiv(builder, z, imm6, "");
> out = LLVMBuildInsertElement(builder, out, z, imm2, "");
> }
> @@ -4319,21 +4325,21 @@ static void txq_emit(const struct lp_build_tgsi_action *action,
>
> if (target == TGSI_TEXTURE_BUFFER) {
> /* Just return the buffer size. */
> emit_data->output[emit_data->chan] = emit_data->args[0];
> return;
> }
>
> emit_data->output[emit_data->chan] = lp_build_intrinsic(
> base->gallivm->builder, "llvm.SI.getresinfo.i32",
> emit_data->dst_type, emit_data->args, emit_data->arg_count,
> - LP_FUNC_ATTR_READNONE);
> + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
>
> /* Divide the number of layers by 6 to get the number of cubes. */
> if (target == TGSI_TEXTURE_CUBE_ARRAY ||
> target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
> LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2);
> LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6);
>
> LLVMValueRef v4 = emit_data->output[emit_data->chan];
> LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
> @@ -4728,21 +4734,21 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx,
> tmp = LLVMBuildBitCast(builder, tmp, ctx->f32, "");
> tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
> tmp = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
> coord = LLVMBuildInsertElement(builder, coord, tmp, index, "");
> }
>
> emit_data->args[0] = coord;
> emit_data->output[emit_data->chan] =
> lp_build_intrinsic(builder, intr_name, emit_data->dst_type,
> emit_data->args, emit_data->arg_count,
> - LP_FUNC_ATTR_READNONE);
> + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
> }
>
> static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
> struct lp_build_tgsi_context *bld_base,
> struct lp_build_emit_data *emit_data)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct lp_build_context *base = &bld_base->base;
> const struct tgsi_full_instruction *inst = emit_data->inst;
> unsigned opcode = inst->Instruction.Opcode;
> @@ -4752,21 +4758,21 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
> bool is_shadow = tgsi_is_shadow_target(target);
> char type[64];
> const char *name = "llvm.SI.image.sample";
> const char *infix = "";
>
> if (target == TGSI_TEXTURE_BUFFER) {
> emit_data->output[emit_data->chan] = lp_build_intrinsic(
> base->gallivm->builder,
> "llvm.SI.vs.load.input", emit_data->dst_type,
> emit_data->args, emit_data->arg_count,
> - LP_FUNC_ATTR_READNONE);
> + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
> return;
> }
>
> switch (opcode) {
> case TGSI_OPCODE_TXF:
> name = target == TGSI_TEXTURE_2D_MSAA ||
> target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
> "llvm.SI.image.load" :
> "llvm.SI.image.load.mip";
> is_shadow = false;
> @@ -4829,21 +4835,21 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
> */
> si_lower_gather4_integer(ctx, emit_data, intr_name,
> (int)has_offset + (int)is_shadow);
> return;
> }
> }
>
> emit_data->output[emit_data->chan] = lp_build_intrinsic(
> base->gallivm->builder, intr_name, emit_data->dst_type,
> emit_data->args, emit_data->arg_count,
> - LP_FUNC_ATTR_READNONE);
> + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
> }
>
> static void si_llvm_emit_txqs(
> const struct lp_build_tgsi_action *action,
> struct lp_build_tgsi_context *bld_base,
> struct lp_build_emit_data *emit_data)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct gallivm_state *gallivm = bld_base->base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> @@ -5118,21 +5124,21 @@ static void si_llvm_emit_vertex(
> lp_build_const_int32(gallivm,
> shader->selector->gs_max_out_vertices), "");
>
> bool use_kill = !info->writes_memory;
> if (use_kill) {
> kill = lp_build_select(&bld_base->base, can_emit,
> lp_build_const_float(gallivm, 1.0f),
> lp_build_const_float(gallivm, -1.0f));
>
> lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
> - ctx->voidt, &kill, 1, 0);
> + ctx->voidt, &kill, 1, LP_FUNC_ATTR_LEGACY);
> } else {
> lp_build_if(&if_state, gallivm, can_emit);
> }
>
> offset = 0;
> for (i = 0; i < info->num_outputs; i++) {
> LLVMValueRef *out_ptr = ctx->outputs[i];
>
> for (chan = 0; chan < 4; chan++) {
> if (!(info->output_usagemask[i] & (1 << chan)) ||
> @@ -5238,24 +5244,26 @@ static void si_create_function(struct si_shader_context *ctx,
> LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
>
> /* The combination of:
> * - ByVal
> * - dereferenceable
> * - invariant.load
> * allows the optimization passes to move loads and reduces
> * SGPR spilling significantly.
> */
> if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
> - lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_BYVAL);
> + lp_add_function_attr(ctx->gallivm.context, ctx->main_fn,
> + i + 1, LP_FUNC_ATTR_BYVAL);
> lp_add_attr_dereferenceable(P, UINT64_MAX);
> } else
> - lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
> + lp_add_function_attr(ctx->gallivm.context, ctx->main_fn,
> + i + 1, LP_FUNC_ATTR_INREG);
> }
>
> LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
> "no-signed-zeros-fp-math",
> "true");
>
> if (ctx->screen->b.debug_flags & DBG_UNSAFE_MATH) {
> /* These were copied from some LLVM test. */
> LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
> "less-precise-fpmad",
> @@ -5740,21 +5748,22 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
> offset = LLVMBuildMul(builder, address[1],
> LLVMConstInt(ctx->i32, 4, 0), "");
> row = buffer_load_const(ctx, desc, offset);
> row = LLVMBuildBitCast(builder, row, ctx->i32, "");
> bit = LLVMBuildLShr(builder, row, address[0], "");
> bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
>
> /* The intrinsic kills the thread if arg < 0. */
> bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0),
> LLVMConstReal(ctx->f32, -1), "");
> - lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, 0);
> + lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1,
> + LP_FUNC_ATTR_LEGACY);
> }
>
> void si_shader_binary_read_config(struct radeon_shader_binary *binary,
> struct si_shader_config *conf,
> unsigned symbol_offset)
> {
> unsigned i;
> const unsigned char *config =
> radeon_shader_binary_config_start(binary, symbol_offset);
> bool really_needs_scratch = false;
> @@ -6308,23 +6317,24 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
> }
>
> args[2] = lp_build_const_int32(
> gallivm,
> offset * gs_selector->gs_max_out_vertices * 16 * 4);
> offset++;
>
> outputs[i].values[chan] =
> LLVMBuildBitCast(gallivm->builder,
> lp_build_intrinsic(gallivm->builder,
> - "llvm.SI.buffer.load.dword.i32.i32",
> - ctx.i32, args, 9,
> - LP_FUNC_ATTR_READONLY),
> + "llvm.SI.buffer.load.dword.i32.i32",
> + ctx.i32, args, 9,
> + LP_FUNC_ATTR_READONLY |
> + LP_FUNC_ATTR_LEGACY),
> ctx.f32, "");
> }
> }
>
> /* Streamout and exports. */
> if (gs_selector->so.num_outputs) {
> si_llvm_emit_streamout(&ctx, outputs,
> gsinfo->num_outputs,
> stream);
> }
> @@ -7047,21 +7057,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
> LLVMValueRef out[48];
> LLVMTypeRef function_type;
> unsigned num_params;
> unsigned num_out;
> MAYBE_UNUSED unsigned num_out_sgpr; /* used in debug checks */
> unsigned num_sgprs, num_vgprs;
> unsigned last_sgpr_param;
> unsigned gprs;
>
> for (unsigned i = 0; i < num_parts; ++i) {
> - lp_add_function_attr(parts[i], -1, LP_FUNC_ATTR_ALWAYSINLINE);
> + lp_add_function_attr(gallivm->context, parts[i], -1,
> + LP_FUNC_ATTR_ALWAYSINLINE);
> LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
> }
>
> /* The parameters of the wrapper function correspond to those of the
> * first part in terms of SGPRs and VGPRs, but we use the types of the
> * main part to get the right types. This is relevant for the
> * dereferenceable attribute on descriptor table pointers.
> */
> num_sgprs = 0;
> num_vgprs = 0;
> @@ -7164,21 +7175,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
> param_size = llvm_get_type_size(param_type) / 4;
> is_sgpr = ac_is_sgpr_param(param);
>
> if (is_sgpr) {
> #if HAVE_LLVM < 0x0400
> LLVMRemoveAttribute(param, LLVMByValAttribute);
> #else
> unsigned kind_id = LLVMGetEnumAttributeKindForName("byval", 5);
> LLVMRemoveEnumAttributeAtIndex(parts[part], param_idx + 1, kind_id);
> #endif
> - lp_add_function_attr(parts[part], param_idx + 1, LP_FUNC_ATTR_INREG);
> + lp_add_function_attr(gallivm->context, parts[part],
> + param_idx + 1, LP_FUNC_ATTR_INREG);
> }
>
> assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out));
> assert(is_sgpr || out_idx >= num_out_sgpr);
>
> if (param_size == 1)
> arg = out[out_idx];
> else
> arg = lp_build_gather_values(gallivm, &out[out_idx], param_size);
>
> @@ -7679,21 +7691,21 @@ static void si_build_vs_epilog_function(struct si_shader_context *ctx,
> key->vs_epilog.prim_id_param_offset);
> args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
> args[5] = LLVMGetParam(ctx->main_fn,
> VS_EPILOG_PRIMID_LOC); /* X */
> args[6] = base->undef; /* Y */
> args[7] = base->undef; /* Z */
> args[8] = base->undef; /* W */
>
> lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
> LLVMVoidTypeInContext(base->gallivm->context),
> - args, 9, 0);
> + args, 9, LP_FUNC_ATTR_LEGACY);
> }
>
> LLVMBuildRetVoid(gallivm->builder);
> }
>
> /**
> * Create & compile a vertex shader epilog. This a helper used by VS and TES.
> */
> static bool si_get_vs_epilog(struct si_screen *sscreen,
> LLVMTargetMachineRef tm,
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> index 10268e9..ee59fed 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> @@ -51,27 +51,23 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
> emit_data->arg_count = 1;
> emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
> lp_build_const_float(gallivm, -1.0f),
> bld_base->base.zero, "");
> }
>
> static void kil_emit(const struct lp_build_tgsi_action *action,
> struct lp_build_tgsi_context *bld_base,
> struct lp_build_emit_data *emit_data)
> {
> - unsigned i;
> - for (i = 0; i < emit_data->arg_count; i++) {
> - emit_data->output[i] = lp_build_intrinsic_unary(
> - bld_base->base.gallivm->builder,
> - action->intr_name,
> - emit_data->dst_type, emit_data->args[i]);
> - }
> + lp_build_intrinsic(bld_base->base.gallivm->builder,
> + action->intr_name, emit_data->dst_type,
> + &emit_data->args[0], 1, LP_FUNC_ATTR_LEGACY);
> }
>
> static void emit_icmp(const struct lp_build_tgsi_action *action,
> struct lp_build_tgsi_context *bld_base,
> struct lp_build_emit_data *emit_data)
> {
> unsigned pred;
> LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> LLVMContextRef context = bld_base->base.gallivm->context;
>
> @@ -500,21 +496,23 @@ static void emit_bfe(const struct lp_build_tgsi_action *action,
> struct lp_build_tgsi_context *bld_base,
> struct lp_build_emit_data *emit_data)
> {
> struct gallivm_state *gallivm = bld_base->base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> LLVMValueRef bfe_sm5;
> LLVMValueRef cond;
>
> bfe_sm5 = lp_build_intrinsic(builder, action->intr_name,
> emit_data->dst_type, emit_data->args,
> - emit_data->arg_count, LP_FUNC_ATTR_READNONE);
> + emit_data->arg_count,
> + LP_FUNC_ATTR_READNONE |
> + LP_FUNC_ATTR_LEGACY);
>
> /* Correct for GLSL semantics. */
> cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],
> lp_build_const_int32(gallivm, 32), "");
> emit_data->output[emit_data->chan] =
> LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, "");
> }
>
> /* this is ffs in C */
> static void emit_lsb(const struct lp_build_tgsi_action *action,
>
More information about the mesa-dev
mailing list