[Mesa-dev] [PATCH 2/2] gallivm, ac: add function attributes at call sites instead of declarations

Marek Olšák maraeo at gmail.com
Wed Mar 1 17:21:37 UTC 2017


On Wed, Mar 1, 2017 at 5:29 PM, Jose Fonseca <jfonseca at vmware.com> wrote:
> On 23/02/17 00:01, Marek Olšák wrote:
>>
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> They can vary at call sites if the intrinsic is NOT a legacy SI intrinsic.
>> We need this to force readnone or inaccessiblememonly on some amdgcn
>> intrinsics.
>>
>> This is only used with LLVM 4.0 and later. Intrinsics only used with
>> LLVM <= 3.9 don't need the LEGACY flag.
>>
>> gallivm and ac code is in the same patch, because splitting would be
>> more complicated with all the LEGACY uses all over the place.
>> ---
>>  src/amd/common/ac_llvm_build.c                    | 23 ++++----
>>  src/amd/common/ac_llvm_util.c                     | 31 +++++++----
>>  src/amd/common/ac_llvm_util.h                     | 17 +++---
>>  src/amd/common/ac_nir_to_llvm.c                   | 63
>> ++++++++++++++--------
>>  src/gallium/auxiliary/draw/draw_llvm.c            |  6 ++-
>>  src/gallium/auxiliary/gallivm/lp_bld_intr.c       | 51 ++++++++++++------
>>  src/gallium/auxiliary/gallivm/lp_bld_intr.h       | 11 ++--
>>  src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |  3 +-
>>  src/gallium/drivers/llvmpipe/lp_state_fs.c        |  3 +-
>>  src/gallium/drivers/llvmpipe/lp_state_setup.c     |  7 +--
>>  src/gallium/drivers/radeonsi/si_shader.c          | 64
>> ++++++++++++++---------
>>  src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 14 +++--
>>  12 files changed, 184 insertions(+), 109 deletions(-)
>>
>> diff --git a/src/amd/common/ac_llvm_build.c
>> b/src/amd/common/ac_llvm_build.c
>> index 2f25b14..5c8b7f7 100644
>> --- a/src/amd/common/ac_llvm_build.c
>> +++ b/src/amd/common/ac_llvm_build.c
>> @@ -75,47 +75,50 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
>> LLVMContextRef context)
>>                                                         "amdgpu.uniform",
>> 14);
>>
>>         ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
>>  }
>>
>>  LLVMValueRef
>>  ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
>>                        LLVMTypeRef return_type, LLVMValueRef *params,
>>                        unsigned param_count, unsigned attrib_mask)
>>  {
>> -       LLVMValueRef function;
>> +       LLVMValueRef function, call;
>> +       bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
>> +                                 !(attrib_mask & AC_FUNC_ATTR_LEGACY);
>>
>>         function = LLVMGetNamedFunction(ctx->module, name);
>>         if (!function) {
>>                 LLVMTypeRef param_types[32], function_type;
>>                 unsigned i;
>>
>>                 assert(param_count <= 32);
>>
>>                 for (i = 0; i < param_count; ++i) {
>>                         assert(params[i]);
>>                         param_types[i] = LLVMTypeOf(params[i]);
>>                 }
>>                 function_type =
>>                     LLVMFunctionType(return_type, param_types,
>> param_count, 0);
>>                 function = LLVMAddFunction(ctx->module, name,
>> function_type);
>>
>>                 LLVMSetFunctionCallConv(function, LLVMCCallConv);
>>                 LLVMSetLinkage(function, LLVMExternalLinkage);
>>
>> -               attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
>> -               while (attrib_mask) {
>> -                       enum ac_func_attr attr = 1u <<
>> u_bit_scan(&attrib_mask);
>> -                       ac_add_function_attr(function, -1, attr);
>> -               }
>> +               if (!set_callsite_attrs)
>> +                       ac_add_func_attributes(ctx->context, function,
>> attrib_mask);
>>         }
>> -       return LLVMBuildCall(ctx->builder, function, params, param_count,
>> "");
>> +
>> +       call = LLVMBuildCall(ctx->builder, function, params, param_count,
>> "");
>> +       if (set_callsite_attrs)
>> +               ac_add_func_attributes(ctx->context, call, attrib_mask);
>> +       return call;
>>  }
>>
>>  LLVMValueRef
>>  ac_build_gather_values_extended(struct ac_llvm_context *ctx,
>>                                 LLVMValueRef *values,
>>                                 unsigned value_count,
>>                                 unsigned value_stride,
>>                                 bool load)
>>  {
>>         LLVMBuilderRef builder = ctx->builder;
>> @@ -524,21 +527,22 @@ ac_build_tbuffer_store(struct ac_llvm_context *ctx,
>>         /* The instruction offset field has 12 bits */
>>         assert(offen || inst_offset < (1 << 12));
>>
>>         /* The intrinsic is overloaded, we need to add a type suffix for
>> overloading to work. */
>>         unsigned func = CLAMP(num_channels, 1, 3) - 1;
>>         const char *types[] = {"i32", "v2i32", "v4i32"};
>>         char name[256];
>>         snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s",
>> types[func]);
>>
>>         ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
>> -                              args, ARRAY_SIZE(args), 0);
>> +                              args, ARRAY_SIZE(args),
>> +                              AC_FUNC_ATTR_LEGACY);
>>  }
>>
>>  void
>>  ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
>>                               LLVMValueRef rsrc,
>>                               LLVMValueRef vdata,
>>                               unsigned num_channels,
>>                               LLVMValueRef vaddr,
>>                               LLVMValueRef soffset,
>>                               unsigned inst_offset)
>> @@ -836,12 +840,13 @@ LLVMValueRef ac_emit_clamp(struct ac_llvm_context
>> *ctx, LLVMValueRef value)
>>
>>         const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." :
>>                                                  "llvm.AMDIL.clamp.";
>>         LLVMValueRef args[3] = {
>>                 value,
>>                 LLVMConstReal(ctx->f32, 0),
>>                 LLVMConstReal(ctx->f32, 1),
>>         };
>>
>>         return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3,
>> -                                     AC_FUNC_ATTR_READNONE);
>> +                                     AC_FUNC_ATTR_READNONE |
>> +                                     AC_FUNC_ATTR_LEGACY);
>>  }
>> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
>> index be127c5..fb525dd 100644
>> --- a/src/amd/common/ac_llvm_util.c
>> +++ b/src/amd/common/ac_llvm_util.c
>> @@ -17,21 +17,21 @@
>>   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
>> THE
>>   * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>   *
>>   * The above copyright notice and this permission notice (including the
>>   * next paragraph) shall be included in all copies or substantial
>> portions
>>   * of the Software.
>>   *
>>   */
>>  /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
>>  #include "ac_llvm_util.h"
>> -
>> +#include "util/bitscan.h"
>>  #include <llvm-c/Core.h>
>>
>>  #include "c11/threads.h"
>>
>>  #include <assert.h>
>>  #include <stdio.h>
>>  #include <string.h>
>>
>>  static void ac_init_llvm_target()
>>  {
>> @@ -173,40 +173,53 @@ static const char *attr_to_str(enum ac_func_attr
>> attr)
>>     case AC_FUNC_ATTR_READNONE: return "readnone";
>>     case AC_FUNC_ATTR_READONLY: return "readonly";
>>     default:
>>            fprintf(stderr, "Unhandled function attribute: %x\n", attr);
>>            return 0;
>>     }
>>  }
>>
>>  #endif
>>
>> -void
>> -ac_add_function_attr(LLVMValueRef function,
>> -                     int attr_idx,
>> -                     enum ac_func_attr attr)
>> +static void
>> +ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
>> +                     int attr_idx, enum ac_func_attr attr)
>>  {
>> -
>>  #if HAVE_LLVM < 0x0400
>>     LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
>>     if (attr_idx == -1) {
>>        LLVMAddFunctionAttr(function, llvm_attr);
>>     } else {
>>        LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
>>     }
>>  #else
>> -   LLVMContextRef context =
>> LLVMGetModuleContext(LLVMGetGlobalParent(function));
>>     const char *attr_name = attr_to_str(attr);
>>     unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
>>                                                        strlen(attr_name));
>> -   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id,
>> 0);
>> -   LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
>> +   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
>> +
>> +   if (LLVMIsAFunction(function))
>> +      LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
>> +   else
>> +      LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
>>  #endif
>>  }
>>
>> +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
>> +                           unsigned attrib_mask)
>> +{
>> +       attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
>> +       attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
>> +
>> +       while (attrib_mask) {
>> +               enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
>> +               ac_add_function_attr(ctx, function, -1, attr);
>> +       }
>> +}
>> +
>>  void
>>  ac_dump_module(LLVMModuleRef module)
>>  {
>>         char *str = LLVMPrintModuleToString(module);
>>         fprintf(stderr, "%s", str);
>>         LLVMDisposeMessage(str);
>>  }
>> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
>> index 93d3d27..4fe4ab4 100644
>> --- a/src/amd/common/ac_llvm_util.h
>> +++ b/src/amd/common/ac_llvm_util.h
>> @@ -34,28 +34,29 @@ extern "C" {
>>  #endif
>>
>>  enum ac_func_attr {
>>         AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
>>         AC_FUNC_ATTR_BYVAL        = (1 << 1),
>>         AC_FUNC_ATTR_INREG        = (1 << 2),
>>         AC_FUNC_ATTR_NOALIAS      = (1 << 3),
>>         AC_FUNC_ATTR_NOUNWIND     = (1 << 4),
>>         AC_FUNC_ATTR_READNONE     = (1 << 5),
>>         AC_FUNC_ATTR_READONLY     = (1 << 6),
>> +
>> +       /* Legacy intrinsic that needs attributes on function declarations
>> +        * and they must match the internal LLVM definition exactly,
>> otherwise
>> +        * intrinsic selection fails.
>> +        */
>> +       AC_FUNC_ATTR_LEGACY       = (1u << 31),
>>  };
>>
>>  LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
>> bool supports_spill);
>>
>>  void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
>>  bool ac_is_sgpr_param(LLVMValueRef param);
>> -
>> -void
>> -ac_add_function_attr(LLVMValueRef function,
>> -                     int attr_idx,
>> -                     enum ac_func_attr attr);
>> -
>> -void
>> -ac_dump_module(LLVMModuleRef module);
>> +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
>> +                           unsigned attrib_mask);
>> +void ac_dump_module(LLVMModuleRef module);
>>
>>  #ifdef __cplusplus
>>  }
>>  #endif
>> diff --git a/src/amd/common/ac_nir_to_llvm.c
>> b/src/amd/common/ac_nir_to_llvm.c
>> index b3dc63c..4ec19d5 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -1039,26 +1039,27 @@ static LLVMValueRef emit_imul_high(struct
>> nir_to_llvm_context *ctx,
>>         src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
>>         src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
>>
>>         dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
>>         dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64,
>> 32, false), "");
>>         result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
>>         return result;
>>  }
>>
>>  static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context
>> *ctx,
>> -                                         const char *intrin,
>> +                                         const char *intrin, unsigned
>> attr_mask,
>>                                           LLVMValueRef srcs[3])
>>  {
>>         LLVMValueRef result;
>>         LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
>> srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
>> -       result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs,
>> 3, AC_FUNC_ATTR_READNONE);
>> +       result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs,
>> 3,
>> +                                       AC_FUNC_ATTR_READNONE |
>> attr_mask);
>>
>>         result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result,
>> "");
>>         return result;
>>  }
>>
>>  static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
>>                                          LLVMValueRef src0, LLVMValueRef
>> src1,
>>                                          LLVMValueRef src2, LLVMValueRef
>> src3)
>>  {
>>         LLVMValueRef bfi_args[3], result;
>> @@ -1418,24 +1419,26 @@ static void visit_alu(struct nir_to_llvm_context
>> *ctx, nir_alu_instr *instr)
>>                 break;
>>         case nir_op_fmin:
>>                 result = emit_intrin_2f_param(ctx, "llvm.minnum",
>>                                               to_float_type(ctx,
>> def_type), src[0], src[1]);
>>                 break;
>>         case nir_op_ffma:
>>                 result = emit_intrin_3f_param(ctx, "llvm.fma",
>>                                               to_float_type(ctx,
>> def_type), src[0], src[1], src[2]);
>>                 break;
>>         case nir_op_ibitfield_extract:
>> -               result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32",
>> src);
>> +               result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32",
>> +                                              AC_FUNC_ATTR_LEGACY, src);
>>                 break;
>>         case nir_op_ubitfield_extract:
>> -               result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32",
>> src);
>> +               result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32",
>> +                                              AC_FUNC_ATTR_LEGACY, src);
>>                 break;
>>         case nir_op_bitfield_insert:
>>                 result = emit_bitfield_insert(ctx, src[0], src[1], src[2],
>> src[3]);
>>                 break;
>>         case nir_op_bitfield_reverse:
>>                 result = ac_emit_llvm_intrinsic(&ctx->ac,
>> "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
>>                 break;
>>         case nir_op_bit_count:
>>                 result = ac_emit_llvm_intrinsic(&ctx->ac,
>> "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
>>                 break;
>> @@ -1635,22 +1638,23 @@ static LLVMValueRef
>> radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
>>                 txq_args[txq_arg_count++] = tinfo->args[1];
>>                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf,
>> 0); /* dmask */
>>                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0);
>> /* unorm */
>>                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0);
>> /* r128 */
>>                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1
>> : 0, 0);
>>                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0);
>> /* glc */
>>                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0);
>> /* slc */
>>                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0);
>> /* tfe */
>>                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0);
>> /* lwe */
>>                 size = ac_emit_llvm_intrinsic(&ctx->ac,
>> "llvm.SI.getresinfo.i32", ctx->v4i32,
>> -                                          txq_args, txq_arg_count,
>> -                                          AC_FUNC_ATTR_READNONE);
>> +                                             txq_args, txq_arg_count,
>> +                                             AC_FUNC_ATTR_READNONE |
>> +                                             AC_FUNC_ATTR_LEGACY);
>>
>>                 for (c = 0; c < 2; c++) {
>>                         half_texel[c] =
>> LLVMBuildExtractElement(ctx->builder, size,
>>
>> LLVMConstInt(ctx->i32, c, false), "");
>>                         half_texel[c] = LLVMBuildUIToFP(ctx->builder,
>> half_texel[c], ctx->f32, "");
>>                         half_texel[c] = ac_emit_fdiv(&ctx->ac,
>> ctx->f32one, half_texel[c]);
>>                         half_texel[c] = LLVMBuildFMul(ctx->builder,
>> half_texel[c],
>>
>> LLVMConstReal(ctx->f32, -0.5), "");
>>                 }
>>         }
>> @@ -1660,21 +1664,22 @@ static LLVMValueRef
>> radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
>>                 LLVMValueRef index = LLVMConstInt(ctx->i32,
>> coord_vgpr_index + c, 0);
>>                 tmp = LLVMBuildExtractElement(ctx->builder, coord, index,
>> "");
>>                 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
>>                 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
>>                 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
>>                 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp,
>> index, "");
>>         }
>>
>>         tinfo->args[0] = coord;
>>         return ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
>> tinfo->dst_type, tinfo->args, tinfo->arg_count,
>> -                                  AC_FUNC_ATTR_READNONE |
>> AC_FUNC_ATTR_NOUNWIND);
>> +                                     AC_FUNC_ATTR_READNONE |
>> AC_FUNC_ATTR_NOUNWIND |
>> +                                     AC_FUNC_ATTR_LEGACY);
>>
>>  }
>>
>>  static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
>>                                         nir_tex_instr *instr,
>>                                         struct ac_tex_info *tinfo)
>>  {
>>         const char *name = "llvm.SI.image.sample";
>>         const char *infix = "";
>>         char intr_name[127];
>> @@ -1728,21 +1733,22 @@ static LLVMValueRef build_tex_intrinsic(struct
>> nir_to_llvm_context *ctx,
>>                 has_offset ? ".o" : "", type);
>>
>>         if (instr->op == nir_texop_tg4) {
>>                 enum glsl_base_type stype =
>> glsl_get_sampler_result_type(instr->texture->var->type);
>>                 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
>>                         return radv_lower_gather4_integer(ctx, tinfo,
>> instr, intr_name,
>>                                                           (int)has_offset
>> + (int)is_shadow);
>>                 }
>>         }
>>         return ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
>> tinfo->dst_type, tinfo->args, tinfo->arg_count,
>> -                                  AC_FUNC_ATTR_READNONE |
>> AC_FUNC_ATTR_NOUNWIND);
>> +                                     AC_FUNC_ATTR_READNONE |
>> AC_FUNC_ATTR_NOUNWIND |
>> +                                     AC_FUNC_ATTR_LEGACY);
>>
>>  }
>>
>>  static LLVMValueRef visit_vulkan_resource_index(struct
>> nir_to_llvm_context *ctx,
>>                                                  nir_intrinsic_instr
>> *instr)
>>  {
>>         LLVMValueRef index = get_src(ctx, instr->src[0]);
>>         unsigned desc_set = nir_intrinsic_desc_set(instr);
>>         unsigned binding = nir_intrinsic_binding(instr);
>>         LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
>> @@ -2006,21 +2012,23 @@ static LLVMValueRef visit_load_ubo_buffer(struct
>> nir_to_llvm_context *ctx,
>>         if (instr->dest.ssa.bit_size == 64)
>>                 num_components *= 2;
>>
>>         for (unsigned i = 0; i < num_components; ++i) {
>>                 LLVMValueRef params[] = {
>>                         rsrc,
>>                         LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32,
>> 4 * i, 0),
>>                                      offset, "")
>>                 };
>>                 results[i] = ac_emit_llvm_intrinsic(&ctx->ac,
>> "llvm.SI.load.const", ctx->f32,
>> -                                                params, 2,
>> AC_FUNC_ATTR_READNONE);
>> +                                                   params, 2,
>> +                                                   AC_FUNC_ATTR_READNONE
>> |
>> +                                                   AC_FUNC_ATTR_LEGACY);
>>         }
>>
>>
>>         ret = ac_build_gather_values(&ctx->ac, results,
>> instr->num_components);
>>         return LLVMBuildBitCast(ctx->builder, ret,
>>                                 get_def_type(ctx, &instr->dest.ssa), "");
>>  }
>>
>>  static void
>>  radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
>> @@ -2103,21 +2111,23 @@ load_gs_input(struct nir_to_llvm_context *ctx,
>>                 args[1] = vtx_offset;
>>                 args[2] = LLVMConstInt(ctx->i32, (param * 4 + i +
>> const_index) * 256, false);
>>                 args[3] = ctx->i32zero;
>>                 args[4] = ctx->i32one; /* OFFEN */
>>                 args[5] = ctx->i32zero; /* IDXEN */
>>                 args[6] = ctx->i32one; /* GLC */
>>                 args[7] = ctx->i32zero; /* SLC */
>>                 args[8] = ctx->i32zero; /* TFE */
>>
>>                 value[i] = ac_emit_llvm_intrinsic(&ctx->ac,
>> "llvm.SI.buffer.load.dword.i32.i32",
>> -                                           ctx->i32, args, 9,
>> AC_FUNC_ATTR_READONLY);
>> +                                                 ctx->i32, args, 9,
>> +                                                 AC_FUNC_ATTR_READONLY |
>> +                                                 AC_FUNC_ATTR_LEGACY);
>>         }
>>         result = ac_build_gather_values(&ctx->ac, value,
>> instr->num_components);
>>
>>         return result;
>>  }
>>
>>  static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>>                                    nir_intrinsic_instr *instr)
>>  {
>>         LLVMValueRef values[8];
>> @@ -2685,21 +2695,23 @@ static LLVMValueRef visit_image_size(struct
>> nir_to_llvm_context *ctx,
>>         params[2] = LLVMConstInt(ctx->i32, 15, false);
>>         params[3] = ctx->i32zero;
>>         params[4] = ctx->i32zero;
>>         params[5] = da ? ctx->i32one : ctx->i32zero;
>>         params[6] = ctx->i32zero;
>>         params[7] = ctx->i32zero;
>>         params[8] = ctx->i32zero;
>>         params[9] = ctx->i32zero;
>>
>>         res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32",
>> ctx->v4i32,
>> -                                 params, 10, AC_FUNC_ATTR_READNONE);
>> +                                    params, 10,
>> +                                    AC_FUNC_ATTR_READNONE |
>> +                                    AC_FUNC_ATTR_LEGACY);
>>
>>         if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
>>             glsl_sampler_type_is_array(type)) {
>>                 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
>>                 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
>>                 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder,
>> res, two, "");
>>                 z = LLVMBuildSDiv(ctx->builder, z, six, "");
>>                 res = LLVMBuildInsertElement(ctx->builder, res, z, two,
>> "");
>>         }
>>         return res;
>> @@ -2729,21 +2741,21 @@ static void emit_discard_if(struct
>> nir_to_llvm_context *ctx,
>>
>>         cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
>>                              get_src(ctx, instr->src[0]),
>>                              ctx->i32zero, "");
>>
>>         cond = LLVMBuildSelect(ctx->builder, cond,
>>                                LLVMConstReal(ctx->f32, -1.0f),
>>                                ctx->f32zero, "");
>>         ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
>>                                ctx->voidt,
>> -                              &cond, 1, 0);
>> +                              &cond, 1, AC_FUNC_ATTR_LEGACY);
>>  }
>>
>>  static LLVMValueRef
>>  visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
>>  {
>>         LLVMValueRef result;
>>         LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
>>         result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
>>                               LLVMConstInt(ctx->i32, 0xfc0, false), "");
>>
>> @@ -2984,21 +2996,21 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
>>          * have any effect, and GS threads have no externally observable
>>          * effects other than emitting vertices.
>>          */
>>         can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
>>                                  LLVMConstInt(ctx->i32,
>> ctx->gs_max_out_vertices, false), "");
>>
>>         kill = LLVMBuildSelect(ctx->builder, can_emit,
>>                                LLVMConstReal(ctx->f32, 1.0f),
>>                                LLVMConstReal(ctx->f32, -1.0f), "");
>>         ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
>> -                           ctx->voidt, &kill, 1, 0);
>> +                           ctx->voidt, &kill, 1, AC_FUNC_ATTR_LEGACY);
>>
>>         /* loop num outputs */
>>         idx = 0;
>>         for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
>>                 LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
>>                 if (!(ctx->output_mask & (1ull << i)))
>>                         continue;
>>
>>                 for (unsigned j = 0; j < 4; j++) {
>>                         LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
>> @@ -3144,21 +3156,21 @@ static void visit_intrinsic(struct
>> nir_to_llvm_context *ctx,
>>         case nir_intrinsic_image_atomic_comp_swap:
>>                 result = visit_image_atomic(ctx, instr);
>>                 break;
>>         case nir_intrinsic_image_size:
>>                 result = visit_image_size(ctx, instr);
>>                 break;
>>         case nir_intrinsic_discard:
>>                 ctx->shader_info->fs.can_discard = true;
>>                 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
>>                                        ctx->voidt,
>> -                                      NULL, 0, 0);
>> +                                      NULL, 0, AC_FUNC_ATTR_LEGACY);
>>                 break;
>>         case nir_intrinsic_discard_if:
>>                 emit_discard_if(ctx, instr);
>>                 break;
>>         case nir_intrinsic_memory_barrier:
>>                 emit_waitcnt(ctx);
>>                 break;
>>         case nir_intrinsic_barrier:
>>                 emit_barrier(ctx);
>>                 break;
>> @@ -3924,21 +3936,22 @@ handle_vs_input_decl(struct nir_to_llvm_context
>> *ctx,
>>
>>         for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
>>                 t_offset = LLVMConstInt(ctx->i32, index + i, false);
>>
>>                 t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr,
>> t_offset);
>>                 args[0] = t_list;
>>                 args[1] = LLVMConstInt(ctx->i32, 0, false);
>>                 args[2] = buffer_index;
>>                 input = ac_emit_llvm_intrinsic(&ctx->ac,
>>                         "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
>> -                       AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
>> +                       AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
>> +                       AC_FUNC_ATTR_LEGACY);
>>
>>                 for (unsigned chan = 0; chan < 4; chan++) {
>>                         LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32,
>> chan, false);
>>                         ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)]
>> =
>>                                 to_integer(ctx,
>> LLVMBuildExtractElement(ctx->builder,
>>                                                         input, llvm_chan,
>> ""));
>>                 }
>>         }
>>  }
>>
>> @@ -4257,22 +4270,23 @@ si_llvm_init_export_args(struct
>> nir_to_llvm_context *ctx,
>>                         args[4] = ctx->i32one;
>>
>>                         for (unsigned chan = 0; chan < 2; chan++) {
>>                                 LLVMValueRef pack_args[2] = {
>>                                         values[2 * chan],
>>                                         values[2 * chan + 1]
>>                                 };
>>                                 LLVMValueRef packed;
>>
>>                                 packed = ac_emit_llvm_intrinsic(&ctx->ac,
>> "llvm.SI.packf16",
>> -                                                            ctx->i32,
>> pack_args, 2,
>> -
>> AC_FUNC_ATTR_READNONE);
>> +                                                               ctx->i32,
>> pack_args, 2,
>> +
>> AC_FUNC_ATTR_READNONE |
>> +
>> AC_FUNC_ATTR_LEGACY);
>>                                 args[chan + 5] = packed;
>>                         }
>>                         break;
>>
>>                 case V_028714_SPI_SHADER_UNORM16_ABGR:
>>                         for (unsigned chan = 0; chan < 4; chan++) {
>>                                 val[chan] = emit_float_saturate(ctx,
>> values[chan], 0, 1);
>>                                 val[chan] = LLVMBuildFMul(ctx->builder,
>> val[chan],
>>
>> LLVMConstReal(ctx->f32, 65535), "");
>>                                 val[chan] = LLVMBuildFAdd(ctx->builder,
>> val[chan],
>> @@ -4443,21 +4457,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context
>> *ctx)
>>                 si_llvm_init_export_args(ctx, values, target, args);
>>
>>                 if (target >= V_008DFC_SQ_EXP_POS &&
>>                     target <= (V_008DFC_SQ_EXP_POS + 3)) {
>>                         memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
>>                                args, sizeof(args));
>>                 } else {
>>                         ac_emit_llvm_intrinsic(&ctx->ac,
>>                                                "llvm.SI.export",
>>                                                ctx->voidt,
>> -                                              args, 9, 0);
>> +                                              args, 9,
>> +                                              AC_FUNC_ATTR_LEGACY);
>>                 }
>>         }
>>
>>         /* We need to add the position output manually if it's missing. */
>>         if (!pos_args[0][0]) {
>>                 pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
>>                 pos_args[0][1] = ctx->i32zero; /* EXEC mask */
>>                 pos_args[0][2] = ctx->i32zero; /* last export? */
>>                 pos_args[0][3] = LLVMConstInt(ctx->i32,
>> V_008DFC_SQ_EXP_POS, false);
>>                 pos_args[0][4] = ctx->i32zero; /* COMPR flag */
>> @@ -4498,21 +4513,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context
>> *ctx)
>>                 if (!pos_args[i][0])
>>                         continue;
>>
>>                 /* Specify the target we are exporting */
>>                 pos_args[i][3] = LLVMConstInt(ctx->i32,
>> V_008DFC_SQ_EXP_POS + pos_idx++, false);
>>                 if (pos_idx == num_pos_exports)
>>                         pos_args[i][2] = ctx->i32one;
>>                 ac_emit_llvm_intrinsic(&ctx->ac,
>>                                        "llvm.SI.export",
>>                                        ctx->voidt,
>> -                                      pos_args[i], 9, 0);
>> +                                      pos_args[i], 9,
>> +                                      AC_FUNC_ATTR_LEGACY);
>>         }
>>
>>         ctx->shader_info->vs.pos_exports = num_pos_exports;
>>         ctx->shader_info->vs.param_exports = param_count;
>>  }
>>
>>  static void
>>  handle_es_outputs_post(struct nir_to_llvm_context *ctx)
>>  {
>>         int j;
>> @@ -4554,21 +4570,22 @@ si_export_mrt_color(struct nir_to_llvm_context
>> *ctx,
>>         si_llvm_init_export_args(ctx, color, param,
>>                                  args);
>>
>>         if (is_last) {
>>                 args[1] = ctx->i32one; /* whether the EXEC mask is valid
>> */
>>                 args[2] = ctx->i32one; /* DONE bit */
>>         } else if (args[0] == ctx->i32zero)
>>                 return; /* unnecessary NULL export */
>>
>>         ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
>> -                           ctx->voidt, args, 9, 0);
>> +                              ctx->voidt, args, 9,
>> +                              AC_FUNC_ATTR_LEGACY);
>>  }
>>
>>  static void
>>  si_export_mrt_z(struct nir_to_llvm_context *ctx,
>>                 LLVMValueRef depth, LLVMValueRef stencil,
>>                 LLVMValueRef samplemask)
>>  {
>>         LLVMValueRef args[9];
>>         unsigned mask = 0;
>>         args[1] = ctx->i32one; /* whether the EXEC mask is valid */
>> @@ -4598,21 +4615,22 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
>>         }
>>
>>         /* SI (except OLAND) has a bug that it only looks
>>          * at the X writemask component. */
>>         if (ctx->options->chip_class == SI &&
>>             ctx->options->family != CHIP_OLAND)
>>                 mask |= 0x01;
>>
>>         args[0] = LLVMConstInt(ctx->i32, mask, false);
>>         ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
>> -                           ctx->voidt, args, 9, 0);
>> +                              ctx->voidt, args, 9,
>> +                              AC_FUNC_ATTR_LEGACY);
>>  }
>>
>>  static void
>>  handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
>>  {
>>         unsigned index = 0;
>>         LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
>>
>>         for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
>>                 LLVMValueRef values[4];
>> @@ -5021,21 +5039,22 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context
>> *ctx)
>>
>>                 for (unsigned j = 0; j < 4; j++) {
>>                         LLVMValueRef value;
>>                         args[2] = LLVMConstInt(ctx->i32,
>>                                                (idx * 4 + j) *
>>                                                ctx->gs_max_out_vertices *
>> 16 * 4, false);
>>
>>                         value = ac_emit_llvm_intrinsic(&ctx->ac,
>>
>> "llvm.SI.buffer.load.dword.i32.i32",
>>                                                        ctx->i32, args, 9,
>> -
>> AC_FUNC_ATTR_READONLY);
>> +
>> AC_FUNC_ATTR_READONLY |
>> +
>> AC_FUNC_ATTR_LEGACY);
>>
>>                         LLVMBuildStore(ctx->builder,
>>                                        to_float(ctx, value),
>> ctx->outputs[radeon_llvm_reg_index_soa(i, j)]);
>>                 }
>>                 idx++;
>>         }
>>         handle_vs_outputs_post(ctx);
>>  }
>>
>>  void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
>> diff --git a/src/gallium/auxiliary/draw/draw_llvm.c
>> b/src/gallium/auxiliary/draw/draw_llvm.c
>> index 8952dc8..586a9be 100644
>> --- a/src/gallium/auxiliary/draw/draw_llvm.c
>> +++ b/src/gallium/auxiliary/draw/draw_llvm.c
>> @@ -1588,21 +1588,22 @@ draw_llvm_generate(struct draw_llvm *llvm, struct
>> draw_llvm_variant *variant)
>>
>>     func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
>>                                  arg_types, num_arg_types, 0);
>>
>>     variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
>>     variant->function = variant_func;
>>
>>     LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
>>     for (i = 0; i < num_arg_types; ++i)
>>        if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
>> -         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
>> +         lp_add_function_attr(context, variant_func, i + 1,
>> +                              LP_FUNC_ATTR_NOALIAS);
>>
>>     context_ptr               = LLVMGetParam(variant_func, 0);
>>     io_ptr                    = LLVMGetParam(variant_func, 1);
>>     vbuffers_ptr              = LLVMGetParam(variant_func, 2);
>>     count                     = LLVMGetParam(variant_func, 3);
>>     /*
>>      * XXX: the maxelt part is unused. Not really useful, since we cannot
>>      * get index buffer overflows due to vsplit (which provides its own
>>      * elts buffer, with a different size than what's passed in here).
>>      */
>> @@ -2262,21 +2263,22 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
>>     func_type = LLVMFunctionType(int32_type, arg_types,
>> ARRAY_SIZE(arg_types), 0);
>>
>>     variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
>>
>>     variant->function = variant_func;
>>
>>     LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
>>
>>     for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
>>        if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
>> -         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
>> +         lp_add_function_attr(context, variant_func, i + 1,
>> +                              LP_FUNC_ATTR_NOALIAS);
>>
>>     context_ptr               = LLVMGetParam(variant_func, 0);
>>     input_array               = LLVMGetParam(variant_func, 1);
>>     io_ptr                    = LLVMGetParam(variant_func, 2);
>>     num_prims                 = LLVMGetParam(variant_func, 3);
>>     system_values.instance_id = LLVMGetParam(variant_func, 4);
>>     prim_id_ptr               = LLVMGetParam(variant_func, 5);
>>     system_values.invocation_id = LLVMGetParam(variant_func, 6);
>>
>>     lp_build_name(context_ptr, "context");
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
>> b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
>> index 049671a..1b50e68 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
>> @@ -152,83 +152,100 @@ static const char *attr_to_str(enum lp_func_attr
>> attr)
>>     case LP_FUNC_ATTR_READONLY: return "readonly";
>>     default:
>>        _debug_printf("Unhandled function attribute: %x\n", attr);
>>        return 0;
>>     }
>>  }
>>
>>  #endif
>>
>>  void
>> -lp_add_function_attr(LLVMValueRef function,
>> -                     int attr_idx,
>> -                     enum lp_func_attr attr)
>> +lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
>
>
> If function is not always a function, then it's better the parameter is
> renamed to functionOrCall or something like that.
>
>> +                     int attr_idx, enum lp_func_attr attr)
>>  {
>>
>>  #if HAVE_LLVM < 0x0400
>>     LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr);
>>     if (attr_idx == -1) {
>>        LLVMAddFunctionAttr(function, llvm_attr);
>>     } else {
>>        LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
>>     }
>>  #else
>> -   LLVMContextRef context =
>> LLVMGetModuleContext(LLVMGetGlobalParent(function));
>
>
> Even when LLVMIsAFunction(function) is false, we could still get the
> LLVMContextRef:
>
>    LLVMModuleRef module;
>    if (LLVMIsAFunction(functionOrCall)) {
>       module = LLVMGetGlobalParent(functionOrCall);
>    } else {
>       LLVMBasicBlockRef bb = LLVMValueAsBasicBlock(functionOrCall);

The correct function is LLVMGetInstructionParent.

>       LLVMValueRef function = LLVMGetBasicBlockParent(bb)
>       module = LLVMGetGlobalParent(function);
>
>    }
>    LLVMContextRef context = LLVMGetModuleContext(module);
>
> This would enable to keep lp_add_function_attr prototype alone.
>
> Otherwise looks good to me.
>
> Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

Thanks.

Marek


More information about the mesa-dev mailing list