[Mesa-dev] [PATCH 2/2] gallivm, ac: add function attributes at call sites instead of declarations
Mike Lothian
mike at fireburn.co.uk
Wed Mar 1 22:34:40 UTC 2017
Hi
I think this patch is causing build problems for me
/var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c:
In function ‘create_llvm_function’:
/var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c:265:4:
error: implicit declaration of function ‘ac_add_function_attr’
[-Werror=implicit-function-declaration]
ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_BYVAL);
^~~~~~~~~~~~~~~~~~~~
/var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c:
In function ‘visit_interp’:
/var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c:3022:11:
warning: ‘location’ may be used uninitialized in this function
[-Wmaybe-uninitialized]
unsigned location;
^~~~~~~~
/var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c:3089:10:
warning: ‘src_c1’ may be used uninitialized in this function
[-Wmaybe-uninitialized]
temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c:3086:10:
warning: ‘src_c0’ may be used uninitialized in this function
[-Wmaybe-uninitialized]
temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c:3044:12:
warning: ‘src0’ may be used uninitialized in this function
[-Wmaybe-uninitialized]
src_c1 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0,
ctx->i32one, ""));
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors
Was it tested?
Mike
On Wed, 1 Mar 2017 at 17:22 Marek Olšák <maraeo at gmail.com> wrote:
> On Wed, Mar 1, 2017 at 5:29 PM, Jose Fonseca <jfonseca at vmware.com> wrote:
> > On 23/02/17 00:01, Marek Olšák wrote:
> >>
> >> From: Marek Olšák <marek.olsak at amd.com>
> >>
> >> They can vary at call sites if the intrinsic is NOT a legacy SI
> intrinsic.
> >> We need this to force readnone or inaccessiblememonly on some amdgcn
> >> intrinsics.
> >>
> >> This is only used with LLVM 4.0 and later. Intrinsics only used with
> >> LLVM <= 3.9 don't need the LEGACY flag.
> >>
> >> gallivm and ac code is in the same patch, because splitting would be
> >> more complicated with all the LEGACY uses all over the place.
> >> ---
> >> src/amd/common/ac_llvm_build.c | 23 ++++----
> >> src/amd/common/ac_llvm_util.c | 31 +++++++----
> >> src/amd/common/ac_llvm_util.h | 17 +++---
> >> src/amd/common/ac_nir_to_llvm.c | 63
> >> ++++++++++++++--------
> >> src/gallium/auxiliary/draw/draw_llvm.c | 6 ++-
> >> src/gallium/auxiliary/gallivm/lp_bld_intr.c | 51
> ++++++++++++------
> >> src/gallium/auxiliary/gallivm/lp_bld_intr.h | 11 ++--
> >> src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 3 +-
> >> src/gallium/drivers/llvmpipe/lp_state_fs.c | 3 +-
> >> src/gallium/drivers/llvmpipe/lp_state_setup.c | 7 +--
> >> src/gallium/drivers/radeonsi/si_shader.c | 64
> >> ++++++++++++++---------
> >> src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 14 +++--
> >> 12 files changed, 184 insertions(+), 109 deletions(-)
> >>
> >> diff --git a/src/amd/common/ac_llvm_build.c
> >> b/src/amd/common/ac_llvm_build.c
> >> index 2f25b14..5c8b7f7 100644
> >> --- a/src/amd/common/ac_llvm_build.c
> >> +++ b/src/amd/common/ac_llvm_build.c
> >> @@ -75,47 +75,50 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
> >> LLVMContextRef context)
> >>
> "amdgpu.uniform",
> >> 14);
> >>
> >> ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
> >> }
> >>
> >> LLVMValueRef
> >> ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
> >> LLVMTypeRef return_type, LLVMValueRef *params,
> >> unsigned param_count, unsigned attrib_mask)
> >> {
> >> - LLVMValueRef function;
> >> + LLVMValueRef function, call;
> >> + bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
> >> + !(attrib_mask & AC_FUNC_ATTR_LEGACY);
> >>
> >> function = LLVMGetNamedFunction(ctx->module, name);
> >> if (!function) {
> >> LLVMTypeRef param_types[32], function_type;
> >> unsigned i;
> >>
> >> assert(param_count <= 32);
> >>
> >> for (i = 0; i < param_count; ++i) {
> >> assert(params[i]);
> >> param_types[i] = LLVMTypeOf(params[i]);
> >> }
> >> function_type =
> >> LLVMFunctionType(return_type, param_types,
> >> param_count, 0);
> >> function = LLVMAddFunction(ctx->module, name,
> >> function_type);
> >>
> >> LLVMSetFunctionCallConv(function, LLVMCCallConv);
> >> LLVMSetLinkage(function, LLVMExternalLinkage);
> >>
> >> - attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
> >> - while (attrib_mask) {
> >> - enum ac_func_attr attr = 1u <<
> >> u_bit_scan(&attrib_mask);
> >> - ac_add_function_attr(function, -1, attr);
> >> - }
> >> + if (!set_callsite_attrs)
> >> + ac_add_func_attributes(ctx->context, function,
> >> attrib_mask);
> >> }
> >> - return LLVMBuildCall(ctx->builder, function, params,
> param_count,
> >> "");
> >> +
> >> + call = LLVMBuildCall(ctx->builder, function, params,
> param_count,
> >> "");
> >> + if (set_callsite_attrs)
> >> + ac_add_func_attributes(ctx->context, call, attrib_mask);
> >> + return call;
> >> }
> >>
> >> LLVMValueRef
> >> ac_build_gather_values_extended(struct ac_llvm_context *ctx,
> >> LLVMValueRef *values,
> >> unsigned value_count,
> >> unsigned value_stride,
> >> bool load)
> >> {
> >> LLVMBuilderRef builder = ctx->builder;
> >> @@ -524,21 +527,22 @@ ac_build_tbuffer_store(struct ac_llvm_context
> *ctx,
> >> /* The instruction offset field has 12 bits */
> >> assert(offen || inst_offset < (1 << 12));
> >>
> >> /* The intrinsic is overloaded, we need to add a type suffix for
> >> overloading to work. */
> >> unsigned func = CLAMP(num_channels, 1, 3) - 1;
> >> const char *types[] = {"i32", "v2i32", "v4i32"};
> >> char name[256];
> >> snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s",
> >> types[func]);
> >>
> >> ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
> >> - args, ARRAY_SIZE(args), 0);
> >> + args, ARRAY_SIZE(args),
> >> + AC_FUNC_ATTR_LEGACY);
> >> }
> >>
> >> void
> >> ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
> >> LLVMValueRef rsrc,
> >> LLVMValueRef vdata,
> >> unsigned num_channels,
> >> LLVMValueRef vaddr,
> >> LLVMValueRef soffset,
> >> unsigned inst_offset)
> >> @@ -836,12 +840,13 @@ LLVMValueRef ac_emit_clamp(struct ac_llvm_context
> >> *ctx, LLVMValueRef value)
> >>
> >> const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." :
> >> "llvm.AMDIL.clamp.";
> >> LLVMValueRef args[3] = {
> >> value,
> >> LLVMConstReal(ctx->f32, 0),
> >> LLVMConstReal(ctx->f32, 1),
> >> };
> >>
> >> return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3,
> >> - AC_FUNC_ATTR_READNONE);
> >> + AC_FUNC_ATTR_READNONE |
> >> + AC_FUNC_ATTR_LEGACY);
> >> }
> >> diff --git a/src/amd/common/ac_llvm_util.c
> b/src/amd/common/ac_llvm_util.c
> >> index be127c5..fb525dd 100644
> >> --- a/src/amd/common/ac_llvm_util.c
> >> +++ b/src/amd/common/ac_llvm_util.c
> >> @@ -17,21 +17,21 @@
> >> * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
> OR
> >> THE
> >> * USE OR OTHER DEALINGS IN THE SOFTWARE.
> >> *
> >> * The above copyright notice and this permission notice (including the
> >> * next paragraph) shall be included in all copies or substantial
> >> portions
> >> * of the Software.
> >> *
> >> */
> >> /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
> >> #include "ac_llvm_util.h"
> >> -
> >> +#include "util/bitscan.h"
> >> #include <llvm-c/Core.h>
> >>
> >> #include "c11/threads.h"
> >>
> >> #include <assert.h>
> >> #include <stdio.h>
> >> #include <string.h>
> >>
> >> static void ac_init_llvm_target()
> >> {
> >> @@ -173,40 +173,53 @@ static const char *attr_to_str(enum ac_func_attr
> >> attr)
> >> case AC_FUNC_ATTR_READNONE: return "readnone";
> >> case AC_FUNC_ATTR_READONLY: return "readonly";
> >> default:
> >> fprintf(stderr, "Unhandled function attribute: %x\n", attr);
> >> return 0;
> >> }
> >> }
> >>
> >> #endif
> >>
> >> -void
> >> -ac_add_function_attr(LLVMValueRef function,
> >> - int attr_idx,
> >> - enum ac_func_attr attr)
> >> +static void
> >> +ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
> >> + int attr_idx, enum ac_func_attr attr)
> >> {
> >> -
> >> #if HAVE_LLVM < 0x0400
> >> LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
> >> if (attr_idx == -1) {
> >> LLVMAddFunctionAttr(function, llvm_attr);
> >> } else {
> >> LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1),
> llvm_attr);
> >> }
> >> #else
> >> - LLVMContextRef context =
> >> LLVMGetModuleContext(LLVMGetGlobalParent(function));
> >> const char *attr_name = attr_to_str(attr);
> >> unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
> >>
> strlen(attr_name));
> >> - LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context,
> kind_id,
> >> 0);
> >> - LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
> >> + LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id,
> 0);
> >> +
> >> + if (LLVMIsAFunction(function))
> >> + LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
> >> + else
> >> + LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
> >> #endif
> >> }
> >>
> >> +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
> >> + unsigned attrib_mask)
> >> +{
> >> + attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
> >> + attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
> >> +
> >> + while (attrib_mask) {
> >> + enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
> >> + ac_add_function_attr(ctx, function, -1, attr);
> >> + }
> >> +}
> >> +
> >> void
> >> ac_dump_module(LLVMModuleRef module)
> >> {
> >> char *str = LLVMPrintModuleToString(module);
> >> fprintf(stderr, "%s", str);
> >> LLVMDisposeMessage(str);
> >> }
> >> diff --git a/src/amd/common/ac_llvm_util.h
> b/src/amd/common/ac_llvm_util.h
> >> index 93d3d27..4fe4ab4 100644
> >> --- a/src/amd/common/ac_llvm_util.h
> >> +++ b/src/amd/common/ac_llvm_util.h
> >> @@ -34,28 +34,29 @@ extern "C" {
> >> #endif
> >>
> >> enum ac_func_attr {
> >> AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
> >> AC_FUNC_ATTR_BYVAL = (1 << 1),
> >> AC_FUNC_ATTR_INREG = (1 << 2),
> >> AC_FUNC_ATTR_NOALIAS = (1 << 3),
> >> AC_FUNC_ATTR_NOUNWIND = (1 << 4),
> >> AC_FUNC_ATTR_READNONE = (1 << 5),
> >> AC_FUNC_ATTR_READONLY = (1 << 6),
> >> +
> >> + /* Legacy intrinsic that needs attributes on function
> declarations
> >> + * and they must match the internal LLVM definition exactly,
> >> otherwise
> >> + * intrinsic selection fails.
> >> + */
> >> + AC_FUNC_ATTR_LEGACY = (1u << 31),
> >> };
> >>
> >> LLVMTargetMachineRef ac_create_target_machine(enum radeon_family
> family,
> >> bool supports_spill);
> >>
> >> void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
> >> bool ac_is_sgpr_param(LLVMValueRef param);
> >> -
> >> -void
> >> -ac_add_function_attr(LLVMValueRef function,
> >> - int attr_idx,
> >> - enum ac_func_attr attr);
> >> -
> >> -void
> >> -ac_dump_module(LLVMModuleRef module);
> >> +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
> >> + unsigned attrib_mask);
> >> +void ac_dump_module(LLVMModuleRef module);
> >>
> >> #ifdef __cplusplus
> >> }
> >> #endif
> >> diff --git a/src/amd/common/ac_nir_to_llvm.c
> >> b/src/amd/common/ac_nir_to_llvm.c
> >> index b3dc63c..4ec19d5 100644
> >> --- a/src/amd/common/ac_nir_to_llvm.c
> >> +++ b/src/amd/common/ac_nir_to_llvm.c
> >> @@ -1039,26 +1039,27 @@ static LLVMValueRef emit_imul_high(struct
> >> nir_to_llvm_context *ctx,
> >> src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
> >> src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
> >>
> >> dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
> >> dst64 = LLVMBuildAShr(ctx->builder, dst64,
> LLVMConstInt(ctx->i64,
> >> 32, false), "");
> >> result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
> >> return result;
> >> }
> >>
> >> static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context
> >> *ctx,
> >> - const char *intrin,
> >> + const char *intrin, unsigned
> >> attr_mask,
> >> LLVMValueRef srcs[3])
> >> {
> >> LLVMValueRef result;
> >> LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
> >> srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
> >> - result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32,
> srcs,
> >> 3, AC_FUNC_ATTR_READNONE);
> >> + result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32,
> srcs,
> >> 3,
> >> + AC_FUNC_ATTR_READNONE |
> >> attr_mask);
> >>
> >> result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result,
> >> "");
> >> return result;
> >> }
> >>
> >> static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context
> *ctx,
> >> LLVMValueRef src0, LLVMValueRef
> >> src1,
> >> LLVMValueRef src2, LLVMValueRef
> >> src3)
> >> {
> >> LLVMValueRef bfi_args[3], result;
> >> @@ -1418,24 +1419,26 @@ static void visit_alu(struct nir_to_llvm_context
> >> *ctx, nir_alu_instr *instr)
> >> break;
> >> case nir_op_fmin:
> >> result = emit_intrin_2f_param(ctx, "llvm.minnum",
> >> to_float_type(ctx,
> >> def_type), src[0], src[1]);
> >> break;
> >> case nir_op_ffma:
> >> result = emit_intrin_3f_param(ctx, "llvm.fma",
> >> to_float_type(ctx,
> >> def_type), src[0], src[1], src[2]);
> >> break;
> >> case nir_op_ibitfield_extract:
> >> - result = emit_bitfield_extract(ctx,
> "llvm.AMDGPU.bfe.i32",
> >> src);
> >> + result = emit_bitfield_extract(ctx,
> "llvm.AMDGPU.bfe.i32",
> >> + AC_FUNC_ATTR_LEGACY,
> src);
> >> break;
> >> case nir_op_ubitfield_extract:
> >> - result = emit_bitfield_extract(ctx,
> "llvm.AMDGPU.bfe.u32",
> >> src);
> >> + result = emit_bitfield_extract(ctx,
> "llvm.AMDGPU.bfe.u32",
> >> + AC_FUNC_ATTR_LEGACY,
> src);
> >> break;
> >> case nir_op_bitfield_insert:
> >> result = emit_bitfield_insert(ctx, src[0], src[1],
> src[2],
> >> src[3]);
> >> break;
> >> case nir_op_bitfield_reverse:
> >> result = ac_emit_llvm_intrinsic(&ctx->ac,
> >> "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
> >> break;
> >> case nir_op_bit_count:
> >> result = ac_emit_llvm_intrinsic(&ctx->ac,
> >> "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
> >> break;
> >> @@ -1635,22 +1638,23 @@ static LLVMValueRef
> >> radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
> >> txq_args[txq_arg_count++] = tinfo->args[1];
> >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf,
> >> 0); /* dmask */
> >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0,
> 0);
> >> /* unorm */
> >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0,
> 0);
> >> /* r128 */
> >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ?
> 1
> >> : 0, 0);
> >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0,
> 0);
> >> /* glc */
> >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0,
> 0);
> >> /* slc */
> >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0,
> 0);
> >> /* tfe */
> >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0,
> 0);
> >> /* lwe */
> >> size = ac_emit_llvm_intrinsic(&ctx->ac,
> >> "llvm.SI.getresinfo.i32", ctx->v4i32,
> >> - txq_args, txq_arg_count,
> >> - AC_FUNC_ATTR_READNONE);
> >> + txq_args, txq_arg_count,
> >> + AC_FUNC_ATTR_READNONE |
> >> + AC_FUNC_ATTR_LEGACY);
> >>
> >> for (c = 0; c < 2; c++) {
> >> half_texel[c] =
> >> LLVMBuildExtractElement(ctx->builder, size,
> >>
> >> LLVMConstInt(ctx->i32, c, false), "");
> >> half_texel[c] = LLVMBuildUIToFP(ctx->builder,
> >> half_texel[c], ctx->f32, "");
> >> half_texel[c] = ac_emit_fdiv(&ctx->ac,
> >> ctx->f32one, half_texel[c]);
> >> half_texel[c] = LLVMBuildFMul(ctx->builder,
> >> half_texel[c],
> >>
> >> LLVMConstReal(ctx->f32, -0.5), "");
> >> }
> >> }
> >> @@ -1660,21 +1664,22 @@ static LLVMValueRef
> >> radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
> >> LLVMValueRef index = LLVMConstInt(ctx->i32,
> >> coord_vgpr_index + c, 0);
> >> tmp = LLVMBuildExtractElement(ctx->builder, coord,
> index,
> >> "");
> >> tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
> >> tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c],
> "");
> >> tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
> >> coord = LLVMBuildInsertElement(ctx->builder, coord, tmp,
> >> index, "");
> >> }
> >>
> >> tinfo->args[0] = coord;
> >> return ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
> >> tinfo->dst_type, tinfo->args, tinfo->arg_count,
> >> - AC_FUNC_ATTR_READNONE |
> >> AC_FUNC_ATTR_NOUNWIND);
> >> + AC_FUNC_ATTR_READNONE |
> >> AC_FUNC_ATTR_NOUNWIND |
> >> + AC_FUNC_ATTR_LEGACY);
> >>
> >> }
> >>
> >> static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context
> *ctx,
> >> nir_tex_instr *instr,
> >> struct ac_tex_info *tinfo)
> >> {
> >> const char *name = "llvm.SI.image.sample";
> >> const char *infix = "";
> >> char intr_name[127];
> >> @@ -1728,21 +1733,22 @@ static LLVMValueRef build_tex_intrinsic(struct
> >> nir_to_llvm_context *ctx,
> >> has_offset ? ".o" : "", type);
> >>
> >> if (instr->op == nir_texop_tg4) {
> >> enum glsl_base_type stype =
> >> glsl_get_sampler_result_type(instr->texture->var->type);
> >> if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
> >> return radv_lower_gather4_integer(ctx, tinfo,
> >> instr, intr_name,
> >>
> (int)has_offset
> >> + (int)is_shadow);
> >> }
> >> }
> >> return ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
> >> tinfo->dst_type, tinfo->args, tinfo->arg_count,
> >> - AC_FUNC_ATTR_READNONE |
> >> AC_FUNC_ATTR_NOUNWIND);
> >> + AC_FUNC_ATTR_READNONE |
> >> AC_FUNC_ATTR_NOUNWIND |
> >> + AC_FUNC_ATTR_LEGACY);
> >>
> >> }
> >>
> >> static LLVMValueRef visit_vulkan_resource_index(struct
> >> nir_to_llvm_context *ctx,
> >> nir_intrinsic_instr
> >> *instr)
> >> {
> >> LLVMValueRef index = get_src(ctx, instr->src[0]);
> >> unsigned desc_set = nir_intrinsic_desc_set(instr);
> >> unsigned binding = nir_intrinsic_binding(instr);
> >> LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
> >> @@ -2006,21 +2012,23 @@ static LLVMValueRef visit_load_ubo_buffer(struct
> >> nir_to_llvm_context *ctx,
> >> if (instr->dest.ssa.bit_size == 64)
> >> num_components *= 2;
> >>
> >> for (unsigned i = 0; i < num_components; ++i) {
> >> LLVMValueRef params[] = {
> >> rsrc,
> >> LLVMBuildAdd(ctx->builder,
> LLVMConstInt(ctx->i32,
> >> 4 * i, 0),
> >> offset, "")
> >> };
> >> results[i] = ac_emit_llvm_intrinsic(&ctx->ac,
> >> "llvm.SI.load.const", ctx->f32,
> >> - params, 2,
> >> AC_FUNC_ATTR_READNONE);
> >> + params, 2,
> >> +
> AC_FUNC_ATTR_READNONE
> >> |
> >> +
> AC_FUNC_ATTR_LEGACY);
> >> }
> >>
> >>
> >> ret = ac_build_gather_values(&ctx->ac, results,
> >> instr->num_components);
> >> return LLVMBuildBitCast(ctx->builder, ret,
> >> get_def_type(ctx, &instr->dest.ssa),
> "");
> >> }
> >>
> >> static void
> >> radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
> >> @@ -2103,21 +2111,23 @@ load_gs_input(struct nir_to_llvm_context *ctx,
> >> args[1] = vtx_offset;
> >> args[2] = LLVMConstInt(ctx->i32, (param * 4 + i +
> >> const_index) * 256, false);
> >> args[3] = ctx->i32zero;
> >> args[4] = ctx->i32one; /* OFFEN */
> >> args[5] = ctx->i32zero; /* IDXEN */
> >> args[6] = ctx->i32one; /* GLC */
> >> args[7] = ctx->i32zero; /* SLC */
> >> args[8] = ctx->i32zero; /* TFE */
> >>
> >> value[i] = ac_emit_llvm_intrinsic(&ctx->ac,
> >> "llvm.SI.buffer.load.dword.i32.i32",
> >> - ctx->i32, args, 9,
> >> AC_FUNC_ATTR_READONLY);
> >> + ctx->i32, args, 9,
> >> + AC_FUNC_ATTR_READONLY
> |
> >> + AC_FUNC_ATTR_LEGACY);
> >> }
> >> result = ac_build_gather_values(&ctx->ac, value,
> >> instr->num_components);
> >>
> >> return result;
> >> }
> >>
> >> static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
> >> nir_intrinsic_instr *instr)
> >> {
> >> LLVMValueRef values[8];
> >> @@ -2685,21 +2695,23 @@ static LLVMValueRef visit_image_size(struct
> >> nir_to_llvm_context *ctx,
> >> params[2] = LLVMConstInt(ctx->i32, 15, false);
> >> params[3] = ctx->i32zero;
> >> params[4] = ctx->i32zero;
> >> params[5] = da ? ctx->i32one : ctx->i32zero;
> >> params[6] = ctx->i32zero;
> >> params[7] = ctx->i32zero;
> >> params[8] = ctx->i32zero;
> >> params[9] = ctx->i32zero;
> >>
> >> res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32",
> >> ctx->v4i32,
> >> - params, 10, AC_FUNC_ATTR_READNONE);
> >> + params, 10,
> >> + AC_FUNC_ATTR_READNONE |
> >> + AC_FUNC_ATTR_LEGACY);
> >>
> >> if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
> >> glsl_sampler_type_is_array(type)) {
> >> LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
> >> LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
> >> LLVMValueRef z = LLVMBuildExtractElement(ctx->builder,
> >> res, two, "");
> >> z = LLVMBuildSDiv(ctx->builder, z, six, "");
> >> res = LLVMBuildInsertElement(ctx->builder, res, z, two,
> >> "");
> >> }
> >> return res;
> >> @@ -2729,21 +2741,21 @@ static void emit_discard_if(struct
> >> nir_to_llvm_context *ctx,
> >>
> >> cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
> >> get_src(ctx, instr->src[0]),
> >> ctx->i32zero, "");
> >>
> >> cond = LLVMBuildSelect(ctx->builder, cond,
> >> LLVMConstReal(ctx->f32, -1.0f),
> >> ctx->f32zero, "");
> >> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
> >> ctx->voidt,
> >> - &cond, 1, 0);
> >> + &cond, 1, AC_FUNC_ATTR_LEGACY);
> >> }
> >>
> >> static LLVMValueRef
> >> visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
> >> {
> >> LLVMValueRef result;
> >> LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
> >> result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
> >> LLVMConstInt(ctx->i32, 0xfc0, false), "");
> >>
> >> @@ -2984,21 +2996,21 @@ visit_emit_vertex(struct nir_to_llvm_context
> *ctx,
> >> * have any effect, and GS threads have no externally observable
> >> * effects other than emitting vertices.
> >> */
> >> can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT,
> gs_next_vertex,
> >> LLVMConstInt(ctx->i32,
> >> ctx->gs_max_out_vertices, false), "");
> >>
> >> kill = LLVMBuildSelect(ctx->builder, can_emit,
> >> LLVMConstReal(ctx->f32, 1.0f),
> >> LLVMConstReal(ctx->f32, -1.0f), "");
> >> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
> >> - ctx->voidt, &kill, 1, 0);
> >> + ctx->voidt, &kill, 1, AC_FUNC_ATTR_LEGACY);
> >>
> >> /* loop num outputs */
> >> idx = 0;
> >> for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
> >> LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
> >> if (!(ctx->output_mask & (1ull << i)))
> >> continue;
> >>
> >> for (unsigned j = 0; j < 4; j++) {
> >> LLVMValueRef out_val =
> LLVMBuildLoad(ctx->builder,
> >> @@ -3144,21 +3156,21 @@ static void visit_intrinsic(struct
> >> nir_to_llvm_context *ctx,
> >> case nir_intrinsic_image_atomic_comp_swap:
> >> result = visit_image_atomic(ctx, instr);
> >> break;
> >> case nir_intrinsic_image_size:
> >> result = visit_image_size(ctx, instr);
> >> break;
> >> case nir_intrinsic_discard:
> >> ctx->shader_info->fs.can_discard = true;
> >> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
> >> ctx->voidt,
> >> - NULL, 0, 0);
> >> + NULL, 0, AC_FUNC_ATTR_LEGACY);
> >> break;
> >> case nir_intrinsic_discard_if:
> >> emit_discard_if(ctx, instr);
> >> break;
> >> case nir_intrinsic_memory_barrier:
> >> emit_waitcnt(ctx);
> >> break;
> >> case nir_intrinsic_barrier:
> >> emit_barrier(ctx);
> >> break;
> >> @@ -3924,21 +3936,22 @@ handle_vs_input_decl(struct nir_to_llvm_context
> >> *ctx,
> >>
> >> for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
> >> t_offset = LLVMConstInt(ctx->i32, index + i, false);
> >>
> >> t_list = ac_build_indexed_load_const(&ctx->ac,
> t_list_ptr,
> >> t_offset);
> >> args[0] = t_list;
> >> args[1] = LLVMConstInt(ctx->i32, 0, false);
> >> args[2] = buffer_index;
> >> input = ac_emit_llvm_intrinsic(&ctx->ac,
> >> "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
> >> - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
> >> + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
> >> + AC_FUNC_ATTR_LEGACY);
> >>
> >> for (unsigned chan = 0; chan < 4; chan++) {
> >> LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32,
> >> chan, false);
> >> ctx->inputs[radeon_llvm_reg_index_soa(idx,
> chan)]
> >> =
> >> to_integer(ctx,
> >> LLVMBuildExtractElement(ctx->builder,
> >> input,
> llvm_chan,
> >> ""));
> >> }
> >> }
> >> }
> >>
> >> @@ -4257,22 +4270,23 @@ si_llvm_init_export_args(struct
> >> nir_to_llvm_context *ctx,
> >> args[4] = ctx->i32one;
> >>
> >> for (unsigned chan = 0; chan < 2; chan++) {
> >> LLVMValueRef pack_args[2] = {
> >> values[2 * chan],
> >> values[2 * chan + 1]
> >> };
> >> LLVMValueRef packed;
> >>
> >> packed =
> ac_emit_llvm_intrinsic(&ctx->ac,
> >> "llvm.SI.packf16",
> >> - ctx->i32,
> >> pack_args, 2,
> >> -
> >> AC_FUNC_ATTR_READNONE);
> >> +
> ctx->i32,
> >> pack_args, 2,
> >> +
> >> AC_FUNC_ATTR_READNONE |
> >> +
> >> AC_FUNC_ATTR_LEGACY);
> >> args[chan + 5] = packed;
> >> }
> >> break;
> >>
> >> case V_028714_SPI_SHADER_UNORM16_ABGR:
> >> for (unsigned chan = 0; chan < 4; chan++) {
> >> val[chan] = emit_float_saturate(ctx,
> >> values[chan], 0, 1);
> >> val[chan] = LLVMBuildFMul(ctx->builder,
> >> val[chan],
> >>
> >> LLVMConstReal(ctx->f32, 65535), "");
> >> val[chan] = LLVMBuildFAdd(ctx->builder,
> >> val[chan],
> >> @@ -4443,21 +4457,22 @@ handle_vs_outputs_post(struct
> nir_to_llvm_context
> >> *ctx)
> >> si_llvm_init_export_args(ctx, values, target, args);
> >>
> >> if (target >= V_008DFC_SQ_EXP_POS &&
> >> target <= (V_008DFC_SQ_EXP_POS + 3)) {
> >> memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
> >> args, sizeof(args));
> >> } else {
> >> ac_emit_llvm_intrinsic(&ctx->ac,
> >> "llvm.SI.export",
> >> ctx->voidt,
> >> - args, 9, 0);
> >> + args, 9,
> >> + AC_FUNC_ATTR_LEGACY);
> >> }
> >> }
> >>
> >> /* We need to add the position output manually if it's missing.
> */
> >> if (!pos_args[0][0]) {
> >> pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
> >> pos_args[0][1] = ctx->i32zero; /* EXEC mask */
> >> pos_args[0][2] = ctx->i32zero; /* last export? */
> >> pos_args[0][3] = LLVMConstInt(ctx->i32,
> >> V_008DFC_SQ_EXP_POS, false);
> >> pos_args[0][4] = ctx->i32zero; /* COMPR flag */
> >> @@ -4498,21 +4513,22 @@ handle_vs_outputs_post(struct
> nir_to_llvm_context
> >> *ctx)
> >> if (!pos_args[i][0])
> >> continue;
> >>
> >> /* Specify the target we are exporting */
> >> pos_args[i][3] = LLVMConstInt(ctx->i32,
> >> V_008DFC_SQ_EXP_POS + pos_idx++, false);
> >> if (pos_idx == num_pos_exports)
> >> pos_args[i][2] = ctx->i32one;
> >> ac_emit_llvm_intrinsic(&ctx->ac,
> >> "llvm.SI.export",
> >> ctx->voidt,
> >> - pos_args[i], 9, 0);
> >> + pos_args[i], 9,
> >> + AC_FUNC_ATTR_LEGACY);
> >> }
> >>
> >> ctx->shader_info->vs.pos_exports = num_pos_exports;
> >> ctx->shader_info->vs.param_exports = param_count;
> >> }
> >>
> >> static void
> >> handle_es_outputs_post(struct nir_to_llvm_context *ctx)
> >> {
> >> int j;
> >> @@ -4554,21 +4570,22 @@ si_export_mrt_color(struct nir_to_llvm_context
> >> *ctx,
> >> si_llvm_init_export_args(ctx, color, param,
> >> args);
> >>
> >> if (is_last) {
> >> args[1] = ctx->i32one; /* whether the EXEC mask is valid
> >> */
> >> args[2] = ctx->i32one; /* DONE bit */
> >> } else if (args[0] == ctx->i32zero)
> >> return; /* unnecessary NULL export */
> >>
> >> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
> >> - ctx->voidt, args, 9, 0);
> >> + ctx->voidt, args, 9,
> >> + AC_FUNC_ATTR_LEGACY);
> >> }
> >>
> >> static void
> >> si_export_mrt_z(struct nir_to_llvm_context *ctx,
> >> LLVMValueRef depth, LLVMValueRef stencil,
> >> LLVMValueRef samplemask)
> >> {
> >> LLVMValueRef args[9];
> >> unsigned mask = 0;
> >> args[1] = ctx->i32one; /* whether the EXEC mask is valid */
> >> @@ -4598,21 +4615,22 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
> >> }
> >>
> >> /* SI (except OLAND) has a bug that it only looks
> >> * at the X writemask component. */
> >> if (ctx->options->chip_class == SI &&
> >> ctx->options->family != CHIP_OLAND)
> >> mask |= 0x01;
> >>
> >> args[0] = LLVMConstInt(ctx->i32, mask, false);
> >> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
> >> - ctx->voidt, args, 9, 0);
> >> + ctx->voidt, args, 9,
> >> + AC_FUNC_ATTR_LEGACY);
> >> }
> >>
> >> static void
> >> handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
> >> {
> >> unsigned index = 0;
> >> LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
> >>
> >> for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
> >> LLVMValueRef values[4];
> >> @@ -5021,21 +5039,22 @@ ac_gs_copy_shader_emit(struct
> nir_to_llvm_context
> >> *ctx)
> >>
> >> for (unsigned j = 0; j < 4; j++) {
> >> LLVMValueRef value;
> >> args[2] = LLVMConstInt(ctx->i32,
> >> (idx * 4 + j) *
> >> ctx->gs_max_out_vertices
> *
> >> 16 * 4, false);
> >>
> >> value = ac_emit_llvm_intrinsic(&ctx->ac,
> >>
> >> "llvm.SI.buffer.load.dword.i32.i32",
> >> ctx->i32, args,
> 9,
> >> -
> >> AC_FUNC_ATTR_READONLY);
> >> +
> >> AC_FUNC_ATTR_READONLY |
> >> +
> >> AC_FUNC_ATTR_LEGACY);
> >>
> >> LLVMBuildStore(ctx->builder,
> >> to_float(ctx, value),
> >> ctx->outputs[radeon_llvm_reg_index_soa(i, j)]);
> >> }
> >> idx++;
> >> }
> >> handle_vs_outputs_post(ctx);
> >> }
> >>
> >> void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
> >> diff --git a/src/gallium/auxiliary/draw/draw_llvm.c
> >> b/src/gallium/auxiliary/draw/draw_llvm.c
> >> index 8952dc8..586a9be 100644
> >> --- a/src/gallium/auxiliary/draw/draw_llvm.c
> >> +++ b/src/gallium/auxiliary/draw/draw_llvm.c
> >> @@ -1588,21 +1588,22 @@ draw_llvm_generate(struct draw_llvm *llvm,
> struct
> >> draw_llvm_variant *variant)
> >>
> >> func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
> >> arg_types, num_arg_types, 0);
> >>
> >> variant_func = LLVMAddFunction(gallivm->module, func_name,
> func_type);
> >> variant->function = variant_func;
> >>
> >> LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
> >> for (i = 0; i < num_arg_types; ++i)
> >> if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
> >> - lp_add_function_attr(variant_func, i + 1,
> LP_FUNC_ATTR_NOALIAS);
> >> + lp_add_function_attr(context, variant_func, i + 1,
> >> + LP_FUNC_ATTR_NOALIAS);
> >>
> >> context_ptr = LLVMGetParam(variant_func, 0);
> >> io_ptr = LLVMGetParam(variant_func, 1);
> >> vbuffers_ptr = LLVMGetParam(variant_func, 2);
> >> count = LLVMGetParam(variant_func, 3);
> >> /*
> >> * XXX: the maxelt part is unused. Not really useful, since we
> cannot
> >> * get index buffer overflows due to vsplit (which provides its own
> >> * elts buffer, with a different size than what's passed in here).
> >> */
> >> @@ -2262,21 +2263,22 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
> >> func_type = LLVMFunctionType(int32_type, arg_types,
> >> ARRAY_SIZE(arg_types), 0);
> >>
> >> variant_func = LLVMAddFunction(gallivm->module, func_name,
> func_type);
> >>
> >> variant->function = variant_func;
> >>
> >> LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
> >>
> >> for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
> >> if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
> >> - lp_add_function_attr(variant_func, i + 1,
> LP_FUNC_ATTR_NOALIAS);
> >> + lp_add_function_attr(context, variant_func, i + 1,
> >> + LP_FUNC_ATTR_NOALIAS);
> >>
> >> context_ptr = LLVMGetParam(variant_func, 0);
> >> input_array = LLVMGetParam(variant_func, 1);
> >> io_ptr = LLVMGetParam(variant_func, 2);
> >> num_prims = LLVMGetParam(variant_func, 3);
> >> system_values.instance_id = LLVMGetParam(variant_func, 4);
> >> prim_id_ptr = LLVMGetParam(variant_func, 5);
> >> system_values.invocation_id = LLVMGetParam(variant_func, 6);
> >>
> >> lp_build_name(context_ptr, "context");
> >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> >> b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> >> index 049671a..1b50e68 100644
> >> --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
> >> @@ -152,83 +152,100 @@ static const char *attr_to_str(enum lp_func_attr
> >> attr)
> >> case LP_FUNC_ATTR_READONLY: return "readonly";
> >> default:
> >> _debug_printf("Unhandled function attribute: %x\n", attr);
> >> return 0;
> >> }
> >> }
> >>
> >> #endif
> >>
> >> void
> >> -lp_add_function_attr(LLVMValueRef function,
> >> - int attr_idx,
> >> - enum lp_func_attr attr)
> >> +lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
> >
> >
> > If function is not always a function, then it's better the parameter is
> > renamed to functionOrCall or something like that.
> >
> >> + int attr_idx, enum lp_func_attr attr)
> >> {
> >>
> >> #if HAVE_LLVM < 0x0400
> >> LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr);
> >> if (attr_idx == -1) {
> >> LLVMAddFunctionAttr(function, llvm_attr);
> >> } else {
> >> LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1),
> llvm_attr);
> >> }
> >> #else
> >> - LLVMContextRef context =
> >> LLVMGetModuleContext(LLVMGetGlobalParent(function));
> >
> >
> > Even when LLVMIsAFunction(function) is false, we could still get the
> > LLVMContextRef:
> >
> > LLVMModuleRef module;
> > if (LLVMIsAFunction(functionOrCall)) {
> > module = LLVMGetGlobalParent(functionOrCall);
> > } else {
> > LLVMBasicBlockRef bb = LLVMValueAsBasicBlock(functionOrCall);
>
> The correct function is LLVMGetInstructionParent.
>
> > LLVMValueRef function = LLVMGetBasicBlockParent(bb)
> > module = LLVMGetGlobalParent(function);
> >
> > }
> > LLVMContextRef context = LLVMGetModuleContext(module);
> >
> > This would enable to keep lp_add_function_attr prototype alone.
> >
> > Otherwise looks good to me.
> >
> > Reviewed-by: Jose Fonseca <jfonseca at vmware.com>
>
> Thanks.
>
> Marek
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170301/04a7274f/attachment-0001.html>
More information about the mesa-dev
mailing list