[Mesa-dev] [PATCH 2/2] gallivm, ac: add function attributes at call sites instead of declarations
Marek Olšák
maraeo at gmail.com
Thu Feb 23 00:01:51 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
They can vary at call sites if the intrinsic is NOT a legacy SI intrinsic.
We need this to force readnone or inaccessiblememonly on some amdgcn
intrinsics.
This is only used with LLVM 4.0 and later. Intrinsics only used with
LLVM <= 3.9 don't need the LEGACY flag.
gallivm and ac code is in the same patch, because splitting would be
more complicated with all the LEGACY uses all over the place.
---
src/amd/common/ac_llvm_build.c | 23 ++++----
src/amd/common/ac_llvm_util.c | 31 +++++++----
src/amd/common/ac_llvm_util.h | 17 +++---
src/amd/common/ac_nir_to_llvm.c | 63 ++++++++++++++--------
src/gallium/auxiliary/draw/draw_llvm.c | 6 ++-
src/gallium/auxiliary/gallivm/lp_bld_intr.c | 51 ++++++++++++------
src/gallium/auxiliary/gallivm/lp_bld_intr.h | 11 ++--
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 3 +-
src/gallium/drivers/llvmpipe/lp_state_fs.c | 3 +-
src/gallium/drivers/llvmpipe/lp_state_setup.c | 7 +--
src/gallium/drivers/radeonsi/si_shader.c | 64 ++++++++++++++---------
src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 14 +++--
12 files changed, 184 insertions(+), 109 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 2f25b14..5c8b7f7 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -75,47 +75,50 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
"amdgpu.uniform", 14);
ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
}
LLVMValueRef
ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
LLVMTypeRef return_type, LLVMValueRef *params,
unsigned param_count, unsigned attrib_mask)
{
- LLVMValueRef function;
+ LLVMValueRef function, call;
+ bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
+ !(attrib_mask & AC_FUNC_ATTR_LEGACY);
function = LLVMGetNamedFunction(ctx->module, name);
if (!function) {
LLVMTypeRef param_types[32], function_type;
unsigned i;
assert(param_count <= 32);
for (i = 0; i < param_count; ++i) {
assert(params[i]);
param_types[i] = LLVMTypeOf(params[i]);
}
function_type =
LLVMFunctionType(return_type, param_types, param_count, 0);
function = LLVMAddFunction(ctx->module, name, function_type);
LLVMSetFunctionCallConv(function, LLVMCCallConv);
LLVMSetLinkage(function, LLVMExternalLinkage);
- attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
- while (attrib_mask) {
- enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
- ac_add_function_attr(function, -1, attr);
- }
+ if (!set_callsite_attrs)
+ ac_add_func_attributes(ctx->context, function, attrib_mask);
}
- return LLVMBuildCall(ctx->builder, function, params, param_count, "");
+
+ call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
+ if (set_callsite_attrs)
+ ac_add_func_attributes(ctx->context, call, attrib_mask);
+ return call;
}
LLVMValueRef
ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count,
unsigned value_stride,
bool load)
{
LLVMBuilderRef builder = ctx->builder;
@@ -524,21 +527,22 @@ ac_build_tbuffer_store(struct ac_llvm_context *ctx,
/* The instruction offset field has 12 bits */
assert(offen || inst_offset < (1 << 12));
/* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
unsigned func = CLAMP(num_channels, 1, 3) - 1;
const char *types[] = {"i32", "v2i32", "v4i32"};
char name[256];
snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
- args, ARRAY_SIZE(args), 0);
+ args, ARRAY_SIZE(args),
+ AC_FUNC_ATTR_LEGACY);
}
void
ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vdata,
unsigned num_channels,
LLVMValueRef vaddr,
LLVMValueRef soffset,
unsigned inst_offset)
@@ -836,12 +840,13 @@ LLVMValueRef ac_emit_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." :
"llvm.AMDIL.clamp.";
LLVMValueRef args[3] = {
value,
LLVMConstReal(ctx->f32, 0),
LLVMConstReal(ctx->f32, 1),
};
return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3,
- AC_FUNC_ATTR_READNONE);
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_LEGACY);
}
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index be127c5..fb525dd 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -17,21 +17,21 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
*/
/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
#include "ac_llvm_util.h"
-
+#include "util/bitscan.h"
#include <llvm-c/Core.h>
#include "c11/threads.h"
#include <assert.h>
#include <stdio.h>
#include <string.h>
static void ac_init_llvm_target()
{
@@ -173,40 +173,53 @@ static const char *attr_to_str(enum ac_func_attr attr)
case AC_FUNC_ATTR_READNONE: return "readnone";
case AC_FUNC_ATTR_READONLY: return "readonly";
default:
fprintf(stderr, "Unhandled function attribute: %x\n", attr);
return 0;
}
}
#endif
-void
-ac_add_function_attr(LLVMValueRef function,
- int attr_idx,
- enum ac_func_attr attr)
+static void
+ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
+ int attr_idx, enum ac_func_attr attr)
{
-
#if HAVE_LLVM < 0x0400
LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
if (attr_idx == -1) {
LLVMAddFunctionAttr(function, llvm_attr);
} else {
LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
}
#else
- LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
const char *attr_name = attr_to_str(attr);
unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
strlen(attr_name));
- LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
- LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+ LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
+
+ if (LLVMIsAFunction(function))
+ LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+ else
+ LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
#endif
}
+void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
+ unsigned attrib_mask)
+{
+ attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
+ attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
+
+ while (attrib_mask) {
+ enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
+ ac_add_function_attr(ctx, function, -1, attr);
+ }
+}
+
void
ac_dump_module(LLVMModuleRef module)
{
char *str = LLVMPrintModuleToString(module);
fprintf(stderr, "%s", str);
LLVMDisposeMessage(str);
}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 93d3d27..4fe4ab4 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -34,28 +34,29 @@ extern "C" {
#endif
enum ac_func_attr {
AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
AC_FUNC_ATTR_BYVAL = (1 << 1),
AC_FUNC_ATTR_INREG = (1 << 2),
AC_FUNC_ATTR_NOALIAS = (1 << 3),
AC_FUNC_ATTR_NOUNWIND = (1 << 4),
AC_FUNC_ATTR_READNONE = (1 << 5),
AC_FUNC_ATTR_READONLY = (1 << 6),
+
+ /* Legacy intrinsic that needs attributes on function declarations
+ * and they must match the internal LLVM definition exactly, otherwise
+ * intrinsic selection fails.
+ */
+ AC_FUNC_ATTR_LEGACY = (1u << 31),
};
LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill);
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
bool ac_is_sgpr_param(LLVMValueRef param);
-
-void
-ac_add_function_attr(LLVMValueRef function,
- int attr_idx,
- enum ac_func_attr attr);
-
-void
-ac_dump_module(LLVMModuleRef module);
+void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
+ unsigned attrib_mask);
+void ac_dump_module(LLVMModuleRef module);
#ifdef __cplusplus
}
#endif
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index b3dc63c..4ec19d5 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1039,26 +1039,27 @@ static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
return result;
}
static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
- const char *intrin,
+ const char *intrin, unsigned attr_mask,
LLVMValueRef srcs[3])
{
LLVMValueRef result;
LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
- result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE);
+ result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3,
+ AC_FUNC_ATTR_READNONE | attr_mask);
result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
return result;
}
static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
LLVMValueRef src0, LLVMValueRef src1,
LLVMValueRef src2, LLVMValueRef src3)
{
LLVMValueRef bfi_args[3], result;
@@ -1418,24 +1419,26 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
break;
case nir_op_fmin:
result = emit_intrin_2f_param(ctx, "llvm.minnum",
to_float_type(ctx, def_type), src[0], src[1]);
break;
case nir_op_ffma:
result = emit_intrin_3f_param(ctx, "llvm.fma",
to_float_type(ctx, def_type), src[0], src[1], src[2]);
break;
case nir_op_ibitfield_extract:
- result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
+ result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32",
+ AC_FUNC_ATTR_LEGACY, src);
break;
case nir_op_ubitfield_extract:
- result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
+ result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32",
+ AC_FUNC_ATTR_LEGACY, src);
break;
case nir_op_bitfield_insert:
result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
break;
case nir_op_bitfield_reverse:
result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
break;
case nir_op_bit_count:
result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
break;
@@ -1635,22 +1638,23 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
txq_args[txq_arg_count++] = tinfo->args[1];
txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */
txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */
txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
- txq_args, txq_arg_count,
- AC_FUNC_ATTR_READNONE);
+ txq_args, txq_arg_count,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_LEGACY);
for (c = 0; c < 2; c++) {
half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
LLVMConstInt(ctx->i32, c, false), "");
half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
half_texel[c] = ac_emit_fdiv(&ctx->ac, ctx->f32one, half_texel[c]);
half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
LLVMConstReal(ctx->f32, -0.5), "");
}
}
@@ -1660,21 +1664,22 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
}
tinfo->args[0] = coord;
return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
- AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
+ AC_FUNC_ATTR_LEGACY);
}
static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
nir_tex_instr *instr,
struct ac_tex_info *tinfo)
{
const char *name = "llvm.SI.image.sample";
const char *infix = "";
char intr_name[127];
@@ -1728,21 +1733,22 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
has_offset ? ".o" : "", type);
if (instr->op == nir_texop_tg4) {
enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name,
(int)has_offset + (int)is_shadow);
}
}
return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
- AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
+ AC_FUNC_ATTR_LEGACY);
}
static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
nir_intrinsic_instr *instr)
{
LLVMValueRef index = get_src(ctx, instr->src[0]);
unsigned desc_set = nir_intrinsic_desc_set(instr);
unsigned binding = nir_intrinsic_binding(instr);
LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
@@ -2006,21 +2012,23 @@ static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
if (instr->dest.ssa.bit_size == 64)
num_components *= 2;
for (unsigned i = 0; i < num_components; ++i) {
LLVMValueRef params[] = {
rsrc,
LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
offset, "")
};
results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32,
- params, 2, AC_FUNC_ATTR_READNONE);
+ params, 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_LEGACY);
}
ret = ac_build_gather_values(&ctx->ac, results, instr->num_components);
return LLVMBuildBitCast(ctx->builder, ret,
get_def_type(ctx, &instr->dest.ssa), "");
}
static void
radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
@@ -2103,21 +2111,23 @@ load_gs_input(struct nir_to_llvm_context *ctx,
args[1] = vtx_offset;
args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false);
args[3] = ctx->i32zero;
args[4] = ctx->i32one; /* OFFEN */
args[5] = ctx->i32zero; /* IDXEN */
args[6] = ctx->i32one; /* GLC */
args[7] = ctx->i32zero; /* SLC */
args[8] = ctx->i32zero; /* TFE */
value[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
- ctx->i32, args, 9, AC_FUNC_ATTR_READONLY);
+ ctx->i32, args, 9,
+ AC_FUNC_ATTR_READONLY |
+ AC_FUNC_ATTR_LEGACY);
}
result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
return result;
}
static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
nir_intrinsic_instr *instr)
{
LLVMValueRef values[8];
@@ -2685,21 +2695,23 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
params[2] = LLVMConstInt(ctx->i32, 15, false);
params[3] = ctx->i32zero;
params[4] = ctx->i32zero;
params[5] = da ? ctx->i32one : ctx->i32zero;
params[6] = ctx->i32zero;
params[7] = ctx->i32zero;
params[8] = ctx->i32zero;
params[9] = ctx->i32zero;
res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
- params, 10, AC_FUNC_ATTR_READNONE);
+ params, 10,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_LEGACY);
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
glsl_sampler_type_is_array(type)) {
LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
z = LLVMBuildSDiv(ctx->builder, z, six, "");
res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
}
return res;
@@ -2729,21 +2741,21 @@ static void emit_discard_if(struct nir_to_llvm_context *ctx,
cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
get_src(ctx, instr->src[0]),
ctx->i32zero, "");
cond = LLVMBuildSelect(ctx->builder, cond,
LLVMConstReal(ctx->f32, -1.0f),
ctx->f32zero, "");
ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
ctx->voidt,
- &cond, 1, 0);
+ &cond, 1, AC_FUNC_ATTR_LEGACY);
}
static LLVMValueRef
visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
{
LLVMValueRef result;
LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
LLVMConstInt(ctx->i32, 0xfc0, false), "");
@@ -2984,21 +2996,21 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
* have any effect, and GS threads have no externally observable
* effects other than emitting vertices.
*/
can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
LLVMConstInt(ctx->i32, ctx->gs_max_out_vertices, false), "");
kill = LLVMBuildSelect(ctx->builder, can_emit,
LLVMConstReal(ctx->f32, 1.0f),
LLVMConstReal(ctx->f32, -1.0f), "");
ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
- ctx->voidt, &kill, 1, 0);
+ ctx->voidt, &kill, 1, AC_FUNC_ATTR_LEGACY);
/* loop num outputs */
idx = 0;
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
if (!(ctx->output_mask & (1ull << i)))
continue;
for (unsigned j = 0; j < 4; j++) {
LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
@@ -3144,21 +3156,21 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx,
case nir_intrinsic_image_atomic_comp_swap:
result = visit_image_atomic(ctx, instr);
break;
case nir_intrinsic_image_size:
result = visit_image_size(ctx, instr);
break;
case nir_intrinsic_discard:
ctx->shader_info->fs.can_discard = true;
ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
ctx->voidt,
- NULL, 0, 0);
+ NULL, 0, AC_FUNC_ATTR_LEGACY);
break;
case nir_intrinsic_discard_if:
emit_discard_if(ctx, instr);
break;
case nir_intrinsic_memory_barrier:
emit_waitcnt(ctx);
break;
case nir_intrinsic_barrier:
emit_barrier(ctx);
break;
@@ -3924,21 +3936,22 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,
for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
t_offset = LLVMConstInt(ctx->i32, index + i, false);
t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
args[0] = t_list;
args[1] = LLVMConstInt(ctx->i32, 0, false);
args[2] = buffer_index;
input = ac_emit_llvm_intrinsic(&ctx->ac,
"llvm.SI.vs.load.input", ctx->v4f32, args, 3,
- AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
+ AC_FUNC_ATTR_LEGACY);
for (unsigned chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
to_integer(ctx, LLVMBuildExtractElement(ctx->builder,
input, llvm_chan, ""));
}
}
}
@@ -4257,22 +4270,23 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
args[4] = ctx->i32one;
for (unsigned chan = 0; chan < 2; chan++) {
LLVMValueRef pack_args[2] = {
values[2 * chan],
values[2 * chan + 1]
};
LLVMValueRef packed;
packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16",
- ctx->i32, pack_args, 2,
- AC_FUNC_ATTR_READNONE);
+ ctx->i32, pack_args, 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_LEGACY);
args[chan + 5] = packed;
}
break;
case V_028714_SPI_SHADER_UNORM16_ABGR:
for (unsigned chan = 0; chan < 4; chan++) {
val[chan] = emit_float_saturate(ctx, values[chan], 0, 1);
val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
LLVMConstReal(ctx->f32, 65535), "");
val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
@@ -4443,21 +4457,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
si_llvm_init_export_args(ctx, values, target, args);
if (target >= V_008DFC_SQ_EXP_POS &&
target <= (V_008DFC_SQ_EXP_POS + 3)) {
memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
args, sizeof(args));
} else {
ac_emit_llvm_intrinsic(&ctx->ac,
"llvm.SI.export",
ctx->voidt,
- args, 9, 0);
+ args, 9,
+ AC_FUNC_ATTR_LEGACY);
}
}
/* We need to add the position output manually if it's missing. */
if (!pos_args[0][0]) {
pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
pos_args[0][1] = ctx->i32zero; /* EXEC mask */
pos_args[0][2] = ctx->i32zero; /* last export? */
pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false);
pos_args[0][4] = ctx->i32zero; /* COMPR flag */
@@ -4498,21 +4513,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
if (!pos_args[i][0])
continue;
/* Specify the target we are exporting */
pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false);
if (pos_idx == num_pos_exports)
pos_args[i][2] = ctx->i32one;
ac_emit_llvm_intrinsic(&ctx->ac,
"llvm.SI.export",
ctx->voidt,
- pos_args[i], 9, 0);
+ pos_args[i], 9,
+ AC_FUNC_ATTR_LEGACY);
}
ctx->shader_info->vs.pos_exports = num_pos_exports;
ctx->shader_info->vs.param_exports = param_count;
}
static void
handle_es_outputs_post(struct nir_to_llvm_context *ctx)
{
int j;
@@ -4554,21 +4570,22 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
si_llvm_init_export_args(ctx, color, param,
args);
if (is_last) {
args[1] = ctx->i32one; /* whether the EXEC mask is valid */
args[2] = ctx->i32one; /* DONE bit */
} else if (args[0] == ctx->i32zero)
return; /* unnecessary NULL export */
ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
- ctx->voidt, args, 9, 0);
+ ctx->voidt, args, 9,
+ AC_FUNC_ATTR_LEGACY);
}
static void
si_export_mrt_z(struct nir_to_llvm_context *ctx,
LLVMValueRef depth, LLVMValueRef stencil,
LLVMValueRef samplemask)
{
LLVMValueRef args[9];
unsigned mask = 0;
args[1] = ctx->i32one; /* whether the EXEC mask is valid */
@@ -4598,21 +4615,22 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
}
/* SI (except OLAND) has a bug that it only looks
* at the X writemask component. */
if (ctx->options->chip_class == SI &&
ctx->options->family != CHIP_OLAND)
mask |= 0x01;
args[0] = LLVMConstInt(ctx->i32, mask, false);
ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
- ctx->voidt, args, 9, 0);
+ ctx->voidt, args, 9,
+ AC_FUNC_ATTR_LEGACY);
}
static void
handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
{
unsigned index = 0;
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
LLVMValueRef values[4];
@@ -5021,21 +5039,22 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
for (unsigned j = 0; j < 4; j++) {
LLVMValueRef value;
args[2] = LLVMConstInt(ctx->i32,
(idx * 4 + j) *
ctx->gs_max_out_vertices * 16 * 4, false);
value = ac_emit_llvm_intrinsic(&ctx->ac,
"llvm.SI.buffer.load.dword.i32.i32",
ctx->i32, args, 9,
- AC_FUNC_ATTR_READONLY);
+ AC_FUNC_ATTR_READONLY |
+ AC_FUNC_ATTR_LEGACY);
LLVMBuildStore(ctx->builder,
to_float(ctx, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]);
}
idx++;
}
handle_vs_outputs_post(ctx);
}
void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 8952dc8..586a9be 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1588,21 +1588,22 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
arg_types, num_arg_types, 0);
variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
variant->function = variant_func;
LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
for (i = 0; i < num_arg_types; ++i)
if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
+ lp_add_function_attr(context, variant_func, i + 1,
+ LP_FUNC_ATTR_NOALIAS);
context_ptr = LLVMGetParam(variant_func, 0);
io_ptr = LLVMGetParam(variant_func, 1);
vbuffers_ptr = LLVMGetParam(variant_func, 2);
count = LLVMGetParam(variant_func, 3);
/*
* XXX: the maxelt part is unused. Not really useful, since we cannot
* get index buffer overflows due to vsplit (which provides its own
* elts buffer, with a different size than what's passed in here).
*/
@@ -2262,21 +2263,22 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
variant->function = variant_func;
LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
+ lp_add_function_attr(context, variant_func, i + 1,
+ LP_FUNC_ATTR_NOALIAS);
context_ptr = LLVMGetParam(variant_func, 0);
input_array = LLVMGetParam(variant_func, 1);
io_ptr = LLVMGetParam(variant_func, 2);
num_prims = LLVMGetParam(variant_func, 3);
system_values.instance_id = LLVMGetParam(variant_func, 4);
prim_id_ptr = LLVMGetParam(variant_func, 5);
system_values.invocation_id = LLVMGetParam(variant_func, 6);
lp_build_name(context_ptr, "context");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index 049671a..1b50e68 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -152,83 +152,100 @@ static const char *attr_to_str(enum lp_func_attr attr)
case LP_FUNC_ATTR_READONLY: return "readonly";
default:
_debug_printf("Unhandled function attribute: %x\n", attr);
return 0;
}
}
#endif
void
-lp_add_function_attr(LLVMValueRef function,
- int attr_idx,
- enum lp_func_attr attr)
+lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
+ int attr_idx, enum lp_func_attr attr)
{
#if HAVE_LLVM < 0x0400
LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr);
if (attr_idx == -1) {
LLVMAddFunctionAttr(function, llvm_attr);
} else {
LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
}
#else
- LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
const char *attr_name = attr_to_str(attr);
unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
strlen(attr_name));
- LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
- LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+ LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
+
+ if (LLVMIsAFunction(function))
+ LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+ else
+ LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
#endif
}
+static void
+lp_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
+ unsigned attrib_mask)
+{
+ /* NoUnwind indicates that the intrinsic never raises a C++ exception.
+ * Set it for all intrinsics.
+ */
+ attrib_mask |= LP_FUNC_ATTR_NOUNWIND;
+ attrib_mask &= ~LP_FUNC_ATTR_LEGACY;
+
+ while (attrib_mask) {
+ enum lp_func_attr attr = 1u << u_bit_scan(&attrib_mask);
+ lp_add_function_attr(ctx, function, -1, attr);
+ }
+}
+
LLVMValueRef
lp_build_intrinsic(LLVMBuilderRef builder,
const char *name,
LLVMTypeRef ret_type,
LLVMValueRef *args,
unsigned num_args,
unsigned attr_mask)
{
LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
- LLVMValueRef function;
+ LLVMContextRef ctx = LLVMGetModuleContext(module);
+ LLVMValueRef function, call;
+ bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
+ !(attr_mask & LP_FUNC_ATTR_LEGACY);
function = LLVMGetNamedFunction(module, name);
if(!function) {
LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
unsigned i;
assert(num_args <= LP_MAX_FUNC_ARGS);
for(i = 0; i < num_args; ++i) {
assert(args[i]);
arg_types[i] = LLVMTypeOf(args[i]);
}
function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
- /* NoUnwind indicates that the intrinsic never raises a C++ exception.
- * Set it for all intrinsics.
- */
- attr_mask |= LP_FUNC_ATTR_NOUNWIND;
-
- while (attr_mask) {
- enum lp_func_attr attr = 1 << u_bit_scan(&attr_mask);
- lp_add_function_attr(function, -1, attr);
- }
+ if (!set_callsite_attrs)
+ lp_add_func_attributes(ctx, function, attr_mask);
if (gallivm_debug & GALLIVM_DEBUG_IR) {
lp_debug_dump_value(function);
}
}
- return LLVMBuildCall(builder, function, args, num_args, "");
+ call = LLVMBuildCall(builder, function, args, num_args, "");
+ if (set_callsite_attrs)
+ lp_add_func_attributes(ctx, call, attr_mask);
+ return call;
}
LLVMValueRef
lp_build_intrinsic_unary(LLVMBuilderRef builder,
const char *name,
LLVMTypeRef ret_type,
LLVMValueRef a)
{
return lp_build_intrinsic(builder, name, ret_type, &a, 1, 0);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index 039e9ab..d279911 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -47,39 +47,44 @@
#define LP_MAX_FUNC_ARGS 32
enum lp_func_attr {
LP_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
LP_FUNC_ATTR_BYVAL = (1 << 1),
LP_FUNC_ATTR_INREG = (1 << 2),
LP_FUNC_ATTR_NOALIAS = (1 << 3),
LP_FUNC_ATTR_NOUNWIND = (1 << 4),
LP_FUNC_ATTR_READNONE = (1 << 5),
LP_FUNC_ATTR_READONLY = (1 << 6),
+
+ /* Legacy intrinsic that needs attributes on function declarations
+ * and they must match the internal LLVM definition exactly, otherwise
+ * intrinsic selection fails.
+ */
+ LP_FUNC_ATTR_LEGACY = (1u << 31),
};
void
lp_format_intrinsic(char *name,
size_t size,
const char *name_root,
LLVMTypeRef type);
LLVMValueRef
lp_declare_intrinsic(LLVMModuleRef module,
const char *name,
LLVMTypeRef ret_type,
LLVMTypeRef *arg_types,
unsigned num_args);
void
-lp_add_function_attr(LLVMValueRef function,
- int attr_idx,
- enum lp_func_attr attr);
+lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
+ int attr_idx, enum lp_func_attr attr);
LLVMValueRef
lp_build_intrinsic(LLVMBuilderRef builder,
const char *name,
LLVMTypeRef ret_type,
LLVMValueRef *args,
unsigned num_args,
unsigned attr_mask);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 161a03f..a1e2601 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -3311,21 +3311,22 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
val_type[0] = val_type[1] = val_type[2] = val_type[3] =
lp_build_vec_type(gallivm, params->type);
ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
function = LLVMAddFunction(module, func_name, function_type);
for (i = 0; i < num_param; ++i) {
if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
- lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
+ lp_add_function_attr(gallivm->context, function, i + 1,
+ LP_FUNC_ATTR_NOALIAS);
}
}
LLVMSetFunctionCallConv(function, LLVMFastCallConv);
LLVMSetLinkage(function, LLVMInternalLinkage);
lp_build_sample_gen_func(gallivm,
static_texture_state,
static_sampler_state,
dynamic_state,
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index af47b52..70b0a67 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -2488,21 +2488,22 @@ generate_fragment(struct llvmpipe_context *lp,
function = LLVMAddFunction(gallivm->module, func_name, func_type);
LLVMSetFunctionCallConv(function, LLVMCCallConv);
variant->function[partial_mask] = function;
/* XXX: need to propagate noalias down into color param now we are
* passing a pointer-to-pointer?
*/
for(i = 0; i < ARRAY_SIZE(arg_types); ++i)
if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
+ lp_add_function_attr(gallivm->context, function, i + 1,
+ LP_FUNC_ATTR_NOALIAS);
context_ptr = LLVMGetParam(function, 0);
x = LLVMGetParam(function, 1);
y = LLVMGetParam(function, 2);
facing = LLVMGetParam(function, 3);
a0_ptr = LLVMGetParam(function, 4);
dadx_ptr = LLVMGetParam(function, 5);
dady_ptr = LLVMGetParam(function, 6);
color_ptr_ptr = LLVMGetParam(function, 7);
depth_ptr = LLVMGetParam(function, 8);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
index 6b0df21..66bc42c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -609,29 +609,29 @@ emit_tri_coef( struct gallivm_state *gallivm,
default:
assert(0);
}
}
}
/* XXX: generic code:
*/
static void
-set_noalias(LLVMBuilderRef builder,
+set_noalias(LLVMContextRef ctx,
LLVMValueRef function,
const LLVMTypeRef *arg_types,
int nr_args)
{
int i;
for(i = 0; i < nr_args; ++i)
if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
+ lp_add_function_attr(ctx, function, i + 1, LP_FUNC_ATTR_NOALIAS);
}
static void
init_args(struct gallivm_state *gallivm,
const struct lp_setup_variant_key *key,
struct lp_setup_args *args)
{
LLVMBuilderRef b = gallivm->builder;
LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
LLVMValueRef onef = lp_build_const_float(gallivm, 1.0);
@@ -783,21 +783,22 @@ generate_setup_variant(struct lp_setup_variant_key *key,
lp_build_name(args.dadx, "out_dadx");
lp_build_name(args.dady, "out_dady");
/*
* Function body
*/
block = LLVMAppendBasicBlockInContext(gallivm->context,
variant->function, "entry");
LLVMPositionBuilderAtEnd(builder, block);
- set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types));
+ set_noalias(gallivm->context, variant->function, arg_types,
+ ARRAY_SIZE(arg_types));
init_args(gallivm, &variant->key, &args);
emit_tri_coef(gallivm, &variant->key, &args);
LLVMBuildRetVoid(builder);
gallivm_verify_function(gallivm, variant->function);
gallivm_compile_module(gallivm);
variant->jit_function = (lp_jit_setup_triangle)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index f9eaea2..ea3f4fd 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -394,21 +394,22 @@ static void declare_input_vs(
}
args[0] = t_list;
args[2] = vertex_index;
for (unsigned i = 0; i < num_fetches; i++) {
args[1] = LLVMConstInt(ctx->i32, fetch_stride * i, 0);
input[i] = lp_build_intrinsic(gallivm->builder,
"llvm.SI.vs.load.input", ctx->v4f32, args, 3,
- LP_FUNC_ATTR_READNONE);
+ LP_FUNC_ATTR_READNONE |
+ LP_FUNC_ATTR_LEGACY);
}
/* Break up the vec4 into individual components */
for (chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
out[chan] = LLVMBuildExtractElement(gallivm->builder,
input[0], llvm_chan, "");
}
switch (fix_fetch) {
@@ -1116,28 +1117,30 @@ static LLVMValueRef fetch_input_gs(
args[3] = uint->zero;
args[4] = uint->one; /* OFFEN */
args[5] = uint->zero; /* IDXEN */
args[6] = uint->one; /* GLC */
args[7] = uint->zero; /* SLC */
args[8] = uint->zero; /* TFE */
value = lp_build_intrinsic(gallivm->builder,
"llvm.SI.buffer.load.dword.i32.i32",
ctx->i32, args, 9,
- LP_FUNC_ATTR_READONLY);
+ LP_FUNC_ATTR_READONLY |
+ LP_FUNC_ATTR_LEGACY);
if (tgsi_type_is_64bit(type)) {
LLVMValueRef value2;
args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256);
value2 = lp_build_intrinsic(gallivm->builder,
"llvm.SI.buffer.load.dword.i32.i32",
ctx->i32, args, 9,
- LP_FUNC_ATTR_READONLY);
+ LP_FUNC_ATTR_READONLY |
+ LP_FUNC_ATTR_LEGACY);
return si_llvm_emit_fetch_64bit(bld_base, type,
value, value2);
}
return LLVMBuildBitCast(gallivm->builder,
value,
tgsi2llvmtype(bld_base, type), "");
}
static int lookup_interp_param_index(unsigned interpolate, unsigned location)
{
@@ -1361,21 +1364,22 @@ static LLVMValueRef get_sample_id(struct si_shader_context *radeon_bld)
* Load a dword from a constant buffer.
*/
static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
LLVMValueRef resource,
LLVMValueRef offset)
{
LLVMBuilderRef builder = ctx->gallivm.builder;
LLVMValueRef args[2] = {resource, offset};
return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2,
- LP_FUNC_ATTR_READNONE);
+ LP_FUNC_ATTR_READNONE |
+ LP_FUNC_ATTR_LEGACY);
}
static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, LLVMValueRef sample_id)
{
struct si_shader_context *ctx =
si_shader_context(&radeon_bld->bld_base);
struct lp_build_context *uint_bld = &radeon_bld->bld_base.uint_bld;
struct gallivm_state *gallivm = &radeon_bld->gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef desc = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS);
@@ -1815,21 +1819,22 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
for (chan = 0; chan < 2; chan++) {
LLVMValueRef pack_args[2] = {
values[2 * chan],
values[2 * chan + 1]
};
LLVMValueRef packed;
packed = lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.packf16",
ctx->i32, pack_args, 2,
- LP_FUNC_ATTR_READNONE);
+ LP_FUNC_ATTR_READNONE |
+ LP_FUNC_ATTR_LEGACY);
args[chan + 5] =
LLVMBuildBitCast(base->gallivm->builder,
packed, ctx->f32, "");
}
break;
case V_028714_SPI_SHADER_UNORM16_ABGR:
for (chan = 0; chan < 4; chan++) {
val[chan] = ac_emit_clamp(&ctx->ac, values[chan]);
val[chan] = LLVMBuildFMul(builder, val[chan],
@@ -1947,24 +1952,24 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
lp_build_cmp(&bld_base->base,
ctx->shader->key.part.ps.epilog.alpha_func,
alpha, alpha_ref);
LLVMValueRef arg =
lp_build_select(&bld_base->base,
alpha_pass,
lp_build_const_float(gallivm, 1.0f),
lp_build_const_float(gallivm, -1.0f));
lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
- ctx->voidt, &arg, 1, 0);
+ ctx->voidt, &arg, 1, LP_FUNC_ATTR_LEGACY);
} else {
lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kilp",
- ctx->voidt, NULL, 0, 0);
+ ctx->voidt, NULL, 0, LP_FUNC_ATTR_LEGACY);
}
}
static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
LLVMValueRef alpha,
unsigned samplemask_param)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMValueRef coverage;
@@ -2295,21 +2300,21 @@ handle_semantic:
si_llvm_init_export_args(bld_base, outputs[i].values, target, args);
if (target >= V_008DFC_SQ_EXP_POS &&
target <= (V_008DFC_SQ_EXP_POS + 3)) {
memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
args, sizeof(args));
} else {
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export", ctx->voidt,
- args, 9, 0);
+ args, 9, LP_FUNC_ATTR_LEGACY);
}
if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
semantic_name = TGSI_SEMANTIC_GENERIC;
goto handle_semantic;
}
}
shader->info.nr_param_exports = param_count;
@@ -2381,21 +2386,22 @@ handle_semantic:
continue;
/* Specify the target we are exporting */
pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
if (pos_idx == shader->info.nr_pos_exports)
/* Specify that this is the last export */
pos_args[i][2] = uint->one;
lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
- ctx->voidt, pos_args[i], 9, 0);
+ ctx->voidt, pos_args[i], 9,
+ LP_FUNC_ATTR_LEGACY);
}
}
/**
* Forward all outputs from the vertex shader to the TES. This is only used
* for the fixed function TCS.
*/
static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
@@ -2972,42 +2978,42 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
memcpy(exp->args[exp->num++], args, sizeof(args));
}
}
static void si_emit_ps_exports(struct si_shader_context *ctx,
struct si_ps_exports *exp)
{
for (unsigned i = 0; i < exp->num; i++)
lp_build_intrinsic(ctx->gallivm.builder,
"llvm.SI.export", ctx->voidt,
- exp->args[i], 9, 0);
+ exp->args[i], 9, LP_FUNC_ATTR_LEGACY);
}
static void si_export_null(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct lp_build_context *base = &bld_base->base;
struct lp_build_context *uint = &bld_base->uint_bld;
LLVMValueRef args[9];
args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */
args[1] = uint->one; /* whether the EXEC mask is valid */
args[2] = uint->one; /* DONE bit */
args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL);
args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
args[5] = base->undef; /* R */
args[6] = base->undef; /* G */
args[7] = base->undef; /* B */
args[8] = base->undef; /* A */
lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
- ctx->voidt, args, 9, 0);
+ ctx->voidt, args, 9, LP_FUNC_ATTR_LEGACY);
}
/**
* Return PS outputs in this order:
*
* v[0:3] = color0.xyzw
* v[4:7] = color1.xyzw
* ...
* vN+0 = Depth
* vN+1 = Stencil
@@ -4082,21 +4088,21 @@ static void resq_emit(
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
out = LLVMBuildExtractElement(builder, emit_data->args[0],
lp_build_const_int32(gallivm, 2), "");
} else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
out = get_buffer_size(bld_base, emit_data->args[0]);
} else {
out = lp_build_intrinsic(
builder, "llvm.SI.getresinfo.i32", emit_data->dst_type,
emit_data->args, emit_data->arg_count,
- LP_FUNC_ATTR_READNONE);
+ LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
/* Divide the number of layers by 6 to get the number of cubes. */
if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
LLVMValueRef imm2 = lp_build_const_int32(gallivm, 2);
LLVMValueRef imm6 = lp_build_const_int32(gallivm, 6);
LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
z = LLVMBuildSDiv(builder, z, imm6, "");
out = LLVMBuildInsertElement(builder, out, z, imm2, "");
}
@@ -4319,21 +4325,21 @@ static void txq_emit(const struct lp_build_tgsi_action *action,
if (target == TGSI_TEXTURE_BUFFER) {
/* Just return the buffer size. */
emit_data->output[emit_data->chan] = emit_data->args[0];
return;
}
emit_data->output[emit_data->chan] = lp_build_intrinsic(
base->gallivm->builder, "llvm.SI.getresinfo.i32",
emit_data->dst_type, emit_data->args, emit_data->arg_count,
- LP_FUNC_ATTR_READNONE);
+ LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
/* Divide the number of layers by 6 to get the number of cubes. */
if (target == TGSI_TEXTURE_CUBE_ARRAY ||
target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2);
LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6);
LLVMValueRef v4 = emit_data->output[emit_data->chan];
LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
@@ -4728,21 +4734,21 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx,
tmp = LLVMBuildBitCast(builder, tmp, ctx->f32, "");
tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
tmp = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
coord = LLVMBuildInsertElement(builder, coord, tmp, index, "");
}
emit_data->args[0] = coord;
emit_data->output[emit_data->chan] =
lp_build_intrinsic(builder, intr_name, emit_data->dst_type,
emit_data->args, emit_data->arg_count,
- LP_FUNC_ATTR_READNONE);
+ LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
}
static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct lp_build_context *base = &bld_base->base;
const struct tgsi_full_instruction *inst = emit_data->inst;
unsigned opcode = inst->Instruction.Opcode;
@@ -4752,21 +4758,21 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
bool is_shadow = tgsi_is_shadow_target(target);
char type[64];
const char *name = "llvm.SI.image.sample";
const char *infix = "";
if (target == TGSI_TEXTURE_BUFFER) {
emit_data->output[emit_data->chan] = lp_build_intrinsic(
base->gallivm->builder,
"llvm.SI.vs.load.input", emit_data->dst_type,
emit_data->args, emit_data->arg_count,
- LP_FUNC_ATTR_READNONE);
+ LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
return;
}
switch (opcode) {
case TGSI_OPCODE_TXF:
name = target == TGSI_TEXTURE_2D_MSAA ||
target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
"llvm.SI.image.load" :
"llvm.SI.image.load.mip";
is_shadow = false;
@@ -4829,21 +4835,21 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
*/
si_lower_gather4_integer(ctx, emit_data, intr_name,
(int)has_offset + (int)is_shadow);
return;
}
}
emit_data->output[emit_data->chan] = lp_build_intrinsic(
base->gallivm->builder, intr_name, emit_data->dst_type,
emit_data->args, emit_data->arg_count,
- LP_FUNC_ATTR_READNONE);
+ LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
}
static void si_llvm_emit_txqs(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
@@ -5118,21 +5124,21 @@ static void si_llvm_emit_vertex(
lp_build_const_int32(gallivm,
shader->selector->gs_max_out_vertices), "");
bool use_kill = !info->writes_memory;
if (use_kill) {
kill = lp_build_select(&bld_base->base, can_emit,
lp_build_const_float(gallivm, 1.0f),
lp_build_const_float(gallivm, -1.0f));
lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
- ctx->voidt, &kill, 1, 0);
+ ctx->voidt, &kill, 1, LP_FUNC_ATTR_LEGACY);
} else {
lp_build_if(&if_state, gallivm, can_emit);
}
offset = 0;
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr = ctx->outputs[i];
for (chan = 0; chan < 4; chan++) {
if (!(info->output_usagemask[i] & (1 << chan)) ||
@@ -5238,24 +5244,26 @@ static void si_create_function(struct si_shader_context *ctx,
LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
/* The combination of:
* - ByVal
* - dereferenceable
* - invariant.load
* allows the optimization passes to move loads and reduces
* SGPR spilling significantly.
*/
if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
- lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_BYVAL);
+ lp_add_function_attr(ctx->gallivm.context, ctx->main_fn,
+ i + 1, LP_FUNC_ATTR_BYVAL);
lp_add_attr_dereferenceable(P, UINT64_MAX);
} else
- lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
+ lp_add_function_attr(ctx->gallivm.context, ctx->main_fn,
+ i + 1, LP_FUNC_ATTR_INREG);
}
LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
"no-signed-zeros-fp-math",
"true");
if (ctx->screen->b.debug_flags & DBG_UNSAFE_MATH) {
/* These were copied from some LLVM test. */
LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
"less-precise-fpmad",
@@ -5740,21 +5748,22 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
offset = LLVMBuildMul(builder, address[1],
LLVMConstInt(ctx->i32, 4, 0), "");
row = buffer_load_const(ctx, desc, offset);
row = LLVMBuildBitCast(builder, row, ctx->i32, "");
bit = LLVMBuildLShr(builder, row, address[0], "");
bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
/* The intrinsic kills the thread if arg < 0. */
bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0),
LLVMConstReal(ctx->f32, -1), "");
- lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, 0);
+ lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1,
+ LP_FUNC_ATTR_LEGACY);
}
void si_shader_binary_read_config(struct radeon_shader_binary *binary,
struct si_shader_config *conf,
unsigned symbol_offset)
{
unsigned i;
const unsigned char *config =
radeon_shader_binary_config_start(binary, symbol_offset);
bool really_needs_scratch = false;
@@ -6308,23 +6317,24 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
}
args[2] = lp_build_const_int32(
gallivm,
offset * gs_selector->gs_max_out_vertices * 16 * 4);
offset++;
outputs[i].values[chan] =
LLVMBuildBitCast(gallivm->builder,
lp_build_intrinsic(gallivm->builder,
- "llvm.SI.buffer.load.dword.i32.i32",
- ctx.i32, args, 9,
- LP_FUNC_ATTR_READONLY),
+ "llvm.SI.buffer.load.dword.i32.i32",
+ ctx.i32, args, 9,
+ LP_FUNC_ATTR_READONLY |
+ LP_FUNC_ATTR_LEGACY),
ctx.f32, "");
}
}
/* Streamout and exports. */
if (gs_selector->so.num_outputs) {
si_llvm_emit_streamout(&ctx, outputs,
gsinfo->num_outputs,
stream);
}
@@ -7047,21 +7057,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
LLVMValueRef out[48];
LLVMTypeRef function_type;
unsigned num_params;
unsigned num_out;
MAYBE_UNUSED unsigned num_out_sgpr; /* used in debug checks */
unsigned num_sgprs, num_vgprs;
unsigned last_sgpr_param;
unsigned gprs;
for (unsigned i = 0; i < num_parts; ++i) {
- lp_add_function_attr(parts[i], -1, LP_FUNC_ATTR_ALWAYSINLINE);
+ lp_add_function_attr(gallivm->context, parts[i], -1,
+ LP_FUNC_ATTR_ALWAYSINLINE);
LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
}
/* The parameters of the wrapper function correspond to those of the
* first part in terms of SGPRs and VGPRs, but we use the types of the
* main part to get the right types. This is relevant for the
* dereferenceable attribute on descriptor table pointers.
*/
num_sgprs = 0;
num_vgprs = 0;
@@ -7164,21 +7175,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
param_size = llvm_get_type_size(param_type) / 4;
is_sgpr = ac_is_sgpr_param(param);
if (is_sgpr) {
#if HAVE_LLVM < 0x0400
LLVMRemoveAttribute(param, LLVMByValAttribute);
#else
unsigned kind_id = LLVMGetEnumAttributeKindForName("byval", 5);
LLVMRemoveEnumAttributeAtIndex(parts[part], param_idx + 1, kind_id);
#endif
- lp_add_function_attr(parts[part], param_idx + 1, LP_FUNC_ATTR_INREG);
+ lp_add_function_attr(gallivm->context, parts[part],
+ param_idx + 1, LP_FUNC_ATTR_INREG);
}
assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out));
assert(is_sgpr || out_idx >= num_out_sgpr);
if (param_size == 1)
arg = out[out_idx];
else
arg = lp_build_gather_values(gallivm, &out[out_idx], param_size);
@@ -7679,21 +7691,21 @@ static void si_build_vs_epilog_function(struct si_shader_context *ctx,
key->vs_epilog.prim_id_param_offset);
args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
args[5] = LLVMGetParam(ctx->main_fn,
VS_EPILOG_PRIMID_LOC); /* X */
args[6] = base->undef; /* Y */
args[7] = base->undef; /* Z */
args[8] = base->undef; /* W */
lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- args, 9, 0);
+ args, 9, LP_FUNC_ATTR_LEGACY);
}
LLVMBuildRetVoid(gallivm->builder);
}
/**
* Create & compile a vertex shader epilog. This a helper used by VS and TES.
*/
static bool si_get_vs_epilog(struct si_screen *sscreen,
LLVMTargetMachineRef tm,
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index 10268e9..ee59fed 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -51,27 +51,23 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
emit_data->arg_count = 1;
emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
lp_build_const_float(gallivm, -1.0f),
bld_base->base.zero, "");
}
static void kil_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- unsigned i;
- for (i = 0; i < emit_data->arg_count; i++) {
- emit_data->output[i] = lp_build_intrinsic_unary(
- bld_base->base.gallivm->builder,
- action->intr_name,
- emit_data->dst_type, emit_data->args[i]);
- }
+ lp_build_intrinsic(bld_base->base.gallivm->builder,
+ action->intr_name, emit_data->dst_type,
+ &emit_data->args[0], 1, LP_FUNC_ATTR_LEGACY);
}
static void emit_icmp(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
unsigned pred;
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMContextRef context = bld_base->base.gallivm->context;
@@ -500,21 +496,23 @@ static void emit_bfe(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef bfe_sm5;
LLVMValueRef cond;
bfe_sm5 = lp_build_intrinsic(builder, action->intr_name,
emit_data->dst_type, emit_data->args,
- emit_data->arg_count, LP_FUNC_ATTR_READNONE);
+ emit_data->arg_count,
+ LP_FUNC_ATTR_READNONE |
+ LP_FUNC_ATTR_LEGACY);
/* Correct for GLSL semantics. */
cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],
lp_build_const_int32(gallivm, 32), "");
emit_data->output[emit_data->chan] =
LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, "");
}
/* this is ffs in C */
static void emit_lsb(const struct lp_build_tgsi_action *action,
--
2.7.4
More information about the mesa-dev
mailing list