[Mesa-dev] [PATCH 10/11] radeonsi: fold *gallivm
Marek Olšák
maraeo at gmail.com
Fri Sep 29 14:49:54 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_shader.c | 71 +++++++++--------------
src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 12 ++--
2 files changed, 31 insertions(+), 52 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index d80d10c..db9a0d7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1005,30 +1005,29 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
*
* \param type output value type
* \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
* \param dw_addr address in dwords
*/
static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type, unsigned swizzle,
LLVMValueRef dw_addr)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef value;
if (swizzle == ~0) {
LLVMValueRef values[TGSI_NUM_CHANNELS];
for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
values[chan] = lds_load(bld_base, type, chan, dw_addr);
- return lp_build_gather_values(gallivm, values,
+ return lp_build_gather_values(&ctx->gallivm, values,
TGSI_NUM_CHANNELS);
}
dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
LLVMConstInt(ctx->i32, swizzle, 0));
value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
if (tgsi_type_is_64bit(type)) {
LLVMValueRef value2;
dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
@@ -1136,21 +1135,20 @@ static LLVMValueRef fetch_input_tes(
return buffer_load(bld_base, type, swizzle, buffer, base, addr, true);
}
static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
const struct tgsi_opcode_info *info,
unsigned index,
LLVMValueRef dst[4])
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
const struct tgsi_full_dst_register *reg = &inst->Dst[index];
const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
unsigned chan_index;
LLVMValueRef dw_addr, stride;
LLVMValueRef buffer, base, buf_addr;
LLVMValueRef values[4];
bool skip_lds_store;
bool is_tess_factor = false, is_tess_inner = false;
/* Only handle per-patch and per-vertex outputs here.
@@ -1220,37 +1218,36 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
LLVMBuildStore(ctx->ac.builder, value, /* outer */
ctx->invoc0_tess_factors[chan_index]);
} else if (chan_index < 2) {
LLVMBuildStore(ctx->ac.builder, value, /* inner */
ctx->invoc0_tess_factors[4 + chan_index]);
}
}
}
if (reg->Register.WriteMask == 0xF && !is_tess_factor) {
- LLVMValueRef value = lp_build_gather_values(gallivm,
+ LLVMValueRef value = lp_build_gather_values(&ctx->gallivm,
values, 4);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
base, 0, 1, 0, true, false);
}
}
static LLVMValueRef fetch_input_gs(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type,
unsigned swizzle)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct si_shader *shader = ctx->shader;
struct lp_build_context *uint = &ctx->bld_base.uint_bld;
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef vtx_offset, soffset;
struct tgsi_shader_info *info = &shader->selector->info;
unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
unsigned param;
LLVMValueRef value;
if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
return get_primitive_id(ctx, swizzle);
@@ -1286,21 +1283,21 @@ static LLVMValueRef fetch_input_gs(
return lds_load(bld_base, type, swizzle, vtx_offset);
}
/* GFX6: input load from the ESGS ring in memory. */
if (swizzle == ~0) {
LLVMValueRef values[TGSI_NUM_CHANNELS];
unsigned chan;
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
values[chan] = fetch_input_gs(bld_base, reg, type, chan);
}
- return lp_build_gather_values(gallivm, values,
+ return lp_build_gather_values(&ctx->gallivm, values,
TGSI_NUM_CHANNELS);
}
/* Get the vertex offset parameter on GFX6. */
unsigned vtx_offset_param = reg->Dimension.Index;
if (vtx_offset_param < 2) {
vtx_offset_param += ctx->param_gs_vtx0_offset;
} else {
assert(vtx_offset_param < 6);
vtx_offset_param += ctx->param_gs_vtx2_offset - 2;
@@ -1539,45 +1536,43 @@ static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
LLVMValueRef resource,
LLVMValueRef offset)
{
return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
0, 0, 0, true, true);
}
static LLVMValueRef load_sample_position(struct si_shader_context *ctx, LLVMValueRef sample_id)
{
struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld;
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
LLVMValueRef buf_index = LLVMConstInt(ctx->i32, SI_PS_CONST_SAMPLE_POSITIONS, 0);
LLVMValueRef resource = ac_build_indexed_load_const(&ctx->ac, desc, buf_index);
/* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8);
LLVMValueRef offset1 = LLVMBuildAdd(ctx->ac.builder, offset0, LLVMConstInt(ctx->i32, 4, 0), "");
LLVMValueRef pos[4] = {
buffer_load_const(ctx, resource, offset0),
buffer_load_const(ctx, resource, offset1),
LLVMConstReal(ctx->f32, 0),
LLVMConstReal(ctx->f32, 0)
};
- return lp_build_gather_values(gallivm, pos, 4);
+ return lp_build_gather_values(&ctx->gallivm, pos, 4);
}
void si_load_system_value(struct si_shader_context *ctx,
unsigned index,
const struct tgsi_full_declaration *decl)
{
struct lp_build_context *bld = &ctx->bld_base.base;
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef value = 0;
assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
switch (decl->Semantic.Name) {
case TGSI_SEMANTIC_INSTANCEID:
value = ctx->abi.instance_id;
break;
case TGSI_SEMANTIC_VERTEXID:
@@ -1630,21 +1625,21 @@ void si_load_system_value(struct si_shader_context *ctx,
case TGSI_SEMANTIC_POSITION:
{
LLVMValueRef pos[4] = {
LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT),
lp_build_emit_llvm_unary(&ctx->bld_base, TGSI_OPCODE_RCP,
LLVMGetParam(ctx->main_fn,
SI_PARAM_POS_W_FLOAT)),
};
- value = lp_build_gather_values(gallivm, pos, 4);
+ value = lp_build_gather_values(&ctx->gallivm, pos, 4);
break;
}
case TGSI_SEMANTIC_FACE:
value = ctx->abi.front_face;
break;
case TGSI_SEMANTIC_SAMPLEID:
value = get_sample_id(ctx);
break;
@@ -1653,21 +1648,21 @@ void si_load_system_value(struct si_shader_context *ctx,
LLVMValueRef pos[4] = {
LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
LLVMConstReal(ctx->f32, 0),
LLVMConstReal(ctx->f32, 0)
};
pos[0] = lp_build_emit_llvm_unary(&ctx->bld_base,
TGSI_OPCODE_FRC, pos[0]);
pos[1] = lp_build_emit_llvm_unary(&ctx->bld_base,
TGSI_OPCODE_FRC, pos[1]);
- value = lp_build_gather_values(gallivm, pos, 4);
+ value = lp_build_gather_values(&ctx->gallivm, pos, 4);
break;
}
case TGSI_SEMANTIC_SAMPLEMASK:
/* This can only occur with the OpenGL Core profile, which
* doesn't support smoothing.
*/
value = LLVMGetParam(ctx->main_fn, SI_PARAM_SAMPLE_COVERAGE);
break;
@@ -1679,21 +1674,21 @@ void si_load_system_value(struct si_shader_context *ctx,
bld->zero,
bld->zero
};
/* For triangles, the vector should be (u, v, 1-u-v). */
if (ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
PIPE_PRIM_TRIANGLES)
coord[2] = lp_build_sub(bld, bld->one,
lp_build_add(bld, coord[0], coord[1]));
- value = lp_build_gather_values(gallivm, coord, 4);
+ value = lp_build_gather_values(&ctx->gallivm, coord, 4);
break;
}
case TGSI_SEMANTIC_VERTICESIN:
if (ctx->type == PIPE_SHADER_TESS_CTRL)
value = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 26, 6);
else if (ctx->type == PIPE_SHADER_TESS_EVAL)
value = get_num_tcs_out_vertices(ctx);
else
assert(!"invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
@@ -1724,21 +1719,21 @@ void si_load_system_value(struct si_shader_context *ctx,
int i, offset;
slot = LLVMConstInt(ctx->i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
buf = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
buf = ac_build_indexed_load_const(&ctx->ac, buf, slot);
offset = decl->Semantic.Name == TGSI_SEMANTIC_DEFAULT_TESSINNER_SI ? 4 : 0;
for (i = 0; i < 4; i++)
val[i] = buffer_load_const(ctx, buf,
LLVMConstInt(ctx->i32, (offset + i) * 4, 0));
- value = lp_build_gather_values(gallivm, val, 4);
+ value = lp_build_gather_values(&ctx->gallivm, val, 4);
break;
}
case TGSI_SEMANTIC_PRIMID:
value = get_primitive_id(ctx, 0);
break;
case TGSI_SEMANTIC_GRID_SIZE:
value = LLVMGetParam(ctx->main_fn, ctx->param_grid_size);
break;
@@ -1752,39 +1747,39 @@ void si_load_system_value(struct si_shader_context *ctx,
if (properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] != 0) {
unsigned sizes[3] = {
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]
};
for (i = 0; i < 3; ++i)
values[i] = LLVMConstInt(ctx->i32, sizes[i], 0);
- value = lp_build_gather_values(gallivm, values, 3);
+ value = lp_build_gather_values(&ctx->gallivm, values, 3);
} else {
value = LLVMGetParam(ctx->main_fn, ctx->param_block_size);
}
break;
}
case TGSI_SEMANTIC_BLOCK_ID:
{
LLVMValueRef values[3];
for (int i = 0; i < 3; i++) {
values[i] = ctx->i32_0;
if (ctx->param_block_id[i] >= 0) {
values[i] = LLVMGetParam(ctx->main_fn,
ctx->param_block_id[i]);
}
}
- value = lp_build_gather_values(gallivm, values, 3);
+ value = lp_build_gather_values(&ctx->gallivm, values, 3);
break;
}
case TGSI_SEMANTIC_THREAD_ID:
value = LLVMGetParam(ctx->main_fn, ctx->param_thread_id);
break;
case TGSI_SEMANTIC_HELPER_INVOCATION:
value = lp_build_intrinsic(ctx->ac.builder,
"llvm.amdgcn.ps.live",
@@ -1839,30 +1834,29 @@ void si_load_system_value(struct si_shader_context *ctx,
return;
}
ctx->system_values[index] = value;
}
void si_declare_compute_memory(struct si_shader_context *ctx,
const struct tgsi_full_declaration *decl)
{
struct si_shader_selector *sel = ctx->shader->selector;
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMTypeRef i8p = LLVMPointerType(ctx->i8, LOCAL_ADDR_SPACE);
LLVMValueRef var;
assert(decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED);
assert(decl->Range.First == decl->Range.Last);
assert(!ctx->shared_memory);
- var = LLVMAddGlobalInAddressSpace(gallivm->module,
+ var = LLVMAddGlobalInAddressSpace(ctx->ac.module,
LLVMArrayType(ctx->i8, sel->local_size),
"compute_lds",
LOCAL_ADDR_SPACE);
LLVMSetAlignment(var, 4);
ctx->shared_memory = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
}
static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
{
@@ -2327,39 +2321,38 @@ static void emit_streamout_output(struct si_shader_context *ctx,
/**
* Write streamout data to buffers for vertex stream @p stream (different
* vertex streams can occur for GS copy shaders).
*/
static void si_llvm_emit_streamout(struct si_shader_context *ctx,
struct si_shader_output_values *outputs,
unsigned noutput, unsigned stream)
{
struct si_shader_selector *sel = ctx->shader->selector;
struct pipe_stream_output_info *so = &sel->so;
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMBuilderRef builder = ctx->ac.builder;
int i;
struct lp_build_if_state if_ctx;
/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
LLVMValueRef so_vtx_count =
unpack_param(ctx, ctx->param_streamout_config, 16, 7);
LLVMValueRef tid = ac_get_thread_id(&ctx->ac);
/* can_emit = tid < so_vtx_count; */
LLVMValueRef can_emit =
LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
/* Emit the streamout code conditionally. This actually avoids
* out-of-bounds buffer access. The hw tells us via the SGPR
* (so_vtx_count) which threads are allowed to emit streamout data. */
- lp_build_if(&if_ctx, gallivm, can_emit);
+ lp_build_if(&if_ctx, &ctx->gallivm, can_emit);
{
/* The buffer offset is computed as follows:
* ByteOffset = streamout_offset[buffer_id]*4 +
* (streamout_write_index + thread_id)*stride[buffer_id] +
* attrib_offset
*/
LLVMValueRef so_write_index =
LLVMGetParam(ctx->main_fn,
ctx->param_streamout_write_index);
@@ -2658,39 +2651,38 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
}
static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
LLVMValueRef rel_patch_id,
LLVMValueRef invocation_id,
LLVMValueRef tcs_out_current_patch_data_offset,
LLVMValueRef invoc0_tf_outer[4],
LLVMValueRef invoc0_tf_inner[2])
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
struct si_shader *shader = ctx->shader;
unsigned tess_inner_index, tess_outer_index;
LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
unsigned stride, outer_comps, inner_comps, i, offset;
struct lp_build_if_state if_ctx, inner_if_ctx;
/* Add a barrier before loading tess factors from LDS. */
if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
si_llvm_emit_barrier(NULL, bld_base, NULL);
/* Do this only for invocation 0, because the tess levels are per-patch,
* not per-vertex.
*
* This can't jump, because invocation 0 executes this. It should
* at least mask out the loads and stores for other invocations.
*/
- lp_build_if(&if_ctx, gallivm,
+ lp_build_if(&if_ctx, &ctx->gallivm,
LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
invocation_id, ctx->i32_0, ""));
/* Determine the layout of one tess factor element in the buffer. */
switch (shader->key.part.tcs.epilog.prim_mode) {
case PIPE_PRIM_LINES:
stride = 2; /* 2 dwords, 1 vec2 store */
outer_comps = 2;
inner_comps = 0;
break;
@@ -2748,36 +2740,36 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
/* For isolines, the hardware expects tess factors in the
* reverse order from what GLSL / TGSI specify.
*/
LLVMValueRef tmp = out[0];
out[0] = out[1];
out[1] = tmp;
}
/* Convert the outputs to vectors for stores. */
- vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4));
+ vec0 = lp_build_gather_values(&ctx->gallivm, out, MIN2(stride, 4));
vec1 = NULL;
if (stride > 4)
- vec1 = lp_build_gather_values(gallivm, out+4, stride - 4);
+ vec1 = lp_build_gather_values(&ctx->gallivm, out+4, stride - 4);
/* Get the buffer. */
buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_factor_addr_base64k);
/* Get the offset. */
tf_base = LLVMGetParam(ctx->main_fn,
ctx->param_tcs_factor_offset);
byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id,
LLVMConstInt(ctx->i32, 4 * stride, 0), "");
- lp_build_if(&inner_if_ctx, gallivm,
+ lp_build_if(&inner_if_ctx, &ctx->gallivm,
LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
rel_patch_id, ctx->i32_0, ""));
/* Store the dynamic HS control word. */
offset = 0;
if (ctx->screen->b.chip_class <= VI) {
ac_build_buffer_store_dword(&ctx->ac, buffer,
LLVMConstInt(ctx->i32, 0x80000000, 0),
1, ctx->i32_0, tf_base,
offset, 1, 0, true, false);
@@ -2803,34 +2795,34 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
unsigned param_outer, param_inner;
buf = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
param_outer = si_shader_io_get_unique_index_patch(
TGSI_SEMANTIC_TESSOUTER, 0);
tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
LLVMConstInt(ctx->i32, param_outer, 0));
- outer_vec = lp_build_gather_values(gallivm, outer,
+ outer_vec = lp_build_gather_values(&ctx->gallivm, outer,
util_next_power_of_two(outer_comps));
ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
outer_comps, tf_outer_offset,
base, 0, 1, 0, true, false);
if (inner_comps) {
param_inner = si_shader_io_get_unique_index_patch(
TGSI_SEMANTIC_TESSINNER, 0);
tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
LLVMConstInt(ctx->i32, param_inner, 0));
inner_vec = inner_comps == 1 ? inner[0] :
- lp_build_gather_values(gallivm, inner, inner_comps);
+ lp_build_gather_values(&ctx->gallivm, inner, inner_comps);
ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
inner_comps, tf_inner_offset,
base, 0, 1, 0, true, false);
}
}
lp_build_endif(&if_ctx);
}
static LLVMValueRef
@@ -3156,21 +3148,20 @@ static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
if (ctx->screen->b.chip_class >= GFX9)
lp_build_endif(&ctx->merged_wrap_if_state);
}
static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
unsigned max_outputs,
LLVMValueRef *addrs)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- struct gallivm_state *gallivm = &ctx->gallivm;
struct tgsi_shader_info *info = &ctx->shader->selector->info;
struct si_shader_output_values *outputs = NULL;
int i,j;
assert(!ctx->shader->is_gs_copy_shader);
assert(info->num_outputs <= max_outputs);
outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
/* Vertex color clamping.
@@ -3189,21 +3180,21 @@ static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
info->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR)
continue;
/* We've found a color. */
if (!cond) {
/* The state is in the first bit of the user SGPR. */
cond = LLVMGetParam(ctx->main_fn,
ctx->param_vs_state_bits);
cond = LLVMBuildTrunc(ctx->ac.builder, cond,
ctx->i1, "");
- lp_build_if(&if_ctx, gallivm, cond);
+ lp_build_if(&if_ctx, &ctx->gallivm, cond);
}
for (j = 0; j < 4; j++) {
addr = addrs[4 * i + j];
val = LLVMBuildLoad(ctx->ac.builder, addr, "");
val = ac_build_clamp(&ctx->ac, val);
LLVMBuildStore(ctx->ac.builder, val, addr);
}
}
@@ -3626,40 +3617,38 @@ static void si_llvm_emit_ddxy(
/*
* this takes an I,J coordinate pair,
* and works out the X and Y derivatives.
* it returns DDX(I), DDX(J), DDY(I), DDY(J).
*/
static LLVMValueRef si_llvm_emit_ddxy_interp(
struct lp_build_tgsi_context *bld_base,
LLVMValueRef interp_ij)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef result[4], a;
unsigned i;
for (i = 0; i < 2; i++) {
a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
LLVMConstInt(ctx->i32, i, 0), "");
result[i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDX, a);
result[2+i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDY, a);
}
- return lp_build_gather_values(gallivm, result, 4);
+ return lp_build_gather_values(&ctx->gallivm, result, 4);
}
static void interp_fetch_args(
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
const struct tgsi_full_instruction *inst = emit_data->inst;
if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
/* offset is in second src, first two channels */
emit_data->args[0] = lp_build_emit_fetch(bld_base,
emit_data->inst, 1,
TGSI_CHAN_X);
emit_data->args[1] = lp_build_emit_fetch(bld_base,
emit_data->inst, 1,
TGSI_CHAN_Y);
@@ -3691,21 +3680,21 @@ static void interp_fetch_args(
* sample position doesn't work.
*/
if (ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center) {
LLVMValueRef center[4] = {
LLVMConstReal(ctx->f32, 0.5),
LLVMConstReal(ctx->f32, 0.5),
ctx->ac.f32_0,
ctx->ac.f32_0,
};
- sample_position = lp_build_gather_values(gallivm, center, 4);
+ sample_position = lp_build_gather_values(&ctx->gallivm, center, 4);
} else {
sample_position = load_sample_position(ctx, sample_id);
}
emit_data->args[0] = LLVMBuildExtractElement(ctx->ac.builder,
sample_position,
ctx->i32_0, "");
emit_data->args[0] = LLVMBuildFSub(ctx->ac.builder, emit_data->args[0], halfval, "");
emit_data->args[1] = LLVMBuildExtractElement(ctx->ac.builder,
@@ -3715,21 +3704,20 @@ static void interp_fetch_args(
emit_data->arg_count = 2;
}
}
static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct si_shader *shader = ctx->shader;
- struct gallivm_state *gallivm = &ctx->gallivm;
const struct tgsi_shader_info *info = &shader->selector->info;
LLVMValueRef interp_param;
const struct tgsi_full_instruction *inst = emit_data->inst;
const struct tgsi_full_src_register *input = &inst->Src[0];
int input_base, input_array_size;
int chan;
int i;
LLVMValueRef prim_mask = LLVMGetParam(ctx->main_fn, SI_PARAM_PRIM_MASK);
LLVMValueRef array_idx;
int interp_param_idx;
@@ -3800,21 +3788,21 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
interp_el = ac_to_float(&ctx->ac, interp_el);
temp1 = LLVMBuildFMul(ctx->ac.builder, ddx_el, emit_data->args[0], "");
temp1 = LLVMBuildFAdd(ctx->ac.builder, temp1, interp_el, "");
temp2 = LLVMBuildFMul(ctx->ac.builder, ddy_el, emit_data->args[1], "");
ij_out[i] = LLVMBuildFAdd(ctx->ac.builder, temp2, temp1, "");
}
- interp_param = lp_build_gather_values(gallivm, ij_out, 2);
+ interp_param = lp_build_gather_values(&ctx->gallivm, ij_out, 2);
}
if (interp_param)
interp_param = ac_to_float(&ctx->ac, interp_param);
for (chan = 0; chan < 4; chan++) {
LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->f32, input_array_size));
unsigned schan = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], chan);
for (unsigned idx = 0; idx < input_array_size; ++idx) {
@@ -3944,21 +3932,20 @@ static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
/* Emit one vertex from the geometry shader */
static void si_llvm_emit_vertex(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct lp_build_context *uint = &bld_base->uint_bld;
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = &shader->selector->info;
- struct gallivm_state *gallivm = &ctx->gallivm;
struct lp_build_if_state if_state;
LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
ctx->param_gs2vs_offset);
LLVMValueRef gs_next_vertex;
LLVMValueRef can_emit, kill;
unsigned chan, offset;
int i;
unsigned stream;
stream = si_llvm_get_stream(bld_base, emit_data);
@@ -3981,21 +3968,21 @@ static void si_llvm_emit_vertex(
shader->selector->gs_max_out_vertices, 0), "");
bool use_kill = !info->writes_memory;
if (use_kill) {
kill = lp_build_select(&bld_base->base, can_emit,
LLVMConstReal(ctx->f32, 1.0f),
LLVMConstReal(ctx->f32, -1.0f));
ac_build_kill(&ctx->ac, kill);
} else {
- lp_build_if(&if_state, gallivm, can_emit);
+ lp_build_if(&if_state, &ctx->gallivm, can_emit);
}
offset = 0;
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr = ctx->outputs[i];
for (chan = 0; chan < 4; chan++) {
if (!(info->output_usagemask[i] & (1 << chan)) ||
((info->output_streams[i] >> (2 * chan)) & 3) != stream)
continue;
@@ -6058,21 +6045,20 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx,
/**
* Given a list of shader part functions, build a wrapper function that
* runs them in sequence to form a monolithic shader.
*/
static void si_build_wrapper_function(struct si_shader_context *ctx,
LLVMValueRef *parts,
unsigned num_parts,
unsigned main_part,
unsigned next_shader_first_part)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMBuilderRef builder = ctx->ac.builder;
/* PS epilog has one arg per color component; gfx9 merged shader
* prologs need to forward 32 user SGPRs.
*/
struct si_function_info fninfo;
LLVMValueRef initial[64], out[64];
LLVMTypeRef function_type;
unsigned num_first_params;
unsigned num_out, initial_num_out;
MAYBE_UNUSED unsigned num_out_sgpr; /* used in debug checks */
@@ -6215,21 +6201,21 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
#endif
lp_add_function_attr(parts[part], param_idx + 1, LP_FUNC_ATTR_INREG);
}
assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out));
assert(is_sgpr || out_idx >= num_out_sgpr);
if (param_size == 1)
arg = out[out_idx];
else
- arg = lp_build_gather_values(gallivm, &out[out_idx], param_size);
+ arg = lp_build_gather_values(&ctx->gallivm, &out[out_idx], param_size);
if (LLVMTypeOf(arg) != param_type) {
if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
arg = LLVMBuildBitCast(builder, arg, ctx->i64, "");
arg = LLVMBuildIntToPtr(builder, arg, param_type, "");
} else {
arg = LLVMBuildBitCast(builder, arg, param_type, "");
}
}
@@ -6636,21 +6622,20 @@ si_get_shader_part(struct si_screen *sscreen,
return result;
}
}
/* Compile a new one. */
result = CALLOC_STRUCT(si_shader_part);
result->key = *key;
struct si_shader shader = {};
struct si_shader_context ctx;
- struct gallivm_state *gallivm = &ctx.gallivm;
si_init_shader_ctx(&ctx, sscreen, tm);
ctx.shader = &shader;
ctx.type = type;
switch (type) {
case PIPE_SHADER_VERTEX:
break;
case PIPE_SHADER_TESS_CTRL:
assert(!prolog);
@@ -6668,44 +6653,43 @@ si_get_shader_part(struct si_screen *sscreen,
default:
unreachable("bad shader part");
}
build(&ctx, key);
/* Compile. */
si_llvm_optimize_module(&ctx);
if (si_compile_llvm(sscreen, &result->binary, &result->config, tm,
- gallivm->module, debug, ctx.type, name)) {
+ ctx.ac.module, debug, ctx.type, name)) {
FREE(result);
result = NULL;
goto out;
}
result->next = *list;
*list = result;
out:
si_llvm_dispose(&ctx);
mtx_unlock(&sscreen->shader_parts_mutex);
return result;
}
static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef ptr[2], list;
/* Get the pointer to rw buffers. */
ptr[0] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS);
ptr[1] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS_HI);
- list = lp_build_gather_values(gallivm, ptr, 2);
+ list = lp_build_gather_values(&ctx->gallivm, ptr, 2);
list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
list = LLVMBuildIntToPtr(ctx->ac.builder, list,
si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), "");
return list;
}
/**
* Build the vertex shader prolog function.
*
* The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values).
@@ -7042,21 +7026,20 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen,
* - overriding interpolation parameters for the API PS
* - polygon stippling
*
* All preloaded SGPRs and VGPRs are passed through unmodified unless they are
* overriden by other states. (e.g. per-sample interpolation)
* Interpolated colors are stored after the preloaded VGPRs.
*/
static void si_build_ps_prolog_function(struct si_shader_context *ctx,
union si_shader_part_key *key)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
struct si_function_info fninfo;
LLVMValueRef ret, func;
int num_returns, i, num_color_channels;
assert(si_need_ps_prolog(key));
si_init_function_info(&fninfo);
/* Declare inputs. */
for (i = 0; i < key->ps_prolog.num_input_sgprs; i++)
@@ -7227,21 +7210,21 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
/* If the interpolation qualifier is not CONSTANT (-1). */
if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
unsigned interp_vgpr = key->ps_prolog.num_input_sgprs +
key->ps_prolog.color_interp_vgpr_index[i];
/* Get the (i,j) updated by bc_optimize handling. */
interp[0] = LLVMBuildExtractValue(ctx->ac.builder, ret,
interp_vgpr, "");
interp[1] = LLVMBuildExtractValue(ctx->ac.builder, ret,
interp_vgpr + 1, "");
- interp_ij = lp_build_gather_values(gallivm, interp, 2);
+ interp_ij = lp_build_gather_values(&ctx->gallivm, interp, 2);
}
/* Use the absolute location of the input. */
prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
if (key->ps_prolog.states.color_two_side) {
face = LLVMGetParam(func, face_vgpr);
face = ac_to_integer(&ctx->ac, face);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index d7ba5c4..7c2afe3 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -226,21 +226,20 @@ image_fetch_rsrc(
target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE,
dcc_off);
}
static LLVMValueRef image_fetch_coords(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
unsigned src, LLVMValueRef desc)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMBuilderRef builder = ctx->ac.builder;
unsigned target = inst->Memory.Texture;
unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
LLVMValueRef coords[4];
LLVMValueRef tmp;
int chan;
for (chan = 0; chan < num_coords; ++chan) {
tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
tmp = ac_to_integer(&ctx->ac, tmp);
@@ -276,21 +275,21 @@ static LLVMValueRef image_fetch_coords(
if (num_coords == 1)
return coords[0];
if (num_coords == 3) {
/* LLVM has difficulties lowering 3-element vectors. */
coords[3] = bld_base->uint_bld.undef;
num_coords = 4;
}
- return lp_build_gather_values(gallivm, coords, num_coords);
+ return lp_build_gather_values(&ctx->gallivm, coords, num_coords);
}
/**
* Append the extra mode bits that are used by image load and store.
*/
static void image_append_args(
struct si_shader_context *ctx,
struct lp_build_emit_data * emit_data,
unsigned target,
bool atomic,
@@ -467,38 +466,37 @@ static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx,
ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), "");
return ptr;
}
static void load_emit_memory(
struct si_shader_context *ctx,
struct lp_build_emit_data *emit_data)
{
const struct tgsi_full_instruction *inst = emit_data->inst;
- struct gallivm_state *gallivm = &ctx->gallivm;
unsigned writemask = inst->Dst[0].Register.WriteMask;
LLVMValueRef channels[4], ptr, derived_ptr, index;
int chan;
ptr = get_memory_ptr(ctx, inst, ctx->f32, 1);
for (chan = 0; chan < 4; ++chan) {
if (!(writemask & (1 << chan))) {
channels[chan] = LLVMGetUndef(ctx->f32);
continue;
}
index = LLVMConstInt(ctx->i32, chan, 0);
derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
channels[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
}
- emit_data->output[emit_data->chan] = lp_build_gather_values(gallivm, channels, 4);
+ emit_data->output[emit_data->chan] = lp_build_gather_values(&ctx->gallivm, channels, 4);
}
/**
* Return true if the memory accessed by a LOAD or STORE instruction is
* read-only or write-only, respectively.
*
* \param shader_buffers_reverse_access_mask
* For LOAD, set this to (store | atomic) slot usage in the shader.
* For STORE, set this to (load | atomic) slot usage in the shader.
* \param images_reverse_access_mask Same as above, but for images.
@@ -613,34 +611,33 @@ static void load_emit(
emit_data->args, emit_data->arg_count,
get_load_intr_attribs(can_speculate));
}
}
static void store_fetch_args(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
const struct tgsi_full_instruction * inst = emit_data->inst;
struct tgsi_full_src_register memory;
LLVMValueRef chans[4];
LLVMValueRef data;
LLVMValueRef rsrc;
unsigned chan;
emit_data->dst_type = ctx->voidt;
for (chan = 0; chan < 4; ++chan) {
chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
}
- data = lp_build_gather_values(gallivm, chans, 4);
+ data = lp_build_gather_values(&ctx->gallivm, chans, 4);
emit_data->args[emit_data->arg_count++] = data;
memory = tgsi_full_src_register_from_dst(&inst->Dst[0]);
if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
LLVMValueRef offset;
LLVMValueRef tmp;
rsrc = shader_buffer_fetch_rsrc(ctx, &memory, false);
@@ -992,29 +989,28 @@ static void atomic_emit(
emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp);
}
static void set_tex_fetch_args(struct si_shader_context *ctx,
struct lp_build_emit_data *emit_data,
unsigned target,
LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
LLVMValueRef *param, unsigned count,
unsigned dmask)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
struct ac_image_args args = {};
/* Pad to power of two vector */
while (count < util_next_power_of_two(count))
param[count++] = LLVMGetUndef(ctx->i32);
if (count > 1)
- args.addr = lp_build_gather_values(gallivm, param, count);
+ args.addr = lp_build_gather_values(&ctx->gallivm, param, count);
else
args.addr = param[0];
args.resource = res_ptr;
args.sampler = samp_ptr;
args.dmask = dmask;
args.unorm = target == TGSI_TEXTURE_RECT ||
target == TGSI_TEXTURE_SHADOWRECT;
args.da = tgsi_is_array_sampler(target);
--
2.7.4
More information about the mesa-dev
mailing list