[Mesa-dev] [PATCH 16/24] radeonsi: merge and simplify tbuffer_store functions
Marek Olšák
maraeo at gmail.com
Sat Feb 25 23:58:14 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
---
src/amd/common/ac_llvm_build.c | 62 +++++++++----------------
src/amd/common/ac_llvm_build.h | 34 ++++----------
src/amd/common/ac_nir_to_llvm.c | 16 +++----
src/gallium/drivers/radeonsi/si_shader.c | 79 ++++++++++++++++----------------
4 files changed, 77 insertions(+), 114 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 34085bb..cc1eaf1 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -536,87 +536,69 @@ ac_build_indexed_load_const(struct ac_llvm_context *ctx,
LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true);
LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
return result;
}
/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
* The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
* or v4i32 (num_channels=3,4).
*/
void
-ac_build_tbuffer_store(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- unsigned num_channels,
- LLVMValueRef vaddr,
- LLVMValueRef soffset,
- unsigned inst_offset,
- unsigned dfmt,
- unsigned nfmt,
- unsigned offen,
- unsigned idxen,
- unsigned glc,
- unsigned slc,
- unsigned tfe)
+ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ unsigned num_channels,
+ LLVMValueRef vaddr,
+ LLVMValueRef soffset,
+ unsigned inst_offset,
+ bool offen,
+ bool glc,
+ bool slc)
{
+ static unsigned dfmt[] = {
+ V_008F0C_BUF_DATA_FORMAT_32,
+ V_008F0C_BUF_DATA_FORMAT_32_32,
+ V_008F0C_BUF_DATA_FORMAT_32_32_32,
+ V_008F0C_BUF_DATA_FORMAT_32_32_32_32
+ };
+ assert(num_channels >= 1 && num_channels <= 4);
+
LLVMValueRef args[] = {
rsrc,
vdata,
LLVMConstInt(ctx->i32, num_channels, 0),
vaddr,
soffset,
LLVMConstInt(ctx->i32, inst_offset, 0),
- LLVMConstInt(ctx->i32, dfmt, 0),
- LLVMConstInt(ctx->i32, nfmt, 0),
+ LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
+ LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
LLVMConstInt(ctx->i32, offen, 0),
- LLVMConstInt(ctx->i32, idxen, 0),
+ LLVMConstInt(ctx->i32, 0, 0), /* idxen */
LLVMConstInt(ctx->i32, glc, 0),
LLVMConstInt(ctx->i32, slc, 0),
- LLVMConstInt(ctx->i32, tfe, 0)
+ LLVMConstInt(ctx->i32, 0, 0), /* tfe*/
};
/* The instruction offset field has 12 bits */
assert(offen || inst_offset < (1 << 12));
/* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
unsigned func = CLAMP(num_channels, 1, 3) - 1;
const char *types[] = {"i32", "v2i32", "v4i32"};
char name[256];
snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
args, ARRAY_SIZE(args),
AC_FUNC_ATTR_LEGACY);
}
-void
-ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- unsigned num_channels,
- LLVMValueRef vaddr,
- LLVMValueRef soffset,
- unsigned inst_offset)
-{
- static unsigned dfmt[] = {
- V_008F0C_BUF_DATA_FORMAT_32,
- V_008F0C_BUF_DATA_FORMAT_32_32,
- V_008F0C_BUF_DATA_FORMAT_32_32_32,
- V_008F0C_BUF_DATA_FORMAT_32_32_32_32
- };
- assert(num_channels >= 1 && num_channels <= 4);
-
- ac_build_tbuffer_store(ctx, rsrc, vdata, num_channels, vaddr, soffset,
- inst_offset, dfmt[num_channels - 1],
- V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0);
-}
-
LLVMValueRef
ac_build_buffer_load(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
int num_channels,
LLVMValueRef vindex,
LLVMValueRef voffset,
LLVMValueRef soffset,
unsigned inst_offset,
unsigned glc,
unsigned slc,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index e7773d7..65a9a05 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -115,44 +115,30 @@ ac_build_indexed_store(struct ac_llvm_context *ctx,
LLVMValueRef
ac_build_indexed_load(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index,
bool uniform);
LLVMValueRef
ac_build_indexed_load_const(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index);
void
-ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- unsigned num_channels,
- LLVMValueRef vaddr,
- LLVMValueRef soffset,
- unsigned inst_offset);
-
-void
-ac_build_tbuffer_store(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- unsigned num_channels,
- LLVMValueRef vaddr,
- LLVMValueRef soffset,
- unsigned inst_offset,
- unsigned dfmt,
- unsigned nfmt,
- unsigned offen,
- unsigned idxen,
- unsigned glc,
- unsigned slc,
- unsigned tfe);
-
+ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ unsigned num_channels,
+ LLVMValueRef vaddr,
+ LLVMValueRef soffset,
+ unsigned inst_offset,
+ bool offen,
+ bool glc,
+ bool slc);
LLVMValueRef
ac_build_buffer_load(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
int num_channels,
LLVMValueRef vindex,
LLVMValueRef voffset,
LLVMValueRef soffset,
unsigned inst_offset,
unsigned glc,
unsigned slc,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 30d48aa..4143b3c 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3131,26 +3131,24 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
}
for (unsigned j = 0; j < length; j++) {
LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
out_ptr[j], "");
LLVMValueRef voffset = LLVMConstInt(ctx->i32, (slot * 4 + j + start) * ctx->gs_max_out_vertices, false);
voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, "");
voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->i32, 4, false), "");
out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, "");
- ac_build_tbuffer_store(&ctx->ac, ctx->gsvs_ring,
- out_val, 1,
- voffset, ctx->gs2vs_offset, 0,
- V_008F0C_BUF_DATA_FORMAT_32,
- V_008F0C_BUF_NUM_FORMAT_UINT,
- 1, 0, 1, 1, 0);
+ ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring,
+ out_val, 1,
+ voffset, ctx->gs2vs_offset, 0,
+ 1, 1, 1);
}
idx += slot_inc;
}
gs_next_vertex = LLVMBuildAdd(ctx->builder, gs_next_vertex,
ctx->i32one, "");
LLVMBuildStore(ctx->builder, gs_next_vertex, ctx->gs_next_vertex);
ac_emit_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id);
}
@@ -4631,28 +4629,26 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx)
}
param_index = shader_io_get_unique_index(i);
if (param_index > max_output_written)
max_output_written = param_index;
for (j = 0; j < length; j++) {
LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], "");
out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, "");
- ac_build_tbuffer_store(&ctx->ac,
+ ac_build_buffer_store_dword(&ctx->ac,
ctx->esgs_ring,
out_val, 1,
LLVMGetUndef(ctx->i32), ctx->es2gs_offset,
(4 * param_index + j + start) * 4,
- V_008F0C_BUF_DATA_FORMAT_32,
- V_008F0C_BUF_NUM_FORMAT_UINT,
- 0, 0, 1, 1, 0);
+ 0, 1, 1);
}
}
ctx->shader_info->vs.esgs_itemsize = (max_output_written + 1) * 16;
}
static void
si_export_mrt_color(struct nir_to_llvm_context *ctx,
LLVMValueRef *color, unsigned param, bool is_last)
{
LLVMValueRef args[9];
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 699fefd..daaf9f1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1040,31 +1040,31 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
value = ac_emit_clamp(&ctx->ac, value);
/* Skip LDS stores if there is no LDS read of this output. */
if (!skip_lds_store)
lds_store(bld_base, chan_index, dw_addr, value);
value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
values[chan_index] = value;
if (inst->Dst[0].Register.WriteMask != 0xF && !is_tess_factor) {
- ac_build_tbuffer_store_dwords(&ctx->ac, buffer, value, 1,
- buf_addr, base,
- 4 * chan_index);
+ ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
+ buf_addr, base,
+ 4 * chan_index, 1, 1, 1);
}
}
if (inst->Dst[0].Register.WriteMask == 0xF && !is_tess_factor) {
LLVMValueRef value = lp_build_gather_values(bld_base->base.gallivm,
values, 4);
- ac_build_tbuffer_store_dwords(&ctx->ac, buffer, value, 4, buf_addr,
- base, 0);
+ ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
+ base, 0, 1, 1, 1);
}
}
static LLVMValueRef fetch_input_gs(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type,
unsigned swizzle)
{
struct lp_build_context *base = &bld_base->base;
@@ -2076,25 +2076,25 @@ static void emit_streamout_output(struct si_shader_context *ctx,
case 3: /* as v4i32 (aligned to 4) */
case 4: /* as v4i32 */
vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, util_next_power_of_two(num_comps)));
for (int j = 0; j < num_comps; j++) {
vdata = LLVMBuildInsertElement(builder, vdata, out[j],
LLVMConstInt(ctx->i32, j, 0), "");
}
break;
}
- ac_build_tbuffer_store_dwords(&ctx->ac, so_buffers[buf_idx],
- vdata, num_comps,
- so_write_offsets[buf_idx],
- LLVMConstInt(ctx->i32, 0, 0),
- stream_out->dst_offset * 4);
+ ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx],
+ vdata, num_comps,
+ so_write_offsets[buf_idx],
+ LLVMConstInt(ctx->i32, 0, 0),
+ stream_out->dst_offset * 4, 1, 1, 1);
}
/**
* Write streamout data to buffers for vertex stream @p stream (different
* vertex streams can occur for GS copy shaders).
*/
static void si_llvm_emit_streamout(struct si_shader_context *ctx,
struct si_shader_output_values *outputs,
unsigned noutput, unsigned stream)
{
@@ -2404,22 +2404,22 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
"");
LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(ctx,
get_rel_patch_id(ctx),
invocation_id,
lp_build_const_int32(gallivm, i));
LLVMValueRef value = lds_load(bld_base, TGSI_TYPE_SIGNED, ~0,
lds_ptr);
- ac_build_tbuffer_store_dwords(&ctx->ac, buffer, value, 4, buffer_addr,
- buffer_offset, 0);
+ ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
+ buffer_offset, 0, 1, 1, 1);
}
}
static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
LLVMValueRef rel_patch_id,
LLVMValueRef invocation_id,
LLVMValueRef tcs_out_current_patch_data_offset)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
@@ -2517,65 +2517,68 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
tf_base = LLVMGetParam(ctx->main_fn,
SI_PARAM_TESS_FACTOR_OFFSET);
byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
lp_build_const_int32(gallivm, 4 * stride), "");
lp_build_if(&inner_if_ctx, gallivm,
LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
rel_patch_id, bld_base->uint_bld.zero, ""));
/* Store the dynamic HS control word. */
- ac_build_tbuffer_store_dwords(&ctx->ac, buffer,
- lp_build_const_int32(gallivm, 0x80000000),
- 1, lp_build_const_int32(gallivm, 0), tf_base, 0);
+ ac_build_buffer_store_dword(&ctx->ac, buffer,
+ lp_build_const_int32(gallivm, 0x80000000),
+ 1, lp_build_const_int32(gallivm, 0), tf_base,
+ 0, 1, 1, 1);
lp_build_endif(&inner_if_ctx);
/* Store the tessellation factors. */
- ac_build_tbuffer_store_dwords(&ctx->ac, buffer, vec0,
- MIN2(stride, 4), byteoffset, tf_base, 4);
+ ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
+ MIN2(stride, 4), byteoffset, tf_base,
+ 4, 1, 1, 1);
if (vec1)
- ac_build_tbuffer_store_dwords(&ctx->ac, buffer, vec1,
- stride - 4, byteoffset, tf_base, 20);
+ ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
+ stride - 4, byteoffset, tf_base,
+ 20, 1, 1, 1);
/* Store the tess factors into the offchip buffer if TES reads them. */
if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
LLVMValueRef tf_inner_offset;
unsigned param_outer, param_inner;
buf = ac_build_indexed_load_const(&ctx->ac, rw_buffers,
LLVMConstInt(ctx->i32, SI_HS_RING_TESS_OFFCHIP, 0));
base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
param_outer = si_shader_io_get_unique_index(
TGSI_SEMANTIC_TESSOUTER, 0);
tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
LLVMConstInt(ctx->i32, param_outer, 0));
outer_vec = lp_build_gather_values(gallivm, outer,
util_next_power_of_two(outer_comps));
- ac_build_tbuffer_store_dwords(&ctx->ac, buf, outer_vec,
- outer_comps, tf_outer_offset,
- base, 0);
+ ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
+ outer_comps, tf_outer_offset,
+ base, 0, 1, 1, 1);
if (inner_comps) {
param_inner = si_shader_io_get_unique_index(
TGSI_SEMANTIC_TESSINNER, 0);
tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
LLVMConstInt(ctx->i32, param_inner, 0));
inner_vec = inner_comps == 1 ? inner[0] :
lp_build_gather_values(gallivm, inner, inner_comps);
- ac_build_tbuffer_store_dwords(&ctx->ac, buf, inner_vec,
- inner_comps, tf_inner_offset,
- base, 0);
+ ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
+ inner_comps, tf_inner_offset,
+ base, 0, 1, 1, 1);
}
}
lp_build_endif(&if_ctx);
}
/* This only writes the tessellation factor levels. */
static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
@@ -2681,28 +2684,26 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
continue;
param_index = si_shader_io_get_unique_index(info->output_semantic_name[i],
info->output_semantic_index[i]);
for (chan = 0; chan < 4; chan++) {
LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
out_val = LLVMBuildBitCast(gallivm->builder, out_val, ctx->i32, "");
- ac_build_tbuffer_store(&ctx->ac,
- ctx->esgs_ring,
- out_val, 1,
- LLVMGetUndef(ctx->i32), soffset,
- (4 * param_index + chan) * 4,
- V_008F0C_BUF_DATA_FORMAT_32,
- V_008F0C_BUF_NUM_FORMAT_UINT,
- 0, 0, 1, 1, 0);
+ ac_build_buffer_store_dword(&ctx->ac,
+ ctx->esgs_ring,
+ out_val, 1,
+ LLVMGetUndef(ctx->i32), soffset,
+ (4 * param_index + chan) * 4,
+ 0, 1, 1);
}
}
}
static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
ac_emit_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID));
@@ -5049,27 +5050,25 @@ static void si_llvm_emit_vertex(
LLVMValueRef voffset =
lp_build_const_int32(gallivm, offset *
shader->selector->gs_max_out_vertices);
offset++;
voffset = lp_build_add(uint, voffset, gs_next_vertex);
voffset = lp_build_mul_imm(uint, voffset, 4);
out_val = LLVMBuildBitCast(gallivm->builder, out_val, ctx->i32, "");
- ac_build_tbuffer_store(&ctx->ac,
- ctx->gsvs_ring[stream],
- out_val, 1,
- voffset, soffset, 0,
- V_008F0C_BUF_DATA_FORMAT_32,
- V_008F0C_BUF_NUM_FORMAT_UINT,
- 1, 0, 1, 1, 0);
+ ac_build_buffer_store_dword(&ctx->ac,
+ ctx->gsvs_ring[stream],
+ out_val, 1,
+ voffset, soffset, 0,
+ 1, 1, 1);
}
}
gs_next_vertex = lp_build_add(uint, gs_next_vertex,
lp_build_const_int32(gallivm, 1));
LLVMBuildStore(gallivm->builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
/* Signal vertex emission */
ac_emit_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
--
2.7.4
More information about the mesa-dev
mailing list