[Mesa-dev] [PATCH 4/5] ac: LDS loads of TCS and GS inputs can be non-volatile and invariant
Marek Olšák
maraeo at gmail.com
Thu Nov 9 22:41:21 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
---
src/amd/common/ac_llvm_build.c | 6 ++++++
src/amd/common/ac_llvm_build.h | 2 ++
src/amd/common/ac_nir_to_llvm.c | 4 ++--
src/gallium/drivers/radeonsi/si_shader.c | 25 ++++++++++++++-----------
4 files changed, 24 insertions(+), 13 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 4c9beda..305abd3 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1765,20 +1765,26 @@ void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_LOCAL_ADDR_SPACE),
"lds");
}
LLVMValueRef ac_lds_load_volatile(struct ac_llvm_context *ctx,
LLVMValueRef dw_addr)
{
return ac_build_load_custom(ctx, ctx->lds, dw_addr, false, false, true);
}
+LLVMValueRef ac_lds_load_invariant(struct ac_llvm_context *ctx,
+ LLVMValueRef dw_addr)
+{
+ return ac_build_load_custom(ctx, ctx->lds, dw_addr, false, true, false);
+}
+
void ac_lds_store_volatile(struct ac_llvm_context *ctx,
LLVMValueRef dw_addr, LLVMValueRef value)
{
ac_build_store(ctx, ctx->lds, dw_addr, ac_to_integer(ctx, value), true);
}
void ac_lds_store_writeonly(struct ac_llvm_context *ctx,
LLVMValueRef dw_addr, LLVMValueRef value)
{
ac_build_store(ctx, ctx->lds, dw_addr, ac_to_integer(ctx, value), false);
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 25a540a..3bd085c 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -286,20 +286,22 @@ void ac_get_image_intr_name(const char *base_name,
void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
LLVMValueRef main_fn,
uint8_t *vs_output_param_offset,
uint32_t num_outputs,
uint8_t *num_param_exports);
void ac_init_exec_full_mask(struct ac_llvm_context *ctx);
void ac_declare_lds_as_pointer(struct ac_llvm_context *ac);
LLVMValueRef ac_lds_load_volatile(struct ac_llvm_context *ctx,
LLVMValueRef dw_addr);
+LLVMValueRef ac_lds_load_invariant(struct ac_llvm_context *ctx,
+ LLVMValueRef dw_addr);
void ac_lds_store_volatile(struct ac_llvm_context *ctx,
LLVMValueRef dw_addr, LLVMValueRef value);
void ac_lds_store_writeonly(struct ac_llvm_context *ctx,
LLVMValueRef dw_addr, LLVMValueRef value);
LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
LLVMTypeRef dst_type,
LLVMValueRef src0);
#ifdef __cplusplus
}
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 3f41b9f..b4d840f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2719,21 +2719,21 @@ load_tcs_input(struct nir_to_llvm_context *ctx,
false, NULL, per_vertex ? &vertex_index : NULL,
&const_index, &indir_index);
stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8);
dw_addr = get_tcs_in_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
indir_index);
unsigned comp = instr->variables[0]->var->data.location_frac;
for (unsigned i = 0; i < instr->num_components + comp; i++) {
- value[i] = ac_lds_load_volatile(&ctx->ac, dw_addr);
+ value[i] = ac_lds_load_invariant(&ctx->ac, dw_addr);
dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
ctx->ac.i32_1, "");
}
result = build_varying_gather_values(&ctx->ac, value, instr->num_components, comp);
result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
return result;
}
static LLVMValueRef
load_tcs_output(struct nir_to_llvm_context *ctx,
@@ -2901,21 +2901,21 @@ load_gs_input(struct nir_to_llvm_context *ctx,
LLVMConstInt(ctx->ac.i32, 4, false), "");
param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
unsigned comp = instr->variables[0]->var->data.location_frac;
for (unsigned i = comp; i < instr->num_components + comp; i++) {
if (ctx->ac.chip_class >= GFX9) {
LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param];
dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
- value[i] = ac_lds_load_volatile(&ctx->ac, dw_addr);
+ value[i] = ac_lds_load_invariant(&ctx->ac, dw_addr);
} else {
args[0] = ctx->esgs_ring;
args[1] = vtx_offset;
args[2] = LLVMConstInt(ctx->ac.i32, (param * 4 + i + const_index) * 256, false);
args[3] = ctx->ac.i32_0;
args[4] = ctx->ac.i32_1; /* OFFEN */
args[5] = ctx->ac.i32_0; /* IDXEN */
args[6] = ctx->ac.i32_1; /* GLC */
args[7] = ctx->ac.i32_0; /* SLC */
args[8] = ctx->ac.i32_0; /* TFE */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 98ac914..05c95a6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1069,48 +1069,51 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
/**
* Load from LDS.
*
* \param type output value type
* \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
* \param dw_addr address in dwords
*/
static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type, unsigned swizzle,
- LLVMValueRef dw_addr)
+ LLVMValueRef dw_addr, bool Volatile)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef value;
if (swizzle == ~0) {
LLVMValueRef values[TGSI_NUM_CHANNELS];
for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
- values[chan] = lds_load(bld_base, type, chan, dw_addr);
+ values[chan] = lds_load(bld_base, type, chan, dw_addr, Volatile);
return lp_build_gather_values(&ctx->gallivm, values,
TGSI_NUM_CHANNELS);
}
/* Split 64-bit loads. */
if (tgsi_type_is_64bit(type)) {
LLVMValueRef lo, hi;
- lo = lds_load(bld_base, TGSI_TYPE_UNSIGNED, swizzle, dw_addr);
- hi = lds_load(bld_base, TGSI_TYPE_UNSIGNED, swizzle + 1, dw_addr);
+ lo = lds_load(bld_base, TGSI_TYPE_UNSIGNED, swizzle, dw_addr, Volatile);
+ hi = lds_load(bld_base, TGSI_TYPE_UNSIGNED, swizzle + 1, dw_addr, Volatile);
return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
}
dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
LLVMConstInt(ctx->i32, swizzle, 0));
- value = ac_lds_load_volatile(&ctx->ac, dw_addr);
+ if (Volatile)
+ value = ac_lds_load_volatile(&ctx->ac, dw_addr);
+ else
+ value = ac_lds_load_invariant(&ctx->ac, dw_addr);
return bitcast(bld_base, type, value);
}
/**
* Store to LDS.
*
* \param swizzle offset (typically 0..3)
* \param dw_addr address in dwords
* \param value value to store
@@ -1159,41 +1162,41 @@ static LLVMValueRef fetch_input_tcs(
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type, unsigned swizzle)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef dw_addr, stride;
stride = get_tcs_in_vertex_dw_stride(ctx);
dw_addr = get_tcs_in_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
- return lds_load(bld_base, type, swizzle, dw_addr);
+ return lds_load(bld_base, type, swizzle, dw_addr, false);
}
static LLVMValueRef fetch_output_tcs(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type, unsigned swizzle)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef dw_addr, stride;
if (reg->Register.Dimension) {
stride = get_tcs_out_vertex_dw_stride(ctx);
dw_addr = get_tcs_out_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
} else {
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
dw_addr = get_dw_address(ctx, NULL, reg, NULL, dw_addr);
}
- return lds_load(bld_base, type, swizzle, dw_addr);
+ return lds_load(bld_base, type, swizzle, dw_addr, true);
}
static LLVMValueRef fetch_input_tes(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type, unsigned swizzle)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef buffer, base, addr;
@@ -1343,21 +1346,21 @@ static LLVMValueRef fetch_input_gs(
vtx_offset = unpack_param(ctx, ctx->param_gs_vtx45_offset,
index % 2 ? 16 : 0, 16);
break;
default:
assert(0);
return NULL;
}
vtx_offset = LLVMBuildAdd(ctx->ac.builder, vtx_offset,
LLVMConstInt(ctx->i32, param * 4, 0), "");
- return lds_load(bld_base, type, swizzle, vtx_offset);
+ return lds_load(bld_base, type, swizzle, vtx_offset, false);
}
/* GFX6: input load from the ESGS ring in memory. */
if (swizzle == ~0) {
LLVMValueRef values[TGSI_NUM_CHANNELS];
unsigned chan;
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
values[chan] = fetch_input_gs(bld_base, reg, type, chan);
}
return lp_build_gather_values(&ctx->gallivm, values,
@@ -2754,21 +2757,21 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
LLVMValueRef lds_ptr = LLVMBuildAdd(ctx->ac.builder, lds_base,
LLVMConstInt(ctx->i32, 4 * i, 0),
"");
LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(ctx,
get_rel_patch_id(ctx),
invocation_id,
LLVMConstInt(ctx->i32, i, 0));
LLVMValueRef value = lds_load(bld_base, TGSI_TYPE_SIGNED, ~0,
- lds_ptr);
+ lds_ptr, false);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
buffer_offset, 0, 1, 0, true, false);
}
}
static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
LLVMValueRef rel_patch_id,
LLVMValueRef invocation_id,
LLVMValueRef tcs_out_current_patch_data_offset,
@@ -2840,25 +2843,25 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
lds_base = tcs_out_current_patch_data_offset;
lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
LLVMConstInt(ctx->i32,
tess_inner_index * 4, 0), "");
lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
LLVMConstInt(ctx->i32,
tess_outer_index * 4, 0), "");
for (i = 0; i < outer_comps; i++) {
outer[i] = out[i] =
- lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
+ lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer, true);
}
for (i = 0; i < inner_comps; i++) {
inner[i] = out[outer_comps+i] =
- lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner);
+ lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner, true);
}
}
if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
/* For isolines, the hardware expects tess factors in the
* reverse order from what GLSL / TGSI specify.
*/
LLVMValueRef tmp = out[0];
out[0] = out[1];
out[1] = tmp;
--
2.7.4
More information about the mesa-dev
mailing list