[Mesa-dev] [PATCH 1/3] radeonsi: move building llvm.SI.load.const into ac_build_buffer_load
Marek Olšák
maraeo at gmail.com
Fri May 19 15:54:46 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
---
src/amd/common/ac_llvm_build.c | 50 ++++++++++++++++++++++++--------
src/amd/common/ac_llvm_build.h | 3 +-
src/amd/common/ac_nir_to_llvm.c | 2 +-
src/gallium/drivers/radeonsi/si_shader.c | 23 +++++++--------
4 files changed, 51 insertions(+), 27 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 87a1fb7..4b048ba 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -626,47 +626,73 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
LLVMValueRef
ac_build_buffer_load(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
int num_channels,
LLVMValueRef vindex,
LLVMValueRef voffset,
LLVMValueRef soffset,
unsigned inst_offset,
unsigned glc,
unsigned slc,
- bool readonly_memory)
+ bool readonly_memory,
+ bool coherent)
{
+ LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
+ if (voffset)
+ offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
+ if (soffset)
+ offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
+
+ /* Loads from a shader buffer that has no stores in the same shader
+ * and is non-coherent with other shader invocations can use SMEM.
+ */
+ if (readonly_memory && !coherent) {
+ assert(vindex == NULL);
+ assert(glc == 0);
+ assert(slc == 0);
+
+ LLVMValueRef result[4];
+
+ for (int i = 0; i < num_channels; i++) {
+ if (i) {
+ offset = LLVMBuildAdd(ctx->builder, offset,
+ LLVMConstInt(ctx->i32, 4, 0), "");
+ }
+ LLVMValueRef args[2] = {rsrc, offset};
+ result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32",
+ ctx->f32, args, 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_LEGACY);
+ }
+ if (num_channels == 1)
+ return result[0];
+
+ if (num_channels == 3)
+ result[num_channels++] = LLVMGetUndef(ctx->f32);
+ return ac_build_gather_values(ctx, result, num_channels);
+ }
+
unsigned func = CLAMP(num_channels, 1, 3) - 1;
LLVMValueRef args[] = {
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
- LLVMConstInt(ctx->i32, inst_offset, 0),
+ offset,
LLVMConstInt(ctx->i1, glc, 0),
LLVMConstInt(ctx->i1, slc, 0)
};
LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
ctx->v4f32};
const char *type_names[] = {"f32", "v2f32", "v4f32"};
char name[256];
- if (voffset) {
- args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
- "");
- }
-
- if (soffset) {
- args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
- "");
- }
-
snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
type_names[func]);
return ac_build_intrinsic(ctx, name, types[func], args,
ARRAY_SIZE(args),
/* READNONE means writes can't affect it, while
* READONLY means that writes can affect it. */
readonly_memory && HAVE_LLVM >= 0x0400 ?
AC_FUNC_ATTR_READNONE :
AC_FUNC_ATTR_READONLY);
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 0ecbc4a..754461b 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -136,21 +136,22 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
LLVMValueRef
ac_build_buffer_load(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
int num_channels,
LLVMValueRef vindex,
LLVMValueRef voffset,
LLVMValueRef soffset,
unsigned inst_offset,
unsigned glc,
unsigned slc,
- bool readonly_memory);
+ bool readonly_memory,
+ bool coherent);
LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vindex,
LLVMValueRef voffset,
bool readonly_memory);
LLVMValueRef
ac_get_thread_id(struct ac_llvm_context *ctx);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 8ae0a75..9794c70 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2816,21 +2816,21 @@ load_tes_input(struct nir_to_llvm_context *ctx,
param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 &&
is_compact && const_index > 3) {
const_index -= 3;
param++;
}
buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
is_compact, vertex_index, indir_index);
result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL,
- buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true);
+ buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, true);
result = trim_vector(ctx, result, instr->num_components);
result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), "");
return result;
}
static LLVMValueRef
load_gs_input(struct nir_to_llvm_context *ctx,
nir_intrinsic_instr *instr)
{
LLVMValueRef indir_index, vtx_offset;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 61f1384..7dd121b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -826,39 +826,39 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
LLVMValueRef base, bool readonly_memory)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef value, value2;
LLVMTypeRef llvm_type = tgsi2llvmtype(bld_base, type);
LLVMTypeRef vec_type = LLVMVectorType(llvm_type, 4);
if (swizzle == ~0) {
value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
- 0, 1, 0, readonly_memory);
+ 0, 1, 0, readonly_memory, true);
return LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
}
if (!tgsi_type_is_64bit(type)) {
value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
- 0, 1, 0, readonly_memory);
+ 0, 1, 0, readonly_memory, true);
value = LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
return LLVMBuildExtractElement(gallivm->builder, value,
LLVMConstInt(ctx->i32, swizzle, 0), "");
}
value = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
- swizzle * 4, 1, 0, readonly_memory);
+ swizzle * 4, 1, 0, readonly_memory, true);
value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
- swizzle * 4 + 4, 1, 0, readonly_memory);
+ swizzle * 4 + 4, 1, 0, readonly_memory, true);
return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
}
/**
* Load from LDS.
*
* \param type output value type
* \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
* \param dw_addr address in dwords
@@ -1147,28 +1147,28 @@ static LLVMValueRef fetch_input_gs(
vtx_offset_param += ctx->param_gs_vtx2_offset - 2;
}
vtx_offset = lp_build_mul_imm(uint,
LLVMGetParam(ctx->main_fn,
vtx_offset_param),
4);
soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle) * 256, 0);
value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->i32_0,
- vtx_offset, soffset, 0, 1, 0, true);
+ vtx_offset, soffset, 0, 1, 0, true, true);
if (tgsi_type_is_64bit(type)) {
LLVMValueRef value2;
soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle + 1) * 256, 0);
value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1,
ctx->i32_0, vtx_offset, soffset,
- 0, 1, 0, true);
+ 0, 1, 0, true, true);
return si_llvm_emit_fetch_64bit(bld_base, type,
value, value2);
}
return LLVMBuildBitCast(gallivm->builder,
value,
tgsi2llvmtype(bld_base, type), "");
}
static int lookup_interp_param_index(unsigned interpolate, unsigned location)
{
@@ -1382,26 +1382,22 @@ static LLVMValueRef get_sample_id(struct si_shader_context *ctx)
}
/**
* Load a dword from a constant buffer.
*/
static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
LLVMValueRef resource,
LLVMValueRef offset)
{
- LLVMBuilderRef builder = ctx->gallivm.builder;
- LLVMValueRef args[2] = {resource, offset};
-
- return lp_build_intrinsic(builder, "llvm.SI.load.const.v4i32", ctx->f32, args, 2,
- LP_FUNC_ATTR_READNONE |
- LP_FUNC_ATTR_LEGACY);
+ return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
+ 0, 0, 0, true, false);
}
static LLVMValueRef load_sample_position(struct si_shader_context *ctx, LLVMValueRef sample_id)
{
struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld;
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
LLVMValueRef buf_index = LLVMConstInt(ctx->i32, SI_PS_CONST_SAMPLE_POSITIONS, 0);
LLVMValueRef resource = ac_build_indexed_load_const(&ctx->ac, desc, buf_index);
@@ -5192,21 +5188,22 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
}
LLVMValueRef soffset = LLVMConstInt(ctx.i32,
offset * gs_selector->gs_max_out_vertices * 16 * 4, 0);
offset++;
outputs[i].values[chan] =
ac_build_buffer_load(&ctx.ac,
ctx.gsvs_ring[0], 1,
ctx.i32_0, voffset,
- soffset, 0, 1, 1, true);
+ soffset, 0, 1, 1,
+ true, true);
}
}
/* Streamout and exports. */
if (gs_selector->so.num_outputs) {
si_llvm_emit_streamout(&ctx, outputs,
gsinfo->num_outputs,
stream);
}
--
2.7.4
More information about the mesa-dev
mailing list