[Mesa-dev] [PATCH] radeonsi: Use build_buffer_load helper function for geometry shaders
Tom Stellard
thomas.stellard at amd.com
Thu Dec 15 20:35:55 UTC 2016
Also add a need_range_checks parameter to this function, which can be
set to false to enable some additional optimizations. Currently, this
will cause the compiler to emit the llvm.SI.buffer.load.dword intrinsic
instead of llvm.amdgcn.buffer.load. Eventually, this information
will be passed to LLVM to enable more agressive addressing mode optimizations.
---
src/gallium/drivers/radeonsi/si_shader.c | 79 ++++++++++++--------------------
1 file changed, 29 insertions(+), 50 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 72cf827..5b15ad4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -825,12 +825,13 @@ static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
LLVMValueRef soffset,
unsigned inst_offset,
unsigned glc,
- unsigned slc)
+ unsigned slc,
+ bool need_range_checks)
{
struct gallivm_state *gallivm = &ctx->gallivm;
unsigned func = CLAMP(num_channels, 1, 3) - 1;
- if (HAVE_LLVM >= 0x309) {
+ if (need_range_checks && HAVE_LLVM >= 0x309) {
LLVMValueRef args[] = {
LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, ""),
vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
@@ -896,7 +897,7 @@ static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type, unsigned swizzle,
LLVMValueRef buffer, LLVMValueRef offset,
- LLVMValueRef base)
+ LLVMValueRef base, bool need_range_checks)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
@@ -906,14 +907,14 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
if (swizzle == ~0) {
value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
- 0, 1, 0);
+ 0, 1, 0, need_range_checks);
return LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
}
if (!tgsi_type_is_64bit(type)) {
value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
- 0, 1, 0);
+ 0, 1, 0, need_range_checks);
value = LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
return LLVMBuildExtractElement(gallivm->builder, value,
@@ -921,10 +922,10 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
}
value = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
- swizzle * 4, 1, 0);
+ swizzle * 4, 1, 0, need_range_checks);
value2 = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
- swizzle * 4 + 4, 1, 0);
+ swizzle * 4 + 4, 1, 0, need_range_checks);
return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
}
@@ -1044,7 +1045,7 @@ static LLVMValueRef fetch_input_tes(
base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg);
- return buffer_load(bld_base, type, swizzle, buffer, base, addr);
+ return buffer_load(bld_base, type, swizzle, buffer, base, addr, true);
}
static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
@@ -1125,13 +1126,12 @@ static LLVMValueRef fetch_input_gs(
struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld;
struct gallivm_state *gallivm = base->gallivm;
LLVMValueRef vtx_offset;
- LLVMValueRef args[9];
unsigned vtx_offset_param;
struct tgsi_shader_info *info = &shader->selector->info;
unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
unsigned param;
- LLVMValueRef value;
+ LLVMValueRef soffset, value;
if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
return get_primitive_id(bld_base, swizzle);
@@ -1163,27 +1163,15 @@ static LLVMValueRef fetch_input_gs(
4);
param = si_shader_io_get_unique_index(semantic_name, semantic_index);
- args[0] = ctx->esgs_ring;
- args[1] = vtx_offset;
- args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle) * 256);
- args[3] = uint->zero;
- args[4] = uint->one; /* OFFEN */
- args[5] = uint->zero; /* IDXEN */
- args[6] = uint->one; /* GLC */
- args[7] = uint->zero; /* SLC */
- args[8] = uint->zero; /* TFE */
-
- value = lp_build_intrinsic(gallivm->builder,
- "llvm.SI.buffer.load.dword.i32.i32",
- ctx->i32, args, 9,
- LP_FUNC_ATTR_READONLY);
+ soffset = lp_build_const_int32(gallivm, (param * 4 + swizzle) * 256);
+
+ value = build_buffer_load(ctx, ctx->esgs_ring, 1, NULL,
+ vtx_offset, soffset, 0, 1, 0, false);
if (tgsi_type_is_64bit(type)) {
LLVMValueRef value2;
- args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256);
- value2 = lp_build_intrinsic(gallivm->builder,
- "llvm.SI.buffer.load.dword.i32.i32",
- ctx->i32, args, 9,
- LP_FUNC_ATTR_READONLY);
+ soffset = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256);
+ value2 = build_buffer_load(ctx, ctx->esgs_ring, 1, NULL,
+ vtx_offset, soffset, 0, 1, 0, false);
return si_llvm_emit_fetch_64bit(bld_base, type,
value, value2);
}
@@ -1709,7 +1697,7 @@ static void declare_system_value(
lp_build_const_int32(gallivm, param));
value = buffer_load(&radeon_bld->soa.bld_base, TGSI_TYPE_FLOAT,
- ~0, buffer, base, addr);
+ ~0, buffer, base, addr, true);
break;
}
@@ -6420,7 +6408,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
struct lp_build_context *uint = &bld_base->uint_bld;
struct si_shader_output_values *outputs;
struct tgsi_shader_info *gsinfo = &gs_selector->info;
- LLVMValueRef args[9];
+ LLVMValueRef voffset;
int i, r;
outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
@@ -6447,18 +6435,6 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
create_function(&ctx);
preload_ring_buffers(&ctx);
- args[0] = ctx.gsvs_ring[0];
- args[1] = lp_build_mul_imm(uint,
- LLVMGetParam(ctx.main_fn,
- ctx.param_vertex_id),
- 4);
- args[3] = uint->zero;
- args[4] = uint->one; /* OFFEN */
- args[5] = uint->zero; /* IDXEN */
- args[6] = uint->one; /* GLC */
- args[7] = uint->one; /* SLC */
- args[8] = uint->zero; /* TFE */
-
/* Fetch the vertex stream ID.*/
LLVMValueRef stream_id;
@@ -6468,6 +6444,9 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
stream_id = uint->zero;
/* Fill in output information. */
+ voffset = lp_build_mul_imm(uint, LLVMGetParam(ctx.main_fn,
+ ctx.param_vertex_id), 4);
+ /* Fetch vertex data from GSVS ring */
for (i = 0; i < gsinfo->num_outputs; ++i) {
outputs[i].semantic_name = gsinfo->output_semantic_name[i];
outputs[i].semantic_index = gsinfo->output_semantic_index[i];
@@ -6502,24 +6481,24 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
offset = 0;
for (i = 0; i < gsinfo->num_outputs; ++i) {
for (unsigned chan = 0; chan < 4; chan++) {
+ LLVMValueRef load, soffset;
if (!(gsinfo->output_usagemask[i] & (1 << chan)) ||
outputs[i].vertex_stream[chan] != stream) {
outputs[i].values[chan] = ctx.soa.bld_base.base.undef;
continue;
}
- args[2] = lp_build_const_int32(
- gallivm,
+ soffset = lp_build_const_int32(gallivm,
offset * gs_selector->gs_max_out_vertices * 16 * 4);
offset++;
+ load = build_buffer_load(&ctx, ctx.gsvs_ring[0], 1,
+ NULL, voffset, soffset,
+ 0, 1, 1, false);
+
outputs[i].values[chan] =
LLVMBuildBitCast(gallivm->builder,
- lp_build_intrinsic(gallivm->builder,
- "llvm.SI.buffer.load.dword.i32.i32",
- ctx.i32, args, 9,
- LP_FUNC_ATTR_READONLY),
- ctx.f32, "");
+ load, ctx.f32, "");
}
}
--
2.7.4
More information about the mesa-dev
mailing list