[Mesa-dev] [PATCH 6/9] radeonsi: don't read the LS output vertex stride from an SGPR in LS
Marek Olšák
maraeo at gmail.com
Wed Sep 6 17:03:57 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
Now it's able to generate ds_write2_b64 instead of ds_write2_b32.
-20 bytes in one shader binary. (having only 1 output)
---
src/gallium/drivers/radeonsi/si_shader.c | 25 +++++++++++++++++++++----
1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 32a6fa0..7c3bd8b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -391,20 +391,38 @@ static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx)
ctx->shader->selector ?
ctx->shader->selector->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : 0;
/* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
if (ctx->type == PIPE_SHADER_TESS_CTRL && tcs_out_vertices)
return LLVMConstInt(ctx->i32, tcs_out_vertices, 0);
return unpack_param(ctx, ctx->param_tcs_offchip_layout, 6, 6);
}
+static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
+{
+ unsigned stride;
+
+ switch (ctx->type) {
+ case PIPE_SHADER_VERTEX:
+ stride = util_last_bit64(ctx->shader->selector->outputs_written);
+ return LLVMConstInt(ctx->i32, stride * 4, 0);
+
+ case PIPE_SHADER_TESS_CTRL:
+ return unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+
+ default:
+ assert(0);
+ return NULL;
+ }
+}
+
static LLVMValueRef get_instance_index_for_fetch(
struct si_shader_context *ctx,
unsigned param_start_instance, LLVMValueRef divisor)
{
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef result = ctx->abi.instance_id;
/* The division must be done before START_INSTANCE is added. */
if (divisor != ctx->i32_1)
@@ -1040,21 +1058,21 @@ static LLVMValueRef desc_from_addr_base64k(struct si_shader_context *ctx,
}
static LLVMValueRef fetch_input_tcs(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type, unsigned swizzle)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef dw_addr, stride;
- stride = unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+ stride = get_tcs_in_vertex_dw_stride(ctx);
dw_addr = get_tcs_in_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
return lds_load(bld_base, type, swizzle, dw_addr);
}
static LLVMValueRef fetch_output_tcs(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type, unsigned swizzle)
@@ -2603,21 +2621,21 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef invocation_id, buffer, buffer_offset;
LLVMValueRef lds_vertex_stride, lds_vertex_offset, lds_base;
uint64_t inputs;
invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
buffer_offset = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
- lds_vertex_stride = unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+ lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx);
lds_vertex_offset = LLVMBuildMul(gallivm->builder, invocation_id,
lds_vertex_stride, "");
lds_base = get_tcs_in_current_patch_offset(ctx);
lds_base = LLVMBuildAdd(gallivm->builder, lds_base, lds_vertex_offset, "");
inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy;
while (inputs) {
unsigned i = u_bit_scan64(&inputs);
LLVMValueRef lds_ptr = LLVMBuildAdd(gallivm->builder, lds_base,
@@ -3014,22 +3032,21 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = &shader->selector->info;
struct gallivm_state *gallivm = &ctx->gallivm;
unsigned i, chan;
LLVMValueRef vertex_id = LLVMGetParam(ctx->main_fn,
ctx->param_rel_auto_id);
- LLVMValueRef vertex_dw_stride =
- unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+ LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx);
LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
vertex_dw_stride, "");
/* Write outputs to LDS. The next shader (TCS aka HS) will read
* its inputs from it. */
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr = ctx->outputs[i];
unsigned name = info->output_semantic_name[i];
unsigned index = info->output_semantic_index[i];
--
2.7.4
More information about the mesa-dev
mailing list