<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Thu, Mar 8, 2018 at 9:08 AM, Samuel Pitoiset <span dir="ltr"><<a href="mailto:samuel.pitoiset@gmail.com" target="_blank">samuel.pitoiset@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">RadeonSI does something similar, the VGPRs decrease is a win<br>
but not sure if we really want to implement that.<br>
<br>
Polaris10:<br>
Totals from affected shaders:<br>
SGPRS: 116376 -> 116768 (0.34 %)<br>
VGPRS: 76556 -> 74868 (-2.20 %)<br>
Spilled SGPRs: 10347 -> 10466 (1.15 %)<br>
Code Size: 5555072 -> 5569024 (0.25 %) bytes<br>
Max Waves: 9854 -> 9951 (0.98 %)<br>
<br>
Signed-off-by: Samuel Pitoiset <<a href="mailto:samuel.pitoiset@gmail.com">samuel.pitoiset@gmail.com</a>><br>
---<br>
src/amd/common/ac_nir_to_llvm.<wbr>c | 118 ++++++++++++++++++++++++++++++<wbr>+---------<br>
src/amd/common/ac_shader_abi.h | 7 +++<br>
2 files changed, 98 insertions(+), 27 deletions(-)<br>
<br>
diff --git a/src/amd/common/ac_nir_to_<wbr>llvm.c b/src/amd/common/ac_nir_to_<wbr>llvm.c<br>
index 644c85e2eb..eb0935972d 100644<br>
--- a/src/amd/common/ac_nir_to_<wbr>llvm.c<br>
+++ b/src/amd/common/ac_nir_to_<wbr>llvm.c<br>
@@ -3131,6 +3131,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,<br>
nir_intrinsic_instr *instr)<br>
{<br>
LLVMValueRef values[8];<br>
+ int location = instr->variables[0]->var-><wbr>data.location;<br>
int idx = instr->variables[0]->var-><wbr>data.driver_location;<br>
int ve = instr->dest.ssa.num_<wbr>components;<br>
unsigned comp = instr->variables[0]->var-><wbr>data.location_frac;<br>
@@ -3167,6 +3168,19 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,<br>
instr->num_components, vertex_index, const_index, type);<br>
}<br>
<br>
+ LLVMValueRef inputs[4];<br>
+<br>
+ if (ctx->stage == MESA_SHADER_FRAGMENT) {<br>
+ ctx->abi->load_fs_inputs(ctx-><wbr>abi, location,<br>
+ indir_index, const_index,<br>
+ stride, inputs);<br></blockquote><div><br></div><div>load_fs_inputs is NULL for radeonsi. Are you sure that radeonsi doesn't get here?<br><br></div><div>Marek<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ } else {<br>
+ unsigned index = idx +<br>
+ (indir_index ? 0 : const_index * stride);<br>
+<br>
+ memcpy(inputs, &ctx->abi->inputs[index], sizeof(inputs));<br>
+ }<br>
+<br>
for (unsigned chan = comp; chan < ve + comp; chan++) {<br>
if (indir_index) {<br>
unsigned count = glsl_count_attribute_slots(<br>
@@ -3174,14 +3188,15 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,<br>
ctx->stage == MESA_SHADER_VERTEX);<br>
count -= chan / 4;<br>
LLVMValueRef tmp_vec = ac_build_gather_values_<wbr>extended(<br>
- &ctx->ac, ctx->abi->inputs + idx + chan, count,<br>
+ &ctx->ac, inputs + chan, count,<br>
stride, false, true);<br>
<br>
values[chan] = LLVMBuildExtractElement(ctx-><wbr>ac.builder,<br>
tmp_vec,<br>
indir_index, "");<br>
- } else<br>
- values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];<br>
+ } else {<br>
+ values[chan] = inputs[chan];<br>
+ }<br>
}<br>
break;<br>
case nir_var_local:<br>
@@ -5556,45 +5571,93 @@ prepare_interp_optimize(struct radv_shader_context *ctx,<br>
}<br>
}<br>
<br>
+static unsigned<br>
+get_input_hw_index(struct radv_shader_context *ctx, unsigned idx)<br>
+{<br>
+ struct ac_shader_info *info = &ctx->shader_info->info;<br>
+ uint64_t mask = info->input_mask & ((1ull << idx) - 1);<br>
+<br>
+ mask &= ~(1ull << VARYING_SLOT_POS);<br>
+<br>
+ return util_bitcount64(mask);<br>
+}<br>
+<br>
+/* If this is true, preload FS inputs at the beginning of shaders. Otherwise,<br>
+ * reload them at each use. This must be true if the shader is using<br>
+ * derivatives and KILL, because KILL can leave the WQM and then a lazy<br>
+ * input load isn't in the WQM anymore.<br>
+ */<br>
+static bool<br>
+radv_preload_fs_inputs(struct radv_shader_context *ctx)<br>
+{<br>
+ return ctx->shader_info-><a href="http://info.ps">info.ps</a>.<wbr>uses_derivatives &&<br>
+ ctx->shader_info-><a href="http://info.ps">info.ps</a>.<wbr>uses_kill;<br>
+}<br>
+<br>
static void<br>
-handle_fs_inputs(struct radv_shader_context *ctx,<br>
- struct nir_shader *nir)<br>
+radv_load_fs_inputs(struct radv_shader_context *ctx, unsigned idx,<br>
+ LLVMValueRef out[4])<br>
{<br>
struct ac_shader_info *info = &ctx->shader_info->info;<br>
<br>
+ if (idx >= VARYING_SLOT_VAR0 ||<br>
+ idx == VARYING_SLOT_PNTC ||<br>
+ idx == VARYING_SLOT_PRIMITIVE_ID ||<br>
+ idx == VARYING_SLOT_LAYER) {<br>
+ unsigned interp_mode = info->ps.input_interp_mode[<wbr>idx];<br>
+ unsigned interp_loc = info->ps.input_interp_loc[idx]<wbr>;<br>
+ unsigned hw_index = get_input_hw_index(ctx, idx);<br>
+ LLVMValueRef interp_param =<br>
+ lookup_interp_param(&ctx->abi, interp_mode, interp_loc);<br>
+<br>
+ interp_fs_input(ctx, hw_index, interp_param, ctx->abi.prim_mask,<br>
+ &out[0]);<br>
+ } else if (idx == VARYING_SLOT_POS) {<br>
+ for (int i = 0; i < 3; ++i)<br>
+ out[i] = ctx->abi.frag_pos[i];<br>
+<br>
+ out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,<br>
+ ctx->abi.frag_pos[3]);<br>
+ }<br>
+}<br>
+<br>
+static void<br>
+load_fs_inputs(struct ac_shader_abi *abi,<br>
+ unsigned location,<br>
+ LLVMValueRef indir_index,<br>
+ unsigned const_index,<br>
+ unsigned stride,<br>
+ LLVMValueRef out[4])<br>
+{<br>
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(<wbr>abi);<br>
+<br>
+ if (!radv_preload_fs_inputs(ctx)) {<br>
+ radv_load_fs_inputs(ctx, location, out);<br>
+ } else {<br>
+ unsigned index = radeon_llvm_reg_index_soa(<wbr>location, 0);<br>
+<br>
+ index += (indir_index ? 0 : const_index * stride);<br>
+<br>
+ memcpy(out, &abi->inputs[index], sizeof(out[0]) * 4);<br>
+ }<br>
+}<br>
+<br>
+static void<br>
+handle_fs_inputs(struct radv_shader_context *ctx,<br>
+ struct nir_shader *nir)<br>
+{<br>
prepare_interp_optimize(ctx, nir);<br>
<br>
nir_foreach_variable(variable, &nir->inputs)<br>
handle_fs_input_decl(ctx, variable);<br>
<br>
- unsigned index = 0;<br>
-<br>
for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {<br>
- LLVMValueRef interp_param;<br>
LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);<br>
<br>
if (!(ctx->shader_info->info.<wbr>input_mask & (1ull << i)))<br>
continue;<br>
<br>
- if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||<br>
- i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {<br>
- unsigned interp_mode = info->ps.input_interp_mode[i];<br>
- unsigned interp_loc = info->ps.input_interp_loc[i];<br>
-<br>
- interp_param = lookup_interp_param(&ctx->abi, interp_mode,<br>
- interp_loc);<br>
-<br>
- interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask,<br>
- inputs);<br>
-<br>
- ++index;<br>
- } else if (i == VARYING_SLOT_POS) {<br>
- for(int i = 0; i < 3; ++i)<br>
- inputs[i] = ctx->abi.frag_pos[i];<br>
-<br>
- inputs[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,<br>
- ctx->abi.frag_pos[3]);<br>
- }<br>
+ radv_load_fs_inputs(ctx, i, inputs);<br>
}<br>
<br>
if (ctx->shader_info->info.needs_<wbr>multiview_view_index)<br>
@@ -6924,6 +6987,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(<wbr>LLVMTargetMachineRef tm,<br>
ctx.abi.load_base_vertex = radv_load_base_vertex;<br>
} else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {<br>
shader_info->fs.can_discard = shaders[i]->info.fs.uses_<wbr>discard;<br>
+ ctx.abi.load_fs_inputs = load_fs_inputs;<br>
ctx.abi.lookup_interp_param = lookup_interp_param;<br>
ctx.abi.load_sample_position = load_sample_position;<br>
ctx.abi.load_sample_mask_in = load_sample_mask_in;<br>
diff --git a/src/amd/common/ac_shader_<wbr>abi.h b/src/amd/common/ac_shader_<wbr>abi.h<br>
index 901e49b1f9..8e51ce9fdd 100644<br>
--- a/src/amd/common/ac_shader_<wbr>abi.h<br>
+++ b/src/amd/common/ac_shader_<wbr>abi.h<br>
@@ -97,6 +97,13 @@ struct ac_shader_abi {<br>
unsigned const_index,<br>
LLVMTypeRef type);<br>
<br>
+ void (*load_fs_inputs)(struct ac_shader_abi *abi,<br>
+ unsigned location,<br>
+ LLVMValueRef indir_index,<br>
+ unsigned const_index,<br>
+ unsigned stride,<br>
+ LLVMValueRef out[4]);<br>
+<br>
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi,<br>
LLVMTypeRef type,<br>
LLVMValueRef vertex_index,<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.16.2<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>