<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Jul 18, 2016 at 1:26 PM, Kenneth Graunke <span dir="ltr"><<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Currently, i965 interpolates all FS inputs at the top of the program.<br>
This has advantages and disadvantages, but I'd like to keep that policy<br>
while reworking this code.  We can consider changing it independently.<br>
<br>
The next patch will make the compiler generate PLN instructions "on the<br>
fly", when it encounters an input load intrinsic, rather than doing it<br>
for all inputs at the start of the program.<br>
<br>
To emulate this behavior, we introduce an ugly pass to move all NIR<br>
load_interpolated_input and payload-based (not interpolator message)<br>
load_barycentric_* intrinsics to the shader's start block.<br>
<br>
This helps avoid regressions in shader-db for cases such as:<br>
<br>
   if (...) {<br>
      ...load some input...<br>
   } else {<br>
      ...load that same input...<br>
   }<br>
<br>
which CSE can't handle, because there's no dominance relationship<br>
between the two loads.  Because the start block dominates all others,<br>
we can CSE all inputs and emit PLNs exactly once, as we did before.<br>
<br>
Ideally, global value numbering would eliminate these redundant loads,<br>
while not forcing them all the way to the start block.  When that lands,<br>
we should consider dropping this hacky pass.<br></blockquote><div><br></div><div>Ugh... You're probably right that we need to do this but it's ugly...  I look forward to deleting this code :)<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Again, this pass currently does nothing, as i965 doesn't generate these<br>
intrinsics yet.  But it will shortly, and I figured I'd separate this<br>
code as it's relatively self-contained.<br>
<br>
Signed-off-by: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>><br>
---<br>
 src/mesa/drivers/dri/i965/brw_fs.cpp | 78 ++++++++++++++++++++++++++++++++++++<br>
 1 file changed, 78 insertions(+)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
index ea6616b..94127bc 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
@@ -6400,6 +6400,83 @@ computed_depth_mode(const nir_shader *shader)<br>
 }<br>
<br>
 /**<br>
+ * Move load_interpolated_input with simple (payload-based) barycentric modes<br>
+ * to the top of the program so we don't emit multiple PLNs for the same input.<br>
+ *<br>
+ * This works around CSE not being able to handle non-dominating cases<br>
+ * such as:<br>
+ *<br>
+ *    if (...) {<br>
+ *       interpolate input<br>
+ *    } else {<br>
+ *       interpolate the same exact input<br>
+ *    }<br>
+ *<br>
+ * This should be replaced by global value numbering someday.<br>
+ */<br>
+void<br>
+move_interpolation_to_top(nir_shader *nir)<br>
+{<br>
+   nir_foreach_function(f, nir) {<br>
+      if (!f->impl)<br>
+         continue;<br>
+<br>
+      nir_builder b;<br>
+      nir_builder_init(&b, f->impl);<br>
+      b.cursor = nir_before_block(nir_start_block(f->impl));<br>
+<br>
+      nir_foreach_block(block, f->impl) {<br>
+         nir_foreach_instr_safe(instr, block) {<br>
+            if (instr->type != nir_instr_type_intrinsic)<br>
+               continue;<br>
+<br>
+            nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);<br>
+            if (load->intrinsic != nir_intrinsic_load_interpolated_input)<br>
+               continue;<br>
+<br>
+            nir_intrinsic_instr *bary =<br>
+               nir_instr_as_intrinsic(load->src[0].ssa->parent_instr);<br>
+<br>
+            /* Leave interpolateAtSample/Offset() where it is. */<br>
+            if (bary->intrinsic == nir_intrinsic_load_barycentric_at_sample ||<br>
+                bary->intrinsic == nir_intrinsic_load_barycentric_at_offset)<br>
+               continue;<br>
+<br>
+            /* Make a new load_barycentric_* intrinsic at the top */<br>
+            nir_ssa_def *top_bary =<br>
+               nir_load_barycentric(&b, bary->intrinsic,<br>
+                                    nir_intrinsic_interp_mode(bary));<br>
+<br>
+            /* Make a new load_intrinsic_input at the top */<br>
+            nir_intrinsic_instr *top_load = nir_intrinsic_instr_create(nir,<br>
+               nir_intrinsic_load_interpolated_input);<br>
+            top_load->num_components = load->num_components;<br>
+            top_load->src[0] = nir_src_for_ssa(top_bary);<br>
+            /* We don't support indirects today - otherwise we might not<br>
+             * be able to move this to the top. add_const_offset_to_base<br>
+             * guarantees the offset will be 0.<br>
+             */<br>
+            assert(nir_src_as_const_value(load->src[1]) &&<br>
+                   nir_src_as_const_value(load->src[1])->u32[0] == 0);<br>
+            top_load->src[1] = nir_src_for_ssa(nir_imm_int(&b, 0));<br>
+            top_load->const_index[0] = load->const_index[0];<br>
+            top_load->const_index[1] = load->const_index[1];<br>
+            nir_ssa_dest_init(&top_load->instr, &top_load->dest,<br>
+                              load->dest.ssa.num_components,<br>
+                              load->dest.ssa.bit_size, NULL);<br>
+<br>
+            nir_ssa_def_rewrite_uses(&load->dest.ssa,<br>
+                                     nir_src_for_ssa(&top_load->dest.ssa));<br>
+            nir_builder_instr_insert(&b, &top_load->instr);<br></blockquote><div><br></div><div>You're doing way more work than you need to here.  Just save off the start block at the top of the pass and do exec_node_remove and exec_list_insert to put it in the top block.<br><br></div><div>--Jason<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+         }<br>
+      }<br>
+      nir_metadata_preserve(f->impl, (nir_metadata)<br>
+                            ((unsigned) nir_metadata_block_index |<br>
+                             (unsigned) nir_metadata_dominance));<br>
+   }<br>
+}<br>
+<br>
+/**<br>
  * Apply default interpolation settings to FS inputs which don't specify any.<br>
  */<br>
 static void<br>
@@ -6506,6 +6583,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,<br>
    brw_nir_lower_fs_outputs(shader);<br>
    if (!key->multisample_fbo)<br>
       NIR_PASS_V(shader, demote_sample_qualifiers);<br>
+   NIR_PASS_V(shader, move_interpolation_to_top);<br>
    shader = brw_postprocess_nir(shader, compiler->devinfo, true);<br>
<br>
    /* key->alpha_test_func means simulating alpha testing via discards,<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.9.0<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>