<div dir="ltr"><div>Also, Could we rename this to nir_lower_alu_to_scalar?  That's more descriptive.<br></div>--Jason<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Jan 22, 2015 at 4:27 PM, Jason Ekstrand <span dir="ltr"><<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote"><div><div class="h5">On Thu, Jan 22, 2015 at 2:52 PM, Eric Anholt <span dir="ltr"><<a href="mailto:eric@anholt.net" target="_blank">eric@anholt.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span>This is the equivalent of brw_fs_channel_expressions.cpp, which I wanted<br>
for vc4.<br>
<br>
</span>v2: Use the nir_src_for_ssa() helper, and another instance of<br>
    nir_alu_src_copy().<br>
v3: Drop the non-SSA support.  All intended callers will have SSA-only ALU<br>
    ops.<br>
v4: Use insert_before, drop stale bcsel/fcsel comment, drop now-unused<br>
    unsupported() function, drop lower_context struct.<br>
---<br>
<br>
This can also be found on my nir-scalarize-2 branch.<br>
<span><br>
src/glsl/Makefile.sources           |   1 +<br>
 src/glsl/nir/nir.h                  |   1 +<br>
</span> src/glsl/nir/nir_lower_alu_scalar.c | 182 ++++++++++++++++++++++++++++++++++++<br>
 3 files changed, 184 insertions(+)<br>
<span> create mode 100644 src/glsl/nir/nir_lower_alu_scalar.c<br>
<br>
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources<br>
index 6237627..9cd1a6a 100644<br>
--- a/src/glsl/Makefile.sources<br>
+++ b/src/glsl/Makefile.sources<br>
@@ -24,6 +24,7 @@ NIR_FILES = \<br>
        $(GLSL_SRCDIR)/nir/nir_intrinsics.c \<br>
        $(GLSL_SRCDIR)/nir/nir_intrinsics.h \<br>
        $(GLSL_SRCDIR)/nir/nir_live_variables.c \<br>
+       $(GLSL_SRCDIR)/nir/nir_lower_alu_scalar.c \<br>
        $(GLSL_SRCDIR)/nir/nir_lower_atomics.c \<br>
        $(GLSL_SRCDIR)/nir/nir_lower_global_vars_to_local.c \<br>
        $(GLSL_SRCDIR)/nir/nir_lower_locals_to_regs.c \<br>
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h<br>
</span>index 0dbe000..aa0927a 100644<br>
--- a/src/glsl/nir/nir.h<br>
+++ b/src/glsl/nir/nir.h<br>
@@ -1520,6 +1520,7 @@ void nir_lower_vars_to_ssa(nir_shader *shader);<br>
<span> void nir_remove_dead_variables(nir_shader *shader);<br>
<br>
 void nir_lower_vec_to_movs(nir_shader *shader);<br>
+void nir_lower_ops_scalar(nir_shader *shader);<br>
<br>
 void nir_lower_samplers(nir_shader *shader,<br>
                         struct gl_shader_program *shader_program,<br>
diff --git a/src/glsl/nir/nir_lower_alu_scalar.c b/src/glsl/nir/nir_lower_alu_scalar.c<br>
new file mode 100644<br>
</span>index 0000000..64552be<br>
--- /dev/null<br>
+++ b/src/glsl/nir/nir_lower_alu_scalar.c<br>
@@ -0,0 +1,182 @@<br>
<div><div>+/*<br>
+ * Copyright © 2014-2015 Broadcom<br>
+ *<br>
+ * Permission is hereby granted, free of charge, to any person obtaining a<br>
+ * copy of this software and associated documentation files (the "Software"),<br>
+ * to deal in the Software without restriction, including without limitation<br>
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,<br>
+ * and/or sell copies of the Software, and to permit persons to whom the<br>
+ * Software is furnished to do so, subject to the following conditions:<br>
+ *<br>
+ * The above copyright notice and this permission notice (including the next<br>
+ * paragraph) shall be included in all copies or substantial portions of the<br>
+ * Software.<br>
+ *<br>
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br>
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br>
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL<br>
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br>
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING<br>
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS<br>
+ * IN THE SOFTWARE.<br>
+ */<br>
+<br>
+#include "nir.h"<br>
+<br>
+/** @file nir_lower_alu_scalar.c<br>
+ *<br>
+ * Replaces nir_alu_instr operations with more than one channel used in the<br>
+ * arguments with individual per-channel operations.<br>
+ */<br>
+<br>
+static void<br>
</div></div><span>+nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components)<br>
+{<br>
+   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);<br>
+   instr->dest.write_mask = (1 << num_components) - 1;<br>
+}<br>
+<br>
+static void<br>
</span><span>+lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,<br>
+                void *mem_ctx)<br>
+{<br>
+   unsigned num_components = nir_op_infos[instr->op].input_sizes[0];<br>
+<br>
+   nir_ssa_def *last = NULL;<br>
+   for (unsigned i = 0; i < num_components; i++) {<br>
+      nir_alu_instr *chan = nir_alu_instr_create(mem_ctx, chan_op);<br>
+      nir_alu_ssa_dest_init(chan, 1);<br>
+      nir_alu_src_copy(&chan->src[0], &instr->src[0], mem_ctx);<br>
+      chan->src[0].swizzle[0] = chan->src[0].swizzle[i];<br>
+      if (nir_op_infos[chan_op].num_inputs > 1) {<br>
</span>+         assert(nir_op_infos[chan_op].num_inputs == 2);<br>
<span>+         nir_alu_src_copy(&chan->src[1], &instr->src[1], mem_ctx);<br>
+         chan->src[1].swizzle[0] = chan->src[1].swizzle[i];<br>
+      }<br>
+<br>
+      nir_instr_insert_before(&instr->instr, &chan->instr);<br>
+<br>
+      if (i == 0) {<br>
+         last = &chan->dest.dest.ssa;<br>
+      } else {<br>
+         nir_alu_instr *merge = nir_alu_instr_create(mem_ctx, merge_op);<br>
+         nir_alu_ssa_dest_init(merge, 1);<br>
+         merge->dest.write_mask = 1;<br>
</span>+         merge->src[0].src = nir_src_for_ssa(last);<br>
+         merge->src[1].src = nir_src_for_ssa(&chan->dest.dest.ssa);<br>
<span>+         nir_instr_insert_before(&instr->instr, &merge->instr);<br>
+         last = &merge->dest.dest.ssa;<br>
</span>+      }<br>
+   }<br>
<span>+<br>
+   assert(instr->dest.write_mask == 1);<br>
</span>+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last),<br>
+                            mem_ctx);<br>
<span>+   nir_instr_remove(&instr->instr);<br>
+}<br>
+<br>
+static void<br>
</span>+lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)<br>
+{<br>
+   unsigned num_src = nir_op_infos[instr->op].num_inputs;<br>
+   unsigned i, chan;<br>
+<br>
+   assert(instr->dest.dest.is_ssa);<br>
<div><div>+   assert(instr->dest.write_mask != 0);<br>
+<br>
+#define LOWER_REDUCTION(name, chan, merge) \<br>
+   case name##2: \<br>
+   case name##3: \<br>
+   case name##4: \<br>
+      lower_reduction(instr, chan, merge, mem_ctx); \<br>
+      break;<br>
+<br>
+   switch (instr->op) {<br>
+   case nir_op_vec4:<br>
+   case nir_op_vec3:<br>
+   case nir_op_vec2:<br>
+      /* We don't need to scalarize these ops, they're the ones generated to<br>
+       * group up outputs into a value that can be SSAed.<br>
+       */<br>
+      return;<br>
+<br>
+      LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);<br>
+      LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);<br>
+      LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);<br>
+      LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior);<br>
+      LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior);<br>
+      LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand);<br>
+      LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for);<br>
+      LOWER_REDUCTION(nir_op_ball, nir_op_imov, nir_op_iand);<br>
+      LOWER_REDUCTION(nir_op_bany, nir_op_imov, nir_op_ior);<br>
+      LOWER_REDUCTION(nir_op_fall, nir_op_fmov, nir_op_fand);<br>
+      LOWER_REDUCTION(nir_op_fany, nir_op_fmov, nir_op_for);<br>
+<br>
+   default:<br>
+      break;<br>
+   }<br>
+<br>
</div></div>+   if (instr->dest.dest.ssa.num_components == 1)<br>
+      return;<br>
+<br>
+   unsigned num_components = instr->dest.dest.ssa.num_components;<br>
<span>+   static const nir_op nir_op_map[] = {nir_op_vec2, nir_op_vec3, nir_op_vec4};<br>
</span>+   nir_alu_instr *vec_instr =<br>
<span>+      nir_alu_instr_create(mem_ctx, nir_op_map[num_components - 2]);<br>
+   nir_alu_ssa_dest_init(vec_instr, num_components);<br>
+<br>
</span>+   for (chan = 0; chan < 4; chan++) {<br>
+      if (!(instr->dest.write_mask & (1 << chan)))<br>
+         continue;<br>
<span>+<br>
+      nir_alu_instr *lower = nir_alu_instr_create(mem_ctx, instr->op);<br>
+      for (i = 0; i < num_src; i++) {<br>
</span>+         unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ?<br>
+                              0 : chan);<br></blockquote><div><br></div></div></div>If we're going to do input_sizes == 1 rather than input_sizes != 0, can we at least put in an assert?  While I can't really immagine a vectorized instruction with an explicit vec2 source, it is possible in the IR.  If we ever add one, this is going to be a really annoying bug to find.<br><br></div><div class="gmail_quote">Other than that,<br>Reviewed-by: Jason Ekstrand <<a href="mailto:jason.ekstrand@iastate.edu" target="_blank">jason.ekstrand@iastate.edu</a>><br></div><div><div class="h5"><div class="gmail_quote"><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+         nir_alu_src_copy(&lower->src[i], &instr->src[i], mem_ctx);<br>
<span>+         for (int j = 0; j < 4; j++)<br>
</span>+            lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];<br>
+      }<br>
+<br>
+      nir_alu_ssa_dest_init(lower, 1);<br>
<span>+      lower->dest.saturate = instr->dest.saturate;<br>
</span>+      vec_instr->src[chan].src = nir_src_for_ssa(&lower->dest.dest.ssa);<br>
<span>+<br>
+      nir_instr_insert_before(&instr->instr, &lower->instr);<br>
+   }<br>
+<br>
</span>+   nir_instr_insert_before(&instr->instr, &vec_instr->instr);<br>
+<br>
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,<br>
+                            nir_src_for_ssa(&vec_instr->dest.dest.ssa),<br>
<span>+                            mem_ctx);<br>
+<br>
+   nir_instr_remove(&instr->instr);<br>
+}<br>
</span><span>+<br>
+static bool<br>
+lower_ops_scalar_block(nir_block *block, void *data)<br>
+{<br>
</span>+   nir_foreach_instr_safe(block, instr) {<br>
<span>+      if (instr->type == nir_instr_type_alu)<br>
</span>+         lower_alu_instr_scalar((nir_alu_instr *)instr, data);<br>
<span>+   }<br>
+<br>
+   return true;<br>
+}<br>
+<br>
+static void<br>
+nir_lower_ops_scalar_impl(nir_function_impl *impl)<br>
+{<br>
</span>+   nir_foreach_block(impl, lower_ops_scalar_block, ralloc_parent(impl));<br>
<div><div>+}<br>
+<br>
+void<br>
+nir_lower_ops_scalar(nir_shader *shader)<br>
+{<br>
+   nir_foreach_overload(shader, overload) {<br>
+      if (overload->impl)<br>
+         nir_lower_ops_scalar_impl(overload->impl);<br>
+   }<br>
+}<br>
--<br>
2.1.3<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</div></div></blockquote></div><br></div></div></div></div>
</blockquote></div><br></div>