[Mesa-dev] [PATCH v2 3/3] i965/fs_nir: Get rid of get_alu_src

Fri Jan 23 22:00:13 PST 2015

Originally, get_alu_src was supposed to handle resolving swizzles and
things like that.  However, now that basically every instruction we have
only takes scalar sources, we don't really need it anymore.  The only case
where it's still marginally useful is for the mov and vecN operations that
are left over from SSA form.  We can handle those cases as a special case
easily enough.  As a side-effect, we don't need the vec_to_movs pass
anymore.
---
 src/mesa/drivers/dri/i965/brw_fs.h       |   1 -
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 153 +++++++++++++++++++------------
 2 files changed, 95 insertions(+), 59 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 25197cd..b95e2c0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -589,7 +589,6 @@ public:
    void nir_emit_texture(nir_tex_instr *instr);
    void nir_emit_jump(nir_jump_instr *instr);
    fs_reg get_nir_src(nir_src src);
-   fs_reg get_nir_alu_src(nir_alu_instr *instr, unsigned src);
    fs_reg get_nir_dest(nir_dest dest);
    void emit_percomp(fs_inst *inst, unsigned wr_mask);
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index a722660..d3916d6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -138,8 +138,6 @@ fs_visitor::emit_nir_code()
 
    nir_convert_from_ssa(nir);
    nir_validate_shader(nir);
-   nir_lower_vec_to_movs(nir);
-   nir_validate_shader(nir);
 
    /* emit the arrays used for inputs and outputs - load/store intrinsics will
     * be converted to reads/writes of these arrays
@@ -417,6 +415,7 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
 void
 fs_visitor::nir_emit_cf_list(exec_list *list)
 {
+   exec_list_validate(list);
    foreach_list_typed(nir_cf_node, node, node, list) {
       switch (node->type) {
       case nir_cf_node_if:
@@ -538,34 +537,117 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
 {
    struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
 
-   fs_reg op[3];
    fs_reg result = get_nir_dest(instr->dest.dest);
    result.type = brw_type_for_nir_type(nir_op_infos[instr->op].output_type);
 
-   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
-      op[i] = get_nir_alu_src(instr, i);
+   fs_reg op[4];
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      op[i] = get_nir_src(instr->src[i].src);
+      op[i].type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]);
+      op[i].abs = instr->src[i].abs;
+      op[i].negate = instr->src[i].negate;
+   }
+
+   /* We get a bunch of mov's out of the from_ssa pass and they may still
+    * be vectorized.  We'll handle them as a special-case.  We'll also
+    * handle vecN here because it's basically the same thing.
+    */
+   bool need_extra_copy = false;
+   switch (instr->op) {
+   case nir_op_vec4:
+      if (!instr->src[3].src.is_ssa &&
+          instr->dest.dest.reg.reg == instr->src[3].src.reg.reg)
+         need_extra_copy = true;
+      /* fall through */
+   case nir_op_vec3:
+      if (!instr->src[2].src.is_ssa &&
+          instr->dest.dest.reg.reg == instr->src[2].src.reg.reg)
+         need_extra_copy = true;
+      /* fall through */
+   case nir_op_vec2:
+      if (!instr->src[1].src.is_ssa &&
+          instr->dest.dest.reg.reg == instr->src[1].src.reg.reg)
+         need_extra_copy = true;
+      /* fall through */
+   case nir_op_imov:
+   case nir_op_fmov: {
+      if (!instr->src[0].src.is_ssa &&
+          instr->dest.dest.reg.reg == instr->src[0].src.reg.reg)
+         need_extra_copy = true;
+
+      fs_reg temp;
+      if (need_extra_copy) {
+         temp = retype(vgrf(4), result.type);
+      } else {
+         temp = result;
+      }
+
+      if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
+         for (unsigned i = 0; i < 4; i++) {
+            if (!(instr->dest.write_mask & (1 << i)))
+               continue;
+
+            emit(MOV(offset(temp, i),
+                 offset(op[0], instr->src[0].swizzle[i])))
+                ->saturate = instr->dest.saturate;
+         }
+      } else {
+         for (unsigned i = 0; i < 4; i++) {
+            if (!(instr->dest.write_mask & (1 << i)))
+               continue;
+
+            emit(MOV(offset(temp, i),
+                 offset(op[i], instr->src[i].swizzle[0])))
+                ->saturate = instr->dest.saturate;
+         }
+      }
+
+      /* In this case the source and destination registers were the same,
+       * so we need to insert an extra set of moves in order to deal with
+       * any swizzling.
+       */
+      if (need_extra_copy) {
+         for (unsigned i = 0; i < 4; i++) {
+            if (!(instr->dest.write_mask & (1 << i)))
+               continue;
+
+            emit(MOV(offset(result, i), offset(temp, i)));
+         }
+      }
+      return;
+   }
+   default:
+      break;
+   }
 
+   /*
+    * At this point, we have dealt with any instruction that operates on
+    * more than a single channel.  Therefore, we can just adjust the source
+    * and destination registers for that channel and emit the instruction.
+    */
+   unsigned channel = 0;
    if (nir_op_infos[instr->op].output_size == 0) {
-      /* We've already scalarized, so we know that we only have one
-       * channel.  The only question is which channel.
+      /* Since NIR is doing the scalarizing for us, we should only ever see
+       * vectorized operations with a single channel.
        */
       assert(_mesa_bitcount(instr->dest.write_mask) == 1);
-      unsigned off = ffs(instr->dest.write_mask) - 1;
-      result = offset(result, off);
+      channel = ffs(instr->dest.write_mask) - 1;
+
+      result = offset(result, channel);
+   }
 
-      for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
-         op[i] = offset(op[i], off);
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      assert(nir_op_infos[instr->op].input_sizes[i] < 2);
+      op[i] = offset(op[i], instr->src[i].swizzle[channel]);
    }
 
    switch (instr->op) {
-   case nir_op_fmov:
    case nir_op_i2f:
    case nir_op_u2f:
       emit(MOV(result, op[0]))
           ->saturate = instr->dest.saturate;
       break;
 
-   case nir_op_imov:
    case nir_op_f2i:
    case nir_op_f2u:
       emit(MOV(result, op[0]));
@@ -816,11 +898,6 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
    case nir_op_fnoise4_4:
       unreachable("not reached: should be handled by lower_noise");
 
-   case nir_op_vec2:
-   case nir_op_vec3:
-   case nir_op_vec4:
-      unreachable("not reached: should be handled by lower_quadop_vector");
-
    case nir_op_ldexp:
       unreachable("not reached: should be handled by ldexp_to_arith()");
 
@@ -1043,46 +1120,6 @@ fs_visitor::get_nir_src(nir_src src)
 }
 
 fs_reg
-fs_visitor::get_nir_alu_src(nir_alu_instr *instr, unsigned src)
-{
-   fs_reg reg = get_nir_src(instr->src[src].src);
-
-   reg.type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[src]);
-   reg.abs = instr->src[src].abs;
-   reg.negate = instr->src[src].negate;
-
-   bool needs_swizzle = false;
-   unsigned num_components = 0;
-   for (unsigned i = 0; i < 4; i++) {
-      if (!nir_alu_instr_channel_used(instr, src, i))
-         continue;
-
-      if (instr->src[src].swizzle[i] != i)
-         needs_swizzle = true;
-
-      num_components = i + 1;
-   }
-
-   if (needs_swizzle) {
-      /* resolve the swizzle through MOV's */
-      fs_reg new_reg = vgrf(num_components);
-      new_reg.type = reg.type;
-
-      for (unsigned i = 0; i < 4; i++) {
-         if (!nir_alu_instr_channel_used(instr, src, i))
-            continue;
-
-         emit(MOV(offset(new_reg, i),
-                  offset(reg, instr->src[src].swizzle[i])));
-      }
-
-      return new_reg;
-   }
-
-   return reg;
-}
-
-fs_reg
 fs_visitor::get_nir_dest(nir_dest dest)
 {
    fs_reg reg;
-- 
2.2.1