[Mesa-dev] [PATCH 2/2] nir/lower_vec_to_movs: Coalesce reg writes there are no hazards

Jason Ekstrand jason at jlekstrand.net
Wed Apr 4 05:23:37 UTC 2018


Instead of just giving up on coalescing if the destination of the vecN
is a register, we look to see if there are any hazards that would
prevent us from moving the write earlier.  This allows us to handle a
few more cases.

Shader-db results on Haswell:

    total instructions in shared programs: 13659101 -> 13658993 (<.01%)
    instructions in affected programs: 29438 -> 29330 (-0.37%)
    helped: 36
    HURT: 0

Cc: Matt Turner <mattst88 at gmail.com>
---
 src/compiler/nir/nir_lower_vec_to_movs.c | 69 ++++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/src/compiler/nir/nir_lower_vec_to_movs.c b/src/compiler/nir/nir_lower_vec_to_movs.c
index 610a362..c38052a 100644
--- a/src/compiler/nir/nir_lower_vec_to_movs.c
+++ b/src/compiler/nir/nir_lower_vec_to_movs.c
@@ -112,13 +112,25 @@ has_replicated_dest(nir_alu_instr *alu)
           alu->op == nir_op_fdph_replicated;
 }
 
+static bool
+src_does_not_read_reg(nir_src *src, void *void_reg)
+{
+   return src->is_ssa || src->reg.reg != void_reg;
+}
+
+static bool
+dest_does_not_write_reg(nir_dest *dest, void *void_reg)
+{
+   return dest->is_ssa || dest->reg.reg != void_reg;
+}
+
 /* Attempts to coalesce the "move" from the given source of the vec to the
  * destination of the instruction generating the value. If, for whatever
  * reason, we cannot coalesce the mmove, it does nothing and returns 0.  We
  * can then call insert_mov as normal.
  */
 static unsigned
-try_coalesce(nir_alu_instr *vec, unsigned start_idx)
+try_coalesce(nir_alu_instr *vec, unsigned start_idx, bool vec_had_ssa_dest)
 {
    assert(start_idx < nir_op_infos[vec->op].num_inputs);
 
@@ -183,6 +195,57 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx)
       write_mask |= 1 << i;
    }
 
+   if (!vec_had_ssa_dest) {
+      /* If the vec instruction had a register destination, then we need to be
+       * careful about moving writes to the source instruction.  Otherwise, we
+       * may end up trying to coalesce in a case such as this:
+       *
+       *    ssa_1 = fadd r1, r2
+       *    r3.x = fneg(r2);
+       *    r3 = vec4(ssa_1, ssa_1.y, ...)
+       *
+       * To deal with this, we walk the instructions between the vec and the
+       * ALU op we're going to coalesce it into and ensure that there are no
+       * access of the the destination register of the vec.
+       */
+
+      /* If they're not in the same block, there's not much we can do */
+      if (src_alu->instr.block != vec->instr.block)
+         return 0;
+
+      /* Since we know that src_alu dominates vec, we can just walk from
+       * one to the other.
+       */
+      for (nir_instr *instr = nir_instr_next(&src_alu->instr);
+           instr != &vec->instr; instr = nir_instr_next(instr)) {
+         if (instr->type == nir_instr_type_alu) {
+            nir_alu_instr *alu = nir_instr_as_alu(instr);
+            /* Only count this instructions write as a hazard if it's write
+             * mask overlaps with the write mask we are going to give alu_src
+             * if we can coalesce into it.
+             */
+            if (!alu->dest.dest.is_ssa &&
+                alu->dest.dest.reg.reg == vec->dest.dest.reg.reg &&
+                (alu->dest.write_mask & write_mask))
+               return 0;
+
+            for (unsigned j = 0; j < nir_op_infos[alu->op].num_inputs; j++) {
+               if (!alu->src[j].src.is_ssa &&
+                   alu->src[j].src.reg.reg == vec->dest.dest.reg.reg)
+                  return 0;
+            }
+         } else {
+            if (!nir_foreach_dest(instr, dest_does_not_write_reg,
+                                  vec->dest.dest.reg.reg))
+               return 0;
+
+            if (!nir_foreach_src(instr, src_does_not_read_reg,
+                                 vec->dest.dest.reg.reg))
+               return 0;
+         }
+      }
+   }
+
    /* Stash off all of the ALU instruction's swizzles. */
    uint8_t swizzles[4][4];
    for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
@@ -274,8 +337,8 @@ lower_vec_to_movs_block(nir_block *block, nir_function_impl *impl)
           * instruction in the source.  We can only do this if the original
           * vecN had an SSA destination.
           */
-         if (vec_had_ssa_dest && !(finished_write_mask & (1 << i)))
-            finished_write_mask |= try_coalesce(vec, i);
+         if (!(finished_write_mask & (1 << i)))
+            finished_write_mask |= try_coalesce(vec, i, vec_had_ssa_dest);
 
          if (!(finished_write_mask & (1 << i)))
             finished_write_mask |= insert_mov(vec, i, shader);
-- 
2.5.0.400.gff86faf



More information about the mesa-dev mailing list