[Mesa-dev] [PATCH 6/6] i965/vec4: Propagate conditional modifiers from more compares to other compares

Ian Romanick idr at freedesktop.org
Mon Jun 25 17:13:37 UTC 2018


From: Ian Romanick <ian.d.romanick at intel.com>

If there is a CMP.NZ that compares a single component (via a .zzzz
swizzle, for example) with 0, it can propagate its conditional modifier
back to a previous CMP that writes only that component.  The specific
case that I saw was:

    cmp.l.f0(8)     g42<1>.xF       g61<4>.xF       (abs)g18<4>.zF
    ...
    cmp.nz.f0(8)    null<1>D        g42<4>.xD       0D

In this case we can just delete the second CMP.

No changes on Broadwell or Skylake.  Also no changes on GM45 or Iron
Lake.

Sandy Bridge, Ivy Bridge, and Haswell had similar results. (Sandy Bridge shown)
total instructions in shared programs: 10427834 -> 10423577 (-0.04%)
instructions in affected programs: 226882 -> 222625 (-1.88%)
helped: 1305
HURT: 0
helped stats (abs) min: 1 max: 7 x̄: 3.26 x̃: 4
helped stats (rel) min: 0.11% max: 6.67% x̄: 1.94% x̃: 1.86%
95% mean confidence interval for instructions value: -3.37 -3.16
95% mean confidence interval for instructions %-change: -1.99% -1.89%
Instructions are helped.

total cycles in shared programs: 146154725 -> 146097503 (-0.04%)
cycles in affected programs: 2487836 -> 2430614 (-2.30%)
helped: 1098
HURT: 53
helped stats (abs) min: 2 max: 134 x̄: 52.27 x̃: 64
helped stats (rel) min: 0.12% max: 12.70% x̄: 3.46% x̃: 2.23%
HURT stats (abs)   min: 2 max: 16 x̄: 3.13 x̃: 2
HURT stats (rel)   min: 0.18% max: 0.83% x̄: 0.63% x̃: 0.71%
95% mean confidence interval for cycles value: -51.63 -47.80
95% mean confidence interval for cycles %-change: -3.44% -3.11%
Cycles are helped.

Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
---
 src/intel/compiler/brw_vec4_cmod_propagation.cpp | 110 +++++++++++++++++++++--
 1 file changed, 104 insertions(+), 6 deletions(-)

diff --git a/src/intel/compiler/brw_vec4_cmod_propagation.cpp b/src/intel/compiler/brw_vec4_cmod_propagation.cpp
index 5205da4983c..9560cc3b6f7 100644
--- a/src/intel/compiler/brw_vec4_cmod_propagation.cpp
+++ b/src/intel/compiler/brw_vec4_cmod_propagation.cpp
@@ -36,7 +36,7 @@
 namespace brw {
 
 static bool
-opt_cmod_propagation_local(bblock_t *block)
+opt_cmod_propagation_local(bblock_t *block, vec4_visitor *v)
 {
    bool progress = false;
    int ip = block->end_ip + 1;
@@ -132,13 +132,111 @@ opt_cmod_propagation_local(bblock_t *block)
                              scan_inst->dst, scan_inst->size_written)) {
             if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) ||
                 scan_inst->dst.offset != inst->src[0].offset ||
-                (scan_inst->dst.writemask != WRITEMASK_X &&
+                scan_inst->exec_size != inst->exec_size ||
+                scan_inst->group != inst->group) {
+               break;
+            }
+
+            /* If scan_inst is a CMP that produces a single value and inst is
+             * a CMP.NZ that consumes only that value, remove inst.
+             */
+            if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+                (inst->src[0].type == BRW_REGISTER_TYPE_D ||
+                 inst->src[0].type == BRW_REGISTER_TYPE_UD) &&
+                (inst->opcode == BRW_OPCODE_CMP ||
+                 inst->opcode == BRW_OPCODE_MOV) &&
+                scan_inst->opcode == BRW_OPCODE_CMP &&
+                ((inst->src[0].swizzle == BRW_SWIZZLE_XXXX &&
+                  scan_inst->dst.writemask == WRITEMASK_X) ||
+                 (inst->src[0].swizzle == BRW_SWIZZLE_YYYY &&
+                  scan_inst->dst.writemask == WRITEMASK_Y) ||
+                 (inst->src[0].swizzle == BRW_SWIZZLE_ZZZZ &&
+                  scan_inst->dst.writemask == WRITEMASK_Z) ||
+                 (inst->src[0].swizzle == BRW_SWIZZLE_WWWW &&
+                  scan_inst->dst.writemask == WRITEMASK_W))) {
+               if (inst->dst.writemask != scan_inst->dst.writemask) {
+                  src_reg temp(v, glsl_type::vec4_type, 1);
+
+                  /* Given a sequence like:
+                   *
+                   *    cmp.ge.f0(8)  g21<1>.xF      g20<4>.xF      g18<4>.xF
+                   *    ...
+                   *    cmp.nz.f0(8)  null<1>D       g21<4>.xD      0D
+                   *
+                   * Replace it with something like:
+                   *
+                   *    cmp.ge.f0(8)  g22<1>F        g20<4>.xF      g18<4>.xF
+                   *    mov(8)        g21<1>.xF      g22<1>.xxxxF
+                   *
+                   * The added MOV will most likely be removed later.  In the
+                   * worst case, it should be cheaper to schedule.
+                   */
+                  temp.swizzle = inst->src[0].swizzle;
+                  temp.type = scan_inst->src[0].type;
+
+                  vec4_instruction *mov = v->MOV(scan_inst->dst, temp);
+
+                  /* Modify the source swizzles on scan_inst.  If scan_inst
+                   * was
+                   *
+                   *    cmp.ge.f0(8)  g21<1>.zF      g20<4>.wzyxF   g18<4>.yxwzF
+                   *
+                   * replace it with
+                   *
+                   *    cmp.ge.f0(8)  g21<1>.zF      g20<4>.yyyyF   g18<4>.wwwwF
+                   */
+                  unsigned src0_chan;
+                  unsigned src1_chan;
+                  switch (scan_inst->dst.writemask) {
+                  case WRITEMASK_X:
+                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 0);
+                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 0);
+                     break;
+                  case WRITEMASK_Y:
+                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 1);
+                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 1);
+                     break;
+                  case WRITEMASK_Z:
+                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 2);
+                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 2);
+                     break;
+                  case WRITEMASK_W:
+                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 3);
+                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 3);
+                     break;
+                  default:
+                     unreachable("Impossible writemask");
+                  }
+
+                  scan_inst->src[0].swizzle = BRW_SWIZZLE4(src0_chan,
+                                                           src0_chan,
+                                                           src0_chan,
+                                                           src0_chan);
+
+                  /* There's no swizzle on immediate value sources. */
+                  if (scan_inst->src[1].file != IMM) {
+                     scan_inst->src[1].swizzle = BRW_SWIZZLE4(src1_chan,
+                                                              src1_chan,
+                                                              src1_chan,
+                                                              src1_chan);
+                  }
+
+                  scan_inst->dst = dst_reg(temp);
+                  scan_inst->dst.writemask = inst->dst.writemask;
+
+                  scan_inst->insert_after(block, mov);
+               }
+
+               inst->remove(block);
+               progress = true;
+               break;
+            }
+
+            if ((scan_inst->dst.writemask != WRITEMASK_X &&
                  scan_inst->dst.writemask != WRITEMASK_XYZW) ||
                 (scan_inst->dst.writemask == WRITEMASK_XYZW &&
                  inst->src[0].swizzle != BRW_SWIZZLE_XYZW) ||
-                (inst->dst.writemask & ~scan_inst->dst.writemask) != 0 ||
-                scan_inst->exec_size != inst->exec_size ||
-                scan_inst->group != inst->group) {
+                (inst->dst.writemask & ~scan_inst->dst.writemask) != 0) {
                break;
             }
 
@@ -246,7 +344,7 @@ vec4_visitor::opt_cmod_propagation()
    bool progress = false;
 
    foreach_block_reverse(block, cfg) {
-      progress = opt_cmod_propagation_local(block) || progress;
+      progress = opt_cmod_propagation_local(block, this) || progress;
    }
 
    if (progress)
-- 
2.14.4



More information about the mesa-dev mailing list