[Mesa-dev] [PATCH 4/5] i965/vs: Allow CSE to handle MULs with negated arguments.

Ian Romanick idr at freedesktop.org
Wed Apr 8 16:38:30 PDT 2015


From: Ian Romanick <ian.d.romanick at intel.com>

This is similar to commit (47c4b38: i965/fs: Allow CSE to handle MULs
with negated arguments.), but it uses a slightly different approach.

Shader-db results:

GM45:
total instructions in shared programs: 4060813 -> 4060151 (-0.02%)
instructions in affected programs:     13448 -> 12786 (-4.92%)
helped:                                62
HURT:                                  9

All other results, except Broadwell, were identical to GM45 w/o NIR.
Since NIR isn't used for VEC4, this makes sense.

Broadwell:
total instructions in shared programs: 7284561 -> 7284540 (-0.00%)
instructions in affected programs:     1272 -> 1251 (-1.65%)
helped:                                12

Broadwell NIR:
total instructions in shared programs: 7500487 -> 7500487 (0.00%)
instructions in affected programs:     0 -> 0

Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
---
 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 32 +++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 100e511..49b50a7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -90,15 +90,34 @@ is_expression(const vec4_instruction *const inst)
 }
 
 static bool
-operands_match(const vec4_instruction *a, const vec4_instruction *b)
+operands_match(const vec4_instruction *a, const vec4_instruction *b,
+               bool *negate)
 {
    const src_reg *xs = a->src;
    const src_reg *ys = b->src;
 
+   *negate = false;
+
    if (a->opcode == BRW_OPCODE_MAD) {
       return xs[0].equals(ys[0]) &&
              ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) ||
               (xs[2].equals(ys[1]) && xs[1].equals(ys[2])));
+   } else if (a->opcode == BRW_OPCODE_MUL) {
+      if ((xs[0].equals(ys[0]) && xs[1].equals(ys[1])) ||
+          (xs[1].equals(ys[0]) && xs[0].equals(ys[1])) ||
+          (xs[0].negative_equals(ys[0]) && xs[1].negative_equals(ys[1])) ||
+          (xs[1].negative_equals(ys[0]) && xs[0].negative_equals(ys[1])))
+         return true;
+
+      if ((xs[0].equals(ys[0]) && xs[1].negative_equals(ys[1])) ||
+          (xs[1].equals(ys[0]) && xs[0].negative_equals(ys[1])) ||
+          (xs[0].negative_equals(ys[0]) && xs[1].equals(ys[1])) ||
+          (xs[1].negative_equals(ys[0]) && xs[0].equals(ys[1]))) {
+         *negate = true;
+         return true;
+      }
+
+      return false;
    } else if (!a->is_commutative()) {
       return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);
    } else {
@@ -108,7 +127,7 @@ operands_match(const vec4_instruction *a, const vec4_instruction *b)
 }
 
 static bool
-instructions_match(vec4_instruction *a, vec4_instruction *b)
+instructions_match(vec4_instruction *a, vec4_instruction *b, bool *negate)
 {
    return a->opcode == b->opcode &&
           a->saturate == b->saturate &&
@@ -117,7 +136,7 @@ instructions_match(vec4_instruction *a, vec4_instruction *b)
           a->dst.writemask == b->dst.writemask &&
           a->force_writemask_all == b->force_writemask_all &&
           a->regs_written == b->regs_written &&
-          operands_match(a, b);
+          operands_match(a, b, negate);
 }
 
 bool
@@ -135,11 +154,12 @@ vec4_visitor::opt_cse_local(bblock_t *block)
           (inst->dst.file != HW_REG || inst->dst.is_null()))
       {
          bool found = false;
+         bool negate;
 
          foreach_in_list_use_after(aeb_entry, entry, &aeb) {
             /* Match current instruction's expression against those in AEB. */
             if (!(entry->generator->dst.is_null() && !inst->dst.is_null()) &&
-                instructions_match(inst, entry->generator)) {
+                instructions_match(inst, entry->generator, &negate)) {
                found = true;
                progress = true;
                break;
@@ -186,6 +206,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
                   vec4_instruction *copy = MOV(offset(inst->dst, i),
                                                offset(entry->tmp, i));
                   copy->force_writemask_all = inst->force_writemask_all;
+                  copy->src[0].negate = negate;
                   inst->insert_before(block, copy);
                }
             }
@@ -206,9 +227,10 @@ vec4_visitor::opt_cse_local(bblock_t *block)
           * the flag register if we just wrote it.
           */
          if (inst->writes_flag()) {
+            bool negate; /* dummy */
             if (entry->generator->reads_flag() ||
                 (entry->generator->writes_flag() &&
-                 !instructions_match(inst, entry->generator))) {
+                 !instructions_match(inst, entry->generator, &negate))) {
                entry->remove();
                ralloc_free(entry);
                continue;
-- 
2.1.0



More information about the mesa-dev mailing list