Mesa (master): i965/vs: Add support for simple algebraic optimizations.

Eric Anholt anholt at kemper.freedesktop.org
Tue Sep 20 19:07:55 UTC 2011


Module: Mesa
Branch: master
Commit: f0c04e6c22babf2aee2ad1ee85dbd6f996be3712
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f0c04e6c22babf2aee2ad1ee85dbd6f996be3712

Author: Eric Anholt <eric at anholt.net>
Date:   Fri Sep  2 15:18:29 2011 -0700

i965/vs: Add support for simple algebraic optimizations.

We generate silly code for array access, and it's easier to generally
support the cleanup than to specifically avoid the bad code in each
place we might generate it.

Removes 4.6% of instructions from 41.6% of shaders in shader-db,
particularly savage2/hon and unigine.

v2: Fixes by Ken: Make is_zero/one member functions, and fix a
    progress flag.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

---

 src/mesa/drivers/dri/i965/brw_vec4.cpp      |   92 +++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4.h        |    3 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |    1 +
 3 files changed, 96 insertions(+), 0 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 436de2f..1f2cebe 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -306,6 +306,98 @@ vec4_visitor::pack_uniform_registers()
    }
 }
 
+bool
+src_reg::is_zero() const
+{
+   if (file != IMM)
+      return false;
+
+   if (type == BRW_REGISTER_TYPE_F) {
+      return imm.f == 0.0;
+   } else {
+      return imm.i == 0;
+   }
+}
+
+bool
+src_reg::is_one() const
+{
+   if (file != IMM)
+      return false;
+
+   if (type == BRW_REGISTER_TYPE_F) {
+      return imm.f == 1.0;
+   } else {
+      return imm.i == 1;
+   }
+}
+
+/**
+ * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a).
+ *
+ * While GLSL IR also performs this optimization, we end up with it in
+ * our instruction stream for a couple of reasons.  One is that we
+ * sometimes generate silly instructions, for example in array access
+ * where we'll generate "ADD offset, index, base" even if base is 0.
+ * The other is that GLSL IR's constant propagation doesn't track the
+ * components of aggregates, so some VS patterns (initialize matrix to
+ * 0, accumulate in vertex blending factors) end up breaking down to
+ * instructions involving 0.
+ */
+bool
+vec4_visitor::opt_algebraic()
+{
+   bool progress = false;
+
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_ADD:
+	 if (inst->src[1].is_zero()) {
+	    inst->opcode = BRW_OPCODE_MOV;
+	    inst->src[1] = src_reg();
+	    progress = true;
+	 }
+	 break;
+
+      case BRW_OPCODE_MUL:
+	 if (inst->src[1].is_zero()) {
+	    inst->opcode = BRW_OPCODE_MOV;
+	    switch (inst->src[0].type) {
+	    case BRW_REGISTER_TYPE_F:
+	       inst->src[0] = src_reg(0.0f);
+	       break;
+	    case BRW_REGISTER_TYPE_D:
+	       inst->src[0] = src_reg(0);
+	       break;
+	    case BRW_REGISTER_TYPE_UD:
+	       inst->src[0] = src_reg(0u);
+	       break;
+	    default:
+	       assert(!"not reached");
+	       inst->src[0] = src_reg(0.0f);
+	       break;
+	    }
+	    inst->src[1] = src_reg();
+	    progress = true;
+	 } else if (inst->src[1].is_one()) {
+	    inst->opcode = BRW_OPCODE_MOV;
+	    inst->src[1] = src_reg();
+	    progress = true;
+	 }
+	 break;
+      default:
+	 break;
+      }
+   }
+
+   if (progress)
+      this->live_intervals_valid = false;
+
+   return progress;
+}
+
 /**
  * Only a limited number of hardware registers may be used for push
  * constants, so this turns access to the overflowed constants into
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 7739a15..058615f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -163,6 +163,8 @@ public:
    }
 
    bool equals(src_reg *r);
+   bool is_zero() const;
+   bool is_one() const;
 
    src_reg(class vec4_visitor *v, const struct glsl_type *type);
 
@@ -401,6 +403,7 @@ public:
    bool dead_code_eliminate();
    bool virtual_grf_interferes(int a, int b);
    bool opt_copy_propagation();
+   bool opt_algebraic();
 
    vec4_instruction *emit(vec4_instruction *inst);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index c40c41f..7031d2a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -615,6 +615,7 @@ vec4_visitor::run()
       progress = false;
       progress = dead_code_eliminate() || progress;
       progress = opt_copy_propagation() || progress;
+      progress = opt_algebraic() || progress;
    } while (progress);
 
 




More information about the mesa-commit mailing list