[Mesa-dev] [PATCH 01/10] i965/vs: Add support for simple algebraic optimizations.

Thu Sep 8 23:32:18 PDT 2011

We generate silly code for array access, and it's easier to generally
support the cleanup than to specifically avoid the bad code in each
place we might generate it.

Removes 4.6% of instructions from 41.6% of shaders in shader-db,
particularly savage2/hon and unigine.
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp      |   91 +++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4.h        |    1 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |    1 +
 3 files changed, 93 insertions(+), 0 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 436de2f..5fd4756 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -306,6 +306,97 @@ vec4_visitor::pack_uniform_registers()
    }
 }
 
+static bool
+src_reg_is_zero(src_reg *reg)
+{
+   if (reg->file != IMM)
+      return false;
+
+   if (reg->type == BRW_REGISTER_TYPE_F) {
+      return reg->imm.f == 0.0;
+   } else {
+      return reg->imm.i == 0;
+   }
+}
+
+static bool
+src_reg_is_one(src_reg *reg)
+{
+   if (reg->file != IMM)
+      return false;
+
+   if (reg->type == BRW_REGISTER_TYPE_F) {
+      return reg->imm.f == 1.0;
+   } else {
+      return reg->imm.i == 1;
+   }
+}
+
+/**
+ * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a).
+ *
+ * While GLSL IR also performs this optimization, we end up with it in
+ * our instruction stream for a couple of reasons.  One is that we
+ * sometimes generate silly instructions, for example in array access
+ * where we'll generate "ADD offset, index, base" even if base is 0.
+ * The other is that GLSL IR's constant propagation doesn't track the
+ * components of aggregates, so some VS patterns (initialize matrix to
+ * 0, accumulate in vertex blending factors) end up breaking down to
+ * instructions involving 0.
+ */
+bool
+vec4_visitor::opt_algebraic()
+{
+   bool progress = false;
+
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_ADD:
+	 if (src_reg_is_zero(&inst->src[1])) {
+	    inst->opcode = BRW_OPCODE_MOV;
+	    inst->src[1] = src_reg();
+	    progress = true;
+	 }
+	 break;
+
+      case BRW_OPCODE_MUL:
+	 if (src_reg_is_zero(&inst->src[1])) {
+	    inst->opcode = BRW_OPCODE_MOV;
+	    switch (inst->src[0].type) {
+	    case BRW_REGISTER_TYPE_F:
+	       inst->src[0] = src_reg(0.0f);
+	       break;
+	    case BRW_REGISTER_TYPE_D:
+	       inst->src[0] = src_reg(0);
+	       break;
+	    case BRW_REGISTER_TYPE_UD:
+	       inst->src[0] = src_reg(0u);
+	       break;
+	    default:
+	       assert(!"not reached");
+	       inst->src[0] = src_reg(0.0f);
+	       break;
+	    }
+	    inst->src[1] = src_reg();
+	    progress = true;
+	 } else if (src_reg_is_one(&inst->src[1])) {
+	    inst->opcode = BRW_OPCODE_MOV;
+	    inst->src[1] = src_reg();
+	 }
+	 break;
+      default:
+	 break;
+      }
+   }
+
+   if (progress)
+      this->live_intervals_valid = false;
+
+   return progress;
+}
+
 /**
  * Only a limited number of hardware registers may be used for push
  * constants, so this turns access to the overflowed constants into
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 7739a15..3f116ee 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -401,6 +401,7 @@ public:
    bool dead_code_eliminate();
    bool virtual_grf_interferes(int a, int b);
    bool opt_copy_propagation();
+   bool opt_algebraic();
 
    vec4_instruction *emit(vec4_instruction *inst);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index c40c41f..7031d2a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -615,6 +615,7 @@ vec4_visitor::run()
       progress = false;
       progress = dead_code_eliminate() || progress;
       progress = opt_copy_propagation() || progress;
+      progress = opt_algebraic() || progress;
    } while (progress);
 
 
-- 
1.7.5.4