[Mesa-dev] [PATCH] i965/vs: Add constant propagation to a few opcodes.

Eric Anholt eric at anholt.net
Tue Aug 30 15:05:09 PDT 2011


This differs from the FS in that we track constants in each
destination channel, and we we have to look at all the swizzled source
channels.  Also, the instruction stream walk is done in an O(n) manner
instead of O(n^2).

Across shader-db, this reduces 5.3% of the instructions from 51.4% of
the vertex shaders compared to the old backend, leaving us now behind
by 14.7% overall.
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp         |  157 ++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4.h           |    3 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    |    1 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   15 +++
 4 files changed, 176 insertions(+), 0 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 9d64a40..1d9f2e1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -277,4 +277,161 @@ vec4_visitor::pack_uniform_registers()
    }
 }
 
+bool
+vec4_visitor::opt_constant_propagation()
+{
+   bool progress = false;
+   src_reg *cur_value[virtual_grf_count][4];
+
+   memset(&cur_value, 0, sizeof(cur_value));
+
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      /* Check if we're setting up some new constant value. */
+      if (inst->opcode == BRW_OPCODE_MOV &&
+	  !inst->predicate &&
+	  inst->dst.file == GRF &&
+	  inst->dst.reg_offset == 0 &&
+	  !inst->dst.reladdr &&
+	  inst->dst.type == inst->src[0].type &&
+	  inst->src[0].file == IMM) {
+	 for (int i = 0; i < 4; i++) {
+	    if (inst->dst.writemask & (1 << i)) {
+	       cur_value[inst->dst.reg][i] = &inst->src[0];
+	    }
+	 }
+	 continue;
+      }
+
+      /* This pass only works on basic blocks.  If there's flow
+       * control, throw out all our information and start from scratch.
+       */
+      if (inst->opcode == BRW_OPCODE_DO ||
+	  inst->opcode == BRW_OPCODE_WHILE ||
+	  inst->opcode == BRW_OPCODE_ELSE ||
+	  inst->opcode == BRW_OPCODE_ENDIF) {
+	 memset(cur_value, 0, sizeof(cur_value));
+	 continue;
+      }
+
+      /* Check source args for usage of constant values, and try
+       * to replace them with an immediate.
+       */
+      for (int i = 2; i >= 0; i--) {
+	 /* Check if it could possibly be a constant value in a GRF. */
+	 if (inst->src[i].file != GRF ||
+	     inst->src[i].reg_offset ||
+	     inst->src[i].reladdr)
+	    continue;
+
+	 /* Don't bother with cases where we should have had the
+	  * operation on the constant folded in GLSL already.
+	  */
+	 if (inst->src[i].negate || inst->src[i].abs)
+	    continue;
+
+	 /* Check if all source swizzles for this operand have the
+	  * same constant value.
+	  */
+	 src_reg *value = NULL;
+	 for (int c = 0; c < 4; c++) {
+	    /* We could be more aggressive here -- some channels might
+	     * not get used based on the destination writemask.
+	     */
+	    int src_chan = BRW_GET_SWZ(inst->src[i].swizzle, c);
+	    src_reg *chan_value = cur_value[inst->src[i].reg][src_chan];
+
+	    if (!chan_value || (value && !value->equals(chan_value))) {
+	       value = NULL;
+	       break;
+	    }
+
+	    value = chan_value;
+	 }
+
+	 if (!value)
+	    continue;
+
+	 switch (inst->opcode) {
+	 case BRW_OPCODE_MOV:
+	    inst->src[i] = *value;
+	    progress = true;
+	    break;
+
+	 case BRW_OPCODE_MUL:
+	 case BRW_OPCODE_ADD:
+	    if (i == 1) {
+	       inst->src[i] = *value;
+	       progress = true;
+	    } else if (i == 0 && inst->src[1].file != IMM) {
+	       /* Fit this constant in by commuting the operands */
+	       inst->src[0] = inst->src[1];
+	       inst->src[1] = *value;
+	       progress = true;
+	    }
+	    break;
+
+	 case BRW_OPCODE_CMP:
+	    if (i == 1) {
+	       inst->src[i] = *value;
+	       progress = true;
+	    } else if (i == 0 && inst->src[1].file != IMM) {
+	       uint32_t new_cmod;
+
+	       new_cmod = brw_swap_cmod(inst->conditional_mod);
+	       if (new_cmod != ~0u) {
+		  /* Fit this constant in by swapping the operands and
+		   * flipping the test
+		   */
+		  inst->src[0] = inst->src[1];
+		  inst->src[1] = *value;
+		  inst->conditional_mod = new_cmod;
+		  progress = true;
+	       }
+	    }
+	    break;
+
+	 case BRW_OPCODE_SEL:
+	    if (i == 1) {
+	       inst->src[i] = *value;
+	       progress = true;
+	    } else if (i == 0 && inst->src[1].file != IMM) {
+	       inst->src[0] = inst->src[1];
+	       inst->src[1] = *value;
+
+	       /* If this was predicated, flipping operands means
+		* we also need to flip the predicate.
+		*/
+	       if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
+		  inst->predicate_inverse = !inst->predicate_inverse;
+	       }
+	       progress = true;
+	    }
+	    break;
+
+	 default:
+	    break;
+	 }
+      }
+
+      /* For any updated channels, clear our constant value flag. */
+      if (inst->dst.file == GRF) {
+	 if (inst->dst.reladdr)
+	    memset(cur_value, 0, sizeof(cur_value));
+	 else {
+	    for (int i = 0; i < 4; i++) {
+	       if (inst->dst.writemask & (1 << i))
+		  cur_value[inst->dst.reg][i] = NULL;
+	    }
+	 }
+      }
+   }
+
+   if (progress)
+      live_intervals_valid = false;
+
+   return progress;
+}
+
 } /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 3279773..e25af25 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -162,6 +162,8 @@ public:
       this->imm.i = i;
    }
 
+   bool equals(src_reg *r);
+
    src_reg(class vec4_visitor *v, const struct glsl_type *type);
 
    explicit src_reg(dst_reg reg);
@@ -393,6 +395,7 @@ public:
    void calculate_live_intervals();
    bool dead_code_eliminate();
    bool virtual_grf_interferes(int a, int b);
+   bool opt_constant_propagation();
 
    vec4_instruction *emit(vec4_instruction *inst);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 067f1c9..886c3e1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -601,6 +601,7 @@ vec4_visitor::run()
    do {
       progress = false;
       progress = dead_code_eliminate() || progress;
+      progress = opt_constant_propagation() || progress;
    } while (progress);
 
    pack_uniform_registers();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 4babc56..d01f070 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -59,6 +59,21 @@ src_reg::src_reg(dst_reg reg)
 				swizzles[2], swizzles[3]);
 }
 
+bool
+src_reg::equals(src_reg *r)
+{
+   return (file == r->file &&
+	   reg == r->reg &&
+	   reg_offset == r->reg_offset &&
+	   type == r->type &&
+	   negate == r->negate &&
+	   abs == r->abs &&
+	   memcmp(&fixed_hw_reg, &r->fixed_hw_reg,
+		  sizeof(fixed_hw_reg)) == 0 &&
+	   smear == r->smear &&
+	   imm.u == r->imm.u);
+}
+
 dst_reg::dst_reg(src_reg reg)
 {
    init();
-- 
1.7.5.4



More information about the mesa-dev mailing list