[Mesa-dev] [PATCH 1/4] i965/vs: Add constant propagation to a few opcodes.

Eric Anholt eric at anholt.net
Fri Sep 2 11:32:10 PDT 2011


This differs from the FS in that we track constants in each
destination channel, and we we have to look at all the swizzled source
channels.  Also, the instruction stream walk is done in an O(n) manner
instead of O(n^2).

Across shader-db, this reduces 8.0% of the instructions from 60.0% of
the vertex shaders, leaving us now behind the old backend by 11.1%
overall.
---
 src/mesa/drivers/dri/i965/Makefile.sources         |    1 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp             |   15 ++
 src/mesa/drivers/dri/i965/brw_vec4.h               |    3 +
 .../drivers/dri/i965/brw_vec4_copy_propagation.cpp |  260 ++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp        |    1 +
 5 files changed, 280 insertions(+), 0 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index e9bd707..ecac395 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -119,6 +119,7 @@ i965_CXX_SOURCES := \
 	brw_shader.cpp \
 	brw_vec4.cpp \
 	brw_vec4_emit.cpp \
+	brw_vec4_copy_propagation.cpp \
 	brw_vec4_reg_allocate.cpp \
 	brw_vec4_visitor.cpp
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 9d64a40..492e579 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -31,6 +31,21 @@ extern "C" {
 
 namespace brw {
 
+bool
+src_reg::equals(src_reg *r)
+{
+   return (file == r->file &&
+	   reg == r->reg &&
+	   reg_offset == r->reg_offset &&
+	   type == r->type &&
+	   negate == r->negate &&
+	   abs == r->abs &&
+	   swizzle == r->swizzle &&
+	   memcmp(&fixed_hw_reg, &r->fixed_hw_reg,
+		  sizeof(fixed_hw_reg)) == 0 &&
+	   imm.u == r->imm.u);
+}
+
 void
 vec4_visitor::calculate_live_intervals()
 {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index b9a5396..865f7d1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -162,6 +162,8 @@ public:
       this->imm.i = i;
    }
 
+   bool equals(src_reg *r);
+
    src_reg(class vec4_visitor *v, const struct glsl_type *type);
 
    explicit src_reg(dst_reg reg);
@@ -394,6 +396,7 @@ public:
    void calculate_live_intervals();
    bool dead_code_eliminate();
    bool virtual_grf_interferes(int a, int b);
+   bool opt_copy_propagation();
 
    vec4_instruction *emit(vec4_instruction *inst);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
new file mode 100644
index 0000000..1e24e2e
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file brw_vec4_copy_propagation.cpp
+ *
+ * Implements tracking of values copied between registers, and
+ * optimizations based on that: copy propagation and constant
+ * propagation.
+ */
+
+#include "brw_vec4.h"
+extern "C" {
+#include "main/macros.h"
+}
+
+namespace brw {
+
+static bool
+is_direct_copy(vec4_instruction *inst)
+{
+   return (inst->opcode == BRW_OPCODE_MOV &&
+	   !inst->predicate &&
+	   inst->dst.file == GRF &&
+	   !inst->saturate &&
+	   !inst->dst.reladdr &&
+	   !inst->src[0].reladdr &&
+	   inst->dst.type == inst->src[0].type);
+}
+
+static bool
+is_dominated_by_previous_instruction(vec4_instruction *inst)
+{
+   return (inst->opcode != BRW_OPCODE_DO &&
+	   inst->opcode != BRW_OPCODE_WHILE &&
+	   inst->opcode != BRW_OPCODE_ELSE &&
+	   inst->opcode != BRW_OPCODE_ENDIF);
+}
+
+static bool
+try_constant_propagation(vec4_instruction *inst, int arg, src_reg *values[4])
+{
+   /* For constant propagation, we only handle the same constant
+    * across all 4 channels.  Some day, we should handle the 8-bit
+    * float vector format, which would let us constant propagate
+    * vectors better.
+    */
+   src_reg value = *values[0];
+   for (int i = 1; i < 4; i++) {
+      if (!value.equals(values[i]))
+	 return false;
+   }
+
+   if (value.file != IMM)
+      return false;
+
+   if (inst->src[arg].abs) {
+      if (value.type == BRW_REGISTER_TYPE_F) {
+	 value.imm.f = fabs(value.imm.f);
+      } else if (value.type == BRW_REGISTER_TYPE_D) {
+	 if (value.imm.i < 0)
+	    value.imm.i = -value.imm.i;
+      }
+   }
+
+   if (inst->src[arg].negate) {
+      if (value.type == BRW_REGISTER_TYPE_F)
+	 value.imm.f = -value.imm.f;
+      else
+	 value.imm.u = -value.imm.u;
+   }
+
+   switch (inst->opcode) {
+   case BRW_OPCODE_MOV:
+      inst->src[arg] = value;
+      return true;
+
+   case BRW_OPCODE_MUL:
+   case BRW_OPCODE_ADD:
+      if (arg == 1) {
+	 inst->src[arg] = value;
+	 return true;
+      } else if (arg == 0 && inst->src[1].file != IMM) {
+	 /* Fit this constant in by commuting the operands */
+	 inst->src[0] = inst->src[1];
+	 inst->src[1] = value;
+	 return true;
+      }
+      break;
+
+   case BRW_OPCODE_CMP:
+      if (arg == 1) {
+	 inst->src[arg] = value;
+	 return true;
+      } else if (arg == 0 && inst->src[1].file != IMM) {
+	 uint32_t new_cmod;
+
+	 new_cmod = brw_swap_cmod(inst->conditional_mod);
+	 if (new_cmod != ~0u) {
+	    /* Fit this constant in by swapping the operands and
+	     * flipping the test.
+	     */
+	    inst->src[0] = inst->src[1];
+	    inst->src[1] = value;
+	    inst->conditional_mod = new_cmod;
+	    return true;
+	 }
+      }
+      break;
+
+   case BRW_OPCODE_SEL:
+      if (arg == 1) {
+	 inst->src[arg] = value;
+	 return true;
+      } else if (arg == 0 && inst->src[1].file != IMM) {
+	 inst->src[0] = inst->src[1];
+	 inst->src[1] = value;
+
+	 /* If this was predicated, flipping operands means
+	  * we also need to flip the predicate.
+	  */
+	 if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
+	    inst->predicate_inverse = !inst->predicate_inverse;
+	 }
+	 return true;
+      }
+      break;
+
+   default:
+      break;
+   }
+
+   return false;
+}
+
+bool
+vec4_visitor::opt_copy_propagation()
+{
+   bool progress = false;
+   src_reg *cur_value[virtual_grf_reg_count][4];
+
+   memset(&cur_value, 0, sizeof(cur_value));
+
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      /* This pass only works on basic blocks.  If there's flow
+       * control, throw out all our information and start from
+       * scratch.
+       *
+       * This should really be fixed by using a structure like in
+       * src/glsl/opt_copy_propagation.cpp to track available copies.
+       */
+      if (!is_dominated_by_previous_instruction(inst)) {
+	 memset(cur_value, 0, sizeof(cur_value));
+	 continue;
+      }
+
+      /* For each source arg, see if each component comes from a copy
+       * from the same type file (IMM, GRF, UNIFORM), and try
+       * optimizing out access to the copy result
+       */
+      for (int i = 2; i >= 0; i--) {
+	 int reg = (virtual_grf_reg_map[inst->src[i].reg] +
+		    inst->src[i].reg_offset);
+
+	 /* Copied values end up in GRFs, and we don't track reladdr
+	  * accesses.
+	  */
+	 if (inst->src[i].file != GRF ||
+	     inst->src[i].reladdr)
+	    continue;
+
+	 /* Find the regs that each swizzle component came from.
+	  */
+	 src_reg *values[4];
+	 int c;
+	 for (c = 0; c < 4; c++) {
+	    values[c] = cur_value[reg][BRW_GET_SWZ(inst->src[i].swizzle, c)];
+
+	    /* If there's no available copy for this channel, bail.
+	     * We could be more aggressive here -- some channels might
+	     * not get used based on the destination writemask.
+	     */
+	    if (!values[c])
+	       break;
+
+	    /* We'll only be able to copy propagate if the sources are
+	     * all from the same file -- there's no ability to swizzle
+	     * 0 or 1 constants in with source registers like in i915.
+	     */
+	    if (c > 0 && values[c - 1]->file != values[c]->file)
+	       break;
+	 }
+
+	 if (c != 4)
+	    continue;
+
+	 if (try_constant_propagation(inst, i, values))
+	    progress = true;
+      }
+
+      /* Track available source registers. */
+      if (is_direct_copy(inst)) {
+	 int reg = virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset;
+	 for (int i = 0; i < 4; i++) {
+	    if (inst->dst.writemask & (1 << i)) {
+	       cur_value[reg][i] = &inst->src[0];
+	    }
+	 }
+	 continue;
+      }
+
+      /* For any updated channels, clear tracking of them as a source
+       * or destination.
+       *
+       * FINISHME: Sources aren't handled, which will need to be done
+       * for copy propagation.
+       */
+      if (inst->dst.file == GRF) {
+	 if (inst->dst.reladdr)
+	    memset(cur_value, 0, sizeof(cur_value));
+	 else {
+	    int reg = virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset;
+
+	    for (int i = 0; i < 4; i++) {
+	       if (inst->dst.writemask & (1 << i))
+		  cur_value[reg][i] = NULL;
+	    }
+	 }
+      }
+   }
+
+   if (progress)
+      live_intervals_valid = false;
+
+   return progress;
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 58eeda2..d93a9e0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -617,6 +617,7 @@ vec4_visitor::run()
    do {
       progress = false;
       progress = dead_code_eliminate() || progress;
+      progress = opt_copy_propagation() || progress;
    } while (progress);
 
    pack_uniform_registers();
-- 
1.7.5.4



More information about the mesa-dev mailing list