[Mesa-dev] [PATCH 09/10] i965/fs: Move constant propagation to the same codebase as copy prop.

Eric Anholt eric at anholt.net
Sat Sep 22 14:04:49 PDT 2012


This means that we don't get constant prop across into the first block after a
BRW_OPCODE_IF or a BRW_OPCODE_DO, but we have hope for properly doing it
across control flow at some point.  More importantly, it avoids the O(n^2)
with instruction count runtime for shaders that have many constant moves.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp               |  163 --------------------
 src/mesa/drivers/dri/i965/brw_fs.h                 |    2 +-
 .../drivers/dri/i965/brw_fs_copy_propagation.cpp   |  124 ++++++++++++++-
 3 files changed, 124 insertions(+), 165 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 2701413..0545a74 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1249,168 +1249,6 @@ fs_visitor::setup_pull_constants()
    c->prog_data.nr_pull_params = pull_uniform_count;
 }
 
-/**
- * Attempts to move immediate constants into the immediate
- * constant slot of following instructions.
- *
- * Immediate constants are a bit tricky -- they have to be in the last
- * operand slot, you can't do abs/negate on them,
- */
-
-bool
-fs_visitor::propagate_constants()
-{
-   bool progress = false;
-
-   calculate_live_intervals();
-
-   foreach_list(node, &this->instructions) {
-      fs_inst *inst = (fs_inst *)node;
-
-      if (inst->opcode != BRW_OPCODE_MOV ||
-	  inst->predicated ||
-	  inst->dst.file != GRF || inst->src[0].file != IMM ||
-	  inst->dst.type != inst->src[0].type ||
-	  (c->dispatch_width == 16 &&
-	   (inst->force_uncompressed || inst->force_sechalf)))
-	 continue;
-
-      /* Don't bother with cases where we should have had the
-       * operation on the constant folded in GLSL already.
-       */
-      if (inst->saturate)
-	 continue;
-
-      /* Found a move of a constant to a GRF.  Find anything else using the GRF
-       * before it's written, and replace it with the constant if we can.
-       */
-      for (fs_inst *scan_inst = (fs_inst *)inst->next;
-	   !scan_inst->is_tail_sentinel();
-	   scan_inst = (fs_inst *)scan_inst->next) {
-	 if (scan_inst->opcode == BRW_OPCODE_DO ||
-	     scan_inst->opcode == BRW_OPCODE_WHILE ||
-	     scan_inst->opcode == BRW_OPCODE_ELSE ||
-	     scan_inst->opcode == BRW_OPCODE_ENDIF) {
-	    break;
-	 }
-
-	 for (int i = 2; i >= 0; i--) {
-	    if (scan_inst->src[i].file != GRF ||
-		scan_inst->src[i].reg != inst->dst.reg ||
-		scan_inst->src[i].reg_offset != inst->dst.reg_offset)
-	       continue;
-
-	    /* Don't bother with cases where we should have had the
-	     * operation on the constant folded in GLSL already.
-	     */
-	    if (scan_inst->src[i].negate || scan_inst->src[i].abs)
-	       continue;
-
-	    switch (scan_inst->opcode) {
-	    case BRW_OPCODE_MOV:
-	       scan_inst->src[i] = inst->src[0];
-	       progress = true;
-	       break;
-
-	    case BRW_OPCODE_MUL:
-	    case BRW_OPCODE_ADD:
-	       if (i == 1) {
-		  scan_inst->src[i] = inst->src[0];
-		  progress = true;
-	       } else if (i == 0 && scan_inst->src[1].file != IMM) {
-		  /* Fit this constant in by commuting the operands.
-		   * Exception: we can't do this for 32-bit integer MUL
-		   * because it's asymmetric.
-		   */
-		  if (scan_inst->opcode == BRW_OPCODE_MUL &&
-		      (scan_inst->src[1].type == BRW_REGISTER_TYPE_D ||
-		       scan_inst->src[1].type == BRW_REGISTER_TYPE_UD))
-		     break;
-		  scan_inst->src[0] = scan_inst->src[1];
-		  scan_inst->src[1] = inst->src[0];
-		  progress = true;
-	       }
-	       break;
-
-	    case BRW_OPCODE_CMP:
-	    case BRW_OPCODE_IF:
-	       if (i == 1) {
-		  scan_inst->src[i] = inst->src[0];
-		  progress = true;
-	       } else if (i == 0 && scan_inst->src[1].file != IMM) {
-		  uint32_t new_cmod;
-
-		  new_cmod = brw_swap_cmod(scan_inst->conditional_mod);
-		  if (new_cmod != ~0u) {
-		     /* Fit this constant in by swapping the operands and
-		      * flipping the test
-		      */
-		     scan_inst->src[0] = scan_inst->src[1];
-		     scan_inst->src[1] = inst->src[0];
-		     scan_inst->conditional_mod = new_cmod;
-		     progress = true;
-		  }
-	       }
-	       break;
-
-	    case BRW_OPCODE_SEL:
-	       if (i == 1) {
-		  scan_inst->src[i] = inst->src[0];
-		  progress = true;
-	       } else if (i == 0 && scan_inst->src[1].file != IMM) {
-		  scan_inst->src[0] = scan_inst->src[1];
-		  scan_inst->src[1] = inst->src[0];
-
-		  /* If this was predicated, flipping operands means
-		   * we also need to flip the predicate.
-		   */
-		  if (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) {
-		     scan_inst->predicate_inverse =
-			!scan_inst->predicate_inverse;
-		  }
-		  progress = true;
-	       }
-	       break;
-
-	    case SHADER_OPCODE_RCP:
-	       /* The hardware doesn't do math on immediate values
-		* (because why are you doing that, seriously?), but
-		* the correct answer is to just constant fold it
-		* anyway.
-		*/
-	       assert(i == 0);
-	       if (inst->src[0].imm.f != 0.0f) {
-		  scan_inst->opcode = BRW_OPCODE_MOV;
-		  scan_inst->src[0] = inst->src[0];
-		  scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
-		  progress = true;
-	       }
-	       break;
-
-            case FS_OPCODE_PULL_CONSTANT_LOAD:
-	       scan_inst->src[i] = inst->src[0];
-	       progress = true;
-	       break;
-
-	    default:
-	       break;
-	    }
-	 }
-
-	 if (scan_inst->dst.file == GRF &&
-             scan_inst->overwrites_reg(inst->dst)) {
-	    break;
-	 }
-      }
-   }
-
-   if (progress)
-       this->live_intervals_valid = false;
-
-   return progress;
-}
-
-
 bool
 fs_visitor::opt_algebraic()
 {
@@ -2025,7 +1863,6 @@ fs_visitor::run()
 
 	 progress = remove_duplicate_mrf_writes() || progress;
 
-	 progress = propagate_constants() || progress;
 	 progress = opt_algebraic() || progress;
 	 progress = opt_cse() || progress;
 	 progress = opt_copy_propagate() || progress;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 9fbb8e5..59a0e50 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -240,12 +240,12 @@ public:
    void split_virtual_grfs();
    void setup_pull_constants();
    void calculate_live_intervals();
-   bool propagate_constants();
    bool opt_algebraic();
    bool opt_cse();
    bool opt_cse_local(fs_bblock *block, exec_list *aeb);
    bool opt_copy_propagate();
    bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
+   bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
    bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block,
 				 exec_list *acp);
    bool register_coalesce();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 1870f43..ad34657 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -34,6 +34,9 @@ struct acp_entry : public exec_node {
 bool
 fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
 {
+   if (entry->src.file == IMM)
+      return false;
+
    if (inst->src[arg].file != entry->dst.file ||
        inst->src[arg].reg != entry->dst.reg ||
        inst->src[arg].reg_offset != entry->dst.reg_offset) {
@@ -64,6 +67,121 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
    return true;
 }
 
+
+bool
+fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
+{
+   bool progress = false;
+
+   if (entry->src.file != IMM)
+      return false;
+
+   for (int i = 2; i >= 0; i--) {
+      if (inst->src[i].file != entry->dst.file ||
+          inst->src[i].reg != entry->dst.reg ||
+          inst->src[i].reg_offset != entry->dst.reg_offset)
+         continue;
+
+      /* Don't bother with cases where we should have had the
+       * operation on the constant folded in GLSL already.
+       */
+      if (inst->src[i].negate || inst->src[i].abs)
+         continue;
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MOV:
+         inst->src[i] = entry->src;
+         progress = true;
+         break;
+
+      case BRW_OPCODE_MUL:
+      case BRW_OPCODE_ADD:
+         if (i == 1) {
+            inst->src[i] = entry->src;
+            progress = true;
+         } else if (i == 0 && inst->src[1].file != IMM) {
+            /* Fit this constant in by commuting the operands.
+             * Exception: we can't do this for 32-bit integer MUL
+             * because it's asymmetric.
+             */
+            if (inst->opcode == BRW_OPCODE_MUL &&
+                (inst->src[1].type == BRW_REGISTER_TYPE_D ||
+                 inst->src[1].type == BRW_REGISTER_TYPE_UD))
+               break;
+            inst->src[0] = inst->src[1];
+            inst->src[1] = entry->src;
+            progress = true;
+         }
+         break;
+
+      case BRW_OPCODE_CMP:
+      case BRW_OPCODE_IF:
+         if (i == 1) {
+            inst->src[i] = entry->src;
+            progress = true;
+         } else if (i == 0 && inst->src[1].file != IMM) {
+            uint32_t new_cmod;
+
+            new_cmod = brw_swap_cmod(inst->conditional_mod);
+            if (new_cmod != ~0u) {
+               /* Fit this constant in by swapping the operands and
+                * flipping the test
+                */
+               inst->src[0] = inst->src[1];
+               inst->src[1] = entry->src;
+               inst->conditional_mod = new_cmod;
+               progress = true;
+            }
+         }
+         break;
+
+      case BRW_OPCODE_SEL:
+         if (i == 1) {
+            inst->src[i] = entry->src;
+            progress = true;
+         } else if (i == 0 && inst->src[1].file != IMM) {
+            inst->src[0] = inst->src[1];
+            inst->src[1] = entry->src;
+
+            /* If this was predicated, flipping operands means
+             * we also need to flip the predicate.
+             */
+            if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
+               inst->predicate_inverse =
+                  !inst->predicate_inverse;
+            }
+            progress = true;
+         }
+         break;
+
+      case SHADER_OPCODE_RCP:
+         /* The hardware doesn't do math on immediate values
+          * (because why are you doing that, seriously?), but
+          * the correct answer is to just constant fold it
+          * anyway.
+          */
+         assert(i == 0);
+         if (inst->src[0].imm.f != 0.0f) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[0] = entry->src;
+            inst->src[0].imm.f = 1.0f / inst->src[0].imm.f;
+            progress = true;
+         }
+         break;
+
+      case FS_OPCODE_PULL_CONSTANT_LOAD:
+         inst->src[i] = entry->src;
+         progress = true;
+         break;
+
+      default:
+         break;
+      }
+   }
+
+   return progress;
+}
+
 /** @file brw_fs_copy_propagation.cpp
  *
  * Support for local copy propagation by walking the list of instructions
@@ -90,6 +208,9 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
       foreach_list(entry_node, acp) {
 	 acp_entry *entry = (acp_entry *)entry_node;
 
+         if (try_constant_propagate(inst, entry))
+            progress = true;
+
 	 for (int i = 0; i < 3; i++) {
 	    if (try_copy_propagate(inst, i, entry))
 	       progress = true;
@@ -114,7 +235,8 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
 	  ((inst->src[0].file == GRF &&
 	    (inst->src[0].reg != inst->dst.reg ||
 	     inst->src[0].reg_offset != inst->dst.reg_offset)) ||
-	   inst->src[0].file == UNIFORM) &&
+           inst->src[0].file == UNIFORM ||
+           inst->src[0].file == IMM) &&
 	  inst->src[0].type == inst->dst.type &&
 	  !inst->saturate &&
 	  !inst->predicated &&
-- 
1.7.10.4



More information about the mesa-dev mailing list