[Mesa-dev] [PATCH 10/10] i965/fs: Improve performance of copy/constant propagation.

Eric Anholt eric at anholt.net
Sat Sep 22 14:04:50 PDT 2012


Use a simple chaining hash table for the ACP.  This is not really very good,
because we still do a full walk of the tree per destination write, but it
still reduces fp-long-alu runtime from 5.3 to 3.9s.
---
 src/mesa/drivers/dri/i965/brw_fs.h                 |    2 +-
 .../drivers/dri/i965/brw_fs_copy_propagation.cpp   |   46 +++++++++++++-------
 2 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 59a0e50..3ea4e00 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -247,7 +247,7 @@ public:
    bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
    bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
    bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block,
-				 exec_list *acp);
+				 exec_list *acp, int acp_count);
    bool register_coalesce();
    bool register_coalesce_2();
    bool compute_to_mrf();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index ad34657..0f6dbd4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -196,7 +196,8 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
  */
 bool
 fs_visitor::opt_copy_propagate_local(void *mem_ctx,
-				     fs_bblock *block, exec_list *acp)
+				     fs_bblock *block, exec_list *acp,
+                                     int acp_count)
 {
    bool progress = false;
 
@@ -205,28 +206,41 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
 	inst = (fs_inst *)inst->next) {
 
       /* Try propagating into this instruction. */
-      foreach_list(entry_node, acp) {
-	 acp_entry *entry = (acp_entry *)entry_node;
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file != GRF)
+            continue;
 
-         if (try_constant_propagate(inst, entry))
-            progress = true;
+         foreach_list(entry_node, &acp[inst->src[i].reg % acp_count]) {
+            acp_entry *entry = (acp_entry *)entry_node;
 
-	 for (int i = 0; i < 3; i++) {
-	    if (try_copy_propagate(inst, i, entry))
-	       progress = true;
-	 }
+            if (try_constant_propagate(inst, entry))
+               progress = true;
+
+            if (try_copy_propagate(inst, i, entry))
+               progress = true;
+         }
       }
 
       /* kill the destination from the ACP */
       if (inst->dst.file == GRF) {
-	 foreach_list_safe(entry_node, acp) {
+	 foreach_list_safe(entry_node, &acp[inst->dst.reg % acp_count]) {
 	    acp_entry *entry = (acp_entry *)entry_node;
 
-	    if (inst->overwrites_reg(entry->dst) ||
-                inst->overwrites_reg(entry->src)) {
+	    if (inst->overwrites_reg(entry->dst)) {
 	       entry->remove();
 	    }
 	 }
+
+         /* Oops, we only have the chaining hash based on the destination, not
+          * the source, so walk across the entire table.
+          */
+         for (int i = 0; i < acp_count; i++) {
+            foreach_list_safe(entry_node, &acp[i]) {
+               acp_entry *entry = (acp_entry *)entry_node;
+               if (inst->overwrites_reg(entry->src))
+                  entry->remove();
+            }
+	 }
       }
 
       /* If this instruction is a raw copy, add it to the ACP. */
@@ -246,7 +260,7 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
 	 acp_entry *entry = ralloc(mem_ctx, acp_entry);
 	 entry->dst = inst->dst;
 	 entry->src = inst->src[0];
-	 acp->push_tail(entry);
+	 acp[entry->dst.reg % acp_count].push_tail(entry);
       }
    }
 
@@ -263,9 +277,11 @@ fs_visitor::opt_copy_propagate()
 
    for (int b = 0; b < cfg.num_blocks; b++) {
       fs_bblock *block = cfg.blocks[b];
-      exec_list acp;
+      int acp_count = 16;
+      exec_list acp[acp_count];
 
-      progress = opt_copy_propagate_local(mem_ctx, block, &acp) || progress;
+      progress = opt_copy_propagate_local(mem_ctx, block,
+                                          acp, acp_count) || progress;
    }
 
    ralloc_free(mem_ctx);
-- 
1.7.10.4



More information about the mesa-dev mailing list