Mesa (master): i965/fs: Make virtual grf live intervals actually cover their used range.

Eric Anholt anholt at kemper.freedesktop.org
Thu May 9 21:38:19 UTC 2013


Module: Mesa
Branch: master
Commit: e290372542d0475e612e4d10a27b22eae3158ecd
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e290372542d0475e612e4d10a27b22eae3158ecd

Author: Eric Anholt <eric at anholt.net>
Date:   Tue Apr 30 15:00:40 2013 -0700

i965/fs: Make virtual grf live intervals actually cover their used range.

Previously, we would sometimes not consider a write to a register to
extend the end of the interval, nor would we consider a read before a
write to extend the start.  This made for a bunch of complicated logic
related to how to treat the results when dead code might be present.
Instead, just extend the interval and fix dead code elimination to know
how to remove it.

Interestingly, this actually results in a tiny bit more optimization:
total instructions in shared programs: 1391220 -> 1390799 (-0.03%)
instructions in affected programs:     14037 -> 13616 (-3.00%)

v2: Fix a theoretical problem with the simd16 workaround if dst == src,
    where we would revert the bump of the live range.

Reviewed-by: Ian Romanick <ian.d.romanick at intel.com> (v1)

---

 src/mesa/drivers/dri/i965/brw_fs.cpp               |   21 +++---
 src/mesa/drivers/dri/i965/brw_fs.h                 |    4 +-
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp           |    2 +-
 .../drivers/dri/i965/brw_fs_live_variables.cpp     |   76 ++++++--------------
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |    3 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp       |    4 +-
 6 files changed, 38 insertions(+), 72 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 778a69e..baaa25c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1456,8 +1456,8 @@ fs_visitor::compact_virtual_grfs()
          remap_table[i] = new_index;
          virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
          if (live_intervals_valid) {
-            virtual_grf_use[new_index] = virtual_grf_use[i];
-            virtual_grf_def[new_index] = virtual_grf_def[i];
+            virtual_grf_start[new_index] = virtual_grf_start[i];
+            virtual_grf_end[new_index] = virtual_grf_end[i];
          }
          ++new_index;
       }
@@ -1771,10 +1771,8 @@ fs_visitor::opt_algebraic()
 }
 
 /**
- * Must be called after calculate_live_intervales() to remove unused
- * writes to registers -- register allocation will fail otherwise
- * because something deffed but not used won't be considered to
- * interfere with other regs.
+ * Removes any instructions writing a VGRF where that VGRF is not used by any
+ * later instruction.
  */
 bool
 fs_visitor::dead_code_eliminate()
@@ -1787,9 +1785,12 @@ fs_visitor::dead_code_eliminate()
    foreach_list_safe(node, &this->instructions) {
       fs_inst *inst = (fs_inst *)node;
 
-      if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
-	 inst->remove();
-	 progress = true;
+      if (inst->dst.file == GRF) {
+         assert(this->virtual_grf_end[inst->dst.reg] >= pc);
+         if (this->virtual_grf_end[inst->dst.reg] == pc) {
+            inst->remove();
+            progress = true;
+         }
       }
 
       pc++;
@@ -2201,7 +2202,7 @@ fs_visitor::compute_to_mrf()
       /* Can't compute-to-MRF this GRF if someone else was going to
        * read it later.
        */
-      if (this->virtual_grf_use[inst->src[0].reg] > ip)
+      if (this->virtual_grf_end[inst->src[0].reg] > ip)
 	 continue;
 
       /* Found a move of a GRF to a MRF.  Let's see if we can go
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 9a2bcc0..3d44daf 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -430,8 +430,8 @@ public:
    int *virtual_grf_sizes;
    int virtual_grf_count;
    int virtual_grf_array_size;
-   int *virtual_grf_def;
-   int *virtual_grf_use;
+   int *virtual_grf_start;
+   int *virtual_grf_end;
    bool live_intervals_valid;
 
    /* This is the map from UNIFORM hw_reg + reg_offset as generated by
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index b5c2200..9b60d9b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -194,7 +194,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
             /* Kill any AEB entries using registers that don't get reused any
              * more -- a sure sign they'll fail operands_match().
              */
-            if (src_reg->file == GRF && virtual_grf_use[src_reg->reg] < ip) {
+            if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) {
                entry->remove();
                ralloc_free(entry);
 	       break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index fdcfac6..3daf8fa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -167,16 +167,16 @@ fs_visitor::calculate_live_intervals()
    if (this->live_intervals_valid)
       return;
 
-   int *def = ralloc_array(mem_ctx, int, num_vars);
-   int *use = ralloc_array(mem_ctx, int, num_vars);
-   ralloc_free(this->virtual_grf_def);
-   ralloc_free(this->virtual_grf_use);
-   this->virtual_grf_def = def;
-   this->virtual_grf_use = use;
+   int *start = ralloc_array(mem_ctx, int, num_vars);
+   int *end = ralloc_array(mem_ctx, int, num_vars);
+   ralloc_free(this->virtual_grf_start);
+   ralloc_free(this->virtual_grf_end);
+   this->virtual_grf_start = start;
+   this->virtual_grf_end = end;
 
    for (int i = 0; i < num_vars; i++) {
-      def[i] = MAX_INSTRUCTION;
-      use[i] = -1;
+      start[i] = MAX_INSTRUCTION;
+      end[i] = -1;
    }
 
    /* Start by setting up the intervals with no knowledge of control
@@ -189,8 +189,7 @@ fs_visitor::calculate_live_intervals()
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == GRF) {
 	    int reg = inst->src[i].reg;
-
-	    use[reg] = ip;
+            int end_ip = ip;
 
             /* In most cases, a register can be written over safely by the
              * same instruction that is its last use.  For a single
@@ -220,15 +219,19 @@ fs_visitor::calculate_live_intervals()
             if (dispatch_width == 16 && (inst->src[i].smear ||
                                          (this->pixel_x.reg == reg ||
                                           this->pixel_y.reg == reg))) {
-               use[reg]++;
+               end_ip++;
             }
+
+            start[reg] = MIN2(start[reg], ip);
+            end[reg] = MAX2(end[reg], end_ip);
 	 }
       }
 
       if (inst->dst.file == GRF) {
          int reg = inst->dst.reg;
 
-         def[reg] = MIN2(def[reg], ip);
+         start[reg] = MIN2(start[reg], ip);
+         end[reg] = MAX2(end[reg], ip);
       }
 
       ip++;
@@ -241,60 +244,23 @@ fs_visitor::calculate_live_intervals()
    for (int b = 0; b < cfg.num_blocks; b++) {
       for (int i = 0; i < num_vars; i++) {
 	 if (BITSET_TEST(livevars.bd[b].livein, i)) {
-	    def[i] = MIN2(def[i], cfg.blocks[b]->start_ip);
-	    use[i] = MAX2(use[i], cfg.blocks[b]->start_ip);
+	    start[i] = MIN2(start[i], cfg.blocks[b]->start_ip);
+	    end[i] = MAX2(end[i], cfg.blocks[b]->start_ip);
 	 }
 
 	 if (BITSET_TEST(livevars.bd[b].liveout, i)) {
-	    def[i] = MIN2(def[i], cfg.blocks[b]->end_ip);
-	    use[i] = MAX2(use[i], cfg.blocks[b]->end_ip);
+	    start[i] = MIN2(start[i], cfg.blocks[b]->end_ip);
+	    end[i] = MAX2(end[i], cfg.blocks[b]->end_ip);
 	 }
       }
    }
 
    this->live_intervals_valid = true;
-
-   /* Note in the non-control-flow code above, that we only take def[] as the
-    * first store, and use[] as the last use.  We use this in dead code
-    * elimination, to determine when a store never gets used.  However, we
-    * also use these arrays to answer the virtual_grf_interferes() question
-    * (live interval analysis), which is used for register coalescing and
-    * register allocation.
-    *
-    * So, there's a conflict over what the array should mean: if use[]
-    * considers a def after the last use, then the dead code elimination pass
-    * never does anything (and it's an important pass!).  But if we don't
-    * include dead code, then virtual_grf_interferes() lies and we'll do
-    * horrible things like coalesce the register that is dead-code-written
-    * into another register that was live across the dead write (causing the
-    * use of the second register to take the dead write's source value instead
-    * of the coalesced MOV's source value).
-    *
-    * To resolve the conflict, immediately after calculating live intervals,
-    * detect dead code, nuke it, and if we changed anything, calculate again
-    * before returning to the caller.  Now we happen to produce def[] and
-    * use[] arrays that will work for virtual_grf_interferes().
-    */
-   if (dead_code_eliminate())
-      calculate_live_intervals();
 }
 
 bool
 fs_visitor::virtual_grf_interferes(int a, int b)
 {
-   int a_def = this->virtual_grf_def[a], a_use = this->virtual_grf_use[a];
-   int b_def = this->virtual_grf_def[b], b_use = this->virtual_grf_use[b];
-
-   /* If there's dead code (def but not use), it would break our test
-    * unless we consider it used.
-    */
-   if ((a_use == -1 && a_def != MAX_INSTRUCTION) ||
-       (b_use == -1 && b_def != MAX_INSTRUCTION)) {
-      return true;
-   }
-
-   int start = MAX2(a_def, b_def);
-   int end = MIN2(a_use, b_use);
-
-   return start < end;
+   return !(virtual_grf_end[a] <= virtual_grf_start[b] ||
+            virtual_grf_end[b] <= virtual_grf_start[a]);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index fa1a938..acd9846 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -316,8 +316,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
           * in order to not have to worry about the uniform issue described in
           * calculate_live_intervals().
           */
-         if (this->virtual_grf_def[j] <= payload_last_use_ip[i] ||
-             this->virtual_grf_use[j] <= payload_last_use_ip[i]) {
+         if (this->virtual_grf_start[j] <= payload_last_use_ip[i]) {
             ra_add_node_interference(g, first_payload_node + i, j);
          }
       }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index d2bac2a..b7bbaab 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2451,8 +2451,8 @@ fs_visitor::fs_visitor(struct brw_context *brw,
    this->virtual_grf_sizes = NULL;
    this->virtual_grf_count = 0;
    this->virtual_grf_array_size = 0;
-   this->virtual_grf_def = NULL;
-   this->virtual_grf_use = NULL;
+   this->virtual_grf_start = NULL;
+   this->virtual_grf_end = NULL;
    this->live_intervals_valid = false;
 
    this->force_uncompressed_stack = 0;




More information about the mesa-commit mailing list