Mesa (master): i965: First cut at register allocation using graph coloring.

Wed Sep 29 23:45:41 UTC 2010

Module: Mesa
Branch: master
Commit: e1261d3c493ff48348483a0084f3017c7e663dc0
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1261d3c493ff48348483a0084f3017c7e663dc0

Author: Eric Anholt <eric at anholt.net>
Date:   Wed Sep 29 12:08:11 2010 -0700

i965: First cut at register allocation using graph coloring.

The interference is totally bogus (maximal), so this is equivalent to
our trivial register assignment before.  As in, passes the same set of
piglit tests.

---

 src/mesa/drivers/dri/i965/brw_fs.cpp |  158 ++++++++++++++++++++++++++++++++--
 1 files changed, 151 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e91351c..4de64f0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -35,6 +35,7 @@ extern "C" {
 #include "program/prog_parameter.h"
 #include "program/prog_print.h"
 #include "program/prog_optimize.h"
+#include "program/register_allocate.h"
 #include "program/sampler.h"
 #include "program/hash_table.h"
 #include "brw_context.h"
@@ -448,6 +449,7 @@ public:
    void assign_curb_setup();
    void assign_urb_setup();
    void assign_regs();
+   void assign_regs_trivial();
    void generate_code();
    void generate_fb_write(fs_inst *inst);
    void generate_linterp(fs_inst *inst, struct brw_reg dst,
@@ -1997,7 +1999,7 @@ fs_visitor::assign_urb_setup()
 }
 
 static void
-trivial_assign_reg(int *reg_hw_locations, fs_reg *reg)
+assign_reg(int *reg_hw_locations, fs_reg *reg)
 {
    if (reg->file == GRF && reg->reg != 0) {
       reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
@@ -2006,7 +2008,7 @@ trivial_assign_reg(int *reg_hw_locations, fs_reg *reg)
 }
 
 void
-fs_visitor::assign_regs()
+fs_visitor::assign_regs_trivial()
 {
    int last_grf = 0;
    int hw_reg_mapping[this->virtual_grf_next];
@@ -2020,18 +2022,157 @@ fs_visitor::assign_regs()
    }
    last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
 
-   /* FINISHME: trivial assignment of register numbers */
    foreach_iter(exec_list_iterator, iter, this->instructions) {
       fs_inst *inst = (fs_inst *)iter.get();
 
-      trivial_assign_reg(hw_reg_mapping, &inst->dst);
-      trivial_assign_reg(hw_reg_mapping, &inst->src[0]);
-      trivial_assign_reg(hw_reg_mapping, &inst->src[1]);
+      assign_reg(hw_reg_mapping, &inst->dst);
+      assign_reg(hw_reg_mapping, &inst->src[0]);
+      assign_reg(hw_reg_mapping, &inst->src[1]);
    }
 
    this->grf_used = last_grf + 1;
 }
 
+void
+fs_visitor::assign_regs()
+{
+   int last_grf = 0;
+   int hw_reg_mapping[this->virtual_grf_next + 1];
+   int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf;
+   int class_sizes[base_reg_count];
+   int class_count = 0;
+
+   /* Set up the register classes.
+    *
+    * The base registers store a scalar value.  For texture samples,
+    * we get virtual GRFs composed of 4 contiguous hw register.  For
+    * structures and arrays, we store them as contiguous larger things
+    * than that, though we should be able to do better most of the
+    * time.
+    */
+   class_sizes[class_count++] = 1;
+   for (int r = 1; r < this->virtual_grf_next; r++) {
+      int i;
+
+      for (i = 0; i < class_count; i++) {
+	 if (class_sizes[i] == this->virtual_grf_sizes[r])
+	    break;
+      }
+      if (i == class_count) {
+	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
+      }
+   }
+
+   int ra_reg_count = 0;
+   int class_base_reg[class_count];
+   int class_reg_count[class_count];
+   int classes[class_count];
+
+   for (int i = 0; i < class_count; i++) {
+      class_base_reg[i] = ra_reg_count;
+      class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
+      ra_reg_count += class_reg_count[i];
+   }
+
+   struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
+   for (int i = 0; i < class_count; i++) {
+      classes[i] = ra_alloc_reg_class(regs);
+
+      for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
+	 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
+      }
+
+      /* Add conflicts between our contiguous registers aliasing
+       * base regs and other register classes' contiguous registers
+       * that alias base regs, or the base regs themselves for classes[0].
+       */
+      for (int c = 0; c <= i; c++) {
+	 for (int i_r = 0; i_r < class_reg_count[i] - 1; i_r++) {
+	    for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
+		 c_r <= MIN2(class_reg_count[c] - 1, i_r + class_sizes[i] - 1);
+		 c_r++) {
+
+	       if (0) {
+		  printf("%d/%d conflicts %d/%d\n",
+			 class_sizes[i], i_r,
+			 class_sizes[c], c_r);
+	       }
+
+	       ra_add_reg_conflict(regs,
+				   class_base_reg[i] + i_r,
+				   class_base_reg[c] + c_r);
+	    }
+	 }
+      }
+   }
+
+   ra_set_finalize(regs);
+
+   struct ra_graph *g = ra_alloc_interference_graph(regs,
+						    this->virtual_grf_next);
+   /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
+    * with nodes.
+    */
+   ra_set_node_class(g, 0, classes[0]);
+
+   /* FINISHME: Proper interference (live interval analysis) */
+   for (int i = 1; i < this->virtual_grf_next; i++) {
+      for (int c = 0; c < class_count; c++) {
+	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
+	    ra_set_node_class(g, i, classes[c]);
+	    break;
+	 }
+      }
+
+      for (int j = 1; j < i; j++) {
+	 ra_add_node_interference(g, i, j);
+      }
+   }
+
+   /* FINISHME: Handle spilling */
+   if (!ra_allocate_no_spills(g)) {
+      fprintf(stderr, "Failed to allocate registers.\n");
+      this->fail = true;
+      return;
+   }
+
+   /* Get the chosen virtual registers for each node, and map virtual
+    * regs in the register classes back down to real hardware reg
+    * numbers.
+    */
+   hw_reg_mapping[0] = 0; /* unused */
+   for (int i = 1; i < this->virtual_grf_next; i++) {
+      int reg = ra_get_node_reg(g, i);
+      int hw_reg = -1;
+
+      for (int c = 0; c < class_count; c++) {
+	 if (reg >= class_base_reg[c] &&
+	     reg < class_base_reg[c] + class_reg_count[c] - 1) {
+	    hw_reg = reg - class_base_reg[c];
+	    break;
+	 }
+      }
+
+      assert(hw_reg != -1);
+      hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;
+      last_grf = MAX2(last_grf,
+		      hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1);
+   }
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      fs_inst *inst = (fs_inst *)iter.get();
+
+      assign_reg(hw_reg_mapping, &inst->dst);
+      assign_reg(hw_reg_mapping, &inst->src[0]);
+      assign_reg(hw_reg_mapping, &inst->src[1]);
+   }
+
+   this->grf_used = last_grf + 1;
+
+   talloc_free(g);
+   talloc_free(regs);
+}
+
 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
 {
    struct brw_reg brw_reg;
@@ -2319,7 +2460,10 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
       v.emit_fb_writes();
       v.assign_curb_setup();
       v.assign_urb_setup();
-      v.assign_regs();
+      if (0)
+	 v.assign_regs_trivial();
+      else
+	 v.assign_regs();
    }
 
    v.generate_code();