[Mesa-dev] [PATCH 4/4] i965/vs: Implement proper register allocation instead of 1:1 mapping.

Thu Aug 18 11:38:45 PDT 2011

Fixes vs-atan-* and several others.  This is not the real solution we
eventually want, which will pack floats, vec2s, and vec3s into vec4
registers, but this code should provide the framework for that.
---
 src/mesa/drivers/dri/i965/brw_context.h            |   17 +++
 .../drivers/dri/i965/brw_vec4_reg_allocate.cpp     |  139 +++++++++++++++++++-
 2 files changed, 155 insertions(+), 1 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index add8c56..df63fe1 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -685,6 +685,23 @@ struct brw_context
 
       uint32_t push_const_offset; /* Offset in the batchbuffer */
       int push_const_size; /* in 256-bit register increments */
+
+      /** @{ register allocator */
+
+      struct ra_regs *regs;
+
+      /**
+       * Array of the ra classes for the unaligned contiguous register
+       * block sizes used.
+       */
+      int *classes;
+
+      /**
+       * Mapping for register-allocated objects in *regs to the first
+       * GRF for that object.
+      */
+      uint8_t *ra_reg_to_grf;
+      /** @} */
    } vs;
 
    struct {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 7039553..3f052ff 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -21,6 +21,11 @@
  * IN THE SOFTWARE.
  */
 
+extern "C" {
+#include "main/macros.h"
+#include "program/register_allocate.h"
+} /* extern "C" */
+
 #include "brw_vec4.h"
 #include "../glsl/ir_print_visitor.h"
 
@@ -88,10 +93,142 @@ vec4_visitor::reg_allocate_trivial()
    }
 }
 
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+			      int *class_sizes,
+			      int class_count,
+			      int base_reg_count)
+{
+   /* Compute the total number of registers across all classes. */
+   int ra_reg_count = 0;
+   for (int i = 0; i < class_count; i++) {
+      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
+   }
+
+   ralloc_free(brw->vs.ra_reg_to_grf);
+   brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+   ralloc_free(brw->vs.regs);
+   brw->vs.regs = ra_alloc_reg_set(ra_reg_count);
+   ralloc_free(brw->vs.classes);
+   brw->vs.classes = ralloc_array(brw, int, class_count + 1);
+
+   /* Now, add the registers to their classes, and add the conflicts
+    * between them and the base GRF registers (and also each other).
+    */
+   int reg = 0;
+   for (int i = 0; i < class_count; i++) {
+      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+      brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs);
+
+      for (int j = 0; j < class_reg_count; j++) {
+	 ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg);
+
+	 brw->vs.ra_reg_to_grf[reg] = j;
+
+	 for (int base_reg = j;
+	      base_reg < j + class_sizes[i];
+	      base_reg++) {
+	    ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg);
+	 }
+
+	 reg++;
+      }
+   }
+   assert(reg == ra_reg_count);
+
+   ra_set_finalize(brw->vs.regs);
+}
+
 void
 vec4_visitor::reg_allocate()
 {
-   reg_allocate_trivial();
+   int hw_reg_mapping[virtual_grf_count];
+   int first_assigned_grf = this->first_non_payload_grf;
+   int base_reg_count = BRW_MAX_GRF - first_assigned_grf;
+   int class_sizes[base_reg_count];
+   int class_count = 0;
+
+   /* Using the trivial allocator can be useful in debugging undefined
+    * register access as a result of broken optimization passes.
+    */
+   if (0) {
+      reg_allocate_trivial();
+      return;
+   }
+
+   calculate_live_intervals();
+
+   /* Set up the register classes.
+    *
+    * The base registers store a vec4.  However, we'll need larger
+    * storage for arrays, structures, and matrices, which will be sets
+    * of contiguous registers.
+    */
+   class_sizes[class_count++] = 1;
+
+   for (int r = 0; r < virtual_grf_count; r++) {
+      int i;
+
+      for (i = 0; i < class_count; i++) {
+	 if (class_sizes[i] == this->virtual_grf_sizes[r])
+	    break;
+      }
+      if (i == class_count) {
+	 if (this->virtual_grf_sizes[r] >= base_reg_count) {
+	    fail("Object too large to register allocate.\n");
+	 }
+
+	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
+      }
+   }
+
+   brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count);
+
+   struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs,
+						    virtual_grf_count);
+
+   for (int i = 0; i < virtual_grf_count; i++) {
+      for (int c = 0; c < class_count; c++) {
+	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
+	    ra_set_node_class(g, i, brw->vs.classes[c]);
+	    break;
+	 }
+      }
+
+      for (int j = 0; j < i; j++) {
+	 if (virtual_grf_interferes(i, j)) {
+	    ra_add_node_interference(g, i, j);
+	 }
+      }
+   }
+
+   if (!ra_allocate_no_spills(g)) {
+      ralloc_free(g);
+      fail("No register spilling support yet\n");
+   }
+
+   /* Get the chosen virtual registers for each node, and map virtual
+    * regs in the register classes back down to real hardware reg
+    * numbers.
+    */
+   prog_data->total_grf = first_assigned_grf;
+   for (int i = 0; i < virtual_grf_count; i++) {
+      int reg = ra_get_node_reg(g, i);
+
+      hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
+      prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + 1);
+   }
+
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      assign(hw_reg_mapping, &inst->dst);
+      assign(hw_reg_mapping, &inst->src[0]);
+      assign(hw_reg_mapping, &inst->src[1]);
+      assign(hw_reg_mapping, &inst->src[2]);
+   }
+
+   ralloc_free(g);
 }
 
 } /* namespace brw */
-- 
1.7.5.4