[Mesa-dev] [PATCH 4/6] i965/fs: Reduce the interference between payload regs and virtual GRFs.

Eric Anholt eric at anholt.net
Tue Oct 2 19:52:03 PDT 2012


Improves performance of the Lightsmark penumbra shadows scene by 15.7% +/-
1.0% (n=15), by eliminating register spilling. (tested by smashing the list of
scenes to have all other scenes have 0 duration -- includes additional
rendering of scene description text that normally doesn't appear in that
scene)
---
 src/mesa/drivers/dri/i965/brw_fs.h                |    2 +
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |   86 +++++++++++++++++----
 2 files changed, 74 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 34747d3..56c5a27 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -235,6 +235,8 @@ public:
    void assign_urb_setup();
    bool assign_regs();
    void assign_regs_trivial();
+   void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
+                                   int first_payload_node);
    int choose_spill_reg(struct ra_graph *g);
    void spill_reg(int spill_reg);
    void split_virtual_grfs();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 7b778d6..0510977 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -164,12 +164,78 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width, int base_reg_count)
  * Sets up interference between thread payload registers and the virtual GRFs
  * to be allocated for program temporaries.
  */
-static void
-brw_setup_payload_interference(struct ra_graph *g,
-                               int payload_reg_count,
-                               int first_payload_node,
-                               int reg_node_count)
+void
+fs_visitor::setup_payload_interference(struct ra_graph *g,
+                                       int payload_reg_count,
+                                       int first_payload_node)
 {
+   int reg_width = c->dispatch_width / 8;
+   int last_loop_end = 0;
+   int first_loop_start = 0;
+
+   /* We don't track live intervals for payload regs in our live interval
+    * analysis.  Do a really cheesy version in this function: payload regs are
+    * live from the start of the program (always true) until either their last
+    * use, or the end of looping.
+    */
+   int ip = 0;
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      if (inst->opcode == BRW_OPCODE_DO && first_loop_start == 0)
+         first_loop_start = ip;
+      else if (inst->opcode == BRW_OPCODE_WHILE)
+         last_loop_end = ip;
+
+      ip++;
+   }
+
+   int payload_use_ip[payload_reg_count];
+   memset(payload_use_ip, 0, sizeof(payload_use_ip));
+   ip = 0;
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      /* Note that UNIFORM args have been turned into FIXED_HW_REG by
+       * assign_curbe_setup(), and interpolation uses fixed hardware regs from
+       * the start (see interp_reg()).
+       */
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file == FIXED_HW_REG &&
+             inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+            int reg_nr = inst->src[i].fixed_hw_reg.nr / reg_width;
+            if (reg_nr < payload_reg_count) {
+               if (ip < first_loop_start)
+                  payload_use_ip[reg_nr] = ip;
+               else
+                  payload_use_ip[reg_nr] = MAX2(ip, last_loop_end);
+            }
+         }
+      }
+      ip++;
+   }
+
+   /* g0/g1 are implied used by the FB_WRITE messages, but not present as regs
+    * in the various instructions.  Similarly, other weird payload bits up
+    * until the first push constant might be used and I don't want to think
+    * about them right now.
+    */
+   for (unsigned int i = 0; i < c->nr_payload_regs; i++) {
+      payload_use_ip[i / reg_width] = ip;
+   }
+
+   for (int i = 0; i < payload_reg_count; i++) {
+      /* Mark the payload reg as interfering with any virtual grf that is live
+       * between the start of the program and our last use of the payload reg.
+       */
+      for (int j = 0; j < this->virtual_grf_count; j++) {
+         if (this->virtual_grf_def[j] <= payload_use_ip[i] ||
+             this->virtual_grf_use[j] <= payload_use_ip[i]) {
+            ra_add_node_interference(g, first_payload_node + i, j);
+         }
+      }
+   }
+
    for (int i = 0; i < payload_reg_count; i++) {
       /* Mark each payload reg node as being allocated to its physical register.
        *
@@ -177,13 +243,6 @@ brw_setup_payload_interference(struct ra_graph *g,
        * would just be silly.
        */
       ra_set_node_reg(g, first_payload_node + i, i);
-
-      /* For now, just mark each payload node as interfering with every other
-       * node to be allocated.
-       */
-      for (int j = 0; j < reg_node_count; j++) {
-         ra_add_node_interference(g, first_payload_node + i, j);
-      }
    }
 }
 
@@ -240,8 +299,7 @@ fs_visitor::assign_regs()
       }
    }
 
-   brw_setup_payload_interference(g, payload_reg_count, first_payload_node,
-                                  this->virtual_grf_count);
+   setup_payload_interference(g, payload_reg_count, first_payload_node);
 
    if (!ra_allocate_no_spills(g)) {
       /* Failed to allocate registers.  Spill a reg, and the caller will
-- 
1.7.10.4



More information about the mesa-dev mailing list