[Mesa-dev] [PATCH 03/12] i965/fs: Rewrite fs_visitor::split_virtual_grfs

Jason Ekstrand jason at jlekstrand.net
Fri Sep 19 13:10:21 PDT 2014


The original vgrf splitting code was written assuming that with the
assumption that vgrfs came in two types: those that can be split into
single registers and those that can't be split at all It was very
conservative and bailed as soon as more than one element of a register was
read or written.  This won't work once we start allowing a regular MOV or
ADD operation to operate on multiple registers.  This rewrite allows for
the case where a vgrf of size 5 may appropreately be split in to one
register of size 1 and two registers of size 2.

Signed-off-by: Jason Ekstrand <jason.ekstrand at intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 132 ++++++++++++++++++++++-------------
 1 file changed, 85 insertions(+), 47 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 3fb1545..10a3a20 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1627,15 +1627,39 @@ void
 fs_visitor::split_virtual_grfs()
 {
    int num_vars = this->virtual_grf_count;
-   bool split_grf[num_vars];
-   int new_virtual_grf[num_vars];
 
-   /* Try to split anything > 0 sized. */
+   /* Count the total number of registers */
+   int reg_count = 0;
+   int vgrf_to_reg[num_vars];
    for (int i = 0; i < num_vars; i++) {
-      if (this->virtual_grf_sizes[i] != 1)
-	 split_grf[i] = true;
-      else
-	 split_grf[i] = false;
+      vgrf_to_reg[i] = reg_count;
+      reg_count += virtual_grf_sizes[i];
+   }
+
+   /* An array of "split points".  For each register slot, this indicates
+    * if this slot can be separated from the previous slot.  Every time an
+    * instruction uses multiple elements of a register (as a source or
+    * destination), we mark the used slots as inseparable.  Then we go
+    * through and split the registers into the smallest pieces we can.
+    */
+   bool split_points[reg_count];
+   memset(split_points, 0, sizeof(split_points));
+
+   /* Mark all used registers as fully splittable */
+   foreach_in_list(fs_inst, inst, &instructions) {
+      if (inst->dst.file == GRF) {
+         int reg = vgrf_to_reg[inst->dst.reg];
+         for (int j = 1; j < this->virtual_grf_sizes[inst->dst.reg]; j++)
+            split_points[reg + j] = true;
+      }
+
+      for (int i = 0; i < inst->sources; i++) {
+         if (inst->src[i].file == GRF) {
+            int reg = vgrf_to_reg[inst->src[i].reg];
+            for (int j = 1; j < this->virtual_grf_sizes[inst->src[i].reg]; j++)
+               split_points[reg + j] = true;
+         }
+      }
    }
 
    if (brw->has_pln &&
@@ -1645,61 +1669,75 @@ fs_visitor::split_virtual_grfs()
        * Gen6, that was the only supported interpolation mode, and since Gen6,
        * delta_x and delta_y are in fixed hardware registers.
        */
-      split_grf[this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg] =
-         false;
+      int vgrf = this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg;
+      split_points[vgrf_to_reg[vgrf] + 1] = false;
    }
 
    foreach_in_list(fs_inst, inst, &instructions) {
-      /* If there's a SEND message that requires contiguous destination
-       * registers, no splitting is allowed.
-       */
-      if (inst->regs_written > 1) {
-	 split_grf[inst->dst.reg] = false;
+      if (inst->dst.file == GRF) {
+         int reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
+         for (int j = 1; j < inst->regs_written; j++)
+            split_points[reg + j] = false;
       }
-
-      /* If we're sending from a GRF, don't split it, on the assumption that
-       * the send is reading the whole thing.
-       */
-      if (inst->is_send_from_grf()) {
-         for (int i = 0; i < inst->sources; i++) {
-            if (inst->src[i].file == GRF) {
-               split_grf[inst->src[i].reg] = false;
-            }
+      for (int i = 0; i < inst->sources; i++) {
+         if (inst->src[i].file == GRF) {
+            int reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset;
+            for (int j = 1; j < inst->regs_read(this, i); j++)
+               split_points[reg + j] = false;
          }
       }
    }
 
-   /* Allocate new space for split regs.  Note that the virtual
-    * numbers will be contiguous.
-    */
+   int new_virtual_grf[reg_count];
+   int new_reg_offset[reg_count];
+
+   int reg = 0;
    for (int i = 0; i < num_vars; i++) {
-      if (split_grf[i]) {
-	 new_virtual_grf[i] = virtual_grf_alloc(1);
-	 for (int j = 2; j < this->virtual_grf_sizes[i]; j++) {
-	    int reg = virtual_grf_alloc(1);
-	    assert(reg == new_virtual_grf[i] + j - 1);
-	    (void) reg;
-	 }
-	 this->virtual_grf_sizes[i] = 1;
+      /* The first one should always be 0 as a quick sanity check. */
+      assert(split_points[reg] == false);
+
+      /* j = 0 case */
+      new_reg_offset[reg] = 0;
+      reg++;
+      int offset = 1;
+
+      /* j > 0 case */
+      for (int j = 1; j < virtual_grf_sizes[i]; j++) {
+         /* If this is a split point, reset the offset to 0 and allocate a
+          * new virtual GRF for the previous offset many registers
+          */
+         if (split_points[reg]) {
+            int grf = virtual_grf_alloc(offset);
+            for (int k = reg - offset; k < reg; k++)
+               new_virtual_grf[k] = grf;
+            offset = 0;
+         }
+         new_reg_offset[reg] = offset;
+         offset++;
+         reg++;
       }
+
+      /* The last one gets the original register number */
+      virtual_grf_sizes[i] = offset;
+      for (int k = reg - offset; k < reg; k++)
+         new_virtual_grf[k] = i;
    }
+   assert(reg == reg_count);
 
    foreach_in_list(fs_inst, inst, &instructions) {
-      if (inst->dst.file == GRF &&
-	  split_grf[inst->dst.reg] &&
-	  inst->dst.reg_offset != 0) {
-	 inst->dst.reg = (new_virtual_grf[inst->dst.reg] +
-			  inst->dst.reg_offset - 1);
-	 inst->dst.reg_offset = 0;
+      if (inst->dst.file == GRF) {
+         reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
+         inst->dst.reg = new_virtual_grf[reg];
+         inst->dst.reg_offset = new_reg_offset[reg];
+         assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]);
       }
       for (int i = 0; i < inst->sources; i++) {
-	 if (inst->src[i].file == GRF &&
-	     split_grf[inst->src[i].reg] &&
-	     inst->src[i].reg_offset != 0) {
-	    inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] +
-				inst->src[i].reg_offset - 1);
-	    inst->src[i].reg_offset = 0;
-	 }
+	 if (inst->src[i].file == GRF) {
+            reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset;
+            inst->src[i].reg = new_virtual_grf[reg];
+            inst->src[i].reg_offset = new_reg_offset[reg];
+            assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]);
+         }
       }
    }
    invalidate_live_intervals(false);
-- 
2.1.0



More information about the mesa-dev mailing list