Mesa (master): i965/fs: Allocate more register classes on gen7.

Thu Oct 10 23:30:15 UTC 2013

Module: Mesa
Branch: master
Commit: ee21c8b1e6d3a506fa04d8f86e99b2afe9fca841
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=ee21c8b1e6d3a506fa04d8f86e99b2afe9fca841

Author: Eric Anholt <eric at anholt.net>
Date:   Wed May 22 11:26:03 2013 -0700

i965/fs: Allocate more register classes on gen7.

For texturing from GRFs, we now have payloads of arbitrary sizes up to the
message length limit.

v2 (Kenneth Graunke): Rebase on intel_context -> brw_context change.
v3: Add some comment text.
v4: Change some magic 16s to BRW_MAX_MRF (noted by Ken).  Leave the 11,
    which is the magic "max sampler message length".  BRW_MAX_MRF sizing
    on the little int arrays is retained because I could see us needing to
    extend in the future if we move to GRFs for FB writes (those go to at
    least 12 long in a quick scan of the specs)

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org> (v2)
Acked-by: Matt Turner <mattst88 at gmail.com>

---

 src/mesa/drivers/dri/i965/brw_context.h           |    7 ++--
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |   41 +++++++++++++--------
 2 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index c6e6655..fee4e1a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1300,10 +1300,11 @@ struct brw_context
       struct {
          struct ra_regs *regs;
 
-         /** Array of the ra classes for the unaligned contiguous
-          * register block sizes used.
+         /**
+          * Array of the ra classes for the unaligned contiguous register
+          * block sizes used, indexed by register size.
           */
-         int *classes;
+         int classes[16];
 
          /**
           * Mapping for register-allocated objects in *regs to the first
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 6850102..f0f4ad9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -83,9 +83,9 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width)
     * aggregates of scalar values at the GLSL level were split to scalar
     * values by split_virtual_grfs().
     *
-    * However, texture SEND messages return a series of contiguous registers.
-    * We currently always ask for 4 registers, but we may convert that to use
-    * less some day.
+    * However, texture SEND messages return a series of contiguous registers
+    * to write into.  We currently always ask for 4 registers, but we may
+    * convert that to use less some day.
     *
     * Additionally, on gen5 we need aligned pairs of registers for the PLN
     * instruction, and on gen4 we need 8 contiguous regs for workaround simd16
@@ -94,9 +94,22 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width)
     * So we have a need for classes for 1, 2, 4, and 8 registers currently,
     * and we add in '3' to make indexing the array easier for the common case
     * (since we'll probably want it for texturing later).
+    *
+    * And, on gen7 and newer, we do texturing SEND messages from GRFs, which
+    * means that we may need any size up to the sampler message size limit (11
+    * regs).
     */
-   const int class_count = 5;
-   const int class_sizes[class_count] = {1, 2, 3, 4, 8};
+   int class_count;
+   int class_sizes[BRW_MAX_MRF];
+
+   if (brw->gen >= 7) {
+      for (class_count = 0; class_count < 11; class_count++)
+         class_sizes[class_count] = class_count + 1;
+   } else {
+      for (class_count = 0; class_count < 4; class_count++)
+         class_sizes[class_count] = class_count + 1;
+      class_sizes[class_count++] = 8;
+   }
 
    /* Compute the total number of registers across all classes. */
    int ra_reg_count = 0;
@@ -159,7 +172,10 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width)
    ra_set_finalize(regs, NULL);
 
    brw->wm.reg_sets[index].regs = regs;
-   brw->wm.reg_sets[index].classes = classes;
+   for (unsigned i = 0; i < ARRAY_SIZE(brw->wm.reg_sets[index].classes); i++)
+      brw->wm.reg_sets[index].classes[i] = -1;
+   for (int i = 0; i < class_count; i++)
+      brw->wm.reg_sets[index].classes[class_sizes[i] - 1] = classes[i];
    brw->wm.reg_sets[index].ra_reg_to_grf = ra_reg_to_grf;
    brw->wm.reg_sets[index].aligned_pairs_class = aligned_pairs_class;
 }
@@ -411,17 +427,12 @@ fs_visitor::assign_regs()
                                                     node_count);
 
    for (int i = 0; i < this->virtual_grf_count; i++) {
-      int size = this->virtual_grf_sizes[i];
+      unsigned size = this->virtual_grf_sizes[i];
       int c;
 
-      if (size == 8) {
-         c = 4;
-      } else {
-         assert(size >= 1 &&
-                size <= 4 &&
-                "Register allocation relies on split_virtual_grfs()");
-         c = brw->wm.reg_sets[rsi].classes[size - 1];
-      }
+      assert(size <= ARRAY_SIZE(brw->wm.reg_sets[rsi].classes) &&
+             "Register allocation relies on split_virtual_grfs()");
+      c = brw->wm.reg_sets[rsi].classes[size - 1];
 
       /* Special case: on pre-GEN6 hardware that supports PLN, the
        * second operand of a PLN instruction needs to be an