[Mesa-dev] [PATCH 14/18] util/register_allocate: Compute transitive conflicts using 2-passes

Chris Wilson chris at chris-wilson.co.uk
Mon Jul 6 03:33:19 PDT 2015


Avoid frequent use of reralloc() for tracking the conflicts list, and
walking that list every time we add a transitive conflict, by making the
observation we apply the indirect conflicts by combining the conflicts
of a conflicting register in a second pass.

Reduces brw_compiler_create() from 18351.5us to 4787.1us on my ivb
i7-3720QM (in context that 18ms represents about 50% of the time it takes
to start X, though why X instantiates an intel_screen at all remains a
mystery).

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Matt Turner <mattst88 at gmail.com>
Cc: Jason Ekstrand <jason.ekstrand at intel.com>
Cc: Martin Peres <martin.peres at linux.intel.com
---
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  | 18 +++++++-
 .../drivers/dri/i965/brw_vec4_reg_allocate.cpp     | 16 ++++++-
 src/util/register_allocate.c                       | 53 +++++++++++++---------
 src/util/register_allocate.h                       |  2 +
 4 files changed, 64 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 8e5621d..7f87221 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -223,7 +223,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
             for (int base_reg = j;
                  base_reg < j + (class_sizes[i] + 1) / 2;
                  base_reg++) {
-               ra_add_transitive_reg_conflict(regs, base_reg, reg);
+               ra_mark_transitive_reg_conflict(regs, base_reg, reg);
             }
 
             reg++;
@@ -237,7 +237,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
             for (int base_reg = j;
                  base_reg < j + class_sizes[i];
                  base_reg++) {
-               ra_add_transitive_reg_conflict(regs, base_reg, reg);
+               ra_mark_transitive_reg_conflict(regs, base_reg, reg);
             }
 
             reg++;
@@ -246,6 +246,20 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
    }
    assert(reg == ra_reg_count);
 
+   reg = 0;
+   for (int i = 0; i < class_count; i++) {
+      int class_size = class_sizes[i];
+      int class_reg_count = base_reg_count - (class_size - 1);
+      if (devinfo->gen <= 5 && reg_width == 2)
+         class_size = (class_size + 1) / 2;
+      for (int j = 0; j < class_reg_count; j++) {
+	 for (int base_reg = j; base_reg < j + class_size; base_reg++)
+	    ra_add_transitive_reg_conflict(regs, base_reg, reg);
+	 reg++;
+      }
+   }
+   assert(reg == ra_reg_count);
+
    /* Add a special class for aligned pairs, which we'll put delta_xy
     * in on Gen <= 6 so that we can do PLN.
     */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 555c42e..93b7297 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -140,7 +140,7 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler)
 	 for (int base_reg = j;
 	      base_reg < j + class_sizes[i];
 	      base_reg++) {
-	    ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
+	    ra_mark_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
 	 }
 
 	 reg++;
@@ -158,6 +158,20 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler)
    }
    assert(reg == ra_reg_count);
 
+   reg = 0;
+   for (int i = 0; i < class_count; i++) {
+      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+      for (int j = 0; j < class_reg_count; j++) {
+	 for (int base_reg = j;
+	      base_reg < j + class_sizes[i];
+	      base_reg++) {
+	    ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
+	 }
+	 reg++;
+      }
+   }
+   assert(reg == ra_reg_count);
+
    ra_set_finalize(compiler->vec4_reg_set.regs, q_values);
 
    for (int i = 0; i < MAX_VGRF_SIZE; i++)
diff --git a/src/util/register_allocate.c b/src/util/register_allocate.c
index f5f7c04..2bbab7f 100644
--- a/src/util/register_allocate.c
+++ b/src/util/register_allocate.c
@@ -83,19 +83,17 @@
 
 struct ra_reg {
    BITSET_WORD *conflicts;
-   unsigned int *conflict_list;
-   unsigned int conflict_list_size;
-   unsigned int num_conflicts;
+   unsigned int conflict_range[2];
 };
 
 struct ra_regs {
    struct ra_reg *regs;
-   unsigned int count;
 
    struct ra_class **classes;
    unsigned int class_count;
 
    bool round_robin;
+   unsigned int count;
 };
 
 struct ra_class {
@@ -200,11 +198,8 @@ ra_alloc_reg_set(void *mem_ctx, unsigned int count)
       conflicts += bitset_count;
 
       BITSET_SET(regs->regs[i].conflicts, i);
-
-      regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4);
-      regs->regs[i].conflict_list_size = 4;
-      regs->regs[i].conflict_list[0] = i;
-      regs->regs[i].num_conflicts = 1;
+      regs->regs[i].conflict_range[0] = i;
+      regs->regs[i].conflict_range[1] = i;
    }
 
    return regs;
@@ -231,13 +226,11 @@ ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2)
 {
    struct ra_reg *reg1 = &regs->regs[r1];
 
-   if (reg1->conflict_list_size == reg1->num_conflicts) {
-      reg1->conflict_list_size *= 2;
-      reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list,
-				     unsigned int, reg1->conflict_list_size);
-   }
-   reg1->conflict_list[reg1->num_conflicts++] = r2;
    BITSET_SET(reg1->conflicts, r2);
+   if (r2 < reg1->conflict_range[0])
+      reg1->conflict_range[0] = r2;
+   else if (r2 > reg1->conflict_range[1])
+      reg1->conflict_range[1] = r2;
 }
 
 void
@@ -261,13 +254,27 @@ void
 ra_add_transitive_reg_conflict(struct ra_regs *regs,
 			       unsigned int base_reg, unsigned int reg)
 {
+   struct ra_reg *b = &regs->regs[base_reg];
+   struct ra_reg *r = &regs->regs[reg];
    unsigned int i;
 
-   ra_add_reg_conflict(regs, reg, base_reg);
+   if (b->conflict_range[0] < r->conflict_range[0])
+      r->conflict_range[0] = b->conflict_range[0];
 
-   for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) {
-      ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]);
-   }
+   if (b->conflict_range[1] > r->conflict_range[1])
+      r->conflict_range[1] = b->conflict_range[1];
+
+   for (i = BITSET_BITWORD(b->conflict_range[0]);
+	i <= BITSET_BITWORD(b->conflict_range[1]);
+	i++)
+	   r->conflicts[i] |= b->conflicts[i];
+}
+
+void
+ra_mark_transitive_reg_conflict(struct ra_regs *regs,
+			       unsigned int base_reg, unsigned int reg)
+{
+   ra_add_conflict_list(regs, base_reg, reg);
 }
 
 unsigned int
@@ -343,9 +350,11 @@ ra_set_finalize(struct ra_regs *regs, unsigned int **q_values)
             if (!reg_belongs_to_class(rc, regs->classes[c]))
 	       continue;
 
-	    for (i = 0; i < regs->regs[rc].num_conflicts; i++) {
-	       unsigned int rb = regs->regs[rc].conflict_list[i];
-	       if (reg_belongs_to_class(rb, regs->classes[b]))
+	    for (i = regs->regs[rc].conflict_range[0];
+                 i <= regs->regs[rc].conflict_range[1];
+                 i++) {
+               if (BITSET_TEST(regs->regs[rc].conflicts, i) &&
+                   reg_belongs_to_class(i, regs->classes[b]))
 		  conflicts++;
 	    }
 	    max_conflicts = MAX2(max_conflicts, conflicts);
diff --git a/src/util/register_allocate.h b/src/util/register_allocate.h
index 61f182e..1ceea79 100644
--- a/src/util/register_allocate.h
+++ b/src/util/register_allocate.h
@@ -51,6 +51,8 @@ void ra_add_reg_conflict(struct ra_regs *regs,
 			 unsigned int r1, unsigned int r2);
 void ra_add_transitive_reg_conflict(struct ra_regs *regs,
 				    unsigned int base_reg, unsigned int reg);
+void ra_mark_transitive_reg_conflict(struct ra_regs *regs,
+				     unsigned int base_reg, unsigned int reg);
 void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg);
 void ra_set_num_conflicts(struct ra_regs *regs, unsigned int class_a,
                           unsigned int class_b, unsigned int num_conflicts);
-- 
2.1.4



More information about the mesa-dev mailing list