[Mesa-dev] [PATCH 14/18] util/register_allocate: Compute transitive conflicts using 2-passes
Chris Wilson
chris at chris-wilson.co.uk
Mon Jul 6 03:33:19 PDT 2015
Avoid frequent use of reralloc() for tracking the conflicts list, and
walking that list every time we add a transitive conflict, by making the
observation we apply the indirect conflicts by combining the conflicts
of a conflicting register in a second pass.
Reduces brw_compiler_create() from 18351.5us to 4787.1us on my ivb
i7-3720QM (in context that 18ms represents about 50% of the time it takes
to start X, though why X instantiates an intel_screen at all remains a
mystery).
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Matt Turner <mattst88 at gmail.com>
Cc: Jason Ekstrand <jason.ekstrand at intel.com>
Cc: Martin Peres <martin.peres at linux.intel.com
---
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 18 +++++++-
.../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 16 ++++++-
src/util/register_allocate.c | 53 +++++++++++++---------
src/util/register_allocate.h | 2 +
4 files changed, 64 insertions(+), 25 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 8e5621d..7f87221 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -223,7 +223,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
for (int base_reg = j;
base_reg < j + (class_sizes[i] + 1) / 2;
base_reg++) {
- ra_add_transitive_reg_conflict(regs, base_reg, reg);
+ ra_mark_transitive_reg_conflict(regs, base_reg, reg);
}
reg++;
@@ -237,7 +237,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
for (int base_reg = j;
base_reg < j + class_sizes[i];
base_reg++) {
- ra_add_transitive_reg_conflict(regs, base_reg, reg);
+ ra_mark_transitive_reg_conflict(regs, base_reg, reg);
}
reg++;
@@ -246,6 +246,20 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
}
assert(reg == ra_reg_count);
+ reg = 0;
+ for (int i = 0; i < class_count; i++) {
+ int class_size = class_sizes[i];
+ int class_reg_count = base_reg_count - (class_size - 1);
+ if (devinfo->gen <= 5 && reg_width == 2)
+ class_size = (class_size + 1) / 2;
+ for (int j = 0; j < class_reg_count; j++) {
+ for (int base_reg = j; base_reg < j + class_size; base_reg++)
+ ra_add_transitive_reg_conflict(regs, base_reg, reg);
+ reg++;
+ }
+ }
+ assert(reg == ra_reg_count);
+
/* Add a special class for aligned pairs, which we'll put delta_xy
* in on Gen <= 6 so that we can do PLN.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 555c42e..93b7297 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -140,7 +140,7 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler)
for (int base_reg = j;
base_reg < j + class_sizes[i];
base_reg++) {
- ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
+ ra_mark_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
}
reg++;
@@ -158,6 +158,20 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler)
}
assert(reg == ra_reg_count);
+ reg = 0;
+ for (int i = 0; i < class_count; i++) {
+ int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+ for (int j = 0; j < class_reg_count; j++) {
+ for (int base_reg = j;
+ base_reg < j + class_sizes[i];
+ base_reg++) {
+ ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
+ }
+ reg++;
+ }
+ }
+ assert(reg == ra_reg_count);
+
ra_set_finalize(compiler->vec4_reg_set.regs, q_values);
for (int i = 0; i < MAX_VGRF_SIZE; i++)
diff --git a/src/util/register_allocate.c b/src/util/register_allocate.c
index f5f7c04..2bbab7f 100644
--- a/src/util/register_allocate.c
+++ b/src/util/register_allocate.c
@@ -83,19 +83,17 @@
struct ra_reg {
BITSET_WORD *conflicts;
- unsigned int *conflict_list;
- unsigned int conflict_list_size;
- unsigned int num_conflicts;
+ unsigned int conflict_range[2];
};
struct ra_regs {
struct ra_reg *regs;
- unsigned int count;
struct ra_class **classes;
unsigned int class_count;
bool round_robin;
+ unsigned int count;
};
struct ra_class {
@@ -200,11 +198,8 @@ ra_alloc_reg_set(void *mem_ctx, unsigned int count)
conflicts += bitset_count;
BITSET_SET(regs->regs[i].conflicts, i);
-
- regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4);
- regs->regs[i].conflict_list_size = 4;
- regs->regs[i].conflict_list[0] = i;
- regs->regs[i].num_conflicts = 1;
+ regs->regs[i].conflict_range[0] = i;
+ regs->regs[i].conflict_range[1] = i;
}
return regs;
@@ -231,13 +226,11 @@ ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2)
{
struct ra_reg *reg1 = ®s->regs[r1];
- if (reg1->conflict_list_size == reg1->num_conflicts) {
- reg1->conflict_list_size *= 2;
- reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list,
- unsigned int, reg1->conflict_list_size);
- }
- reg1->conflict_list[reg1->num_conflicts++] = r2;
BITSET_SET(reg1->conflicts, r2);
+ if (r2 < reg1->conflict_range[0])
+ reg1->conflict_range[0] = r2;
+ else if (r2 > reg1->conflict_range[1])
+ reg1->conflict_range[1] = r2;
}
void
@@ -261,13 +254,27 @@ void
ra_add_transitive_reg_conflict(struct ra_regs *regs,
unsigned int base_reg, unsigned int reg)
{
+ struct ra_reg *b = ®s->regs[base_reg];
+ struct ra_reg *r = ®s->regs[reg];
unsigned int i;
- ra_add_reg_conflict(regs, reg, base_reg);
+ if (b->conflict_range[0] < r->conflict_range[0])
+ r->conflict_range[0] = b->conflict_range[0];
- for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) {
- ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]);
- }
+ if (b->conflict_range[1] > r->conflict_range[1])
+ r->conflict_range[1] = b->conflict_range[1];
+
+ for (i = BITSET_BITWORD(b->conflict_range[0]);
+ i <= BITSET_BITWORD(b->conflict_range[1]);
+ i++)
+ r->conflicts[i] |= b->conflicts[i];
+}
+
+void
+ra_mark_transitive_reg_conflict(struct ra_regs *regs,
+ unsigned int base_reg, unsigned int reg)
+{
+ ra_add_conflict_list(regs, base_reg, reg);
}
unsigned int
@@ -343,9 +350,11 @@ ra_set_finalize(struct ra_regs *regs, unsigned int **q_values)
if (!reg_belongs_to_class(rc, regs->classes[c]))
continue;
- for (i = 0; i < regs->regs[rc].num_conflicts; i++) {
- unsigned int rb = regs->regs[rc].conflict_list[i];
- if (reg_belongs_to_class(rb, regs->classes[b]))
+ for (i = regs->regs[rc].conflict_range[0];
+ i <= regs->regs[rc].conflict_range[1];
+ i++) {
+ if (BITSET_TEST(regs->regs[rc].conflicts, i) &&
+ reg_belongs_to_class(i, regs->classes[b]))
conflicts++;
}
max_conflicts = MAX2(max_conflicts, conflicts);
diff --git a/src/util/register_allocate.h b/src/util/register_allocate.h
index 61f182e..1ceea79 100644
--- a/src/util/register_allocate.h
+++ b/src/util/register_allocate.h
@@ -51,6 +51,8 @@ void ra_add_reg_conflict(struct ra_regs *regs,
unsigned int r1, unsigned int r2);
void ra_add_transitive_reg_conflict(struct ra_regs *regs,
unsigned int base_reg, unsigned int reg);
+void ra_mark_transitive_reg_conflict(struct ra_regs *regs,
+ unsigned int base_reg, unsigned int reg);
void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg);
void ra_set_num_conflicts(struct ra_regs *regs, unsigned int class_a,
unsigned int class_b, unsigned int num_conflicts);
--
2.1.4
More information about the mesa-dev
mailing list