[Mesa-dev] [PATCH 14/18] util/register_allocate: Compute transitive conflicts using 2-passes
Jason Ekstrand
jason at jlekstrand.net
Fri Jul 31 09:30:36 PDT 2015
This patch looks really sketchy to me. First, you fundamentally
changed some of register_allocate's internal data structures with no
explanation as to what the change is and why it still works. Maybe
the difference should be obvious to me, but it isn't. Second, it
appears that your changes assume some intel-specific details of
register allocation. The register_allocate code is *not* Intel
specific and we are *not* the only ones using it.
--Jason
On Mon, Jul 6, 2015 at 3:33 AM, Chris Wilson <chris at chris-wilson.co.uk> wrote:
> Avoid frequent use of reralloc() for tracking the conflicts list, and
> walking that list every time we add a transitive conflict, by making the
> observation we apply the indirect conflicts by combining the conflicts
> of a conflicting register in a second pass.
>
> Reduces brw_compiler_create() from 18351.5us to 4787.1us on my ivb
> i7-3720QM (in context that 18ms represents about 50% of the time it takes
> to start X, though why X instantiates an intel_screen at all remains a
> mystery).
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Matt Turner <mattst88 at gmail.com>
> Cc: Jason Ekstrand <jason.ekstrand at intel.com>
> Cc: Martin Peres <martin.peres at linux.intel.com
> ---
> src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 18 +++++++-
> .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 16 ++++++-
> src/util/register_allocate.c | 53 +++++++++++++---------
> src/util/register_allocate.h | 2 +
> 4 files changed, 64 insertions(+), 25 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> index 8e5621d..7f87221 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> @@ -223,7 +223,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
> for (int base_reg = j;
> base_reg < j + (class_sizes[i] + 1) / 2;
> base_reg++) {
> - ra_add_transitive_reg_conflict(regs, base_reg, reg);
> + ra_mark_transitive_reg_conflict(regs, base_reg, reg);
> }
>
> reg++;
> @@ -237,7 +237,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
> for (int base_reg = j;
> base_reg < j + class_sizes[i];
> base_reg++) {
> - ra_add_transitive_reg_conflict(regs, base_reg, reg);
> + ra_mark_transitive_reg_conflict(regs, base_reg, reg);
> }
>
> reg++;
> @@ -246,6 +246,20 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
> }
> assert(reg == ra_reg_count);
>
> + reg = 0;
> + for (int i = 0; i < class_count; i++) {
> + int class_size = class_sizes[i];
> + int class_reg_count = base_reg_count - (class_size - 1);
> + if (devinfo->gen <= 5 && reg_width == 2)
> + class_size = (class_size + 1) / 2;
> + for (int j = 0; j < class_reg_count; j++) {
> + for (int base_reg = j; base_reg < j + class_size; base_reg++)
> + ra_add_transitive_reg_conflict(regs, base_reg, reg);
> + reg++;
> + }
> + }
> + assert(reg == ra_reg_count);
> +
> /* Add a special class for aligned pairs, which we'll put delta_xy
> * in on Gen <= 6 so that we can do PLN.
> */
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> index 555c42e..93b7297 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> @@ -140,7 +140,7 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler)
> for (int base_reg = j;
> base_reg < j + class_sizes[i];
> base_reg++) {
> - ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
> + ra_mark_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
> }
>
> reg++;
> @@ -158,6 +158,20 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler)
> }
> assert(reg == ra_reg_count);
>
> + reg = 0;
> + for (int i = 0; i < class_count; i++) {
> + int class_reg_count = base_reg_count - (class_sizes[i] - 1);
> + for (int j = 0; j < class_reg_count; j++) {
> + for (int base_reg = j;
> + base_reg < j + class_sizes[i];
> + base_reg++) {
> + ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
> + }
> + reg++;
> + }
> + }
> + assert(reg == ra_reg_count);
> +
> ra_set_finalize(compiler->vec4_reg_set.regs, q_values);
>
> for (int i = 0; i < MAX_VGRF_SIZE; i++)
> diff --git a/src/util/register_allocate.c b/src/util/register_allocate.c
> index f5f7c04..2bbab7f 100644
> --- a/src/util/register_allocate.c
> +++ b/src/util/register_allocate.c
> @@ -83,19 +83,17 @@
>
> struct ra_reg {
> BITSET_WORD *conflicts;
> - unsigned int *conflict_list;
> - unsigned int conflict_list_size;
> - unsigned int num_conflicts;
> + unsigned int conflict_range[2];
> };
>
> struct ra_regs {
> struct ra_reg *regs;
> - unsigned int count;
>
> struct ra_class **classes;
> unsigned int class_count;
>
> bool round_robin;
> + unsigned int count;
> };
>
> struct ra_class {
> @@ -200,11 +198,8 @@ ra_alloc_reg_set(void *mem_ctx, unsigned int count)
> conflicts += bitset_count;
>
> BITSET_SET(regs->regs[i].conflicts, i);
> -
> - regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4);
> - regs->regs[i].conflict_list_size = 4;
> - regs->regs[i].conflict_list[0] = i;
> - regs->regs[i].num_conflicts = 1;
> + regs->regs[i].conflict_range[0] = i;
> + regs->regs[i].conflict_range[1] = i;
> }
>
> return regs;
> @@ -231,13 +226,11 @@ ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2)
> {
> struct ra_reg *reg1 = ®s->regs[r1];
>
> - if (reg1->conflict_list_size == reg1->num_conflicts) {
> - reg1->conflict_list_size *= 2;
> - reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list,
> - unsigned int, reg1->conflict_list_size);
> - }
> - reg1->conflict_list[reg1->num_conflicts++] = r2;
> BITSET_SET(reg1->conflicts, r2);
> + if (r2 < reg1->conflict_range[0])
> + reg1->conflict_range[0] = r2;
> + else if (r2 > reg1->conflict_range[1])
> + reg1->conflict_range[1] = r2;
> }
>
> void
> @@ -261,13 +254,27 @@ void
> ra_add_transitive_reg_conflict(struct ra_regs *regs,
> unsigned int base_reg, unsigned int reg)
> {
> + struct ra_reg *b = ®s->regs[base_reg];
> + struct ra_reg *r = ®s->regs[reg];
> unsigned int i;
>
> - ra_add_reg_conflict(regs, reg, base_reg);
> + if (b->conflict_range[0] < r->conflict_range[0])
> + r->conflict_range[0] = b->conflict_range[0];
>
> - for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) {
> - ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]);
> - }
> + if (b->conflict_range[1] > r->conflict_range[1])
> + r->conflict_range[1] = b->conflict_range[1];
> +
> + for (i = BITSET_BITWORD(b->conflict_range[0]);
> + i <= BITSET_BITWORD(b->conflict_range[1]);
> + i++)
> + r->conflicts[i] |= b->conflicts[i];
> +}
> +
> +void
> +ra_mark_transitive_reg_conflict(struct ra_regs *regs,
> + unsigned int base_reg, unsigned int reg)
> +{
> + ra_add_conflict_list(regs, base_reg, reg);
> }
>
> unsigned int
> @@ -343,9 +350,11 @@ ra_set_finalize(struct ra_regs *regs, unsigned int **q_values)
> if (!reg_belongs_to_class(rc, regs->classes[c]))
> continue;
>
> - for (i = 0; i < regs->regs[rc].num_conflicts; i++) {
> - unsigned int rb = regs->regs[rc].conflict_list[i];
> - if (reg_belongs_to_class(rb, regs->classes[b]))
> + for (i = regs->regs[rc].conflict_range[0];
> + i <= regs->regs[rc].conflict_range[1];
> + i++) {
> + if (BITSET_TEST(regs->regs[rc].conflicts, i) &&
> + reg_belongs_to_class(i, regs->classes[b]))
> conflicts++;
> }
> max_conflicts = MAX2(max_conflicts, conflicts);
> diff --git a/src/util/register_allocate.h b/src/util/register_allocate.h
> index 61f182e..1ceea79 100644
> --- a/src/util/register_allocate.h
> +++ b/src/util/register_allocate.h
> @@ -51,6 +51,8 @@ void ra_add_reg_conflict(struct ra_regs *regs,
> unsigned int r1, unsigned int r2);
> void ra_add_transitive_reg_conflict(struct ra_regs *regs,
> unsigned int base_reg, unsigned int reg);
> +void ra_mark_transitive_reg_conflict(struct ra_regs *regs,
> + unsigned int base_reg, unsigned int reg);
> void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg);
> void ra_set_num_conflicts(struct ra_regs *regs, unsigned int class_a,
> unsigned int class_b, unsigned int num_conflicts);
> --
> 2.1.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list