[Mesa-dev] [PATCH 1/2] i965/fs: Make virtual grf live intervals actually cover their used range.
Ian Romanick
idr at freedesktop.org
Tue May 7 11:03:10 PDT 2013
On 04/30/2013 05:21 PM, Eric Anholt wrote:
> Previously, we would sometimes not consider a write to a register to
> extend the end of the interval, nor would we consider a read before a
> write to extend the start. This made for a bunch of complicated logic
> related to how to treat the results when dead code might be present.
> Instead, just extend the interval and fix dead code elimination to know
> how to remove it.
>
> Interestingly, this actually results in a tiny bit more optimization:
> total instructions in shared programs: 1391220 -> 1390799 (-0.03%)
> instructions in affected programs: 14037 -> 13616 (-3.00%)
Both patches are
Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
> ---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 21 +++---
> src/mesa/drivers/dri/i965/brw_fs.h | 4 +-
> src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 +-
> .../drivers/dri/i965/brw_fs_live_variables.cpp | 76 ++++++----------------
> src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 3 +-
> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 +-
> 6 files changed, 38 insertions(+), 72 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index a8610ee..0821c05 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1449,8 +1449,8 @@ fs_visitor::compact_virtual_grfs()
> remap_table[i] = new_index;
> virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
> if (live_intervals_valid) {
> - virtual_grf_use[new_index] = virtual_grf_use[i];
> - virtual_grf_def[new_index] = virtual_grf_def[i];
> + virtual_grf_start[new_index] = virtual_grf_start[i];
> + virtual_grf_end[new_index] = virtual_grf_end[i];
> }
> ++new_index;
> }
> @@ -1764,10 +1764,8 @@ fs_visitor::opt_algebraic()
> }
>
> /**
> - * Must be called after calculate_live_intervales() to remove unused
> - * writes to registers -- register allocation will fail otherwise
> - * because something deffed but not used won't be considered to
> - * interfere with other regs.
> + * Removes any instructions writing a VGRF where that VGRF is not used by any
> + * later instruction.
> */
> bool
> fs_visitor::dead_code_eliminate()
> @@ -1780,9 +1778,12 @@ fs_visitor::dead_code_eliminate()
> foreach_list_safe(node, &this->instructions) {
> fs_inst *inst = (fs_inst *)node;
>
> - if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
> - inst->remove();
> - progress = true;
> + if (inst->dst.file == GRF) {
> + assert(this->virtual_grf_end[inst->dst.reg] >= pc);
> + if (this->virtual_grf_end[inst->dst.reg] == pc) {
> + inst->remove();
> + progress = true;
> + }
> }
>
> pc++;
> @@ -2194,7 +2195,7 @@ fs_visitor::compute_to_mrf()
> /* Can't compute-to-MRF this GRF if someone else was going to
> * read it later.
> */
> - if (this->virtual_grf_use[inst->src[0].reg] > ip)
> + if (this->virtual_grf_end[inst->src[0].reg] > ip)
> continue;
>
> /* Found a move of a GRF to a MRF. Let's see if we can go
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index c9c9856..3df2ce1 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -434,8 +434,8 @@ public:
> int *virtual_grf_sizes;
> int virtual_grf_count;
> int virtual_grf_array_size;
> - int *virtual_grf_def;
> - int *virtual_grf_use;
> + int *virtual_grf_start;
> + int *virtual_grf_end;
> bool live_intervals_valid;
>
> /* This is the map from UNIFORM hw_reg + reg_offset as generated by
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> index b5c2200..9b60d9b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> @@ -194,7 +194,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
> /* Kill any AEB entries using registers that don't get reused any
> * more -- a sure sign they'll fail operands_match().
> */
> - if (src_reg->file == GRF && virtual_grf_use[src_reg->reg] < ip) {
> + if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) {
> entry->remove();
> ralloc_free(entry);
> break;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> index fdcfac6..dd8923e 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> @@ -167,16 +167,16 @@ fs_visitor::calculate_live_intervals()
> if (this->live_intervals_valid)
> return;
>
> - int *def = ralloc_array(mem_ctx, int, num_vars);
> - int *use = ralloc_array(mem_ctx, int, num_vars);
> - ralloc_free(this->virtual_grf_def);
> - ralloc_free(this->virtual_grf_use);
> - this->virtual_grf_def = def;
> - this->virtual_grf_use = use;
> + int *start = ralloc_array(mem_ctx, int, num_vars);
> + int *end = ralloc_array(mem_ctx, int, num_vars);
> + ralloc_free(this->virtual_grf_start);
> + ralloc_free(this->virtual_grf_end);
> + this->virtual_grf_start = start;
> + this->virtual_grf_end = end;
>
> for (int i = 0; i < num_vars; i++) {
> - def[i] = MAX_INSTRUCTION;
> - use[i] = -1;
> + start[i] = MAX_INSTRUCTION;
> + end[i] = -1;
> }
>
> /* Start by setting up the intervals with no knowledge of control
> @@ -189,8 +189,7 @@ fs_visitor::calculate_live_intervals()
> for (unsigned int i = 0; i < 3; i++) {
> if (inst->src[i].file == GRF) {
> int reg = inst->src[i].reg;
> -
> - use[reg] = ip;
> + int end_ip = ip;
>
> /* In most cases, a register can be written over safely by the
> * same instruction that is its last use. For a single
> @@ -220,15 +219,19 @@ fs_visitor::calculate_live_intervals()
> if (dispatch_width == 16 && (inst->src[i].smear ||
> (this->pixel_x.reg == reg ||
> this->pixel_y.reg == reg))) {
> - use[reg]++;
> + end_ip++;
> }
> +
> + start[reg] = MIN2(start[reg], ip);
> + end[reg] = end_ip;
> }
> }
>
> if (inst->dst.file == GRF) {
> int reg = inst->dst.reg;
>
> - def[reg] = MIN2(def[reg], ip);
> + start[reg] = MIN2(start[reg], ip);
> + end[reg] = ip;
> }
>
> ip++;
> @@ -241,60 +244,23 @@ fs_visitor::calculate_live_intervals()
> for (int b = 0; b < cfg.num_blocks; b++) {
> for (int i = 0; i < num_vars; i++) {
> if (BITSET_TEST(livevars.bd[b].livein, i)) {
> - def[i] = MIN2(def[i], cfg.blocks[b]->start_ip);
> - use[i] = MAX2(use[i], cfg.blocks[b]->start_ip);
> + start[i] = MIN2(start[i], cfg.blocks[b]->start_ip);
> + end[i] = MAX2(end[i], cfg.blocks[b]->start_ip);
> }
>
> if (BITSET_TEST(livevars.bd[b].liveout, i)) {
> - def[i] = MIN2(def[i], cfg.blocks[b]->end_ip);
> - use[i] = MAX2(use[i], cfg.blocks[b]->end_ip);
> + start[i] = MIN2(start[i], cfg.blocks[b]->end_ip);
> + end[i] = MAX2(end[i], cfg.blocks[b]->end_ip);
> }
> }
> }
>
> this->live_intervals_valid = true;
> -
> - /* Note in the non-control-flow code above, that we only take def[] as the
> - * first store, and use[] as the last use. We use this in dead code
> - * elimination, to determine when a store never gets used. However, we
> - * also use these arrays to answer the virtual_grf_interferes() question
> - * (live interval analysis), which is used for register coalescing and
> - * register allocation.
> - *
> - * So, there's a conflict over what the array should mean: if use[]
> - * considers a def after the last use, then the dead code elimination pass
> - * never does anything (and it's an important pass!). But if we don't
> - * include dead code, then virtual_grf_interferes() lies and we'll do
> - * horrible things like coalesce the register that is dead-code-written
> - * into another register that was live across the dead write (causing the
> - * use of the second register to take the dead write's source value instead
> - * of the coalesced MOV's source value).
> - *
> - * To resolve the conflict, immediately after calculating live intervals,
> - * detect dead code, nuke it, and if we changed anything, calculate again
> - * before returning to the caller. Now we happen to produce def[] and
> - * use[] arrays that will work for virtual_grf_interferes().
> - */
> - if (dead_code_eliminate())
> - calculate_live_intervals();
> }
>
> bool
> fs_visitor::virtual_grf_interferes(int a, int b)
> {
> - int a_def = this->virtual_grf_def[a], a_use = this->virtual_grf_use[a];
> - int b_def = this->virtual_grf_def[b], b_use = this->virtual_grf_use[b];
> -
> - /* If there's dead code (def but not use), it would break our test
> - * unless we consider it used.
> - */
> - if ((a_use == -1 && a_def != MAX_INSTRUCTION) ||
> - (b_use == -1 && b_def != MAX_INSTRUCTION)) {
> - return true;
> - }
> -
> - int start = MAX2(a_def, b_def);
> - int end = MIN2(a_use, b_use);
> -
> - return start < end;
> + return !(virtual_grf_end[a] <= virtual_grf_start[b] ||
> + virtual_grf_end[b] <= virtual_grf_start[a]);
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> index b9b0303..3b2197f 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> @@ -316,8 +316,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
> * in order to not have to worry about the uniform issue described in
> * calculate_live_intervals().
> */
> - if (this->virtual_grf_def[j] <= payload_last_use_ip[i] ||
> - this->virtual_grf_use[j] <= payload_last_use_ip[i]) {
> + if (this->virtual_grf_start[j] <= payload_last_use_ip[i]) {
> ra_add_node_interference(g, first_payload_node + i, j);
> }
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 55ae689..a61363c 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -2408,8 +2408,8 @@ fs_visitor::fs_visitor(struct brw_context *brw,
> this->virtual_grf_sizes = NULL;
> this->virtual_grf_count = 0;
> this->virtual_grf_array_size = 0;
> - this->virtual_grf_def = NULL;
> - this->virtual_grf_use = NULL;
> + this->virtual_grf_start = NULL;
> + this->virtual_grf_end = NULL;
> this->live_intervals_valid = false;
>
> this->force_uncompressed_stack = 0;
>
More information about the mesa-dev
mailing list