[Mesa-dev] [PATCH 1/2] i965/fs: Make virtual grf live intervals actually cover their used range.

Ian Romanick idr at freedesktop.org
Tue May 7 11:03:10 PDT 2013


On 04/30/2013 05:21 PM, Eric Anholt wrote:
> Previously, we would sometimes not consider a write to a register to
> extend the end of the interval, nor would we consider a read before a
> write to extend the start.  This made for a bunch of complicated logic
> related to how to treat the results when dead code might be present.
> Instead, just extend the interval and fix dead code elimination to know
> how to remove it.
>
> Interestingly, this actually results in a tiny bit more optimization:
> total instructions in shared programs: 1391220 -> 1390799 (-0.03%)
> instructions in affected programs:     14037 -> 13616 (-3.00%)

Both patches are

Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>

> ---
>   src/mesa/drivers/dri/i965/brw_fs.cpp               | 21 +++---
>   src/mesa/drivers/dri/i965/brw_fs.h                 |  4 +-
>   src/mesa/drivers/dri/i965/brw_fs_cse.cpp           |  2 +-
>   .../drivers/dri/i965/brw_fs_live_variables.cpp     | 76 ++++++----------------
>   src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |  3 +-
>   src/mesa/drivers/dri/i965/brw_fs_visitor.cpp       |  4 +-
>   6 files changed, 38 insertions(+), 72 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index a8610ee..0821c05 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1449,8 +1449,8 @@ fs_visitor::compact_virtual_grfs()
>            remap_table[i] = new_index;
>            virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
>            if (live_intervals_valid) {
> -            virtual_grf_use[new_index] = virtual_grf_use[i];
> -            virtual_grf_def[new_index] = virtual_grf_def[i];
> +            virtual_grf_start[new_index] = virtual_grf_start[i];
> +            virtual_grf_end[new_index] = virtual_grf_end[i];
>            }
>            ++new_index;
>         }
> @@ -1764,10 +1764,8 @@ fs_visitor::opt_algebraic()
>   }
>
>   /**
> - * Must be called after calculate_live_intervales() to remove unused
> - * writes to registers -- register allocation will fail otherwise
> - * because something deffed but not used won't be considered to
> - * interfere with other regs.
> + * Removes any instructions writing a VGRF where that VGRF is not used by any
> + * later instruction.
>    */
>   bool
>   fs_visitor::dead_code_eliminate()
> @@ -1780,9 +1778,12 @@ fs_visitor::dead_code_eliminate()
>      foreach_list_safe(node, &this->instructions) {
>         fs_inst *inst = (fs_inst *)node;
>
> -      if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
> -	 inst->remove();
> -	 progress = true;
> +      if (inst->dst.file == GRF) {
> +         assert(this->virtual_grf_end[inst->dst.reg] >= pc);
> +         if (this->virtual_grf_end[inst->dst.reg] == pc) {
> +            inst->remove();
> +            progress = true;
> +         }
>         }
>
>         pc++;
> @@ -2194,7 +2195,7 @@ fs_visitor::compute_to_mrf()
>         /* Can't compute-to-MRF this GRF if someone else was going to
>          * read it later.
>          */
> -      if (this->virtual_grf_use[inst->src[0].reg] > ip)
> +      if (this->virtual_grf_end[inst->src[0].reg] > ip)
>   	 continue;
>
>         /* Found a move of a GRF to a MRF.  Let's see if we can go
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index c9c9856..3df2ce1 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -434,8 +434,8 @@ public:
>      int *virtual_grf_sizes;
>      int virtual_grf_count;
>      int virtual_grf_array_size;
> -   int *virtual_grf_def;
> -   int *virtual_grf_use;
> +   int *virtual_grf_start;
> +   int *virtual_grf_end;
>      bool live_intervals_valid;
>
>      /* This is the map from UNIFORM hw_reg + reg_offset as generated by
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> index b5c2200..9b60d9b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> @@ -194,7 +194,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
>               /* Kill any AEB entries using registers that don't get reused any
>                * more -- a sure sign they'll fail operands_match().
>                */
> -            if (src_reg->file == GRF && virtual_grf_use[src_reg->reg] < ip) {
> +            if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) {
>                  entry->remove();
>                  ralloc_free(entry);
>   	       break;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> index fdcfac6..dd8923e 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> @@ -167,16 +167,16 @@ fs_visitor::calculate_live_intervals()
>      if (this->live_intervals_valid)
>         return;
>
> -   int *def = ralloc_array(mem_ctx, int, num_vars);
> -   int *use = ralloc_array(mem_ctx, int, num_vars);
> -   ralloc_free(this->virtual_grf_def);
> -   ralloc_free(this->virtual_grf_use);
> -   this->virtual_grf_def = def;
> -   this->virtual_grf_use = use;
> +   int *start = ralloc_array(mem_ctx, int, num_vars);
> +   int *end = ralloc_array(mem_ctx, int, num_vars);
> +   ralloc_free(this->virtual_grf_start);
> +   ralloc_free(this->virtual_grf_end);
> +   this->virtual_grf_start = start;
> +   this->virtual_grf_end = end;
>
>      for (int i = 0; i < num_vars; i++) {
> -      def[i] = MAX_INSTRUCTION;
> -      use[i] = -1;
> +      start[i] = MAX_INSTRUCTION;
> +      end[i] = -1;
>      }
>
>      /* Start by setting up the intervals with no knowledge of control
> @@ -189,8 +189,7 @@ fs_visitor::calculate_live_intervals()
>         for (unsigned int i = 0; i < 3; i++) {
>   	 if (inst->src[i].file == GRF) {
>   	    int reg = inst->src[i].reg;
> -
> -	    use[reg] = ip;
> +            int end_ip = ip;
>
>               /* In most cases, a register can be written over safely by the
>                * same instruction that is its last use.  For a single
> @@ -220,15 +219,19 @@ fs_visitor::calculate_live_intervals()
>               if (dispatch_width == 16 && (inst->src[i].smear ||
>                                            (this->pixel_x.reg == reg ||
>                                             this->pixel_y.reg == reg))) {
> -               use[reg]++;
> +               end_ip++;
>               }
> +
> +            start[reg] = MIN2(start[reg], ip);
> +            end[reg] = end_ip;
>   	 }
>         }
>
>         if (inst->dst.file == GRF) {
>            int reg = inst->dst.reg;
>
> -         def[reg] = MIN2(def[reg], ip);
> +         start[reg] = MIN2(start[reg], ip);
> +         end[reg] = ip;
>         }
>
>         ip++;
> @@ -241,60 +244,23 @@ fs_visitor::calculate_live_intervals()
>      for (int b = 0; b < cfg.num_blocks; b++) {
>         for (int i = 0; i < num_vars; i++) {
>   	 if (BITSET_TEST(livevars.bd[b].livein, i)) {
> -	    def[i] = MIN2(def[i], cfg.blocks[b]->start_ip);
> -	    use[i] = MAX2(use[i], cfg.blocks[b]->start_ip);
> +	    start[i] = MIN2(start[i], cfg.blocks[b]->start_ip);
> +	    end[i] = MAX2(end[i], cfg.blocks[b]->start_ip);
>   	 }
>
>   	 if (BITSET_TEST(livevars.bd[b].liveout, i)) {
> -	    def[i] = MIN2(def[i], cfg.blocks[b]->end_ip);
> -	    use[i] = MAX2(use[i], cfg.blocks[b]->end_ip);
> +	    start[i] = MIN2(start[i], cfg.blocks[b]->end_ip);
> +	    end[i] = MAX2(end[i], cfg.blocks[b]->end_ip);
>   	 }
>         }
>      }
>
>      this->live_intervals_valid = true;
> -
> -   /* Note in the non-control-flow code above, that we only take def[] as the
> -    * first store, and use[] as the last use.  We use this in dead code
> -    * elimination, to determine when a store never gets used.  However, we
> -    * also use these arrays to answer the virtual_grf_interferes() question
> -    * (live interval analysis), which is used for register coalescing and
> -    * register allocation.
> -    *
> -    * So, there's a conflict over what the array should mean: if use[]
> -    * considers a def after the last use, then the dead code elimination pass
> -    * never does anything (and it's an important pass!).  But if we don't
> -    * include dead code, then virtual_grf_interferes() lies and we'll do
> -    * horrible things like coalesce the register that is dead-code-written
> -    * into another register that was live across the dead write (causing the
> -    * use of the second register to take the dead write's source value instead
> -    * of the coalesced MOV's source value).
> -    *
> -    * To resolve the conflict, immediately after calculating live intervals,
> -    * detect dead code, nuke it, and if we changed anything, calculate again
> -    * before returning to the caller.  Now we happen to produce def[] and
> -    * use[] arrays that will work for virtual_grf_interferes().
> -    */
> -   if (dead_code_eliminate())
> -      calculate_live_intervals();
>   }
>
>   bool
>   fs_visitor::virtual_grf_interferes(int a, int b)
>   {
> -   int a_def = this->virtual_grf_def[a], a_use = this->virtual_grf_use[a];
> -   int b_def = this->virtual_grf_def[b], b_use = this->virtual_grf_use[b];
> -
> -   /* If there's dead code (def but not use), it would break our test
> -    * unless we consider it used.
> -    */
> -   if ((a_use == -1 && a_def != MAX_INSTRUCTION) ||
> -       (b_use == -1 && b_def != MAX_INSTRUCTION)) {
> -      return true;
> -   }
> -
> -   int start = MAX2(a_def, b_def);
> -   int end = MIN2(a_use, b_use);
> -
> -   return start < end;
> +   return !(virtual_grf_end[a] <= virtual_grf_start[b] ||
> +            virtual_grf_end[b] <= virtual_grf_start[a]);
>   }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> index b9b0303..3b2197f 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> @@ -316,8 +316,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
>             * in order to not have to worry about the uniform issue described in
>             * calculate_live_intervals().
>             */
> -         if (this->virtual_grf_def[j] <= payload_last_use_ip[i] ||
> -             this->virtual_grf_use[j] <= payload_last_use_ip[i]) {
> +         if (this->virtual_grf_start[j] <= payload_last_use_ip[i]) {
>               ra_add_node_interference(g, first_payload_node + i, j);
>            }
>         }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 55ae689..a61363c 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -2408,8 +2408,8 @@ fs_visitor::fs_visitor(struct brw_context *brw,
>      this->virtual_grf_sizes = NULL;
>      this->virtual_grf_count = 0;
>      this->virtual_grf_array_size = 0;
> -   this->virtual_grf_def = NULL;
> -   this->virtual_grf_use = NULL;
> +   this->virtual_grf_start = NULL;
> +   this->virtual_grf_end = NULL;
>      this->live_intervals_valid = false;
>
>      this->force_uncompressed_stack = 0;
>



More information about the mesa-dev mailing list