[Mesa-dev] [PATCH 2/2] i965/vec4: Rewrite dead code elimination to use live in/out.

Kenneth Graunke kenneth at whitecape.org
Sat Nov 8 22:18:12 PST 2014


On Monday, November 03, 2014 01:34:49 PM Matt Turner wrote:
> Improves 359 shaders by >=10%
>          114 shaders by >=20%
>           91 shaders by >=30%
>           82 shaders by >=40%
>           22 shaders by >=50%
>            4 shaders by >=60%
>            2 shaders by >=80%
> 
> total instructions in shared programs: 5505182 -> 5482260 (-0.42%)
> instructions in affected programs:     364629 -> 341707 (-6.29%)
> ---
>  src/mesa/drivers/dri/i965/Makefile.sources         |   1 +
>  src/mesa/drivers/dri/i965/brw_vec4.cpp             | 155 -------------------
>  .../dri/i965/brw_vec4_dead_code_eliminate.cpp      | 169 +++++++++++++++++++++
>  3 files changed, 170 insertions(+), 155 deletions(-)
>  create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
> 
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
> index 711aabe..10be4f1 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -102,6 +102,7 @@ i965_FILES = \
>  	brw_vec4.cpp \
>  	brw_vec4_copy_propagation.cpp \
>  	brw_vec4_cse.cpp \
> +	brw_vec4_dead_code_eliminate.cpp \
>  	brw_vec4_generator.cpp \
>  	brw_vec4_gs_visitor.cpp \
>  	brw_vec4_live_variables.cpp \
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index df589b8..6560351 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -411,161 +411,6 @@ vec4_visitor::opt_reduce_swizzle()
>     return progress;
>  }
>  
> -static bool
> -try_eliminate_instruction(vec4_instruction *inst, int new_writemask,
> -                          const struct brw_context *brw)
> -{
> -   if (inst->has_side_effects())
> -      return false;
> -
> -   if (new_writemask == 0) {
> -      /* Don't dead code eliminate instructions that write to the
> -       * accumulator as a side-effect. Instead just set the destination
> -       * to the null register to free it.
> -       */
> -      if (inst->writes_accumulator || inst->writes_flag()) {
> -         inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
> -      } else {
> -         inst->opcode = BRW_OPCODE_NOP;
> -      }
> -
> -      return true;
> -   } else if (inst->dst.writemask != new_writemask) {
> -      switch (inst->opcode) {
> -      case SHADER_OPCODE_TXF_CMS:
> -      case SHADER_OPCODE_GEN4_SCRATCH_READ:
> -      case VS_OPCODE_PULL_CONSTANT_LOAD:
> -      case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
> -         break;
> -      default:
> -         /* Do not set a writemask on Gen6 for math instructions, those are
> -          * executed using align1 mode that does not support a destination mask.
> -          */
> -         if (!(brw->gen == 6 && inst->is_math()) && !inst->is_tex()) {
> -            inst->dst.writemask = new_writemask;
> -            return true;
> -         }
> -      }
> -   }
> -
> -   return false;
> -}
> -
> -/**
> - * Must be called after calculate_live_intervals() to remove unused
> - * writes to registers -- register allocation will fail otherwise
> - * because something deffed but not used won't be considered to
> - * interfere with other regs.
> - */
> -bool
> -vec4_visitor::dead_code_eliminate()
> -{
> -   bool progress = false;
> -   int pc = -1;
> -
> -   calculate_live_intervals();
> -
> -   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
> -      pc++;
> -
> -      bool inst_writes_flag = false;
> -      if (inst->dst.file != GRF) {
> -         if (inst->dst.is_null() && inst->writes_flag()) {
> -            inst_writes_flag = true;
> -         } else {
> -            continue;
> -         }
> -      }
> -
> -      if (inst->dst.file == GRF) {
> -         int write_mask = inst->dst.writemask;
> -
> -         for (int c = 0; c < 4; c++) {
> -            if (write_mask & (1 << c)) {
> -               assert(this->virtual_grf_end[inst->dst.reg * 4 + c] >= pc);
> -               if (this->virtual_grf_end[inst->dst.reg * 4 + c] == pc) {
> -                  write_mask &= ~(1 << c);
> -               }
> -            }
> -         }
> -
> -         progress = try_eliminate_instruction(inst, write_mask, brw) ||
> -                    progress;
> -      }
> -
> -      if (inst->predicate || inst->prev == NULL)
> -         continue;
> -
> -      int dead_channels;
> -      if (inst_writes_flag) {
> -/* Arbitrarily chosen, other than not being an xyzw writemask. */
> -#define FLAG_WRITEMASK (1 << 5)
> -         dead_channels = inst->reads_flag() ? 0 : FLAG_WRITEMASK;
> -      } else {
> -         dead_channels = inst->dst.writemask;
> -
> -         for (int i = 0; i < 3; i++) {
> -            if (inst->src[i].file != GRF ||
> -                inst->src[i].reg != inst->dst.reg)
> -                  continue;
> -
> -            for (int j = 0; j < 4; j++) {
> -               int swiz = BRW_GET_SWZ(inst->src[i].swizzle, j);
> -               dead_channels &= ~(1 << swiz);
> -            }
> -         }
> -      }
> -
> -      foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst,
> -                                                  inst, block) {
> -         if (dead_channels == 0)
> -            break;
> -
> -         if (inst_writes_flag) {
> -            if (scan_inst->dst.is_null() && scan_inst->writes_flag()) {
> -               scan_inst->opcode = BRW_OPCODE_NOP;
> -               progress = true;
> -               continue;
> -            } else if (scan_inst->reads_flag()) {
> -               break;
> -            }
> -         }
> -
> -         if (inst->dst.file == scan_inst->dst.file &&
> -             inst->dst.reg == scan_inst->dst.reg &&
> -             inst->dst.reg_offset == scan_inst->dst.reg_offset) {
> -            int new_writemask = scan_inst->dst.writemask & ~dead_channels;
> -
> -            progress = try_eliminate_instruction(scan_inst, new_writemask, brw) ||
> -                       progress;
> -         }
> -
> -         for (int i = 0; i < 3; i++) {
> -            if (scan_inst->src[i].file != inst->dst.file ||
> -                scan_inst->src[i].reg != inst->dst.reg)
> -               continue;
> -
> -            for (int j = 0; j < 4; j++) {
> -               int swiz = BRW_GET_SWZ(scan_inst->src[i].swizzle, j);
> -               dead_channels &= ~(1 << swiz);
> -            }
> -         }
> -      }
> -   }
> -
> -   if (progress) {
> -      foreach_block_and_inst_safe (block, backend_instruction, inst, cfg) {
> -         if (inst->opcode == BRW_OPCODE_NOP) {
> -            inst->remove(block);
> -         }
> -      }
> -
> -      invalidate_live_intervals();
> -   }
> -
> -   return progress;
> -}
> -
>  void
>  vec4_visitor::split_uniform_registers()
>  {
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
> new file mode 100644
> index 0000000..b8370ba
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
> @@ -0,0 +1,169 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "brw_vec4.h"
> +#include "brw_vec4_live_variables.h"
> +#include "brw_cfg.h"
> +
> +/** @file brw_vec4_dead_code_eliminate.cpp
> + *
> + * Dataflow-aware dead code elimination.
> + *
> + * Walks the instruction list from the bottom, removing instructions that
> + * have results that both aren't used in later blocks and haven't been read
> + * yet in the tail end of this block.
> + */
> +
> +using namespace brw;
> +
> +static bool
> +can_do_writemask(const struct brw_context *brw,
> +                 const vec4_instruction *inst)
> +{
> +   switch (inst->opcode) {
> +   case SHADER_OPCODE_GEN4_SCRATCH_READ:
> +   case VS_OPCODE_PULL_CONSTANT_LOAD:
> +   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
> +      return false;
> +   default:
> +      /* The MATH instruction on Gen6 only executes in align1 mode, which does
> +       * not support writemasking.
> +       */
> +      if (brw->gen == 6 && inst->is_math())
> +         return false;
> +
> +      if (inst->is_tex())
> +         return false;

I'd feel a lot more confident in this function if it were:

{
   /* The MATH instruction on Gen6 only executes in align1 mode, which does
    * not support writemasking.
    */
   if (brw->gen == 6 && inst->is_math())
      return false;

   return inst->mlen == 0;
}

> +
> +      return true;
> +   }
> +}
> +
> +bool
> +vec4_visitor::dead_code_eliminate()
> +{
> +   bool progress = false;
> +
> +   calculate_live_intervals();
> +
> +   int num_vars = live_intervals->num_vars;
> +   BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));
> +   BITSET_WORD *flag_live = ralloc_array(NULL, BITSET_WORD, 1);
> +
> +   foreach_block(block, cfg) {
> +      memcpy(live, live_intervals->block_data[block->num].liveout,
> +             sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));
> +      memcpy(flag_live, live_intervals->block_data[block->num].flag_liveout,
> +             sizeof(BITSET_WORD));
> +
> +      foreach_inst_in_block_reverse(vec4_instruction, inst, block) {
> +         if (inst->dst.file == GRF && !inst->has_side_effects()) {
> +            bool result_live[4] = { false };
> +
> +            for (int c = 0; c < 4; c++) {
> +               int var = inst->dst.reg * 4 + c;
> +               result_live[c] = BITSET_TEST(live, var);
> +            }
> +
> +            /* If the instruction can't do writemasking, then it's all or
> +             * nothing.
> +             */
> +            if (!can_do_writemask(brw, inst)) {
> +               bool result = result_live[0] | result_live[1] |
> +                             result_live[2] | result_live[3];

A little strange to be doing bitwise-or on bools, but...it's not wrong...

> +               result_live[0] = result;
> +               result_live[1] = result;
> +               result_live[2] = result;
> +               result_live[3] = result;
> +            }
> +
> +            for (int c = 0; c < 4; c++) {
> +               if (!result_live[c] && inst->dst.writemask & (1 << c)) {
> +                  inst->dst.writemask &= ~(1 << c);
> +                  progress = true;
> +
> +                  if (inst->dst.writemask == 0) {
> +                     if (inst->writes_accumulator) {

Same comment I had about the FS pass.  I think you want:

   if (inst->writes_accumulator || inst->writes_flag())

Otherwise, we stop generating the flag, which may still be necessary (we haven't checked).

With can_do_writemask changed, and this flag bug fixed, the patch would be:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

Great work as always.

> +                        inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
> +                     } else {
> +                        inst->opcode = BRW_OPCODE_NOP;
> +                        continue;
> +                     }
> +                  }
> +               }
> +            }
> +         }
> +
> +         if (inst->dst.is_null() && inst->writes_flag()) {
> +            if (!BITSET_TEST(flag_live, 0)) {
> +               inst->opcode = BRW_OPCODE_NOP;
> +               progress = true;
> +               continue;
> +            }
> +         }
> +
> +         if (inst->dst.file == GRF && !inst->predicate) {
> +            for (int c = 0; c < 4; c++) {
> +               if (inst->dst.writemask & (1 << c)) {
> +                  int var = inst->dst.reg * 4 + c;
> +                  BITSET_CLEAR(live, var);
> +               }
> +            }
> +         }
> +
> +         if (inst->writes_flag()) {
> +            BITSET_CLEAR(flag_live, 0);
> +         }
> +
> +         for (int i = 0; i < 3; i++) {
> +            if (inst->src[i].file == GRF) {
> +               for (int c = 0; c < 4; c++) {
> +                  int swiz = BRW_GET_SWZ(inst->src[i].swizzle, c);
> +                  int var = inst->src[i].reg * 4 + swiz;
> +
> +                  BITSET_SET(live, var);
> +               }
> +            }
> +         }
> +
> +         if (inst->reads_flag()) {
> +            BITSET_SET(flag_live, 0);
> +         }
> +      }
> +   }
> +
> +   ralloc_free(live);
> +   ralloc_free(flag_live);
> +
> +   if (progress) {
> +      foreach_block_and_inst_safe(block, backend_instruction, inst, cfg) {
> +         if (inst->opcode == BRW_OPCODE_NOP) {
> +            inst->remove(block);
> +         }
> +      }
> +
> +      invalidate_live_intervals();
> +   }
> +
> +   return progress;
> +}
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20141108/6c32a1c1/attachment-0001.sig>


More information about the mesa-dev mailing list