[Mesa-dev] [PATCH 09/10] i965/fs: Move constant propagation to the same codebase as copy prop.

Kenneth Graunke kenneth at whitecape.org
Sun Oct 7 18:52:25 PDT 2012


On 09/22/2012 02:04 PM, Eric Anholt wrote:
> This means that we don't get constant prop across into the first block after a
> BRW_OPCODE_IF or a BRW_OPCODE_DO, but we have hope for properly doing it
> across control flow at some point.  More importantly, it avoids the O(n^2)
> with instruction count runtime for shaders that have many constant moves.
> ---
>   src/mesa/drivers/dri/i965/brw_fs.cpp               |  163 --------------------
>   src/mesa/drivers/dri/i965/brw_fs.h                 |    2 +-
>   .../drivers/dri/i965/brw_fs_copy_propagation.cpp   |  124 ++++++++++++++-
>   3 files changed, 124 insertions(+), 165 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 2701413..0545a74 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1249,168 +1249,6 @@ fs_visitor::setup_pull_constants()
>      c->prog_data.nr_pull_params = pull_uniform_count;
>   }
>
> -/**
> - * Attempts to move immediate constants into the immediate
> - * constant slot of following instructions.
> - *
> - * Immediate constants are a bit tricky -- they have to be in the last
> - * operand slot, you can't do abs/negate on them,
> - */
> -
> -bool
> -fs_visitor::propagate_constants()
> -{
> -   bool progress = false;
> -
> -   calculate_live_intervals();
> -
> -   foreach_list(node, &this->instructions) {
> -      fs_inst *inst = (fs_inst *)node;
> -
> -      if (inst->opcode != BRW_OPCODE_MOV ||
> -	  inst->predicated ||
> -	  inst->dst.file != GRF || inst->src[0].file != IMM ||
> -	  inst->dst.type != inst->src[0].type ||
> -	  (c->dispatch_width == 16 &&
> -	   (inst->force_uncompressed || inst->force_sechalf)))
> -	 continue;
> -
> -      /* Don't bother with cases where we should have had the
> -       * operation on the constant folded in GLSL already.
> -       */
> -      if (inst->saturate)
> -	 continue;
> -
> -      /* Found a move of a constant to a GRF.  Find anything else using the GRF
> -       * before it's written, and replace it with the constant if we can.
> -       */
> -      for (fs_inst *scan_inst = (fs_inst *)inst->next;
> -	   !scan_inst->is_tail_sentinel();
> -	   scan_inst = (fs_inst *)scan_inst->next) {
> -	 if (scan_inst->opcode == BRW_OPCODE_DO ||
> -	     scan_inst->opcode == BRW_OPCODE_WHILE ||
> -	     scan_inst->opcode == BRW_OPCODE_ELSE ||
> -	     scan_inst->opcode == BRW_OPCODE_ENDIF) {
> -	    break;
> -	 }
> -
> -	 for (int i = 2; i >= 0; i--) {
> -	    if (scan_inst->src[i].file != GRF ||
> -		scan_inst->src[i].reg != inst->dst.reg ||
> -		scan_inst->src[i].reg_offset != inst->dst.reg_offset)
> -	       continue;
> -
> -	    /* Don't bother with cases where we should have had the
> -	     * operation on the constant folded in GLSL already.
> -	     */
> -	    if (scan_inst->src[i].negate || scan_inst->src[i].abs)
> -	       continue;
> -
> -	    switch (scan_inst->opcode) {
> -	    case BRW_OPCODE_MOV:
> -	       scan_inst->src[i] = inst->src[0];
> -	       progress = true;
> -	       break;
> -
> -	    case BRW_OPCODE_MUL:
> -	    case BRW_OPCODE_ADD:
> -	       if (i == 1) {
> -		  scan_inst->src[i] = inst->src[0];
> -		  progress = true;
> -	       } else if (i == 0 && scan_inst->src[1].file != IMM) {
> -		  /* Fit this constant in by commuting the operands.
> -		   * Exception: we can't do this for 32-bit integer MUL
> -		   * because it's asymmetric.
> -		   */
> -		  if (scan_inst->opcode == BRW_OPCODE_MUL &&
> -		      (scan_inst->src[1].type == BRW_REGISTER_TYPE_D ||
> -		       scan_inst->src[1].type == BRW_REGISTER_TYPE_UD))
> -		     break;
> -		  scan_inst->src[0] = scan_inst->src[1];
> -		  scan_inst->src[1] = inst->src[0];
> -		  progress = true;
> -	       }
> -	       break;
> -
> -	    case BRW_OPCODE_CMP:
> -	    case BRW_OPCODE_IF:
> -	       if (i == 1) {
> -		  scan_inst->src[i] = inst->src[0];
> -		  progress = true;
> -	       } else if (i == 0 && scan_inst->src[1].file != IMM) {
> -		  uint32_t new_cmod;
> -
> -		  new_cmod = brw_swap_cmod(scan_inst->conditional_mod);
> -		  if (new_cmod != ~0u) {
> -		     /* Fit this constant in by swapping the operands and
> -		      * flipping the test
> -		      */
> -		     scan_inst->src[0] = scan_inst->src[1];
> -		     scan_inst->src[1] = inst->src[0];
> -		     scan_inst->conditional_mod = new_cmod;
> -		     progress = true;
> -		  }
> -	       }
> -	       break;
> -
> -	    case BRW_OPCODE_SEL:
> -	       if (i == 1) {
> -		  scan_inst->src[i] = inst->src[0];
> -		  progress = true;
> -	       } else if (i == 0 && scan_inst->src[1].file != IMM) {
> -		  scan_inst->src[0] = scan_inst->src[1];
> -		  scan_inst->src[1] = inst->src[0];
> -
> -		  /* If this was predicated, flipping operands means
> -		   * we also need to flip the predicate.
> -		   */
> -		  if (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) {
> -		     scan_inst->predicate_inverse =
> -			!scan_inst->predicate_inverse;
> -		  }
> -		  progress = true;
> -	       }
> -	       break;
> -
> -	    case SHADER_OPCODE_RCP:
> -	       /* The hardware doesn't do math on immediate values
> -		* (because why are you doing that, seriously?), but
> -		* the correct answer is to just constant fold it
> -		* anyway.
> -		*/
> -	       assert(i == 0);
> -	       if (inst->src[0].imm.f != 0.0f) {
> -		  scan_inst->opcode = BRW_OPCODE_MOV;
> -		  scan_inst->src[0] = inst->src[0];
> -		  scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
> -		  progress = true;
> -	       }
> -	       break;
> -
> -            case FS_OPCODE_PULL_CONSTANT_LOAD:
> -	       scan_inst->src[i] = inst->src[0];
> -	       progress = true;
> -	       break;
> -
> -	    default:
> -	       break;
> -	    }
> -	 }
> -
> -	 if (scan_inst->dst.file == GRF &&
> -             scan_inst->overwrites_reg(inst->dst)) {
> -	    break;
> -	 }
> -      }
> -   }
> -
> -   if (progress)
> -       this->live_intervals_valid = false;
> -
> -   return progress;
> -}
> -
> -
>   bool
>   fs_visitor::opt_algebraic()
>   {
> @@ -2025,7 +1863,6 @@ fs_visitor::run()
>
>   	 progress = remove_duplicate_mrf_writes() || progress;
>
> -	 progress = propagate_constants() || progress;
>   	 progress = opt_algebraic() || progress;
>   	 progress = opt_cse() || progress;
>   	 progress = opt_copy_propagate() || progress;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index 9fbb8e5..59a0e50 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -240,12 +240,12 @@ public:
>      void split_virtual_grfs();
>      void setup_pull_constants();
>      void calculate_live_intervals();
> -   bool propagate_constants();
>      bool opt_algebraic();
>      bool opt_cse();
>      bool opt_cse_local(fs_bblock *block, exec_list *aeb);
>      bool opt_copy_propagate();
>      bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
> +   bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
>      bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block,
>   				 exec_list *acp);
>      bool register_coalesce();
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> index 1870f43..ad34657 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> @@ -34,6 +34,9 @@ struct acp_entry : public exec_node {
>   bool
>   fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
>   {
> +   if (entry->src.file == IMM)
> +      return false;
> +
>      if (inst->src[arg].file != entry->dst.file ||
>          inst->src[arg].reg != entry->dst.reg ||
>          inst->src[arg].reg_offset != entry->dst.reg_offset) {
> @@ -64,6 +67,121 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
>      return true;
>   }
>
> +
> +bool
> +fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
> +{
> +   bool progress = false;
> +
> +   if (entry->src.file != IMM)
> +      return false;
> +
> +   for (int i = 2; i >= 0; i--) {
> +      if (inst->src[i].file != entry->dst.file ||
> +          inst->src[i].reg != entry->dst.reg ||
> +          inst->src[i].reg_offset != entry->dst.reg_offset)
> +         continue;
> +
> +      /* Don't bother with cases where we should have had the
> +       * operation on the constant folded in GLSL already.
 > +       */

Your English doesn't quite flow here.  Also, I don't like the 
overloading of "GLSL"...it's a language defined by Khronos.  (Our 
compiler and IR really could use names...)

What about:

       /* Don't bother with cases which should have been taken care of
        * by the GLSL compiler's constant folding pass.
        */

Tiny nits aside, I like this.  It makes sense to share code here...it's 
very straightforward, and ought to help us out in the future.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

> +      if (inst->src[i].negate || inst->src[i].abs)
> +         continue;
> +
> +      switch (inst->opcode) {
> +      case BRW_OPCODE_MOV:
> +         inst->src[i] = entry->src;
> +         progress = true;
> +         break;
> +
> +      case BRW_OPCODE_MUL:
> +      case BRW_OPCODE_ADD:
> +         if (i == 1) {
> +            inst->src[i] = entry->src;
> +            progress = true;
> +         } else if (i == 0 && inst->src[1].file != IMM) {
> +            /* Fit this constant in by commuting the operands.
> +             * Exception: we can't do this for 32-bit integer MUL
> +             * because it's asymmetric.
> +             */
> +            if (inst->opcode == BRW_OPCODE_MUL &&
> +                (inst->src[1].type == BRW_REGISTER_TYPE_D ||
> +                 inst->src[1].type == BRW_REGISTER_TYPE_UD))
> +               break;
> +            inst->src[0] = inst->src[1];
> +            inst->src[1] = entry->src;
> +            progress = true;
> +         }
> +         break;
> +
> +      case BRW_OPCODE_CMP:
> +      case BRW_OPCODE_IF:
> +         if (i == 1) {
> +            inst->src[i] = entry->src;
> +            progress = true;
> +         } else if (i == 0 && inst->src[1].file != IMM) {
> +            uint32_t new_cmod;
> +
> +            new_cmod = brw_swap_cmod(inst->conditional_mod);
> +            if (new_cmod != ~0u) {
> +               /* Fit this constant in by swapping the operands and
> +                * flipping the test
> +                */
> +               inst->src[0] = inst->src[1];
> +               inst->src[1] = entry->src;
> +               inst->conditional_mod = new_cmod;
> +               progress = true;
> +            }
> +         }
> +         break;
> +
> +      case BRW_OPCODE_SEL:
> +         if (i == 1) {
> +            inst->src[i] = entry->src;
> +            progress = true;
> +         } else if (i == 0 && inst->src[1].file != IMM) {
> +            inst->src[0] = inst->src[1];
> +            inst->src[1] = entry->src;
> +
> +            /* If this was predicated, flipping operands means
> +             * we also need to flip the predicate.
> +             */
> +            if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
> +               inst->predicate_inverse =
> +                  !inst->predicate_inverse;
> +            }
> +            progress = true;
> +         }
> +         break;
> +
> +      case SHADER_OPCODE_RCP:
> +         /* The hardware doesn't do math on immediate values
> +          * (because why are you doing that, seriously?), but
> +          * the correct answer is to just constant fold it
> +          * anyway.
> +          */
> +         assert(i == 0);
> +         if (inst->src[0].imm.f != 0.0f) {
> +            inst->opcode = BRW_OPCODE_MOV;
> +            inst->src[0] = entry->src;
> +            inst->src[0].imm.f = 1.0f / inst->src[0].imm.f;
> +            progress = true;
> +         }
> +         break;
> +
> +      case FS_OPCODE_PULL_CONSTANT_LOAD:
> +         inst->src[i] = entry->src;
> +         progress = true;
> +         break;
> +
> +      default:
> +         break;
> +      }
> +   }
> +
> +   return progress;
> +}
> +
>   /** @file brw_fs_copy_propagation.cpp
>    *
>    * Support for local copy propagation by walking the list of instructions
> @@ -90,6 +208,9 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
>         foreach_list(entry_node, acp) {
>   	 acp_entry *entry = (acp_entry *)entry_node;
>
> +         if (try_constant_propagate(inst, entry))
> +            progress = true;
> +
>   	 for (int i = 0; i < 3; i++) {
>   	    if (try_copy_propagate(inst, i, entry))
>   	       progress = true;
> @@ -114,7 +235,8 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
>   	  ((inst->src[0].file == GRF &&
>   	    (inst->src[0].reg != inst->dst.reg ||
>   	     inst->src[0].reg_offset != inst->dst.reg_offset)) ||
> -	   inst->src[0].file == UNIFORM) &&
> +           inst->src[0].file == UNIFORM ||
> +           inst->src[0].file == IMM) &&
>   	  inst->src[0].type == inst->dst.type &&
>   	  !inst->saturate &&
>   	  !inst->predicated &&
>



More information about the mesa-dev mailing list