[Mesa-dev] [PATCH 13/20] i965: Use basic-block aware insertion/removal functions.

Pohjolainen, Topi topi.pohjolainen at intel.com
Thu Aug 7 07:55:46 PDT 2014


On Thu, Jul 24, 2014 at 07:54:20PM -0700, Matt Turner wrote:
> To avoid invalidating and recreating the control flow graph. Also stop
> invalidating the CFG in places we didn't add or remove an instruction.
> 
> cfg calculations:     202951 -> 80307 (-60.43%)
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp               | 45 +++++++++++++---------
>  .../drivers/dri/i965/brw_fs_copy_propagation.cpp   |  2 +-
>  src/mesa/drivers/dri/i965/brw_fs_cse.cpp           |  8 ++--
>  .../dri/i965/brw_fs_dead_code_eliminate.cpp        |  6 +--
>  .../drivers/dri/i965/brw_fs_register_coalesce.cpp  |  7 ++--
>  .../dri/i965/brw_fs_saturate_propagation.cpp       |  2 +-
>  src/mesa/drivers/dri/i965/brw_vec4.cpp             | 10 ++---
>  .../drivers/dri/i965/brw_vec4_copy_propagation.cpp |  2 +-
>  src/mesa/drivers/dri/i965/brw_vec4_cse.cpp         |  8 ++--
>  9 files changed, 50 insertions(+), 40 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 59d46e8..30fc137 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -46,6 +46,7 @@ extern "C" {
>  #include "brw_wm.h"
>  }
>  #include "brw_fs.h"
> +#include "brw_cfg.h"
>  #include "brw_dead_control_flow.h"
>  #include "main/uniforms.h"
>  #include "brw_fs_live_variables.h"
> @@ -1695,7 +1696,7 @@ fs_visitor::split_virtual_grfs()
>  	 }
>        }
>     }
> -   invalidate_live_intervals();
> +   invalidate_live_intervals(false);
>  }
>  
>  /**
> @@ -1733,7 +1734,7 @@ fs_visitor::compact_virtual_grfs()
>        if (remap_table[i] != -1) {
>           remap_table[i] = new_index;
>           virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
> -         invalidate_live_intervals();
> +         invalidate_live_intervals(false);
>           ++new_index;
>        }
>     }
> @@ -1902,7 +1903,9 @@ fs_visitor::assign_constant_locations()
>  void
>  fs_visitor::demote_pull_constants()
>  {
> -   foreach_in_list(fs_inst, inst, &instructions) {
> +   calculate_cfg();
> +
> +   foreach_block_and_inst (block, fs_inst, inst, cfg) {
>        for (int i = 0; i < inst->sources; i++) {
>  	 if (inst->src[i].file != UNIFORM)
>  	    continue;
> @@ -1925,14 +1928,14 @@ fs_visitor::demote_pull_constants()
>                                                          surf_index,
>                                                          *inst->src[i].reladdr,
>                                                          pull_index);
> -            inst->insert_before(&list);
> +            inst->insert_before(block, &list);
>              inst->src[i].reladdr = NULL;
>           } else {
>              fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
>              fs_inst *pull =
>                 new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
>                                      dst, surf_index, offset);
> -            inst->insert_before(pull);
> +            inst->insert_before(block, pull);
>              inst->src[i].set_smear(pull_index & 3);
>           }
>  
> @@ -1942,7 +1945,7 @@ fs_visitor::demote_pull_constants()
>           inst->src[i].reg_offset = 0;
>        }
>     }
> -   invalidate_live_intervals();
> +   invalidate_live_intervals(false);
>  }
>  
>  bool
> @@ -2232,7 +2235,9 @@ fs_visitor::remove_duplicate_mrf_writes()
>  
>     memset(last_mrf_move, 0, sizeof(last_mrf_move));
>  
> -   foreach_in_list_safe(fs_inst, inst, &instructions) {
> +   calculate_cfg();
> +
> +   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
>        if (inst->is_control_flow()) {
>  	 memset(last_mrf_move, 0, sizeof(last_mrf_move));
>        }
> @@ -2241,7 +2246,7 @@ fs_visitor::remove_duplicate_mrf_writes()
>  	  inst->dst.file == MRF) {
>  	 fs_inst *prev_inst = last_mrf_move[inst->dst.reg];
>  	 if (prev_inst && inst->equals(prev_inst)) {
> -	    inst->remove();
> +	    inst->remove(block);
>  	    progress = true;
>  	    continue;
>  	 }
> @@ -2280,7 +2285,7 @@ fs_visitor::remove_duplicate_mrf_writes()
>     }
>  
>     if (progress)
> -      invalidate_live_intervals();
> +      invalidate_live_intervals(false);
>  
>     return progress;
>  }
> @@ -2515,7 +2520,9 @@ fs_visitor::insert_gen4_send_dependency_workarounds()
>  void
>  fs_visitor::lower_uniform_pull_constant_loads()
>  {
> -   foreach_in_list(fs_inst, inst, &instructions) {
> +   calculate_cfg();
> +
> +   foreach_block_and_inst (block, fs_inst, inst, cfg) {
>        if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)
>           continue;
>  
> @@ -2540,7 +2547,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
>  
>           setup->ir = inst->ir;
>           setup->annotation = inst->annotation;
> -         inst->insert_before(setup);
> +         inst->insert_before(block, setup);
>  
>           /* Similarly, this will only populate the first 4 channels of the
>            * result register (since we only use smear values from 0-3), but we
> @@ -2549,7 +2556,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
>           inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7;
>           inst->src[1] = payload;
>  
> -         invalidate_live_intervals();
> +         invalidate_live_intervals(false);
>        } else {
>           /* Before register allocation, we didn't tell the scheduler about the
>            * MRF we use.  We know it's safe to use this MRF because nothing
> @@ -2567,28 +2574,30 @@ fs_visitor::lower_load_payload()
>  {
>     bool progress = false;
>  
> -   foreach_in_list_safe(fs_inst, inst, &instructions) {
> +   calculate_cfg();
> +
> +   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
>        if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
>           fs_reg dst = inst->dst;
>  
>           /* src[0] represents the (optional) message header. */
>           if (inst->src[0].file != BAD_FILE) {
> -            inst->insert_before(MOV(dst, inst->src[0]));
> +            inst->insert_before(block, MOV(dst, inst->src[0]));
>           }
>           dst.reg_offset++;
>  
>           for (int i = 1; i < inst->sources; i++) {
> -            inst->insert_before(MOV(dst, inst->src[i]));
> +            inst->insert_before(block, MOV(dst, inst->src[i]));
>              dst.reg_offset++;
>           }
>  
> -         inst->remove();
> +         inst->remove(block);
>           progress = true;
>        }
>     }
>  
>     if (progress)
> -      invalidate_live_intervals();
> +      invalidate_live_intervals(false);
>  
>     return progress;
>  }
> @@ -2930,7 +2939,7 @@ fs_visitor::assign_binding_table_offsets()
>  void
>  fs_visitor::calculate_register_pressure()
>  {
> -   invalidate_live_intervals();
> +   invalidate_live_intervals(false);
>     calculate_live_intervals();

With a quick glance this looked a little odd, we don't invalidate but
calculate nevertheless. But we actually do invalidate, the intervals, but
not the cfg. One just needs to remember what the argument actually stands for.

>  
>     unsigned num_instructions = instructions.length();
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> index 587e231..bfc3794 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> @@ -628,7 +628,7 @@ fs_visitor::opt_copy_propagate()
>     ralloc_free(copy_prop_ctx);
>  
>     if (progress)
> -      invalidate_live_intervals();
> +      invalidate_live_intervals(false);
>  
>     return progress;
>  }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> index 6ec209e..e63e0dc 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> @@ -223,7 +223,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
>                    copy->force_writemask_all =
>                       entry->generator->force_writemask_all;
>                 }
> -               entry->generator->insert_after(copy);
> +               entry->generator->insert_after(block, copy);
>              }
>  
>              /* dest <- temp */
> @@ -245,7 +245,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
>                    copy = MOV(dst, tmp);
>                    copy->force_writemask_all = inst->force_writemask_all;
>                 }
> -               inst->insert_before(copy);
> +               inst->insert_before(block, copy);
>              }
>  
>              /* Set our iterator so that next time through the loop inst->next
> @@ -254,7 +254,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
>               */
>              fs_inst *prev = (fs_inst *)inst->prev;
>  
> -            inst->remove();
> +            inst->remove(block);
>  
>              /* Appending an instruction may have changed our bblock end. */
>              if (inst == block->end) {
> @@ -322,7 +322,7 @@ fs_visitor::opt_cse()
>     }
>  
>     if (progress)
> -      invalidate_live_intervals();
> +      invalidate_live_intervals(false);
>  
>     return progress;
>  }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
> index 2506b46..697b44a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
> @@ -100,13 +100,13 @@ fs_visitor::dead_code_eliminate()
>     ralloc_free(live);
>  
>     if (progress) {
> -      foreach_in_list_safe(fs_inst, inst, &instructions) {
> +      foreach_block_and_inst_safe (block, backend_instruction, inst, cfg) {
>           if (inst->opcode == BRW_OPCODE_NOP) {
> -            inst->remove();
> +            inst->remove(block);
>           }
>        }
>  
> -      invalidate_live_intervals();
> +      invalidate_live_intervals(false);
>     }
>  
>     return progress;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
> index 3e3aeca..c1b785b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
> @@ -41,6 +41,7 @@
>   */
>  
>  #include "brw_fs.h"
> +#include "brw_cfg.h"
>  #include "brw_fs_live_variables.h"
>  
>  static bool
> @@ -273,13 +274,13 @@ fs_visitor::register_coalesce()
>     }
>  
>     if (progress) {
> -      foreach_in_list_safe(fs_inst, inst, &instructions) {
> +      foreach_block_and_inst_safe (block, backend_instruction, inst, cfg) {
>           if (inst->opcode == BRW_OPCODE_NOP) {
> -            inst->remove();
> +            inst->remove(block);
>           }
>        }
>  
> -      invalidate_live_intervals();
> +      invalidate_live_intervals(false);
>     }
>  
>     return progress;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
> index d65b2f1..3412ce0 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
> @@ -100,7 +100,7 @@ fs_visitor::opt_saturate_propagation()
>     }
>  
>     if (progress)
> -      invalidate_live_intervals();
> +      invalidate_live_intervals(false);
>  
>     return progress;
>  }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 5d276eb..99bc1ba 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -633,7 +633,7 @@ vec4_visitor::opt_algebraic()
>     }
>  
>     if (progress)
> -      invalidate_live_intervals();
> +      invalidate_live_intervals(false);
>  
>     return progress;
>  }
> @@ -926,7 +926,7 @@ vec4_visitor::opt_register_coalesce()
>  
>     calculate_live_intervals();
>  
> -   foreach_in_list_safe(vec4_instruction, inst, &instructions) {
> +   foreach_block_and_inst_safe (block, vec4_instruction, inst, cfg) {
>        int ip = next_ip;
>        next_ip++;
>  
> @@ -1096,13 +1096,13 @@ vec4_visitor::opt_register_coalesce()
>  	    }
>  	    scan_inst = (vec4_instruction *)scan_inst->next;
>  	 }
> -	 inst->remove();
> +	 inst->remove(block);
>  	 progress = true;
>        }
>     }
>  
>     if (progress)
> -      invalidate_live_intervals();
> +      invalidate_live_intervals(false);
>  
>     return progress;
>  }
> @@ -1181,7 +1181,7 @@ vec4_visitor::split_virtual_grfs()
>           }
>        }
>     }
> -   invalidate_live_intervals();
> +   invalidate_live_intervals(false);
>  }
>  
>  void
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
> index 390448a..ebee42f 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
> @@ -403,7 +403,7 @@ vec4_visitor::opt_copy_propagation()
>     }
>  
>     if (progress)
> -      invalidate_live_intervals();
> +      invalidate_live_intervals(false);

The patch either explicitly changes an iteration to use blocks and block-aware
manipulation routines, or relies on the fact that the iteration already
operates with blocks.
This is the only occurence that initially looked to be using the flat
instruction list (even though the comment in the existing code says that it
operates on basic blocks). Closer inspection reveals that it actually doesn't
add or remove instructions, it just modifies existing, right?

Somebody else should have a look as well, but

Reviewed-by: Topi Pohjolainen <topi.pohjolainen at intel.com>

And just as a thought, splitting this to three parts might assist bisecting
later on. One patch just changing the cases where iteration already works
with blocks, another dealing with those cases where the iteration needs to be
modified as well and one patch dealing with the case here.

>  
>     return progress;
>  }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
> index 1f24af6..a0f6e77 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
> @@ -165,7 +165,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
>                 entry->tmp.swizzle = BRW_SWIZZLE_XYZW;
>  
>                 vec4_instruction *copy = MOV(entry->generator->dst, entry->tmp);
> -               entry->generator->insert_after(copy);
> +               entry->generator->insert_after(block, copy);
>                 entry->generator->dst = dst_reg(entry->tmp);
>              }
>  
> @@ -174,7 +174,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
>                 assert(inst->dst.type == entry->tmp.type);
>                 vec4_instruction *copy = MOV(inst->dst, entry->tmp);
>                 copy->force_writemask_all = inst->force_writemask_all;
> -               inst->insert_before(copy);
> +               inst->insert_before(block, copy);
>              }
>  
>              /* Set our iterator so that next time through the loop inst->next
> @@ -183,7 +183,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
>               */
>              vec4_instruction *prev = (vec4_instruction *)inst->prev;
>  
> -            inst->remove();
> +            inst->remove(block);
>  
>              /* Appending an instruction may have changed our bblock end. */
>              if (inst == block->end) {
> @@ -256,7 +256,7 @@ vec4_visitor::opt_cse()
>     }
>  
>     if (progress)
> -      invalidate_live_intervals();
> +      invalidate_live_intervals(false);
>  
>     return progress;
>  }
> -- 
> 1.8.5.5
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list