[Mesa-dev] [PATCH 06/20] i965: Preserve the CFG in instruction scheduling.

Mon Sep 8 05:43:08 PDT 2014

On Tue, Sep 02, 2014 at 09:34:17PM -0700, Matt Turner wrote:
> ---
>  .../drivers/dri/i965/brw_schedule_instructions.cpp | 74 ++++++++++++----------
>  1 file changed, 42 insertions(+), 32 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> index 04ac242..bac0d55 100644
> --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> @@ -27,6 +27,8 @@
>  
>  #include "brw_fs.h"
>  #include "brw_vec4.h"
> +#include "brw_cfg.h"
> +#include "brw_shader.h"
>  #include "glsl/glsl_types.h"
>  #include "glsl/ir_optimization.h"
>  
> @@ -411,6 +413,7 @@ public:
>           this->remaining_grf_uses = NULL;
>           this->grf_active = NULL;
>        }
> +      v->calculate_cfg();
>     }
>  
>     ~instruction_scheduler()
> @@ -421,8 +424,8 @@ public:
>     void add_dep(schedule_node *before, schedule_node *after, int latency);
>     void add_dep(schedule_node *before, schedule_node *after);
>  
> -   void run(exec_list *instructions);
> -   void add_inst(backend_instruction *inst);
> +   void run(cfg_t *cfg);
> +   void add_insts_from_block(bblock_t *block);
>     void compute_delay(schedule_node *node);
>     virtual void calculate_deps() = 0;
>     virtual schedule_node *choose_instruction_to_schedule() = 0;
> @@ -440,7 +443,7 @@ public:
>     virtual void update_register_pressure(backend_instruction *inst) = 0;
>     virtual int get_register_pressure_benefit(backend_instruction *inst) = 0;
>  
> -   void schedule_instructions(backend_instruction *next_block_header);
> +   void schedule_instructions(bblock_t *block);
>  
>     void *mem_ctx;
>  
> @@ -624,17 +627,28 @@ schedule_node::schedule_node(backend_instruction *inst,
>  }
>  
>  void
> -instruction_scheduler::add_inst(backend_instruction *inst)
> +instruction_scheduler::add_insts_from_block(bblock_t *block)
>  {
> -   schedule_node *n = new(mem_ctx) schedule_node(inst, this);
> +   /* Removing the last instruction from a basic block removes the block as
> +    * well, so put a NOP at the end to keep it alive.
> +    */

Initially using the nop-instruction to keep the block "non-empty" seemed a
little artifical. Then I started thinking alternatives.

I started wondering why we couldn't simply copy the incoming block::start and
block::end into the instance of instruction_scheduler here instead of manually
creating a new instruction list. And then just set the original block::start
and block::end to NULL. If we named the new members of instruction_scheduler
the same as in bblock_t (start, end), we could even use the existing macros
for iterating (the macros are not concerned about the data type as long as it
has members 'start' and 'end').
I understood that the idea is to store the instructions represented by the
block somewhere, set the block empty and re-insert the instructions in possibly
different order back into the block, right?

> +   if (!block->end->is_control_flow()) {
> +      backend_instruction *nop = new(mem_ctx) backend_instruction();
> +      nop->opcode = BRW_OPCODE_NOP;
> +      block->end->insert_after(block, nop);
> +   }
>  
> -   assert(!inst->is_head_sentinel());
> -   assert(!inst->is_tail_sentinel());
> +   foreach_inst_in_block_safe(backend_instruction, inst, block) {
> +      if (inst->opcode == BRW_OPCODE_NOP || inst->is_control_flow())
> +         continue;

Shouldn't this simply return? Original logic below simply quits iterating
when it finds a control flow instruction - there can be in the maximum one
control flow instruction per block, and it is always the last, right?
I also thought that it is not possible to have nop-instructions at this point
anymore other than those inserted above.

>  
> -   this->instructions_to_schedule++;
> +      schedule_node *n = new(mem_ctx) schedule_node(inst, this);
>  
> -   inst->remove();
> -   instructions.push_tail(n);
> +      this->instructions_to_schedule++;
> +
> +      inst->remove(block);
> +      instructions.push_tail(n);
> +   }
>  }
>  
>  /** Recursive computation of the delay member of a node. */
> @@ -1354,9 +1368,10 @@ vec4_instruction_scheduler::issue_time(backend_instruction *inst)
>  }
>  
>  void
> -instruction_scheduler::schedule_instructions(backend_instruction *next_block_header)
> +instruction_scheduler::schedule_instructions(bblock_t *block)
>  {
>     struct brw_context *brw = bv->brw;
> +   backend_instruction *inst = block->end;
>     time = 0;
>  
>     /* Remove non-DAG heads from the list. */
> @@ -1372,7 +1387,7 @@ instruction_scheduler::schedule_instructions(backend_instruction *next_block_hea
>        /* Schedule this instruction. */
>        assert(chosen);
>        chosen->remove();
> -      next_block_header->insert_before(chosen->inst);
> +      inst->insert_before(block, chosen->inst);
>        instructions_to_schedule--;
>        update_register_pressure(chosen->inst);
>  
> @@ -1434,15 +1449,14 @@ instruction_scheduler::schedule_instructions(backend_instruction *next_block_hea
>        }
>     }
>  
> +   if (block->end->opcode == BRW_OPCODE_NOP)
> +      block->end->remove(block);
>     assert(instructions_to_schedule == 0);
>  }
>  
>  void
> -instruction_scheduler::run(exec_list *all_instructions)
> +instruction_scheduler::run(cfg_t *cfg)
>  {
> -   backend_instruction *next_block_header =
> -      (backend_instruction *)all_instructions->head;
> -
>     if (debug) {
>        fprintf(stderr, "\nInstructions before scheduling (reg_alloc %d)\n",
>                post_reg_alloc);
> @@ -1453,28 +1467,24 @@ instruction_scheduler::run(exec_list *all_instructions)
>      * scheduling.
>      */
>     if (remaining_grf_uses) {
> -      foreach_in_list(schedule_node, node, all_instructions) {
> -         count_remaining_grf_uses((backend_instruction *)node);
> +      foreach_block_and_inst(block, backend_instruction, inst, cfg) {
> +         count_remaining_grf_uses(inst);
>        }
>     }
>  
> -   while (!next_block_header->is_tail_sentinel()) {
> -      /* Add things to be scheduled until we get to a new BB. */
> -      while (!next_block_header->is_tail_sentinel()) {
> -	 backend_instruction *inst = next_block_header;
> -	 next_block_header = (backend_instruction *)next_block_header->next;
> +   foreach_block(block, cfg) {
> +      if (block->end_ip - block->start_ip <= 1)
> +         continue;
> +
> +      add_insts_from_block(block);
>  
> -	 add_inst(inst);
> -         if (inst->is_control_flow())
> -	    break;
> -      }
>        calculate_deps();
>  
>        foreach_in_list(schedule_node, n, &instructions) {
>           compute_delay(n);
>        }
>  
> -      schedule_instructions(next_block_header);
> +      schedule_instructions(block);
>     }
>  
>     if (debug) {
> @@ -1494,25 +1504,25 @@ fs_visitor::schedule_instructions(instruction_scheduler_mode mode)
>        grf_count = virtual_grf_count;
>  
>     fs_instruction_scheduler sched(this, grf_count, mode);
> -   sched.run(&instructions);
> +   sched.run(cfg);
>  
>     if (unlikely(INTEL_DEBUG & DEBUG_WM) && mode == SCHEDULE_POST) {
>        fprintf(stderr, "fs%d estimated execution time: %d cycles\n",
>               dispatch_width, sched.time);
>     }
>  
> -   invalidate_live_intervals();
> +   invalidate_live_intervals(false);
>  }
>  
>  void
>  vec4_visitor::opt_schedule_instructions()
>  {
>     vec4_instruction_scheduler sched(this, prog_data->total_grf);
> -   sched.run(&instructions);
> +   sched.run(cfg);
>  
>     if (unlikely(debug_flag)) {
>        fprintf(stderr, "vec4 estimated execution time: %d cycles\n", sched.time);
>     }
>  
> -   invalidate_live_intervals();
> +   invalidate_live_intervals(false);
>  }
> -- 
> 1.8.5.5
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev