[Mesa-dev] [PATCH v2] i965: Relax accumulator dependency scheduling on Gen < 6

Iago Toral itoral at igalia.com
Tue May 13 01:36:29 PDT 2014


Hi Matt, are you okay with excluding FS_OPCODE_CINTERP from the list of
virtual instructions to consider in Gen < 6? As far as I can see it is
implemented as a single MOV instruction and the accumulator does not
seem to be used but maybe I am missing something.

Iago

On Wed, 2014-05-07 at 09:58 +0200, Iago Toral Quiroga wrote:
> Many instructions implicitly update the accumulator on Gen < 6. The instruction
> scheduling code just calls add_barrier_deps() for each accumulator access on
> these platforms, but a large class of operations don't actually update the
> accumulator -- mostly move and logical instructions. Teaching the scheduling
> code about this would allow more flexibility to schedule instructions.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77740
> ---
> 
> This version properly identifies virtual instructions that write to the
> accumulator in Gen < 6 as indicated by Matt. FS_OPCODE_CINTERP is excluded
> because it seems to be implemented as a MOV.
> 
> Passes piglit tests on IronLake.
> 
>  .../drivers/dri/i965/brw_schedule_instructions.cpp | 84 +++++++---------------
>  src/mesa/drivers/dri/i965/brw_shader.cpp           | 10 +++
>  src/mesa/drivers/dri/i965/brw_shader.h             |  1 +
>  3 files changed, 36 insertions(+), 59 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> index 8cc6908..6f8f405 100644
> --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> @@ -742,8 +742,6 @@ fs_instruction_scheduler::is_compressed(fs_inst *inst)
>  void
>  fs_instruction_scheduler::calculate_deps()
>  {
> -   const bool gen6plus = v->brw->gen >= 6;
> -
>     /* Pre-register-allocation, this tracks the last write per VGRF (so
>      * different reg_offsets within it can interfere when they shouldn't).
>      * After register allocation, reg_offsets are gone and we track individual
> @@ -803,7 +801,7 @@ fs_instruction_scheduler::calculate_deps()
>              } else {
>                 add_dep(last_fixed_grf_write, n);
>              }
> -         } else if (inst->src[i].is_accumulator() && gen6plus) {
> +         } else if (inst->src[i].is_accumulator()) {
>              add_dep(last_accumulator_write, n);
>  	 } else if (inst->src[i].file != BAD_FILE &&
>  		    inst->src[i].file != IMM &&
> @@ -828,11 +826,7 @@ fs_instruction_scheduler::calculate_deps()
>        }
>  
>        if (inst->reads_accumulator_implicitly()) {
> -         if (gen6plus) {
> -            add_dep(last_accumulator_write, n);
> -         } else {
> -            add_barrier_deps(n);
> -         }
> +         add_dep(last_accumulator_write, n);
>        }
>  
>        /* write-after-write deps. */
> @@ -867,7 +861,7 @@ fs_instruction_scheduler::calculate_deps()
>           } else {
>              last_fixed_grf_write = n;
>           }
> -      } else if (inst->dst.is_accumulator() && gen6plus) {
> +      } else if (inst->dst.is_accumulator()) {
>           add_dep(last_accumulator_write, n);
>           last_accumulator_write = n;
>        } else if (inst->dst.file != BAD_FILE &&
> @@ -887,13 +881,10 @@ fs_instruction_scheduler::calculate_deps()
>  	 last_conditional_mod[inst->flag_subreg] = n;
>        }
>  
> -      if (inst->writes_accumulator) {
> -         if (gen6plus) {
> -            add_dep(last_accumulator_write, n);
> -            last_accumulator_write = n;
> -         } else {
> -            add_barrier_deps(n);
> -         }
> +      if (inst->writes_accumulator_implicitly(v->brw->gen) &&
> +          !inst->dst.is_accumulator()) {
> +         add_dep(last_accumulator_write, n);
> +         last_accumulator_write = n;
>        }
>     }
>  
> @@ -933,7 +924,7 @@ fs_instruction_scheduler::calculate_deps()
>              } else {
>                 add_dep(n, last_fixed_grf_write);
>              }
> -         } else if (inst->src[i].is_accumulator() && gen6plus) {
> +         } else if (inst->src[i].is_accumulator()) {
>              add_dep(n, last_accumulator_write);
>           } else if (inst->src[i].file != BAD_FILE &&
>  		    inst->src[i].file != IMM &&
> @@ -958,11 +949,7 @@ fs_instruction_scheduler::calculate_deps()
>        }
>  
>        if (inst->reads_accumulator_implicitly()) {
> -         if (gen6plus) {
> -            add_dep(n, last_accumulator_write);
> -         } else {
> -            add_barrier_deps(n);
> -         }
> +         add_dep(n, last_accumulator_write);
>        }
>  
>        /* Update the things this instruction wrote, so earlier reads
> @@ -996,7 +983,7 @@ fs_instruction_scheduler::calculate_deps()
>           } else {
>              last_fixed_grf_write = n;
>           }
> -      } else if (inst->dst.is_accumulator() && gen6plus) {
> +      } else if (inst->dst.is_accumulator()) {
>           last_accumulator_write = n;
>        } else if (inst->dst.file != BAD_FILE &&
>                   !inst->dst.is_null()) {
> @@ -1013,12 +1000,8 @@ fs_instruction_scheduler::calculate_deps()
>  	 last_conditional_mod[inst->flag_subreg] = n;
>        }
>  
> -      if (inst->writes_accumulator) {
> -         if (gen6plus) {
> -            last_accumulator_write = n;
> -         } else {
> -            add_barrier_deps(n);
> -         }
> +      if (inst->writes_accumulator_implicitly(v->brw->gen)) {
> +         last_accumulator_write = n;
>        }
>     }
>  }
> @@ -1026,8 +1009,6 @@ fs_instruction_scheduler::calculate_deps()
>  void
>  vec4_instruction_scheduler::calculate_deps()
>  {
> -   const bool gen6plus = v->brw->gen >= 6;
> -
>     schedule_node *last_grf_write[grf_count];
>     schedule_node *last_mrf_write[BRW_MAX_MRF];
>     schedule_node *last_conditional_mod = NULL;
> @@ -1067,7 +1048,7 @@ vec4_instruction_scheduler::calculate_deps()
>                      (inst->src[i].fixed_hw_reg.file ==
>                       BRW_GENERAL_REGISTER_FILE)) {
>              add_dep(last_fixed_grf_write, n);
> -         } else if (inst->src[i].is_accumulator() && gen6plus) {
> +         } else if (inst->src[i].is_accumulator()) {
>              assert(last_accumulator_write);
>              add_dep(last_accumulator_write, n);
>           } else if (inst->src[i].file != BAD_FILE &&
> @@ -1094,12 +1075,8 @@ vec4_instruction_scheduler::calculate_deps()
>        }
>  
>        if (inst->reads_accumulator_implicitly()) {
> -         if (gen6plus) {
> -            assert(last_accumulator_write);
> -            add_dep(last_accumulator_write, n);
> -         } else {
> -            add_barrier_deps(n);
> -         }
> +         assert(last_accumulator_write);
> +         add_dep(last_accumulator_write, n);
>        }
>  
>        /* write-after-write deps. */
> @@ -1112,7 +1089,7 @@ vec4_instruction_scheduler::calculate_deps()
>       } else if (inst->dst.file == HW_REG &&
>                   inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
>           last_fixed_grf_write = n;
> -      } else if (inst->dst.is_accumulator() && gen6plus) {
> +      } else if (inst->dst.is_accumulator()) {
>           add_dep(last_accumulator_write, n);
>           last_accumulator_write = n;
>        } else if (inst->dst.file != BAD_FILE &&
> @@ -1132,13 +1109,10 @@ vec4_instruction_scheduler::calculate_deps()
>           last_conditional_mod = n;
>        }
>  
> -      if (inst->writes_accumulator) {
> -         if (gen6plus) {
> -            add_dep(last_accumulator_write, n);
> -            last_accumulator_write = n;
> -         } else {
> -            add_barrier_deps(n);
> -         }
> +      if (inst->writes_accumulator_implicitly(v->brw->gen) &&
> +          !inst->dst.is_accumulator()) {
> +         add_dep(last_accumulator_write, n);
> +         last_accumulator_write = n;
>        }
>     }
>  
> @@ -1165,7 +1139,7 @@ vec4_instruction_scheduler::calculate_deps()
>                      (inst->src[i].fixed_hw_reg.file ==
>                       BRW_GENERAL_REGISTER_FILE)) {
>              add_dep(n, last_fixed_grf_write);
> -         } else if (inst->src[i].is_accumulator() && gen6plus) {
> +         } else if (inst->src[i].is_accumulator()) {
>              add_dep(n, last_accumulator_write);
>           } else if (inst->src[i].file != BAD_FILE &&
>                      inst->src[i].file != IMM &&
> @@ -1189,11 +1163,7 @@ vec4_instruction_scheduler::calculate_deps()
>        }
>  
>        if (inst->reads_accumulator_implicitly()) {
> -         if (gen6plus) {
> -            add_dep(n, last_accumulator_write);
> -         } else {
> -            add_barrier_deps(n);
> -         }
> +         add_dep(n, last_accumulator_write);
>        }
>  
>        /* Update the things this instruction wrote, so earlier reads
> @@ -1206,7 +1176,7 @@ vec4_instruction_scheduler::calculate_deps()
>        } else if (inst->dst.file == HW_REG &&
>                   inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
>           last_fixed_grf_write = n;
> -      } else if (inst->dst.is_accumulator() && gen6plus) {
> +      } else if (inst->dst.is_accumulator()) {
>           last_accumulator_write = n;
>        } else if (inst->dst.file != BAD_FILE &&
>                   !inst->dst.is_null()) {
> @@ -1223,12 +1193,8 @@ vec4_instruction_scheduler::calculate_deps()
>           last_conditional_mod = n;
>        }
>  
> -      if (inst->writes_accumulator) {
> -         if (gen6plus) {
> -            last_accumulator_write = n;
> -         } else {
> -            add_barrier_deps(n);
> -         }
> +      if (inst->writes_accumulator_implicitly(v->brw->gen)) {
> +         last_accumulator_write = n;
>        }
>     }
>  }
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 6e74803..714c603 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -676,6 +676,16 @@ backend_instruction::reads_accumulator_implicitly() const
>  }
>  
>  bool
> +backend_instruction::writes_accumulator_implicitly(int gen) const
> +{
> +   return writes_accumulator ||
> +          (gen < 6 &&
> +           ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
> +            (opcode >= FS_OPCODE_DDX && opcode <= FS_OPCODE_LINTERP &&
> +             opcode != FS_OPCODE_CINTERP)));
> +}
> +
> +bool
>  backend_instruction::has_side_effects() const
>  {
>     switch (opcode) {
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
> index e730ed0..5ae4092 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.h
> +++ b/src/mesa/drivers/dri/i965/brw_shader.h
> @@ -48,6 +48,7 @@ public:
>     bool can_do_source_mods() const;
>     bool can_do_saturate() const;
>     bool reads_accumulator_implicitly() const;
> +   bool writes_accumulator_implicitly(int gen) const;
>  
>     /**
>      * True if the instruction has side effects other than writing to




More information about the mesa-dev mailing list