[Mesa-dev] [PATCH] i965/fs: Make the first pre-allocation heuristic be the post heuristic.

Wed Nov 20 00:49:08 PST 2013

Making madness (e9daead7) legit? :)

Reviewed-by: Jordan Justen <jordan.l.justen at intel.com>

On Tue, Nov 19, 2013 at 11:34 PM, Eric Anholt <eric at anholt.net> wrote:
> I recently made us try two different things that tried to reduce register
> pressure so that we would be more likely to allocate successfully.  But
> now that we have the logic for trying two, we can make the first thing we
> try be the normal, not-prioritizing-register-pressure heuristic.
>
> This means one less scheduling pass in the common case of that heuristic
> not producing spills, plus the best schedule we know how to produce, if
> that one happens to succeed.  This is important, because our register
> allocation produces a lot of possibly avoidable dependencies for the
> post-register-allocation schedule, despite ra_set_allocate_round_robin().
>
> GLB2.7: 1.04127% +/- 0.732461% fps improvement (n=31)
> nexuiz: No difference (n=5)
> lightsmark: 0.838512% +/- 0.300147% fps improvement (n=86)
> minecraft apitrace: No difference (n=15)
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp               | 62 ++++++++++++++--------
>  .../drivers/dri/i965/brw_schedule_instructions.cpp |  4 +-
>  src/mesa/drivers/dri/i965/brw_shader.h             |  1 +
>  3 files changed, 44 insertions(+), 23 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 8b3f8df..dbe7503 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -3190,6 +3190,7 @@ fs_visitor::run()
>  {
>     sanity_param_count = fp->Base.Parameters->NumParameters;
>     uint32_t orig_nr_params = c->prog_data.nr_params;
> +   bool allocated_without_spills;
>
>     assign_binding_table_offsets();
>
> @@ -3273,27 +3274,45 @@ fs_visitor::run()
>        assign_curb_setup();
>        assign_urb_setup();
>
> -      schedule_instructions(SCHEDULE_PRE_NON_LIFO);
> +      static enum instruction_scheduler_mode pre_modes[] = {
> +         SCHEDULE_PRE,
> +         SCHEDULE_PRE_NON_LIFO,
> +         SCHEDULE_PRE_LIFO,
> +      };
>
> -      if (0)
> -        assign_regs_trivial();
> -      else {
> -         if (!assign_regs(false)) {
> -            /* Try a non-spilling register allocation again with a different
> -             * scheduling heuristic.
> -             */
> -            schedule_instructions(SCHEDULE_PRE_LIFO);
> -            if (!assign_regs(false)) {
> -               if (dispatch_width == 16) {
> -                  fail("Failure to register allocate.  Reduce number of "
> -                       "live scalar values to avoid this.");
> -               } else {
> -                  while (!assign_regs(true)) {
> -                     if (failed)
> -                        break;
> -                  }
> -               }
> -            }
> +      /* Try each scheduling heuristic to see if it can successfully register
> +       * allocate without spilling.  They should be ordered by decreasing
> +       * performance but increasing likelihood of allocating.
> +       */
> +      for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
> +         schedule_instructions(pre_modes[i]);
> +
> +         if (0) {
> +            assign_regs_trivial();
> +            allocated_without_spills = true;
> +         } else {
> +            allocated_without_spills = assign_regs(false);
> +         }
> +         if (allocated_without_spills)
> +            break;
> +      }
> +
> +      if (!allocated_without_spills) {
> +         /* We assume that any spilling is worse than just dropping back to
> +          * SIMD8.  There's probably actually some intermediate point where
> +          * SIMD16 with a couple of spills is still better.
> +          */
> +         if (dispatch_width == 16) {
> +            fail("Failure to register allocate.  Reduce number of "
> +                 "live scalar values to avoid this.");
> +         }
> +
> +         /* Since we're out of heuristics, just go spill registers until we
> +          * get an allocation.
> +          */
> +         while (!assign_regs(true)) {
> +            if (failed)
> +               break;
>           }
>        }
>     }
> @@ -3308,7 +3327,8 @@ fs_visitor::run()
>     if (failed)
>        return false;
>
> -   schedule_instructions(SCHEDULE_POST);
> +   if (!allocated_without_spills)
> +      schedule_instructions(SCHEDULE_POST);
>
>     if (dispatch_width == 8) {
>        c->prog_data.reg_blocks = brw_register_blocks(grf_used);
> diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> index a4fae0d..a40a46a 100644
> --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> @@ -1140,10 +1140,10 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
>  {
>     schedule_node *chosen = NULL;
>
> -   if (post_reg_alloc) {
> +   if (mode == SCHEDULE_PRE || mode == SCHEDULE_POST) {
>        int chosen_time = 0;
>
> -      /* Of the instructions closest ready to execute or the closest to
> +      /* Of the instructions ready to execute or the closest to
>         * being ready, choose the oldest one.
>         */
>        foreach_list(node, &instructions) {
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
> index aba24c5..34779ff 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.h
> +++ b/src/mesa/drivers/dri/i965/brw_shader.h
> @@ -60,6 +60,7 @@ public:
>  };
>
>  enum instruction_scheduler_mode {
> +   SCHEDULE_PRE,
>     SCHEDULE_PRE_NON_LIFO,
>     SCHEDULE_PRE_LIFO,
>     SCHEDULE_POST,
> --
> 1.8.4.rc3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev