[Mesa-dev] [PATCH v2 13/16] i965: Move fs_visitor ra pass to new fs_visitor::allocate_registers()

Tue Nov 25 12:31:49 PST 2014

On Thu, Nov 13, 2014 at 6:38 PM, Connor Abbott <cwabbott0 at gmail.com> wrote:
> On Thu, Nov 13, 2014 at 7:28 PM, Kristian Høgsberg <krh at bitplanet.net> wrote:
>> This will be reused for the scalar VS pass.
>>
>> Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs.cpp | 132 +++++++++++++++++++----------------
>>  src/mesa/drivers/dri/i965/brw_fs.h   |   1 +
>>  2 files changed, 71 insertions(+), 62 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index cb73b9f..4dce0a2 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -3538,11 +3538,79 @@ fs_visitor::optimize()
>>     lower_uniform_pull_constant_loads();
>>  }
>>
>> +void
>> +fs_visitor::allocate_registers()
>> +{
>> +   bool allocated_without_spills;
>> +
>> +   static enum instruction_scheduler_mode pre_modes[] = {
>> +      SCHEDULE_PRE,
>> +      SCHEDULE_PRE_NON_LIFO,
>> +      SCHEDULE_PRE_LIFO,
>> +   };
>> +
>> +   /* Try each scheduling heuristic to see if it can successfully register
>> +    * allocate without spilling.  They should be ordered by decreasing
>> +    * performance but increasing likelihood of allocating.
>> +    */
>> +   for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
>> +      schedule_instructions(pre_modes[i]);
>> +
>> +      if (0) {
>> +         assign_regs_trivial();
>> +         allocated_without_spills = true;
>> +      } else {
>> +         allocated_without_spills = assign_regs(false);
>> +      }
>> +      if (allocated_without_spills)
>> +         break;
>> +   }
>> +
>> +   if (!allocated_without_spills) {
>> +      /* We assume that any spilling is worse than just dropping back to
>> +       * SIMD8.  There's probably actually some intermediate point where
>> +       * SIMD16 with a couple of spills is still better.
>> +       */
>> +      if (dispatch_width == 16) {
>> +         fail("Failure to register allocate.  Reduce number of "
>> +              "live scalar values to avoid this.");
>> +      } else {
>> +         perf_debug("Fragment shader triggered register spilling.  "
>> +                    "Try reducing the number of live scalar values to "
>> +                    "improve performance.\n");
>
> Hmm, this warning will be pretty confusing once we start hitting this
> path for vertex shaders as well...

Right, I'll put the actual stage name there instead.

Kristian

>> +      }
>> +
>> +      /* Since we're out of heuristics, just go spill registers until we
>> +       * get an allocation.
>> +       */
>> +      while (!assign_regs(true)) {
>> +         if (failed)
>> +            break;
>> +      }
>> +   }
>> +
>> +   assert(force_uncompressed_stack == 0);
>> +
>> +   /* This must come after all optimization and register allocation, since
>> +    * it inserts dead code that happens to have side effects, and it does
>> +    * so based on the actual physical registers in use.
>> +    */
>> +   insert_gen4_send_dependency_workarounds();
>> +
>> +   if (failed)
>> +      return;
>> +
>> +   if (!allocated_without_spills)
>> +      schedule_instructions(SCHEDULE_POST);
>> +
>> +   if (last_scratch > 0)
>> +      prog_data->total_scratch = brw_get_scratch_size(last_scratch);
>> +}
>> +
>>  bool
>>  fs_visitor::run()
>>  {
>>     sanity_param_count = prog->Parameters->NumParameters;
>> -   bool allocated_without_spills;
>>
>>     assign_binding_table_offsets();
>>
>> @@ -3555,7 +3623,6 @@ fs_visitor::run()
>>        emit_dummy_fs();
>>     } else if (brw->use_rep_send && dispatch_width == 16) {
>>        emit_repclear_shader();
>> -      allocated_without_spills = true;
>>     } else {
>>        if (INTEL_DEBUG & DEBUG_SHADER_TIME)
>>           emit_shader_time_begin();
>> @@ -3610,68 +3677,9 @@ fs_visitor::run()
>>        assign_curb_setup();
>>        assign_urb_setup();
>>
>> -      static enum instruction_scheduler_mode pre_modes[] = {
>> -         SCHEDULE_PRE,
>> -         SCHEDULE_PRE_NON_LIFO,
>> -         SCHEDULE_PRE_LIFO,
>> -      };
>> -
>> -      /* Try each scheduling heuristic to see if it can successfully register
>> -       * allocate without spilling.  They should be ordered by decreasing
>> -       * performance but increasing likelihood of allocating.
>> -       */
>> -      for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
>> -         schedule_instructions(pre_modes[i]);
>> -
>> -         if (0) {
>> -            assign_regs_trivial();
>> -            allocated_without_spills = true;
>> -         } else {
>> -            allocated_without_spills = assign_regs(false);
>> -         }
>> -         if (allocated_without_spills)
>> -            break;
>> -      }
>> -
>> -      if (!allocated_without_spills) {
>> -         /* We assume that any spilling is worse than just dropping back to
>> -          * SIMD8.  There's probably actually some intermediate point where
>> -          * SIMD16 with a couple of spills is still better.
>> -          */
>> -         if (dispatch_width == 16) {
>> -            fail("Failure to register allocate.  Reduce number of "
>> -                 "live scalar values to avoid this.");
>> -         } else {
>> -            perf_debug("Fragment shader triggered register spilling.  "
>> -                       "Try reducing the number of live scalar values to "
>> -                       "improve performance.\n");
>> -         }
>> -
>> -         /* Since we're out of heuristics, just go spill registers until we
>> -          * get an allocation.
>> -          */
>> -         while (!assign_regs(true)) {
>> -            if (failed)
>> -               break;
>> -         }
>> -      }
>> -
>> -      assert(force_uncompressed_stack == 0);
>> -
>> -      /* This must come after all optimization and register allocation, since
>> -       * it inserts dead code that happens to have side effects, and it does
>> -       * so based on the actual physical registers in use.
>> -       */
>> -      insert_gen4_send_dependency_workarounds();
>> -
>> +      allocate_registers();
>>        if (failed)
>>           return false;
>> -
>> -      if (!allocated_without_spills)
>> -         schedule_instructions(SCHEDULE_POST);
>> -
>> -      if (last_scratch > 0)
>> -         prog_data->total_scratch = brw_get_scratch_size(last_scratch);
>>     }
>>
>>     if (stage == MESA_SHADER_FRAGMENT) {
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
>> index 31c3001..bb6f767 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.h
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
>> @@ -407,6 +407,7 @@ public:
>>
>>     bool run();
>>     void optimize();
>> +   void allocate_registers();
>>     void assign_binding_table_offsets();
>>     void setup_payload_gen4();
>>     void setup_payload_gen6();
>> --
>> 2.1.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev