[Mesa-dev] [PATCH 1/2] st/mesa: reduce time spent in calculating temp read/writes

Dave Airlie airlied at gmail.com
Thu Aug 27 12:48:21 PDT 2015


On 27 August 2015 at 21:57, Marek Olšák <maraeo at gmail.com> wrote:
> We could just skip this for radeonsi or any driver that does regalloc,
> because it's useless there.

I did expect this comment from Ilia, though adding a CAP and
piping it through is an option I'd look into later. (or someone who
cares can do it sooner).

it doesn't change however what this patch does, it looks useful for r600
as is.

Dave.

>
> Marek
>
> On Thu, Aug 27, 2015 at 5:30 AM, Dave Airlie <airlied at gmail.com> wrote:
>> From: Dave Airlie <airlied at redhat.com>
>>
>> The glsl->tgsi convertor does some temporary register reduction
>> however in profiling shader-db this shows up quite highly,
>>
>> so optimise things to reduce the number of loops through
>> all the instructions we do. This drops merge_registers
>> from 4-5% on the profile to 1%. I think this can be reduced
>> further by possibly optimising the renumber pass.
>>
>> Signed-off-by: Dave Airlie <airlied at redhat.com>
>> ---
>>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 153 +++++++++++++++--------------
>>  1 file changed, 79 insertions(+), 74 deletions(-)
>>
>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> index 65aae40..e07db11 100644
>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> @@ -480,10 +480,9 @@ public:
>>     void simplify_cmp(void);
>>
>>     void rename_temp_register(int index, int new_index);
>> -   int get_first_temp_read(int index);
>> -   int get_first_temp_write(int index);
>> -   int get_last_temp_read(int index);
>> -   int get_last_temp_write(int index);
>> +   void get_first_temp_read(int *first_reads);
>> +   void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
>> +   void get_last_temp_write(int *last_writes);
>>
>>     void copy_propagate(void);
>>     int eliminate_dead_code(void);
>> @@ -3688,8 +3687,8 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
>>     }
>>  }
>>
>> -int
>> -glsl_to_tgsi_visitor::get_first_temp_read(int index)
>> +void
>> +glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
>>  {
>>     int depth = 0; /* loop depth */
>>     int loop_start = -1; /* index of the first active BGNLOOP (if any) */
>> @@ -3697,15 +3696,15 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
>>
>>     foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
>>        for (j = 0; j < num_inst_src_regs(inst); j++) {
>> -         if (inst->src[j].file == PROGRAM_TEMPORARY &&
>> -             inst->src[j].index == index) {
>> -            return (depth == 0) ? i : loop_start;
>> +         if (inst->src[j].file == PROGRAM_TEMPORARY) {
>> +            if (first_reads[inst->src[j].index] == -1)
>> +                first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start;
>>           }
>>        }
>>        for (j = 0; j < inst->tex_offset_num_offset; j++) {
>> -         if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
>> -             inst->tex_offsets[j].index == index) {
>> -            return (depth == 0) ? i : loop_start;
>> +         if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
>> +            if (first_reads[inst->tex_offsets[j].index] == -1)
>> +               first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start;
>>           }
>>        }
>>        if (inst->op == TGSI_OPCODE_BGNLOOP) {
>> @@ -3718,91 +3717,73 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
>>        assert(depth >= 0);
>>        i++;
>>     }
>> -   return -1;
>>  }
>>
>> -int
>> -glsl_to_tgsi_visitor::get_first_temp_write(int index)
>> +void
>> +glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes)
>>  {
>>     int depth = 0; /* loop depth */
>>     int loop_start = -1; /* index of the first active BGNLOOP (if any) */
>> -   int i = 0;
>> -   unsigned j;
>> -
>> +   unsigned i = 0, j;
>> +   int k;
>>     foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
>> +      for (j = 0; j < num_inst_src_regs(inst); j++) {
>> +         if (inst->src[j].file == PROGRAM_TEMPORARY)
>> +            last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
>> +      }
>>        for (j = 0; j < num_inst_dst_regs(inst); j++) {
>> -         if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) {
>> -            return (depth == 0) ? i : loop_start;
>> -         }
>> +         if (inst->dst[j].file == PROGRAM_TEMPORARY)
>> +            if (first_writes[inst->dst[j].index] == -1)
>> +               first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
>> +      }
>> +      for (j = 0; j < inst->tex_offset_num_offset; j++) {
>> +         if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
>> +            last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
>>        }
>>        if (inst->op == TGSI_OPCODE_BGNLOOP) {
>>           if(depth++ == 0)
>>              loop_start = i;
>>        } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
>> -         if (--depth == 0)
>> +         if (--depth == 0) {
>>              loop_start = -1;
>> -      }
>> -      assert(depth >= 0);
>> -      i++;
>> -   }
>> -   return -1;
>> -}
>> -
>> -int
>> -glsl_to_tgsi_visitor::get_last_temp_read(int index)
>> -{
>> -   int depth = 0; /* loop depth */
>> -   int last = -1; /* index of last instruction that reads the temporary */
>> -   unsigned i = 0, j;
>> -
>> -   foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
>> -      for (j = 0; j < num_inst_src_regs(inst); j++) {
>> -         if (inst->src[j].file == PROGRAM_TEMPORARY &&
>> -             inst->src[j].index == index) {
>> -            last = (depth == 0) ? i : -2;
>> +            for (k = 0; k < this->next_temp; k++) {
>> +               if (last_reads[k] == -2) {
>> +                  last_reads[k] = i;
>> +               }
>> +            }
>>           }
>>        }
>> -      for (j = 0; j < inst->tex_offset_num_offset; j++) {
>> -          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
>> -              inst->tex_offsets[j].index == index)
>> -              last = (depth == 0) ? i : -2;
>> -      }
>> -      if (inst->op == TGSI_OPCODE_BGNLOOP)
>> -         depth++;
>> -      else if (inst->op == TGSI_OPCODE_ENDLOOP)
>> -         if (--depth == 0 && last == -2)
>> -            last = i;
>>        assert(depth >= 0);
>>        i++;
>>     }
>> -   assert(last >= -1);
>> -   return last;
>>  }
>>
>> -int
>> -glsl_to_tgsi_visitor::get_last_temp_write(int index)
>> +void
>> +glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
>>  {
>>     int depth = 0; /* loop depth */
>> -   int last = -1; /* index of last instruction that writes to the temporary */
>> -   int i = 0;
>> +   int i = 0, k;
>>     unsigned j;
>>
>>     foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
>>        for (j = 0; j < num_inst_dst_regs(inst); j++) {
>> -         if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index)
>> -            last = (depth == 0) ? i : -2;
>> +         if (inst->dst[j].file == PROGRAM_TEMPORARY)
>> +            last_writes[inst->dst[j].index] = (depth == 0) ? i : -2;
>>        }
>>
>>        if (inst->op == TGSI_OPCODE_BGNLOOP)
>>           depth++;
>>        else if (inst->op == TGSI_OPCODE_ENDLOOP)
>> -         if (--depth == 0 && last == -2)
>> -            last = i;
>> +         if (--depth == 0) {
>> +            for (k = 0; k < this->next_temp; k++) {
>> +               if (last_writes[k] == -2) {
>> +                  last_writes[k] = i;
>> +               }
>> +            }
>> +         }
>>        assert(depth >= 0);
>>        i++;
>>     }
>> -   assert(last >= -1);
>> -   return last;
>>  }
>>
>>  /*
>> @@ -4238,9 +4219,10 @@ glsl_to_tgsi_visitor::merge_registers(void)
>>      * into an array so that we don't have to traverse the instruction list as
>>      * much. */
>>     for (i = 0; i < this->next_temp; i++) {
>> -      last_reads[i] = get_last_temp_read(i);
>> -      first_writes[i] = get_first_temp_write(i);
>> +      last_reads[i] = -1;
>> +      first_writes[i] = -1;
>>     }
>> +   get_last_temp_read_first_temp_write(last_reads, first_writes);
>>
>>     /* Start looking for registers with non-overlapping usages that can be
>>      * merged together. */
>> @@ -4281,15 +4263,21 @@ glsl_to_tgsi_visitor::renumber_registers(void)
>>  {
>>     int i = 0;
>>     int new_index = 0;
>> +   int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp);
>> +
>> +   for (i = 0; i < this->next_temp; i++)
>> +      first_reads[i] = -1;
>> +   get_first_temp_read(first_reads);
>>
>>     for (i = 0; i < this->next_temp; i++) {
>> -      if (get_first_temp_read(i) < 0) continue;
>> +      if (first_reads[i] < 0) continue;
>>        if (i != new_index)
>>           rename_temp_register(i, new_index);
>>        new_index++;
>>     }
>>
>>     this->next_temp = new_index;
>> +   ralloc_free(first_reads);
>>  }
>>
>>  /**
>> @@ -5764,14 +5752,31 @@ get_mesa_program(struct gl_context *ctx,
>>  #if 0
>>     /* Print out some information (for debugging purposes) used by the
>>      * optimization passes. */
>> -   for (i = 0; i < v->next_temp; i++) {
>> -      int fr = v->get_first_temp_read(i);
>> -      int fw = v->get_first_temp_write(i);
>> -      int lr = v->get_last_temp_read(i);
>> -      int lw = v->get_last_temp_write(i);
>> -
>> -      printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
>> -      assert(fw <= fr);
>> +   {
>> +      int i;
>> +      int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
>> +      int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
>> +      int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
>> +      int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
>> +
>> +      for (i = 0; i < v->next_temp; i++) {
>> +         first_writes[i] = -1;
>> +         first_reads[i] = -1;
>> +         last_writes[i] = -1;
>> +         last_reads[i] = -1;
>> +      }
>> +      v->get_first_temp_read(first_reads);
>> +      v->get_last_temp_read_first_temp_write(last_reads, first_writes);
>> +      v->get_last_temp_write(last_writes);
>> +      for (i = 0; i < v->next_temp; i++)
>> +         printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i],
>> +                first_writes[i],
>> +                last_reads[i],
>> +                last_writes[i]);
>> +      ralloc_free(first_writes);
>> +      ralloc_free(first_reads);
>> +      ralloc_free(last_writes);
>> +      ralloc_free(last_reads);
>>     }
>>  #endif
>>
>> --
>> 2.4.3
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list