[Mesa-dev] [PATCH 1/2] st/mesa: reduce time spent in calculating temp read/writes

Marek Olšák maraeo at gmail.com
Thu Aug 27 04:57:39 PDT 2015


We could just skip this for radeonsi or any driver that does regalloc,
because it's useless there.

Marek

On Thu, Aug 27, 2015 at 5:30 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> The glsl->tgsi convertor does some temporary register reduction
> however in profiling shader-db this shows up quite highly,
>
> so optimise things to reduce the number of loops through
> all the instructions we do. This drops merge_registers
> from 4-5% on the profile to 1%. I think this can be reduced
> further by possibly optimising the renumber pass.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 153 +++++++++++++++--------------
>  1 file changed, 79 insertions(+), 74 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 65aae40..e07db11 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -480,10 +480,9 @@ public:
>     void simplify_cmp(void);
>
>     void rename_temp_register(int index, int new_index);
> -   int get_first_temp_read(int index);
> -   int get_first_temp_write(int index);
> -   int get_last_temp_read(int index);
> -   int get_last_temp_write(int index);
> +   void get_first_temp_read(int *first_reads);
> +   void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
> +   void get_last_temp_write(int *last_writes);
>
>     void copy_propagate(void);
>     int eliminate_dead_code(void);
> @@ -3688,8 +3687,8 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
>     }
>  }
>
> -int
> -glsl_to_tgsi_visitor::get_first_temp_read(int index)
> +void
> +glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
>  {
>     int depth = 0; /* loop depth */
>     int loop_start = -1; /* index of the first active BGNLOOP (if any) */
> @@ -3697,15 +3696,15 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
>
>     foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
>        for (j = 0; j < num_inst_src_regs(inst); j++) {
> -         if (inst->src[j].file == PROGRAM_TEMPORARY &&
> -             inst->src[j].index == index) {
> -            return (depth == 0) ? i : loop_start;
> +         if (inst->src[j].file == PROGRAM_TEMPORARY) {
> +            if (first_reads[inst->src[j].index] == -1)
> +                first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start;
>           }
>        }
>        for (j = 0; j < inst->tex_offset_num_offset; j++) {
> -         if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
> -             inst->tex_offsets[j].index == index) {
> -            return (depth == 0) ? i : loop_start;
> +         if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
> +            if (first_reads[inst->tex_offsets[j].index] == -1)
> +               first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start;
>           }
>        }
>        if (inst->op == TGSI_OPCODE_BGNLOOP) {
> @@ -3718,91 +3717,73 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
>        assert(depth >= 0);
>        i++;
>     }
> -   return -1;
>  }
>
> -int
> -glsl_to_tgsi_visitor::get_first_temp_write(int index)
> +void
> +glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes)
>  {
>     int depth = 0; /* loop depth */
>     int loop_start = -1; /* index of the first active BGNLOOP (if any) */
> -   int i = 0;
> -   unsigned j;
> -
> +   unsigned i = 0, j;
> +   int k;
>     foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
> +      for (j = 0; j < num_inst_src_regs(inst); j++) {
> +         if (inst->src[j].file == PROGRAM_TEMPORARY)
> +            last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
> +      }
>        for (j = 0; j < num_inst_dst_regs(inst); j++) {
> -         if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) {
> -            return (depth == 0) ? i : loop_start;
> -         }
> +         if (inst->dst[j].file == PROGRAM_TEMPORARY)
> +            if (first_writes[inst->dst[j].index] == -1)
> +               first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
> +      }
> +      for (j = 0; j < inst->tex_offset_num_offset; j++) {
> +         if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
> +            last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
>        }
>        if (inst->op == TGSI_OPCODE_BGNLOOP) {
>           if(depth++ == 0)
>              loop_start = i;
>        } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
> -         if (--depth == 0)
> +         if (--depth == 0) {
>              loop_start = -1;
> -      }
> -      assert(depth >= 0);
> -      i++;
> -   }
> -   return -1;
> -}
> -
> -int
> -glsl_to_tgsi_visitor::get_last_temp_read(int index)
> -{
> -   int depth = 0; /* loop depth */
> -   int last = -1; /* index of last instruction that reads the temporary */
> -   unsigned i = 0, j;
> -
> -   foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
> -      for (j = 0; j < num_inst_src_regs(inst); j++) {
> -         if (inst->src[j].file == PROGRAM_TEMPORARY &&
> -             inst->src[j].index == index) {
> -            last = (depth == 0) ? i : -2;
> +            for (k = 0; k < this->next_temp; k++) {
> +               if (last_reads[k] == -2) {
> +                  last_reads[k] = i;
> +               }
> +            }
>           }
>        }
> -      for (j = 0; j < inst->tex_offset_num_offset; j++) {
> -          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
> -              inst->tex_offsets[j].index == index)
> -              last = (depth == 0) ? i : -2;
> -      }
> -      if (inst->op == TGSI_OPCODE_BGNLOOP)
> -         depth++;
> -      else if (inst->op == TGSI_OPCODE_ENDLOOP)
> -         if (--depth == 0 && last == -2)
> -            last = i;
>        assert(depth >= 0);
>        i++;
>     }
> -   assert(last >= -1);
> -   return last;
>  }
>
> -int
> -glsl_to_tgsi_visitor::get_last_temp_write(int index)
> +void
> +glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
>  {
>     int depth = 0; /* loop depth */
> -   int last = -1; /* index of last instruction that writes to the temporary */
> -   int i = 0;
> +   int i = 0, k;
>     unsigned j;
>
>     foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
>        for (j = 0; j < num_inst_dst_regs(inst); j++) {
> -         if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index)
> -            last = (depth == 0) ? i : -2;
> +         if (inst->dst[j].file == PROGRAM_TEMPORARY)
> +            last_writes[inst->dst[j].index] = (depth == 0) ? i : -2;
>        }
>
>        if (inst->op == TGSI_OPCODE_BGNLOOP)
>           depth++;
>        else if (inst->op == TGSI_OPCODE_ENDLOOP)
> -         if (--depth == 0 && last == -2)
> -            last = i;
> +         if (--depth == 0) {
> +            for (k = 0; k < this->next_temp; k++) {
> +               if (last_writes[k] == -2) {
> +                  last_writes[k] = i;
> +               }
> +            }
> +         }
>        assert(depth >= 0);
>        i++;
>     }
> -   assert(last >= -1);
> -   return last;
>  }
>
>  /*
> @@ -4238,9 +4219,10 @@ glsl_to_tgsi_visitor::merge_registers(void)
>      * into an array so that we don't have to traverse the instruction list as
>      * much. */
>     for (i = 0; i < this->next_temp; i++) {
> -      last_reads[i] = get_last_temp_read(i);
> -      first_writes[i] = get_first_temp_write(i);
> +      last_reads[i] = -1;
> +      first_writes[i] = -1;
>     }
> +   get_last_temp_read_first_temp_write(last_reads, first_writes);
>
>     /* Start looking for registers with non-overlapping usages that can be
>      * merged together. */
> @@ -4281,15 +4263,21 @@ glsl_to_tgsi_visitor::renumber_registers(void)
>  {
>     int i = 0;
>     int new_index = 0;
> +   int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp);
> +
> +   for (i = 0; i < this->next_temp; i++)
> +      first_reads[i] = -1;
> +   get_first_temp_read(first_reads);
>
>     for (i = 0; i < this->next_temp; i++) {
> -      if (get_first_temp_read(i) < 0) continue;
> +      if (first_reads[i] < 0) continue;
>        if (i != new_index)
>           rename_temp_register(i, new_index);
>        new_index++;
>     }
>
>     this->next_temp = new_index;
> +   ralloc_free(first_reads);
>  }
>
>  /**
> @@ -5764,14 +5752,31 @@ get_mesa_program(struct gl_context *ctx,
>  #if 0
>     /* Print out some information (for debugging purposes) used by the
>      * optimization passes. */
> -   for (i = 0; i < v->next_temp; i++) {
> -      int fr = v->get_first_temp_read(i);
> -      int fw = v->get_first_temp_write(i);
> -      int lr = v->get_last_temp_read(i);
> -      int lw = v->get_last_temp_write(i);
> -
> -      printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
> -      assert(fw <= fr);
> +   {
> +      int i;
> +      int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
> +      int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
> +      int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
> +      int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
> +
> +      for (i = 0; i < v->next_temp; i++) {
> +         first_writes[i] = -1;
> +         first_reads[i] = -1;
> +         last_writes[i] = -1;
> +         last_reads[i] = -1;
> +      }
> +      v->get_first_temp_read(first_reads);
> +      v->get_last_temp_read_first_temp_write(last_reads, first_writes);
> +      v->get_last_temp_write(last_writes);
> +      for (i = 0; i < v->next_temp; i++)
> +         printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i],
> +                first_writes[i],
> +                last_reads[i],
> +                last_writes[i]);
> +      ralloc_free(first_writes);
> +      ralloc_free(first_reads);
> +      ralloc_free(last_writes);
> +      ralloc_free(last_reads);
>     }
>  #endif
>
> --
> 2.4.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list