[Mesa-dev] [PATCH] st_glsl_to_tgsi: track range for writes in a if/else/endif blocks. (v2)

Nicolai Hähnle nhaehnle at gmail.com
Thu Jun 8 08:08:15 UTC 2017


On 08.06.2017 05:23, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
> 
> This overhauls the copy prop and dead code passes to avoid
> major CPU overhead in some corner cases trigged by the fp64 patches
> 
> v2: fix level not being updated correctly
> ---
>   src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 123 +++++++++++++++++++++++++----
>   1 file changed, 109 insertions(+), 14 deletions(-)
> 
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 97f60d3..48d48e8 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -4924,6 +4924,78 @@ glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
>    *
>    * which allows for dead code elimination on TEMP[1]'s writes.
>    */
> +#define DEFAULT_LEVELS 8
> +
> +class per_level_info {
> +
> +   struct per_level_range {
> +      int32_t min_temp_idx;
> +      int32_t max_temp_idx;
> +   } *lvls;
> +
> +   void *mem_ctx;
> +   int num_alloced_levels;
> +   int level;
> +   int max_temps;
> +public:
> +
> +   per_level_info(void *mem_ctx_in, int max_temps_in) {
> +      num_alloced_levels = DEFAULT_LEVELS;
> +      max_temps = max_temps_in;
> +      mem_ctx = mem_ctx_in;
> +      level = 0;
> +      lvls = (struct per_level_range *)reralloc_array_size(mem_ctx,
> +                                                           NULL,
> +                                                           sizeof(struct per_level_range),
> +                                                           num_alloced_levels);
> +      lvls[0].min_temp_idx = max_temps;

I believe you can just use INT_MAX here.


> +      lvls[0].max_temp_idx = 0;
> +   }
> +
> +   ~per_level_info() {
> +      ralloc_free(lvls);
> +   }
> +
> +   int get_level(void) {
> +      return level;
> +   }
> +
> +   void push_level(void) {
> +      level++;
> +      if (level >= num_alloced_levels) {
> +         num_alloced_levels += 4;
> +         lvls = (struct per_level_range *)reralloc_array_size(mem_ctx,
> +                                                              (void *)lvls,
> +                                                              sizeof(struct per_level_range),
> +                                                              num_alloced_levels);
> +      }
> +      lvls[level].min_temp_idx = max_temps;
> +      lvls[level].max_temp_idx = 0;
> +   }
> +
> +   void pop_level(void) {
> +      if (lvls[level - 1].min_temp_idx > lvls[level].min_temp_idx)
> +         lvls[level - 1].min_temp_idx = lvls[level].min_temp_idx;
> +      if (lvls[level - 1].max_temp_idx < lvls[level].max_temp_idx)
> +         lvls[level - 1].max_temp_idx = lvls[level].max_temp_idx;
> +      level--;
> +   }
> +
> +   void get_level_range(int32_t *min, int32_t *max)
> +   {
> +      *min = lvls[level].min_temp_idx;
> +      *max = lvls[level].max_temp_idx;
> +   }
> +
> +   void update_level_range(int32_t idx)
> +   {
> +      if (idx < lvls[level].min_temp_idx)
> +         lvls[level].min_temp_idx = idx;
> +      if ((idx + 1) > lvls[level].max_temp_idx)
> +         lvls[level].max_temp_idx = idx + 1;
> +   }
> +};
> +
>   void
>   glsl_to_tgsi_visitor::copy_propagate(void)
>   {
> @@ -4931,7 +5003,9 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>                                                     glsl_to_tgsi_instruction *,
>                                                     this->next_temp * 4);
>      int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
> -   int level = 0;
> +   class per_level_info lvl_info(mem_ctx, this->next_temp);

class is unnecessary here.


> +   int min_lvl, max_lvl;
> +   int level;

I'd feel more comfortable about the code if you replaced all occurrences 
of level by lvl_info.get_level(), to completely eliminate the chance of 
getting stale data. Same in eliminate_dead_code.


>   
>      foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
>         assert(inst->dst[0].file != PROGRAM_TEMPORARY
> @@ -4955,13 +5029,12 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>            for (int i = 0; i < 4; i++) {
>               int src_chan = GET_SWZ(inst->src[r].swizzle, i);
>               glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
> -
>               if (!copy_chan) {
>                  good = false;
>                  break;
>               }
>   
> -            assert(acp_level[acp_base + src_chan] <= level);
> +            assert(acp_level[acp_base + src_chan] <= lvl_info.get_level());
>   
>               if (!first) {
>                  first = copy_chan;
> @@ -5006,7 +5079,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>   
>         case TGSI_OPCODE_IF:
>         case TGSI_OPCODE_UIF:
> -         ++level;
> +         lvl_info.push_level();
>            break;
>   
>         case TGSI_OPCODE_ENDIF:
> @@ -5014,7 +5087,9 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>            /* Clear all channels written inside the block from the ACP, but
>             * leaving those that were not touched.
>             */
> -         for (int r = 0; r < this->next_temp; r++) {
> +         lvl_info.get_level_range(&min_lvl, &max_lvl);
> +         level = lvl_info.get_level();
> +         for (int r = min_lvl; r < max_lvl; r++) {
>               for (int c = 0; c < 4; c++) {
>                  if (!acp[4 * r + c])
>                     continue;
> @@ -5023,8 +5098,11 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>                     acp[4 * r + c] = NULL;
>               }
>            }
> -         if (inst->op == TGSI_OPCODE_ENDIF)
> -            --level;
> +         lvl_info.pop_level();
> +
> +         if (inst->op != TGSI_OPCODE_ENDIF)
> +            lvl_info.push_level();
> +
>            break;
>   
>         default:
> @@ -5042,7 +5120,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>                  /* Any output might be written, so no copy propagation
>                   * from outputs across this instruction.
>                   */
> -               for (int r = 0; r < this->next_temp; r++) {
> +               lvl_info.get_level_range(&min_lvl, &max_lvl);
> +               for (int r = min_lvl; r < max_lvl; r++) {
>                     for (int c = 0; c < 4; c++) {
>                        if (!acp[4 * r + c])
>                           continue;
> @@ -5062,7 +5141,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>                  }
>   
>                  /* Clear where it's used as src. */
> -               for (int r = 0; r < this->next_temp; r++) {
> +               lvl_info.get_level_range(&min_lvl, &max_lvl);
> +               for (int r = min_lvl; r < max_lvl; r++) {
>                     for (int c = 0; c < 4; c++) {
>                        if (!acp[4 * r + c])
>                           continue;
> @@ -5094,12 +5174,15 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>             !inst->src[0].reladdr2 &&
>             !inst->src[0].negate &&
>             !inst->src[0].abs) {
> +         level = lvl_info.get_level();
>            for (int i = 0; i < 4; i++) {
>               if (inst->dst[0].writemask & (1 << i)) {
>                  acp[4 * inst->dst[0].index + i] = inst;
>                  acp_level[4 * inst->dst[0].index + i] = level;
>               }
>            }
> +
> +         lvl_info.update_level_range(inst->dst[0].index);
>         }
>      }
>   
> @@ -5130,8 +5213,10 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
>                                                        glsl_to_tgsi_instruction *,
>                                                        this->next_temp * 4);
>      int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
> -   int level = 0;
> +   int level;
>      int removed = 0;
> +   int min_lvl, max_lvl;
> +   class per_level_info lvl_info(mem_ctx, this->next_temp);

class is unnecessary.

Apart from the comments above, the patch is

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>


>   
>      foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
>         assert(inst->dst[0].file != PROGRAM_TEMPORARY
> @@ -5158,7 +5243,9 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
>            /* Promote the recorded level of all channels written inside the
>             * preceding if or else block to the level above the if/else block.
>             */
> -         for (int r = 0; r < this->next_temp; r++) {
> +         lvl_info.get_level_range(&min_lvl, &max_lvl);
> +         level = lvl_info.get_level();
> +         for (int r = min_lvl; r < max_lvl; r++) {
>               for (int c = 0; c < 4; c++) {
>                  if (!writes[4 * r + c])
>                     continue;
> @@ -5167,13 +5254,18 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
>                     write_level[4 * r + c] = level-1;
>               }
>            }
> -         if(inst->op == TGSI_OPCODE_ENDIF)
> -            --level;
> +
> +         lvl_info.pop_level();
> +
> +         if(inst->op != TGSI_OPCODE_ENDIF) {
> +            lvl_info.push_level();
> +         }
>            break;
>   
>         case TGSI_OPCODE_IF:
>         case TGSI_OPCODE_UIF:
> -         ++level;
> +         lvl_info.push_level();
> +
>            /* fallthrough to default case to mark the condition as read */
>         default:
>            /* Continuing the block, clear any channels from the write array that
> @@ -5227,6 +5319,8 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
>         for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
>            if (inst->dst[i].file == PROGRAM_TEMPORARY &&
>                !inst->dst[i].reladdr) {
> +            level = lvl_info.get_level();
> +
>               for (int c = 0; c < 4; c++) {
>                  if (inst->dst[i].writemask & (1 << c)) {
>                     if (writes[4 * inst->dst[i].index + c]) {
> @@ -5237,6 +5331,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
>                     }
>                     writes[4 * inst->dst[i].index + c] = inst;
>                     write_level[4 * inst->dst[i].index + c] = level;
> +                  lvl_info.update_level_range(inst->dst[i].index);
>                  }
>               }
>            }
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list