[Mesa-dev] [PATCH] st_glsl_to_tgsi: track range for writes in a if/else/endif blocks. (v2)

Timothy Arceri tarceri at itsqueeze.com
Thu Jun 8 10:07:24 UTC 2017



On 08/06/17 18:08, Nicolai Hähnle wrote:
> On 08.06.2017 05:23, Dave Airlie wrote:
>> From: Dave Airlie <airlied at redhat.com>
>>
>> This overhauls the copy prop and dead code passes to avoid
>> major CPU overhead in some corner cases trigged by the fp64 patches
>>
>> v2: fix level not being updated correctly
>> ---
>>   src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 123 
>> +++++++++++++++++++++++++----
>>   1 file changed, 109 insertions(+), 14 deletions(-)
>>
>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
>> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> index 97f60d3..48d48e8 100644
>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> @@ -4924,6 +4924,78 @@ glsl_to_tgsi_visitor::get_last_temp_write(int 
>> *last_writes)
>>    *
>>    * which allows for dead code elimination on TEMP[1]'s writes.
>>    */
>> +#define DEFAULT_LEVELS 8
>> +
>> +class per_level_info {
>> +
>> +   struct per_level_range {
>> +      int32_t min_temp_idx;
>> +      int32_t max_temp_idx;
>> +   } *lvls;
>> +
>> +   void *mem_ctx;
>> +   int num_alloced_levels;
>> +   int level;
>> +   int max_temps;
>> +public:
>> +
>> +   per_level_info(void *mem_ctx_in, int max_temps_in) {
>> +      num_alloced_levels = DEFAULT_LEVELS;
>> +      max_temps = max_temps_in;
>> +      mem_ctx = mem_ctx_in;
>> +      level = 0;
>> +      lvls = (struct per_level_range *)reralloc_array_size(mem_ctx,
>> +                                                           NULL,
>> +                                                           
>> sizeof(struct per_level_range),
>> +                                                           
>> num_alloced_levels);
>> +      lvls[0].min_temp_idx = max_temps;
> 
> I believe you can just use INT_MAX here.
> 
> 
>> +      lvls[0].max_temp_idx = 0;
>> +   }
>> +
>> +   ~per_level_info() {
>> +      ralloc_free(lvls);
>> +   }
>> +
>> +   int get_level(void) {
>> +      return level;
>> +   }
>> +
>> +   void push_level(void) {
>> +      level++;
>> +      if (level >= num_alloced_levels) {
>> +         num_alloced_levels += 4;
>> +         lvls = (struct per_level_range *)reralloc_array_size(mem_ctx,
>> +                                                              (void 
>> *)lvls,
>> +                                                              
>> sizeof(struct per_level_range),
>> +                                                              
>> num_alloced_levels);
>> +      }
>> +      lvls[level].min_temp_idx = max_temps;
>> +      lvls[level].max_temp_idx = 0;
>> +   }
>> +
>> +   void pop_level(void) {
>> +      if (lvls[level - 1].min_temp_idx > lvls[level].min_temp_idx)
>> +         lvls[level - 1].min_temp_idx = lvls[level].min_temp_idx;
>> +      if (lvls[level - 1].max_temp_idx < lvls[level].max_temp_idx)
>> +         lvls[level - 1].max_temp_idx = lvls[level].max_temp_idx;
>> +      level--;
>> +   }
>> +
>> +   void get_level_range(int32_t *min, int32_t *max)
>> +   {
>> +      *min = lvls[level].min_temp_idx;
>> +      *max = lvls[level].max_temp_idx;
>> +   }
>> +
>> +   void update_level_range(int32_t idx)
>> +   {
>> +      if (idx < lvls[level].min_temp_idx)
>> +         lvls[level].min_temp_idx = idx;
>> +      if ((idx + 1) > lvls[level].max_temp_idx)
>> +         lvls[level].max_temp_idx = idx + 1;
>> +   }
>> +};
>> +
>>   void
>>   glsl_to_tgsi_visitor::copy_propagate(void)
>>   {
>> @@ -4931,7 +5003,9 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>>                                                     
>> glsl_to_tgsi_instruction *,
>>                                                     this->next_temp * 4);
>>      int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
>> -   int level = 0;
>> +   class per_level_info lvl_info(mem_ctx, this->next_temp);
> 
> class is unnecessary here.
> 
> 
>> +   int min_lvl, max_lvl;
>> +   int level;
> 
> I'd feel more comfortable about the code if you replaced all occurrences 
> of level by lvl_info.get_level(), to completely eliminate the chance of 
> getting stale data. Same in eliminate_dead_code.
> 
> 
>>      foreach_in_list(glsl_to_tgsi_instruction, inst, 
>> &this->instructions) {
>>         assert(inst->dst[0].file != PROGRAM_TEMPORARY
>> @@ -4955,13 +5029,12 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>>            for (int i = 0; i < 4; i++) {
>>               int src_chan = GET_SWZ(inst->src[r].swizzle, i);
>>               glsl_to_tgsi_instruction *copy_chan = acp[acp_base + 
>> src_chan];
>> -
>>               if (!copy_chan) {
>>                  good = false;
>>                  break;
>>               }
>> -            assert(acp_level[acp_base + src_chan] <= level);
>> +            assert(acp_level[acp_base + src_chan] <= 
>> lvl_info.get_level());
>>               if (!first) {
>>                  first = copy_chan;
>> @@ -5006,7 +5079,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>>         case TGSI_OPCODE_IF:
>>         case TGSI_OPCODE_UIF:
>> -         ++level;
>> +         lvl_info.push_level();
>>            break;
>>         case TGSI_OPCODE_ENDIF:
>> @@ -5014,7 +5087,9 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>>            /* Clear all channels written inside the block from the 
>> ACP, but
>>             * leaving those that were not touched.
>>             */
>> -         for (int r = 0; r < this->next_temp; r++) {
>> +         lvl_info.get_level_range(&min_lvl, &max_lvl);
>> +         level = lvl_info.get_level();
>> +         for (int r = min_lvl; r < max_lvl; r++) {
>>               for (int c = 0; c < 4; c++) {
>>                  if (!acp[4 * r + c])
>>                     continue;
>> @@ -5023,8 +5098,11 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>>                     acp[4 * r + c] = NULL;
>>               }
>>            }
>> -         if (inst->op == TGSI_OPCODE_ENDIF)
>> -            --level;
>> +         lvl_info.pop_level();
>> +
>> +         if (inst->op != TGSI_OPCODE_ENDIF)
>> +            lvl_info.push_level();
>> +
>>            break;
>>         default:
>> @@ -5042,7 +5120,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>>                  /* Any output might be written, so no copy propagation
>>                   * from outputs across this instruction.
>>                   */
>> -               for (int r = 0; r < this->next_temp; r++) {
>> +               lvl_info.get_level_range(&min_lvl, &max_lvl);
>> +               for (int r = min_lvl; r < max_lvl; r++) {
>>                     for (int c = 0; c < 4; c++) {
>>                        if (!acp[4 * r + c])
>>                           continue;
>> @@ -5062,7 +5141,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>>                  }
>>                  /* Clear where it's used as src. */
>> -               for (int r = 0; r < this->next_temp; r++) {
>> +               lvl_info.get_level_range(&min_lvl, &max_lvl);
>> +               for (int r = min_lvl; r < max_lvl; r++) {
>>                     for (int c = 0; c < 4; c++) {
>>                        if (!acp[4 * r + c])
>>                           continue;
>> @@ -5094,12 +5174,15 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>>             !inst->src[0].reladdr2 &&
>>             !inst->src[0].negate &&
>>             !inst->src[0].abs) {
>> +         level = lvl_info.get_level();
>>            for (int i = 0; i < 4; i++) {
>>               if (inst->dst[0].writemask & (1 << i)) {
>>                  acp[4 * inst->dst[0].index + i] = inst;
>>                  acp_level[4 * inst->dst[0].index + i] = level;
>>               }
>>            }
>> +
>> +         lvl_info.update_level_range(inst->dst[0].index);
>>         }
>>      }
>> @@ -5130,8 +5213,10 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
>>                                                        
>> glsl_to_tgsi_instruction *,
>>                                                        this->next_temp 
>> * 4);
>>      int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
>> -   int level = 0;
>> +   int level;
>>      int removed = 0;
>> +   int min_lvl, max_lvl;
>> +   class per_level_info lvl_info(mem_ctx, this->next_temp);
> 
> class is unnecessary.
> 
> Apart from the comments above, the patch is
> 
> Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

Just FYI as mentioned to Dave on IRC there is something odd going on 
with some Source shaders after this commit. Some dead code inside some 
if blocks is not longer getting optimised away. The strange part is this 
is due to the copy prop changes rather than the dead code changes.

> 
> 
>>      foreach_in_list(glsl_to_tgsi_instruction, inst, 
>> &this->instructions) {
>>         assert(inst->dst[0].file != PROGRAM_TEMPORARY
>> @@ -5158,7 +5243,9 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
>>            /* Promote the recorded level of all channels written 
>> inside the
>>             * preceding if or else block to the level above the 
>> if/else block.
>>             */
>> -         for (int r = 0; r < this->next_temp; r++) {
>> +         lvl_info.get_level_range(&min_lvl, &max_lvl);
>> +         level = lvl_info.get_level();
>> +         for (int r = min_lvl; r < max_lvl; r++) {
>>               for (int c = 0; c < 4; c++) {
>>                  if (!writes[4 * r + c])
>>                     continue;
>> @@ -5167,13 +5254,18 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
>>                     write_level[4 * r + c] = level-1;
>>               }
>>            }
>> -         if(inst->op == TGSI_OPCODE_ENDIF)
>> -            --level;
>> +
>> +         lvl_info.pop_level();
>> +
>> +         if(inst->op != TGSI_OPCODE_ENDIF) {
>> +            lvl_info.push_level();
>> +         }
>>            break;
>>         case TGSI_OPCODE_IF:
>>         case TGSI_OPCODE_UIF:
>> -         ++level;
>> +         lvl_info.push_level();
>> +
>>            /* fallthrough to default case to mark the condition as 
>> read */
>>         default:
>>            /* Continuing the block, clear any channels from the write 
>> array that
>> @@ -5227,6 +5319,8 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
>>         for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
>>            if (inst->dst[i].file == PROGRAM_TEMPORARY &&
>>                !inst->dst[i].reladdr) {
>> +            level = lvl_info.get_level();
>> +
>>               for (int c = 0; c < 4; c++) {
>>                  if (inst->dst[i].writemask & (1 << c)) {
>>                     if (writes[4 * inst->dst[i].index + c]) {
>> @@ -5237,6 +5331,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
>>                     }
>>                     writes[4 * inst->dst[i].index + c] = inst;
>>                     write_level[4 * inst->dst[i].index + c] = level;
>> +                  lvl_info.update_level_range(inst->dst[i].index);
>>                  }
>>               }
>>            }
>>
> 
> 


More information about the mesa-dev mailing list