[Mesa-dev] [PATCH] st_glsl_to_tgsi: track range for writes in a if/else/endif blocks. (v2)

Dave Airlie airlied at gmail.com
Thu Jun 8 03:23:02 UTC 2017


From: Dave Airlie <airlied at redhat.com>

This overhauls the copy prop and dead code passes to avoid
major CPU overhead in some corner cases trigged by the fp64 patches

v2: fix level not being updated correctly
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 123 +++++++++++++++++++++++++----
 1 file changed, 109 insertions(+), 14 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 97f60d3..48d48e8 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4924,6 +4924,78 @@ glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
  *
  * which allows for dead code elimination on TEMP[1]'s writes.
  */
+#define DEFAULT_LEVELS 8
+
+class per_level_info {
+
+   struct per_level_range {
+      int32_t min_temp_idx;
+      int32_t max_temp_idx;
+   } *lvls;
+
+   void *mem_ctx;
+   int num_alloced_levels;
+   int level;
+   int max_temps;
+public:
+
+   per_level_info(void *mem_ctx_in, int max_temps_in) {
+      num_alloced_levels = DEFAULT_LEVELS;
+      max_temps = max_temps_in;
+      mem_ctx = mem_ctx_in;
+      level = 0;
+      lvls = (struct per_level_range *)reralloc_array_size(mem_ctx,
+                                                           NULL,
+                                                           sizeof(struct per_level_range),
+                                                           num_alloced_levels);
+      lvls[0].min_temp_idx = max_temps;
+      lvls[0].max_temp_idx = 0;
+   }
+
+   ~per_level_info() {
+      ralloc_free(lvls);
+   }
+
+   int get_level(void) {
+      return level;
+   }
+
+   void push_level(void) {
+      level++;
+      if (level >= num_alloced_levels) {
+         num_alloced_levels += 4;
+         lvls = (struct per_level_range *)reralloc_array_size(mem_ctx,
+                                                              (void *)lvls,
+                                                              sizeof(struct per_level_range),
+                                                              num_alloced_levels);
+      }
+      lvls[level].min_temp_idx = max_temps;
+      lvls[level].max_temp_idx = 0;
+   }
+
+   void pop_level(void) {
+      if (lvls[level - 1].min_temp_idx > lvls[level].min_temp_idx)
+         lvls[level - 1].min_temp_idx = lvls[level].min_temp_idx;
+      if (lvls[level - 1].max_temp_idx < lvls[level].max_temp_idx)
+         lvls[level - 1].max_temp_idx = lvls[level].max_temp_idx;
+      level--;
+   }
+
+   void get_level_range(int32_t *min, int32_t *max)
+   {
+      *min = lvls[level].min_temp_idx;
+      *max = lvls[level].max_temp_idx;
+   }
+
+   void update_level_range(int32_t idx)
+   {
+      if (idx < lvls[level].min_temp_idx)
+         lvls[level].min_temp_idx = idx;
+      if ((idx + 1) > lvls[level].max_temp_idx)
+         lvls[level].max_temp_idx = idx + 1;
+   }
+};
+
 void
 glsl_to_tgsi_visitor::copy_propagate(void)
 {
@@ -4931,7 +5003,9 @@ glsl_to_tgsi_visitor::copy_propagate(void)
                                                   glsl_to_tgsi_instruction *,
                                                   this->next_temp * 4);
    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
-   int level = 0;
+   class per_level_info lvl_info(mem_ctx, this->next_temp);
+   int min_lvl, max_lvl;
+   int level;
 
    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
       assert(inst->dst[0].file != PROGRAM_TEMPORARY
@@ -4955,13 +5029,12 @@ glsl_to_tgsi_visitor::copy_propagate(void)
          for (int i = 0; i < 4; i++) {
             int src_chan = GET_SWZ(inst->src[r].swizzle, i);
             glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
-
             if (!copy_chan) {
                good = false;
                break;
             }
 
-            assert(acp_level[acp_base + src_chan] <= level);
+            assert(acp_level[acp_base + src_chan] <= lvl_info.get_level());
 
             if (!first) {
                first = copy_chan;
@@ -5006,7 +5079,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
 
       case TGSI_OPCODE_IF:
       case TGSI_OPCODE_UIF:
-         ++level;
+         lvl_info.push_level();
          break;
 
       case TGSI_OPCODE_ENDIF:
@@ -5014,7 +5087,9 @@ glsl_to_tgsi_visitor::copy_propagate(void)
          /* Clear all channels written inside the block from the ACP, but
           * leaving those that were not touched.
           */
-         for (int r = 0; r < this->next_temp; r++) {
+         lvl_info.get_level_range(&min_lvl, &max_lvl);
+         level = lvl_info.get_level();
+         for (int r = min_lvl; r < max_lvl; r++) {
             for (int c = 0; c < 4; c++) {
                if (!acp[4 * r + c])
                   continue;
@@ -5023,8 +5098,11 @@ glsl_to_tgsi_visitor::copy_propagate(void)
                   acp[4 * r + c] = NULL;
             }
          }
-         if (inst->op == TGSI_OPCODE_ENDIF)
-            --level;
+         lvl_info.pop_level();
+
+         if (inst->op != TGSI_OPCODE_ENDIF)
+            lvl_info.push_level();
+
          break;
 
       default:
@@ -5042,7 +5120,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
                /* Any output might be written, so no copy propagation
                 * from outputs across this instruction.
                 */
-               for (int r = 0; r < this->next_temp; r++) {
+               lvl_info.get_level_range(&min_lvl, &max_lvl);
+               for (int r = min_lvl; r < max_lvl; r++) {
                   for (int c = 0; c < 4; c++) {
                      if (!acp[4 * r + c])
                         continue;
@@ -5062,7 +5141,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
                }
 
                /* Clear where it's used as src. */
-               for (int r = 0; r < this->next_temp; r++) {
+               lvl_info.get_level_range(&min_lvl, &max_lvl);
+               for (int r = min_lvl; r < max_lvl; r++) {
                   for (int c = 0; c < 4; c++) {
                      if (!acp[4 * r + c])
                         continue;
@@ -5094,12 +5174,15 @@ glsl_to_tgsi_visitor::copy_propagate(void)
           !inst->src[0].reladdr2 &&
           !inst->src[0].negate &&
           !inst->src[0].abs) {
+         level = lvl_info.get_level();
          for (int i = 0; i < 4; i++) {
             if (inst->dst[0].writemask & (1 << i)) {
                acp[4 * inst->dst[0].index + i] = inst;
                acp_level[4 * inst->dst[0].index + i] = level;
             }
          }
+
+         lvl_info.update_level_range(inst->dst[0].index);
       }
    }
 
@@ -5130,8 +5213,10 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
                                                      glsl_to_tgsi_instruction *,
                                                      this->next_temp * 4);
    int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
-   int level = 0;
+   int level;
    int removed = 0;
+   int min_lvl, max_lvl;
+   class per_level_info lvl_info(mem_ctx, this->next_temp);
 
    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
       assert(inst->dst[0].file != PROGRAM_TEMPORARY
@@ -5158,7 +5243,9 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
          /* Promote the recorded level of all channels written inside the
           * preceding if or else block to the level above the if/else block.
           */
-         for (int r = 0; r < this->next_temp; r++) {
+         lvl_info.get_level_range(&min_lvl, &max_lvl);
+         level = lvl_info.get_level();
+         for (int r = min_lvl; r < max_lvl; r++) {
             for (int c = 0; c < 4; c++) {
                if (!writes[4 * r + c])
                   continue;
@@ -5167,13 +5254,18 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
                   write_level[4 * r + c] = level-1;
             }
          }
-         if(inst->op == TGSI_OPCODE_ENDIF)
-            --level;
+
+         lvl_info.pop_level();
+
+         if(inst->op != TGSI_OPCODE_ENDIF) {
+            lvl_info.push_level();
+         }
          break;
 
       case TGSI_OPCODE_IF:
       case TGSI_OPCODE_UIF:
-         ++level;
+         lvl_info.push_level();
+
          /* fallthrough to default case to mark the condition as read */
       default:
          /* Continuing the block, clear any channels from the write array that
@@ -5227,6 +5319,8 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
       for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
          if (inst->dst[i].file == PROGRAM_TEMPORARY &&
              !inst->dst[i].reladdr) {
+            level = lvl_info.get_level();
+
             for (int c = 0; c < 4; c++) {
                if (inst->dst[i].writemask & (1 << c)) {
                   if (writes[4 * inst->dst[i].index + c]) {
@@ -5237,6 +5331,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
                   }
                   writes[4 * inst->dst[i].index + c] = inst;
                   write_level[4 * inst->dst[i].index + c] = level;
+                  lvl_info.update_level_range(inst->dst[i].index);
                }
             }
          }
-- 
2.9.3



More information about the mesa-dev mailing list