[Mesa-dev] [PATCH] st_glsl_to_tgsi: track range for writes in a if/else/endif blocks. (v2)
Dave Airlie
airlied at gmail.com
Thu Jun 8 03:23:02 UTC 2017
From: Dave Airlie <airlied at redhat.com>
This overhauls the copy prop and dead code passes to avoid
major CPU overhead in some corner cases trigged by the fp64 patches
v2: fix level not being updated correctly
---
src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 123 +++++++++++++++++++++++++----
1 file changed, 109 insertions(+), 14 deletions(-)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 97f60d3..48d48e8 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4924,6 +4924,78 @@ glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
*
* which allows for dead code elimination on TEMP[1]'s writes.
*/
+#define DEFAULT_LEVELS 8
+
+class per_level_info {
+
+ struct per_level_range {
+ int32_t min_temp_idx;
+ int32_t max_temp_idx;
+ } *lvls;
+
+ void *mem_ctx;
+ int num_alloced_levels;
+ int level;
+ int max_temps;
+public:
+
+ per_level_info(void *mem_ctx_in, int max_temps_in) {
+ num_alloced_levels = DEFAULT_LEVELS;
+ max_temps = max_temps_in;
+ mem_ctx = mem_ctx_in;
+ level = 0;
+ lvls = (struct per_level_range *)reralloc_array_size(mem_ctx,
+ NULL,
+ sizeof(struct per_level_range),
+ num_alloced_levels);
+ lvls[0].min_temp_idx = max_temps;
+ lvls[0].max_temp_idx = 0;
+ }
+
+ ~per_level_info() {
+ ralloc_free(lvls);
+ }
+
+ int get_level(void) {
+ return level;
+ }
+
+ void push_level(void) {
+ level++;
+ if (level >= num_alloced_levels) {
+ num_alloced_levels += 4;
+ lvls = (struct per_level_range *)reralloc_array_size(mem_ctx,
+ (void *)lvls,
+ sizeof(struct per_level_range),
+ num_alloced_levels);
+ }
+ lvls[level].min_temp_idx = max_temps;
+ lvls[level].max_temp_idx = 0;
+ }
+
+ void pop_level(void) {
+ if (lvls[level - 1].min_temp_idx > lvls[level].min_temp_idx)
+ lvls[level - 1].min_temp_idx = lvls[level].min_temp_idx;
+ if (lvls[level - 1].max_temp_idx < lvls[level].max_temp_idx)
+ lvls[level - 1].max_temp_idx = lvls[level].max_temp_idx;
+ level--;
+ }
+
+ void get_level_range(int32_t *min, int32_t *max)
+ {
+ *min = lvls[level].min_temp_idx;
+ *max = lvls[level].max_temp_idx;
+ }
+
+ void update_level_range(int32_t idx)
+ {
+ if (idx < lvls[level].min_temp_idx)
+ lvls[level].min_temp_idx = idx;
+ if ((idx + 1) > lvls[level].max_temp_idx)
+ lvls[level].max_temp_idx = idx + 1;
+ }
+};
+
void
glsl_to_tgsi_visitor::copy_propagate(void)
{
@@ -4931,7 +5003,9 @@ glsl_to_tgsi_visitor::copy_propagate(void)
glsl_to_tgsi_instruction *,
this->next_temp * 4);
int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
- int level = 0;
+ class per_level_info lvl_info(mem_ctx, this->next_temp);
+ int min_lvl, max_lvl;
+ int level;
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
assert(inst->dst[0].file != PROGRAM_TEMPORARY
@@ -4955,13 +5029,12 @@ glsl_to_tgsi_visitor::copy_propagate(void)
for (int i = 0; i < 4; i++) {
int src_chan = GET_SWZ(inst->src[r].swizzle, i);
glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
-
if (!copy_chan) {
good = false;
break;
}
- assert(acp_level[acp_base + src_chan] <= level);
+ assert(acp_level[acp_base + src_chan] <= lvl_info.get_level());
if (!first) {
first = copy_chan;
@@ -5006,7 +5079,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
case TGSI_OPCODE_IF:
case TGSI_OPCODE_UIF:
- ++level;
+ lvl_info.push_level();
break;
case TGSI_OPCODE_ENDIF:
@@ -5014,7 +5087,9 @@ glsl_to_tgsi_visitor::copy_propagate(void)
/* Clear all channels written inside the block from the ACP, but
* leaving those that were not touched.
*/
- for (int r = 0; r < this->next_temp; r++) {
+ lvl_info.get_level_range(&min_lvl, &max_lvl);
+ level = lvl_info.get_level();
+ for (int r = min_lvl; r < max_lvl; r++) {
for (int c = 0; c < 4; c++) {
if (!acp[4 * r + c])
continue;
@@ -5023,8 +5098,11 @@ glsl_to_tgsi_visitor::copy_propagate(void)
acp[4 * r + c] = NULL;
}
}
- if (inst->op == TGSI_OPCODE_ENDIF)
- --level;
+ lvl_info.pop_level();
+
+ if (inst->op != TGSI_OPCODE_ENDIF)
+ lvl_info.push_level();
+
break;
default:
@@ -5042,7 +5120,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
/* Any output might be written, so no copy propagation
* from outputs across this instruction.
*/
- for (int r = 0; r < this->next_temp; r++) {
+ lvl_info.get_level_range(&min_lvl, &max_lvl);
+ for (int r = min_lvl; r < max_lvl; r++) {
for (int c = 0; c < 4; c++) {
if (!acp[4 * r + c])
continue;
@@ -5062,7 +5141,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
}
/* Clear where it's used as src. */
- for (int r = 0; r < this->next_temp; r++) {
+ lvl_info.get_level_range(&min_lvl, &max_lvl);
+ for (int r = min_lvl; r < max_lvl; r++) {
for (int c = 0; c < 4; c++) {
if (!acp[4 * r + c])
continue;
@@ -5094,12 +5174,15 @@ glsl_to_tgsi_visitor::copy_propagate(void)
!inst->src[0].reladdr2 &&
!inst->src[0].negate &&
!inst->src[0].abs) {
+ level = lvl_info.get_level();
for (int i = 0; i < 4; i++) {
if (inst->dst[0].writemask & (1 << i)) {
acp[4 * inst->dst[0].index + i] = inst;
acp_level[4 * inst->dst[0].index + i] = level;
}
}
+
+ lvl_info.update_level_range(inst->dst[0].index);
}
}
@@ -5130,8 +5213,10 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
glsl_to_tgsi_instruction *,
this->next_temp * 4);
int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
- int level = 0;
+ int level;
int removed = 0;
+ int min_lvl, max_lvl;
+ class per_level_info lvl_info(mem_ctx, this->next_temp);
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
assert(inst->dst[0].file != PROGRAM_TEMPORARY
@@ -5158,7 +5243,9 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
/* Promote the recorded level of all channels written inside the
* preceding if or else block to the level above the if/else block.
*/
- for (int r = 0; r < this->next_temp; r++) {
+ lvl_info.get_level_range(&min_lvl, &max_lvl);
+ level = lvl_info.get_level();
+ for (int r = min_lvl; r < max_lvl; r++) {
for (int c = 0; c < 4; c++) {
if (!writes[4 * r + c])
continue;
@@ -5167,13 +5254,18 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
write_level[4 * r + c] = level-1;
}
}
- if(inst->op == TGSI_OPCODE_ENDIF)
- --level;
+
+ lvl_info.pop_level();
+
+ if(inst->op != TGSI_OPCODE_ENDIF) {
+ lvl_info.push_level();
+ }
break;
case TGSI_OPCODE_IF:
case TGSI_OPCODE_UIF:
- ++level;
+ lvl_info.push_level();
+
/* fallthrough to default case to mark the condition as read */
default:
/* Continuing the block, clear any channels from the write array that
@@ -5227,6 +5319,8 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
if (inst->dst[i].file == PROGRAM_TEMPORARY &&
!inst->dst[i].reladdr) {
+ level = lvl_info.get_level();
+
for (int c = 0; c < 4; c++) {
if (inst->dst[i].writemask & (1 << c)) {
if (writes[4 * inst->dst[i].index + c]) {
@@ -5237,6 +5331,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
}
writes[4 * inst->dst[i].index + c] = inst;
write_level[4 * inst->dst[i].index + c] = level;
+ lvl_info.update_level_range(inst->dst[i].index);
}
}
}
--
2.9.3
More information about the mesa-dev
mailing list