[Mesa-dev] [PATCH] glsl_to_tgsi: Use a single pass to get live_intervals of temp regs
Vincent Lejeune
vljn at ovi.com
Thu Jan 12 14:35:58 PST 2012
get_*_temp_* functions were parsing code for each temp, which is not optimal.
This patch rewrite the get_live_interval function so that it parses a single time instructions list. The spared time is reused to determine more finely live intervals in case of for loop, which should lower register pressure when there are nested loops.
---
src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 127 ++++++++++++++++++++++++++--
1 files changed, 119 insertions(+), 8 deletions(-)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index fc7efd5..aa8e9f4 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -519,10 +519,10 @@ void
variable_store::optimise_access(void)
{
reindex_table = rzalloc_array(mem_ctx, unsigned, num_temps);
- num_temps_in_array = 1;
+ num_temps_in_array = 0;
hash_table_call_foreach(variables,variable_store::reindex_reladdress, this);
reindex_rvalue_reladdressed();
- num_temps = num_temps_in_array + 1;
+ num_temps = num_temps_in_array;
hash_table_call_foreach(variables,variable_store::reindex_non_reladdress, this);
reindex_rvalue();
}
@@ -3865,6 +3865,112 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
return removed;
}
+static void
+update_live_interval(struct glsl_to_tgsi_visitor::interval &interval, unsigned potential_start, unsigned potential_end)
+{
+ if(!potential_start)
+ return;
+ assert(potential_start <= potential_end);
+ if (!interval.first_line) {
+ interval.first_line = potential_start;
+ }
+ interval.first_line = MIN2(interval.first_line, potential_start);
+ interval.last_line = MAX2(interval.last_line, potential_end);
+ return;
+}
+
+static void
+get_live_interval_from_loop_block(unsigned temp_amounts,
+ unsigned first_non_array_temp,
+ struct glsl_to_tgsi_visitor::interval *live_interval,
+ unsigned ¤t_line,
+ glsl_to_tgsi_instruction *¤t_instruction)
+{
+ unsigned block_start = current_line +1;
+
+ void *ctx = ralloc_context(NULL);
+
+ unsigned *first_write = rzalloc_array(ctx, unsigned, temp_amounts);
+ unsigned *first_read = rzalloc_array(ctx, unsigned, temp_amounts);
+ unsigned *last_write = rzalloc_array(ctx, unsigned, temp_amounts);
+ unsigned *last_read = rzalloc_array(ctx, unsigned, temp_amounts);
+
+ while(current_instruction && !current_instruction->is_tail_sentinel()) {
+ current_line++;
+ glsl_to_tgsi_instruction *inst = current_instruction;
+
+ if (inst->dst.file == PROGRAM_TEMPORARY) {
+ int index = inst->dst.index - first_non_array_temp;
+ if (index >= 0) {
+ if( !first_write[index] )
+ first_write[index] = current_line;
+ last_write[index] = current_line;
+ }
+ }
+
+
+
+ for (unsigned j=0; j < num_inst_src_regs(inst->op); j++) {
+ if (inst->src[j].file == PROGRAM_TEMPORARY) {
+ int index = inst->src[j].index - first_non_array_temp;
+ if (index >= 0) {
+ if( !first_read[index])
+ first_read[index] = current_line;
+ last_read[index] = current_line;
+ }
+ }
+ }
+
+ current_instruction = (glsl_to_tgsi_instruction *) current_instruction->get_next();
+
+ if (inst->op == TGSI_OPCODE_ENDLOOP) {
+ break;
+ }
+
+ if (inst->op == TGSI_OPCODE_BGNLOOP) {
+ get_live_interval_from_loop_block(temp_amounts, first_non_array_temp, live_interval, current_line, current_instruction);
+ }
+ }
+
+ unsigned block_end = current_line;
+
+ for (unsigned i = 0; i < temp_amounts; i++) {
+ if (first_read[i] && first_write[i]) {
+ if (first_read[i] <= first_write[i]) {
+ /**
+ * This can occur in a for(i;...) { if (i>0) then read else write}
+ * We need to mark whole block as live.
+ * Otherwise we are sure that temp is always written before being read,
+ * we don't have to extend the live interval to whole block.
+ */
+ update_live_interval(live_interval[i], block_start, block_end);
+
+ }
+ else {
+ update_live_interval(live_interval[i], first_write[i], MAX2(last_read[i], last_write[i]));
+ }
+ continue;
+ } else {
+ if (first_read[i]) {
+ update_live_interval(live_interval[i], block_start, block_end);
+ }
+ else {
+ /**
+ * f_w[i] or f_r[i] (evtl both) is 0, so corresponding l_w[i] or l_r[i] (evtl both) is 0.
+ * Using MAX2 is valid here;
+ */
+ update_live_interval(live_interval[i], MAX2(first_write[i], first_read[i]), MAX2(last_read[i], last_write[i]));
+ }
+ }
+
+ }
+ ralloc_free(ctx);
+
+ return;
+}
+
+
+
/** Read the indices of the last read and first write to each temp register
* into an array so that we don't have to traverse the instruction list as
* much. Only parse temp not directly addressed. */
@@ -3872,20 +3978,25 @@ struct glsl_to_tgsi_visitor::interval*
glsl_to_tgsi_visitor::get_live_intervals()
{
unsigned total_temps = store.temp_amount();
- unsigned first_non_array_temp = store.temp_array_amount() + 1;
+ unsigned first_non_array_temp = store.temp_array_amount();
unsigned allocable_regs = total_temps - first_non_array_temp;
struct interval *live_interval = rzalloc_array(mem_ctx, struct interval, allocable_regs);
- for (unsigned i=0; i < allocable_regs; i++) {
- live_interval[i].first_line = get_first_temp_write(i + first_non_array_temp);
- live_interval[i].last_line = MAX2(get_last_temp_read(i + first_non_array_temp), live_interval[i].first_line);
- }
+ unsigned current_line = 0;
+
+ glsl_to_tgsi_instruction *first_inst = (glsl_to_tgsi_instruction *) this->instructions.get_head();
+ get_live_interval_from_loop_block(allocable_regs, first_non_array_temp, live_interval, current_line, first_inst);
+
return live_interval;
}
static bool
overlap(const glsl_to_tgsi_visitor::interval &a, const glsl_to_tgsi_visitor::interval &b)
{
+ bool a_is_unused = !a.first_line;
+ bool b_is_unused = !b.first_line;
+ if (a_is_unused || b_is_unused)
+ return false;
bool a_before_b = a.last_line <= b.first_line;
bool b_before_a = b.last_line <= a.first_line;
return !(a_before_b || b_before_a);
@@ -3902,7 +4013,7 @@ unsigned *
glsl_to_tgsi_visitor::regalloc(struct interval *live_interval, unsigned suggested_temp_amount)
{
unsigned total_temps = store.temp_amount();
- unsigned first_non_array_temp = store.temp_array_amount() + 1;
+ unsigned first_non_array_temp = store.temp_array_amount();
unsigned allocable_regs = total_temps - first_non_array_temp;
unsigned i, j;
--
1.7.7
More information about the mesa-dev
mailing list