[Mesa-dev] [PATCH 1/2] i965/fs: Make virtual grf live intervals actually cover their used range.
Eric Anholt
eric at anholt.net
Tue Apr 30 17:21:21 PDT 2013
Previously, we would sometimes not consider a write to a register to
extend the end of the interval, nor would we consider a read before a
write to extend the start. This made for a bunch of complicated logic
related to how to treat the results when dead code might be present.
Instead, just extend the interval and fix dead code elimination to know
how to remove it.
Interestingly, this actually results in a tiny bit more optimization:
total instructions in shared programs: 1391220 -> 1390799 (-0.03%)
instructions in affected programs: 14037 -> 13616 (-3.00%)
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 21 +++---
src/mesa/drivers/dri/i965/brw_fs.h | 4 +-
src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 +-
.../drivers/dri/i965/brw_fs_live_variables.cpp | 76 ++++++----------------
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 3 +-
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 +-
6 files changed, 38 insertions(+), 72 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a8610ee..0821c05 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1449,8 +1449,8 @@ fs_visitor::compact_virtual_grfs()
remap_table[i] = new_index;
virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
if (live_intervals_valid) {
- virtual_grf_use[new_index] = virtual_grf_use[i];
- virtual_grf_def[new_index] = virtual_grf_def[i];
+ virtual_grf_start[new_index] = virtual_grf_start[i];
+ virtual_grf_end[new_index] = virtual_grf_end[i];
}
++new_index;
}
@@ -1764,10 +1764,8 @@ fs_visitor::opt_algebraic()
}
/**
- * Must be called after calculate_live_intervales() to remove unused
- * writes to registers -- register allocation will fail otherwise
- * because something deffed but not used won't be considered to
- * interfere with other regs.
+ * Removes any instructions writing a VGRF where that VGRF is not used by any
+ * later instruction.
*/
bool
fs_visitor::dead_code_eliminate()
@@ -1780,9 +1778,12 @@ fs_visitor::dead_code_eliminate()
foreach_list_safe(node, &this->instructions) {
fs_inst *inst = (fs_inst *)node;
- if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
- inst->remove();
- progress = true;
+ if (inst->dst.file == GRF) {
+ assert(this->virtual_grf_end[inst->dst.reg] >= pc);
+ if (this->virtual_grf_end[inst->dst.reg] == pc) {
+ inst->remove();
+ progress = true;
+ }
}
pc++;
@@ -2194,7 +2195,7 @@ fs_visitor::compute_to_mrf()
/* Can't compute-to-MRF this GRF if someone else was going to
* read it later.
*/
- if (this->virtual_grf_use[inst->src[0].reg] > ip)
+ if (this->virtual_grf_end[inst->src[0].reg] > ip)
continue;
/* Found a move of a GRF to a MRF. Let's see if we can go
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index c9c9856..3df2ce1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -434,8 +434,8 @@ public:
int *virtual_grf_sizes;
int virtual_grf_count;
int virtual_grf_array_size;
- int *virtual_grf_def;
- int *virtual_grf_use;
+ int *virtual_grf_start;
+ int *virtual_grf_end;
bool live_intervals_valid;
/* This is the map from UNIFORM hw_reg + reg_offset as generated by
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index b5c2200..9b60d9b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -194,7 +194,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
/* Kill any AEB entries using registers that don't get reused any
* more -- a sure sign they'll fail operands_match().
*/
- if (src_reg->file == GRF && virtual_grf_use[src_reg->reg] < ip) {
+ if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) {
entry->remove();
ralloc_free(entry);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index fdcfac6..dd8923e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -167,16 +167,16 @@ fs_visitor::calculate_live_intervals()
if (this->live_intervals_valid)
return;
- int *def = ralloc_array(mem_ctx, int, num_vars);
- int *use = ralloc_array(mem_ctx, int, num_vars);
- ralloc_free(this->virtual_grf_def);
- ralloc_free(this->virtual_grf_use);
- this->virtual_grf_def = def;
- this->virtual_grf_use = use;
+ int *start = ralloc_array(mem_ctx, int, num_vars);
+ int *end = ralloc_array(mem_ctx, int, num_vars);
+ ralloc_free(this->virtual_grf_start);
+ ralloc_free(this->virtual_grf_end);
+ this->virtual_grf_start = start;
+ this->virtual_grf_end = end;
for (int i = 0; i < num_vars; i++) {
- def[i] = MAX_INSTRUCTION;
- use[i] = -1;
+ start[i] = MAX_INSTRUCTION;
+ end[i] = -1;
}
/* Start by setting up the intervals with no knowledge of control
@@ -189,8 +189,7 @@ fs_visitor::calculate_live_intervals()
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
int reg = inst->src[i].reg;
-
- use[reg] = ip;
+ int end_ip = ip;
/* In most cases, a register can be written over safely by the
* same instruction that is its last use. For a single
@@ -220,15 +219,19 @@ fs_visitor::calculate_live_intervals()
if (dispatch_width == 16 && (inst->src[i].smear ||
(this->pixel_x.reg == reg ||
this->pixel_y.reg == reg))) {
- use[reg]++;
+ end_ip++;
}
+
+ start[reg] = MIN2(start[reg], ip);
+ end[reg] = end_ip;
}
}
if (inst->dst.file == GRF) {
int reg = inst->dst.reg;
- def[reg] = MIN2(def[reg], ip);
+ start[reg] = MIN2(start[reg], ip);
+ end[reg] = ip;
}
ip++;
@@ -241,60 +244,23 @@ fs_visitor::calculate_live_intervals()
for (int b = 0; b < cfg.num_blocks; b++) {
for (int i = 0; i < num_vars; i++) {
if (BITSET_TEST(livevars.bd[b].livein, i)) {
- def[i] = MIN2(def[i], cfg.blocks[b]->start_ip);
- use[i] = MAX2(use[i], cfg.blocks[b]->start_ip);
+ start[i] = MIN2(start[i], cfg.blocks[b]->start_ip);
+ end[i] = MAX2(end[i], cfg.blocks[b]->start_ip);
}
if (BITSET_TEST(livevars.bd[b].liveout, i)) {
- def[i] = MIN2(def[i], cfg.blocks[b]->end_ip);
- use[i] = MAX2(use[i], cfg.blocks[b]->end_ip);
+ start[i] = MIN2(start[i], cfg.blocks[b]->end_ip);
+ end[i] = MAX2(end[i], cfg.blocks[b]->end_ip);
}
}
}
this->live_intervals_valid = true;
-
- /* Note in the non-control-flow code above, that we only take def[] as the
- * first store, and use[] as the last use. We use this in dead code
- * elimination, to determine when a store never gets used. However, we
- * also use these arrays to answer the virtual_grf_interferes() question
- * (live interval analysis), which is used for register coalescing and
- * register allocation.
- *
- * So, there's a conflict over what the array should mean: if use[]
- * considers a def after the last use, then the dead code elimination pass
- * never does anything (and it's an important pass!). But if we don't
- * include dead code, then virtual_grf_interferes() lies and we'll do
- * horrible things like coalesce the register that is dead-code-written
- * into another register that was live across the dead write (causing the
- * use of the second register to take the dead write's source value instead
- * of the coalesced MOV's source value).
- *
- * To resolve the conflict, immediately after calculating live intervals,
- * detect dead code, nuke it, and if we changed anything, calculate again
- * before returning to the caller. Now we happen to produce def[] and
- * use[] arrays that will work for virtual_grf_interferes().
- */
- if (dead_code_eliminate())
- calculate_live_intervals();
}
bool
fs_visitor::virtual_grf_interferes(int a, int b)
{
- int a_def = this->virtual_grf_def[a], a_use = this->virtual_grf_use[a];
- int b_def = this->virtual_grf_def[b], b_use = this->virtual_grf_use[b];
-
- /* If there's dead code (def but not use), it would break our test
- * unless we consider it used.
- */
- if ((a_use == -1 && a_def != MAX_INSTRUCTION) ||
- (b_use == -1 && b_def != MAX_INSTRUCTION)) {
- return true;
- }
-
- int start = MAX2(a_def, b_def);
- int end = MIN2(a_use, b_use);
-
- return start < end;
+ return !(virtual_grf_end[a] <= virtual_grf_start[b] ||
+ virtual_grf_end[b] <= virtual_grf_start[a]);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index b9b0303..3b2197f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -316,8 +316,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
* in order to not have to worry about the uniform issue described in
* calculate_live_intervals().
*/
- if (this->virtual_grf_def[j] <= payload_last_use_ip[i] ||
- this->virtual_grf_use[j] <= payload_last_use_ip[i]) {
+ if (this->virtual_grf_start[j] <= payload_last_use_ip[i]) {
ra_add_node_interference(g, first_payload_node + i, j);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 55ae689..a61363c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2408,8 +2408,8 @@ fs_visitor::fs_visitor(struct brw_context *brw,
this->virtual_grf_sizes = NULL;
this->virtual_grf_count = 0;
this->virtual_grf_array_size = 0;
- this->virtual_grf_def = NULL;
- this->virtual_grf_use = NULL;
+ this->virtual_grf_start = NULL;
+ this->virtual_grf_end = NULL;
this->live_intervals_valid = false;
this->force_uncompressed_stack = 0;
--
1.8.3.rc0
More information about the mesa-dev
mailing list