<p dir="ltr">I'm not sure if this is relevant to the i965 fs backend in particular, but Briggs in his thesis [1] recommended simply ignoring the liveness of any register read by a copy instruction immediately after said instruction when computing the interference graph in order to solve this exact problem (see section 2.2.2). </p>
<p dir="ltr">[1] <a href="http://www.cs.utexas.edu/~mckinley/380C/lecs/briggs-thesis-1992.pdf">http://www.cs.utexas.edu/~mckinley/380C/lecs/briggs-thesis-1992.pdf</a></p>
<div class="gmail_quote">On Dec 19, 2013 4:40 PM, "Matt Turner" <<a href="mailto:mattst88@gmail.com">mattst88@gmail.com</a>> wrote:<br type="attribution"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Previously we simply considered two registers whose live ranges<br>
overlapped to interfere. Cases such as<br>
<br>
set A ------<br>
... |<br>
mov B, A -- |<br>
... | B | A<br>
use B -- |<br>
... |<br>
use A ------<br>
<br>
would be considered to interfere, even though B is an unmodified copy of<br>
A whose live range fit wholly inside that of A.<br>
<br>
If no writes to A or B occur between the mov B, A and the use of B then<br>
we can safely coalesce them.<br>
<br>
Instead of removing MOV instructions, we make them NOPs and remove them<br>
at once after the main pass is finished in order to avoid recomputing<br>
live intervals (which are needed to perform the previous step).<br>
<br>
total instructions in shared programs: 1543768 -> 1513077 (-1.99%)<br>
instructions in affected programs: 951563 -> 920872 (-3.23%)<br>
GAINED: 46<br>
LOST: 22<br>
---<br>
src/mesa/drivers/dri/i965/brw_fs.cpp | 69 ++++++++++++++++++++++++++++++++----<br>
1 file changed, 62 insertions(+), 7 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
index e4ac0a5..ad56b87 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
@@ -2273,7 +2273,7 @@ fs_visitor::register_coalesce()<br>
int last_use[MAX_SAMPLER_MESSAGE_SIZE];<br>
int next_ip = 0;<br>
<br>
- foreach_list_safe(node, &this->instructions) {<br>
+ foreach_list(node, &this->instructions) {<br>
fs_inst *inst = (fs_inst *)node;<br>
<br>
int ip = next_ip;<br>
@@ -2299,8 +2299,39 @@ fs_visitor::register_coalesce()<br>
int var_to = live_intervals->var_from_reg(&inst->dst);<br>
<br>
if (live_intervals->vars_interfere(var_from, var_to) &&<br>
- !inst->dst.equals(inst->src[0]))<br>
- continue;<br>
+ !inst->dst.equals(inst->src[0])) {<br>
+<br>
+ if (live_intervals->end[var_to] > live_intervals->end[var_from])<br>
+ continue;<br>
+<br>
+ bool overwritten = false;<br>
+ int scan_ip = -1;<br>
+<br>
+ foreach_list(n, &this->instructions) {<br>
+ fs_inst *scan_inst = (fs_inst *)n;<br>
+ scan_ip++;<br>
+<br>
+ if (scan_inst->is_control_flow()) {<br>
+ overwritten = true;<br>
+ break;<br>
+ }<br>
+<br>
+ if (scan_ip <= live_intervals->start[var_to])<br>
+ continue;<br>
+<br>
+ if (scan_ip > live_intervals->end[var_to])<br>
+ break;<br>
+<br>
+ if (scan_inst->dst.equals(inst->dst) ||<br>
+ scan_inst->dst.equals(inst->src[0])) {<br>
+ overwritten = true;<br>
+ break;<br>
+ }<br>
+ }<br>
+<br>
+ if (overwritten)<br>
+ continue;<br>
+ }<br>
<br>
if (reg_from != inst->src[0].reg) {<br>
reg_from = inst->src[0].reg;<br>
@@ -2342,9 +2373,18 @@ fs_visitor::register_coalesce()<br>
if (live_channels_remaining)<br>
continue;<br>
<br>
+ bool removed = false;<br>
for (int i = 0; i < src_size; i++) {<br>
- if (mov[i])<br>
- mov[i]->remove();<br>
+ if (mov[i]) {<br>
+ removed = true;<br>
+<br>
+ mov[i]->opcode = BRW_OPCODE_NOP;<br>
+ mov[i]->conditional_mod = BRW_CONDITIONAL_NONE;<br>
+ mov[i]->dst = reg_undef;<br>
+ mov[i]->src[0] = reg_undef;<br>
+ mov[i]->src[1] = reg_undef;<br>
+ mov[i]->src[2] = reg_undef;<br>
+ }<br>
}<br>
<br>
foreach_list(node, &this->instructions) {<br>
@@ -2366,11 +2406,26 @@ fs_visitor::register_coalesce()<br>
scan_inst->src[j].reg_offset = reg_to_offset[i];<br>
}<br>
}<br>
-<br>
- progress = true;<br>
}<br>
}<br>
}<br>
+<br>
+ if (removed) {<br>
+ live_intervals->start[var_to] = MIN2(live_intervals->start[var_to],<br>
+ live_intervals->start[var_from]);<br>
+ live_intervals->end[var_to] = MAX2(live_intervals->end[var_to],<br>
+ live_intervals->end[var_from]);<br>
+ reg_from = -1;<br>
+ }<br>
+ }<br>
+<br>
+ foreach_list_safe(node, &this->instructions) {<br>
+ fs_inst *inst = (fs_inst *)node;<br>
+<br>
+ if (inst->opcode == BRW_OPCODE_NOP) {<br>
+ inst->remove();<br>
+ progress = true;<br>
+ }<br>
}<br>
<br>
if (progress)<br>
--<br>
1.8.3.2<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div>