<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Fri, May 27, 2016 at 7:06 PM, Francisco Jerez <span dir="ltr"><<a href="mailto:currojerez@riseup.net" target="_blank">currojerez@riseup.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">This requires using a bitset instead of a boolean flag to keep track<br>
of the GRFs we've seen a generating instruction for already.  The<br>
search loop continues until all instructions initializing the value of<br>
the source VGRF have been found, or it is determined that coalescing<br>
is not possible.<br>
---<br>
 src/mesa/drivers/dri/i965/brw_fs.cpp | 46 ++++++++++++++++++++++++++++--------<br>
 1 file changed, 36 insertions(+), 10 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
index 4062ea2..50552cb 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
@@ -2782,6 +2782,20 @@ fs_visitor::opt_redundant_discard_jumps()<br>
    return progress;<br>
 }<br>
<br>
+/**<br>
+ * Compute a bitmask with GRF granularity with a bit set for each GRF starting<br>
+ * from \p r which overlaps the region starting at \p r and spanning \p n GRF<br>
+ * units.<br>
+ */<br>
+static inline unsigned<br>
+mask_relative_to(const fs_reg &r, const fs_reg &s, unsigned n)<br>
+{<br>
+   const int rel_offset = (reg_offset(s) - reg_offset(r)) / REG_SIZE;<br>
+   assert(reg_space(r) == reg_space(s) &&<br>
+          rel_offset >= 0 && rel_offset < int(8 * sizeof(unsigned)));<br></blockquote><div><br></div><div>Isn't that rel_offset < REG_SIZE?  Or do you mean "unsigned-many bits?<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+   return ((1 << n) - 1) << rel_offset;<br>
+}<br>
+<br>
 bool<br>
 fs_visitor::compute_to_mrf()<br>
 {<br>
@@ -2813,10 +2827,12 @@ fs_visitor::compute_to_mrf()<br>
       if (this->virtual_grf_end[inst->src[0].nr] > ip)<br>
         continue;<br>
<br>
-      /* Found a move of a GRF to a MRF.  Let's see if we can go<br>
-       * rewrite the thing that made this GRF to write into the MRF.<br>
+      /* Found a move of a GRF to a MRF.  Let's see if we can go rewrite the<br>
+       * things that computed the value of all GRFs of the source region.  The<br>
+       * regs_left bitset keeps track of the registers we haven't yet found a<br>
+       * generating instruction for.<br>
        */<br>
-      bool found = false;<br>
+      unsigned regs_left = (1 << inst->regs_read(0)) - 1;<br>
<br>
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {<br>
          if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,<br>
@@ -2855,10 +2871,11 @@ fs_visitor::compute_to_mrf()<br>
               }<br>
            }<br>
<br>
-           if (scan_inst->dst.reg_offset == inst->src[0].reg_offset)<br>
-               found = true;<br>
-<br>
-           break;<br>
+            /* Clear the bits for any registers this instruction overwrites. */<br>
+            regs_left &= ~mask_relative_to(<br>
+               inst->src[0], scan_inst->dst, scan_inst->regs_written);<br>
+            if (!regs_left)<br>
+               break; <br></blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
         }<br>
<br>
         /* We don't handle control flow here.  Most computation of<br>
@@ -2901,14 +2918,21 @@ fs_visitor::compute_to_mrf()<br>
          }<br>
       }<br>
<br>
-      if (!found)<br>
+      if (regs_left)<br>
          continue;<br>
<br>
-      /* Found all generating instructions of our MRF's source value.<br>
+      /* Found all generating instructions of our MRF's source value, so it<br>
+       * should be safe to rewrite them to point to the MRF directly.<br>
        */<br>
+      regs_left = (1 << inst->regs_read(0)) - 1;<br>
+<br>
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {<br>
          if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,<br>
                              inst->src[0], inst->regs_read(0) * REG_SIZE)) {<br>
+            /* Clear the bits for any registers this instruction overwrites. */<br>
+            regs_left &= ~mask_relative_to(<br>
+               inst->src[0], scan_inst->dst, scan_inst->regs_written);<br>
+<br>
             const unsigned rel_offset = (reg_offset(scan_inst->dst) -<br>
                                          reg_offset(inst->src[0])) / REG_SIZE;<br>
<br>
@@ -2935,10 +2959,12 @@ fs_visitor::compute_to_mrf()<br>
             scan_inst->dst.file = MRF;<br>
             scan_inst->dst.reg_offset = 0;<br>
             scan_inst->saturate |= inst->saturate;<br>
-            break;<br>
+            if (!regs_left)<br>
+               break;<br>
          }<br>
       }<br>
<br>
+      assert(!regs_left);<br>
       inst->remove(block);<br>
       progress = true;<br>
    }<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.7.3<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>