[Mesa-dev] [PATCH 20/25] i965/fs: Extend compute_to_mrf() to coalesce VGRFs initialized by multiple single-GRF writes.
Francisco Jerez
currojerez at riseup.net
Sat May 28 02:06:01 UTC 2016
This requires using a bitset instead of a boolean flag to keep track
of the GRFs we've seen a generating instruction for already. The
search loop continues until all instructions initializing the value of
the source VGRF have been found, or it is determined that coalescing
is not possible.
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 46 ++++++++++++++++++++++++++++--------
1 file changed, 36 insertions(+), 10 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 4062ea2..50552cb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2782,6 +2782,20 @@ fs_visitor::opt_redundant_discard_jumps()
return progress;
}
+/**
+ * Compute a bitmask with GRF granularity with a bit set for each GRF starting
+ * from \p r which overlaps the region starting at \p r and spanning \p n GRF
+ * units.
+ */
+static inline unsigned
+mask_relative_to(const fs_reg &r, const fs_reg &s, unsigned n)
+{
+ const int rel_offset = (reg_offset(s) - reg_offset(r)) / REG_SIZE;
+ assert(reg_space(r) == reg_space(s) &&
+ rel_offset >= 0 && rel_offset < int(8 * sizeof(unsigned)));
+ return ((1 << n) - 1) << rel_offset;
+}
+
bool
fs_visitor::compute_to_mrf()
{
@@ -2813,10 +2827,12 @@ fs_visitor::compute_to_mrf()
if (this->virtual_grf_end[inst->src[0].nr] > ip)
continue;
- /* Found a move of a GRF to a MRF. Let's see if we can go
- * rewrite the thing that made this GRF to write into the MRF.
+ /* Found a move of a GRF to a MRF. Let's see if we can go rewrite the
+ * things that computed the value of all GRFs of the source region. The
+ * regs_left bitset keeps track of the registers we haven't yet found a
+ * generating instruction for.
*/
- bool found = false;
+ unsigned regs_left = (1 << inst->regs_read(0)) - 1;
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
@@ -2855,10 +2871,11 @@ fs_visitor::compute_to_mrf()
}
}
- if (scan_inst->dst.reg_offset == inst->src[0].reg_offset)
- found = true;
-
- break;
+ /* Clear the bits for any registers this instruction overwrites. */
+ regs_left &= ~mask_relative_to(
+ inst->src[0], scan_inst->dst, scan_inst->regs_written);
+ if (!regs_left)
+ break;
}
/* We don't handle control flow here. Most computation of
@@ -2901,14 +2918,21 @@ fs_visitor::compute_to_mrf()
}
}
- if (!found)
+ if (regs_left)
continue;
- /* Found all generating instructions of our MRF's source value.
+ /* Found all generating instructions of our MRF's source value, so it
+ * should be safe to rewrite them to point to the MRF directly.
*/
+ regs_left = (1 << inst->regs_read(0)) - 1;
+
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
inst->src[0], inst->regs_read(0) * REG_SIZE)) {
+ /* Clear the bits for any registers this instruction overwrites. */
+ regs_left &= ~mask_relative_to(
+ inst->src[0], scan_inst->dst, scan_inst->regs_written);
+
const unsigned rel_offset = (reg_offset(scan_inst->dst) -
reg_offset(inst->src[0])) / REG_SIZE;
@@ -2935,10 +2959,12 @@ fs_visitor::compute_to_mrf()
scan_inst->dst.file = MRF;
scan_inst->dst.reg_offset = 0;
scan_inst->saturate |= inst->saturate;
- break;
+ if (!regs_left)
+ break;
}
}
+ assert(!regs_left);
inst->remove(block);
progress = true;
}
--
2.7.3
More information about the mesa-dev
mailing list