[Mesa-dev] [PATCH] i965/fs: Compact the virtual GRF arrays.
Kenneth Graunke
kenneth at whitecape.org
Thu Nov 1 22:04:50 PDT 2012
During code generation, we create tons of temporary variables, many of
which get immediately killed and are never used. Later optimization and
analysis passes, such as compute_live_intervals, loop over all the
virtual GRFs. By compacting them, we can save a lot of overhead.
Reduces compilation time in L4D2's largest fragment shader from 10.2
seconds to 5.2 seconds (50%). Drops compute_live_variables() from
10-12% of another game's startup time to 8%.
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 60 ++++++++++++++++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_fs.h | 1 +
2 files changed, 61 insertions(+)
Alternatively, with Eric's global FS copy propagation patches applied first,
this reduces the shader compilation from 4.3 seconds to 3.1, a paltry 28%. :)
Also, as for why I put it in the loop: the first call killed roughly 300 dead
variables, but the second call (after one pass of optimization) killed another
400+. Worth it.
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 56cb447..777879e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1102,6 +1102,64 @@ fs_visitor::split_virtual_grfs()
this->live_intervals_valid = false;
}
+/**
+ * Remove unused virtual GRFs and compact the virtual_grf_* arrays.
+ *
+ * During code generation, we create tons of temporary variables, many of
+ * which get immediately killed and are never used again. Yet, in later
+ * optimization and analysis passes, such as compute_live_intervals, we need
+ * to loop over all the virtual GRFs. Compacting them can save a lot of
+ * overhead.
+ */
+void
+fs_visitor::compact_virtual_grfs()
+{
+ /* Mark which virtual GRFs are used, and count how many. */
+ int remap_table[this->virtual_grf_count];
+ memset(remap_table, -1, sizeof(remap_table));
+
+ foreach_list(node, &this->instructions) {
+ const fs_inst *inst = (const fs_inst *) node;
+
+ if (inst->dst.file == GRF)
+ remap_table[inst->dst.reg] = 0;
+
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file == GRF)
+ remap_table[inst->src[i].reg] = 0;
+ }
+ }
+
+ /* Compact the GRF arrays. */
+ int new_index = 0;
+ for (int i = 0; i < this->virtual_grf_count; i++) {
+ if (remap_table[i] != -1) {
+ remap_table[i] = new_index;
+ virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
+ if (live_intervals_valid) {
+ virtual_grf_use[new_index] = virtual_grf_use[i];
+ virtual_grf_def[new_index] = virtual_grf_def[i];
+ }
+ ++new_index;
+ }
+ }
+
+ this->virtual_grf_count = new_index;
+
+ /* Patch all the instructions to use the newly renumbered registers */
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *) node;
+
+ if (inst->dst.file == GRF)
+ inst->dst.reg = remap_table[inst->dst.reg];
+
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file == GRF)
+ inst->src[i].reg = remap_table[inst->src[i].reg];
+ }
+ }
+}
+
bool
fs_visitor::remove_dead_constants()
{
@@ -1860,6 +1918,8 @@ fs_visitor::run()
do {
progress = false;
+ compact_virtual_grfs();
+
progress = remove_duplicate_mrf_writes() || progress;
progress = opt_algebraic() || progress;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 0b2681d..13662bb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -240,6 +240,7 @@ public:
int choose_spill_reg(struct ra_graph *g);
void spill_reg(int spill_reg);
void split_virtual_grfs();
+ void compact_virtual_grfs();
void setup_pull_constants();
void calculate_live_intervals();
bool opt_algebraic();
--
1.8.0
More information about the mesa-dev
mailing list