[Mesa-dev] [PATCH] i965/fs: Compact the virtual GRF arrays.

Kenneth Graunke kenneth at whitecape.org
Thu Nov 1 22:04:50 PDT 2012


During code generation, we create tons of temporary variables, many of
which get immediately killed and are never used.  Later optimization and
analysis passes, such as compute_live_intervals, loop over all the
virtual GRFs.  By compacting them, we can save a lot of overhead.

Reduces compilation time in L4D2's largest fragment shader from 10.2
seconds to 5.2 seconds (50%).  Drops compute_live_variables() from
10-12% of another game's startup time to 8%.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 60 ++++++++++++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h   |  1 +
 2 files changed, 61 insertions(+)

Alternatively, with Eric's global FS copy propagation patches applied first,
this reduces the shader compilation from 4.3 seconds to 3.1, a paltry 28%. :)

Also, as for why I put it in the loop: the first call killed roughly 300 dead
variables, but the second call (after one pass of optimization) killed another
400+.  Worth it.

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 56cb447..777879e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1102,6 +1102,64 @@ fs_visitor::split_virtual_grfs()
    this->live_intervals_valid = false;
 }
 
+/**
+ * Remove unused virtual GRFs and compact the virtual_grf_* arrays.
+ *
+ * During code generation, we create tons of temporary variables, many of
+ * which get immediately killed and are never used again.  Yet, in later
+ * optimization and analysis passes, such as compute_live_intervals, we need
+ * to loop over all the virtual GRFs.  Compacting them can save a lot of
+ * overhead.
+ */
+void
+fs_visitor::compact_virtual_grfs()
+{
+   /* Mark which virtual GRFs are used, and count how many. */
+   int remap_table[this->virtual_grf_count];
+   memset(remap_table, -1, sizeof(remap_table));
+
+   foreach_list(node, &this->instructions) {
+      const fs_inst *inst = (const fs_inst *) node;
+
+      if (inst->dst.file == GRF)
+         remap_table[inst->dst.reg] = 0;
+
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF)
+            remap_table[inst->src[i].reg] = 0;
+      }
+   }
+
+   /* Compact the GRF arrays. */
+   int new_index = 0;
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      if (remap_table[i] != -1) {
+         remap_table[i] = new_index;
+         virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
+         if (live_intervals_valid) {
+            virtual_grf_use[new_index] = virtual_grf_use[i];
+            virtual_grf_def[new_index] = virtual_grf_def[i];
+         }
+         ++new_index;
+      }
+   }
+
+   this->virtual_grf_count = new_index;
+
+   /* Patch all the instructions to use the newly renumbered registers */
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *) node;
+
+      if (inst->dst.file == GRF)
+         inst->dst.reg = remap_table[inst->dst.reg];
+
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF)
+            inst->src[i].reg = remap_table[inst->src[i].reg];
+      }
+   }
+}
+
 bool
 fs_visitor::remove_dead_constants()
 {
@@ -1860,6 +1918,8 @@ fs_visitor::run()
       do {
 	 progress = false;
 
+         compact_virtual_grfs();
+
 	 progress = remove_duplicate_mrf_writes() || progress;
 
 	 progress = opt_algebraic() || progress;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 0b2681d..13662bb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -240,6 +240,7 @@ public:
    int choose_spill_reg(struct ra_graph *g);
    void spill_reg(int spill_reg);
    void split_virtual_grfs();
+   void compact_virtual_grfs();
    void setup_pull_constants();
    void calculate_live_intervals();
    bool opt_algebraic();
-- 
1.8.0



More information about the mesa-dev mailing list