[Mesa-dev] [PATCH 1/3] i965/fs: Reimplement dead_code_elimination().

Matt Turner mattst88 at gmail.com
Mon Apr 14 15:00:33 PDT 2014


total instructions in shared programs: 1653399 -> 1651790 (-0.10%)
instructions in affected programs:     92157 -> 90548 (-1.75%)
GAINED:                                2
LOST:                                  2

Also significantly reduces the number of optimization loop iterations:

total loop iterations in shared programs: 39724 -> 31651 (-20.32%)
loop iterations in affected programs:     21617 -> 13544 (-37.35%)

Including some great pathological cases, like 29 -> 3 in Strike Suit
Zero and 24 -> 3 in Dota2.
---
Moving the placement of the dead_code_elimination() call in the optimization
loop avoids a butterfly-effect instruction count regression. It's placement
is immaterial, because dead_code_eliminate_local() is going away too.

 src/mesa/drivers/dri/i965/Makefile.sources         |   1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp               |  57 +----------
 .../dri/i965/brw_fs_dead_code_eliminate.cpp        | 109 +++++++++++++++++++++
 3 files changed, 111 insertions(+), 56 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 8205fe9..836c62b 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -58,6 +58,7 @@ i965_FILES = \
 	brw_fs_channel_expressions.cpp \
 	brw_fs_copy_propagation.cpp \
 	brw_fs_cse.cpp \
+	brw_fs_dead_code_eliminate.cpp \
 	brw_fs_fp.cpp \
 	brw_fs_generator.cpp \
 	brw_fs_live_variables.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 85a5463..c723bf0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2085,61 +2085,6 @@ fs_visitor::opt_algebraic()
    return progress;
 }
 
-/**
- * Removes any instructions writing a VGRF where that VGRF is not used by any
- * later instruction.
- */
-bool
-fs_visitor::dead_code_eliminate()
-{
-   bool progress = false;
-   int pc = 0;
-
-   calculate_live_intervals();
-
-   foreach_list_safe(node, &this->instructions) {
-      fs_inst *inst = (fs_inst *)node;
-
-      if (inst->dst.file == GRF && !inst->has_side_effects()) {
-         bool dead = true;
-
-         for (int i = 0; i < inst->regs_written; i++) {
-            int var = live_intervals->var_from_vgrf[inst->dst.reg];
-            assert(live_intervals->end[var + inst->dst.reg_offset + i] >= pc);
-            if (live_intervals->end[var + inst->dst.reg_offset + i] != pc) {
-               dead = false;
-               break;
-            }
-         }
-
-         if (dead) {
-            /* Don't dead code eliminate instructions that write to the
-             * accumulator as a side-effect. Instead just set the destination
-             * to the null register to free it.
-             */
-            switch (inst->opcode) {
-            case BRW_OPCODE_ADDC:
-            case BRW_OPCODE_SUBB:
-            case BRW_OPCODE_MACH:
-               inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
-               break;
-            default:
-               inst->remove();
-               progress = true;
-               break;
-            }
-         }
-      }
-
-      pc++;
-   }
-
-   if (progress)
-      invalidate_live_intervals();
-
-   return progress;
-}
-
 struct dead_code_hash_key
 {
    int vgrf;
@@ -3249,8 +3194,8 @@ fs_visitor::run()
 	 progress = opt_cse() || progress;
 	 progress = opt_copy_propagate() || progress;
          progress = opt_peephole_predicated_break() || progress;
-	 progress = dead_code_eliminate() || progress;
 	 progress = dead_code_eliminate_local() || progress;
+         progress = dead_code_eliminate() || progress;
          progress = opt_peephole_sel() || progress;
          progress = dead_control_flow_eliminate(this) || progress;
          progress = opt_saturate_propagation() || progress;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
new file mode 100644
index 0000000..6addbb3
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_fs.h"
+#include "brw_fs_live_variables.h"
+#include "brw_cfg.h"
+
+/** @file brw_fs_dead_code_eliminate.cpp
+ */
+
+bool
+fs_visitor::dead_code_eliminate()
+{
+   bool progress = false;
+
+   cfg_t cfg(&instructions);
+
+   calculate_live_intervals();
+
+   int num_vars = live_intervals->num_vars;
+   BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));
+
+   for (int b = 0; b < cfg.num_blocks; b++) {
+      bblock_t *block = cfg.blocks[b];
+      memcpy(live, live_intervals->bd[b].liveout,
+             sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));
+
+      for (fs_inst *inst = (fs_inst *)block->end;
+           inst != block->start->prev;
+           inst = (fs_inst *)inst->prev) {
+         if (inst->dst.file == GRF &&
+             !inst->has_side_effects() &&
+             !inst->writes_flag()) {
+            bool result_live = false;
+
+            if (inst->regs_written == 1) {
+               int var = live_intervals->var_from_reg(&inst->dst);
+               result_live = BITSET_TEST(live, var);
+            } else {
+               int var = live_intervals->var_from_vgrf[inst->dst.reg];
+               for (int i = 0; i < inst->regs_written; i++) {
+                  result_live = result_live || BITSET_TEST(live, var + i);
+               }
+            }
+
+            if (!result_live) {
+               progress = true;
+
+               switch (inst->opcode) {
+               case BRW_OPCODE_ADDC:
+               case BRW_OPCODE_SUBB:
+               case BRW_OPCODE_MACH:
+                  inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
+                  break;
+               default:
+                  inst->opcode = BRW_OPCODE_NOP;
+                  break;
+               }
+               continue;
+            }
+         }
+
+         for (int i = 0; i < 3; i++) {
+            if (inst->src[i].file == GRF) {
+               int var = live_intervals->var_from_vgrf[inst->src[i].reg];
+
+               for (int j = 0; j < inst->regs_read(this, i); j++) {
+                  BITSET_SET(live, var + inst->src[i].reg_offset + j);
+               }
+            }
+         }
+      }
+   }
+
+   ralloc_free(live);
+
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      if (inst->opcode == BRW_OPCODE_NOP) {
+         inst->remove();
+      }
+   }
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}
-- 
1.8.3.2



More information about the mesa-dev mailing list