[Mesa-dev] [PATCH 4/5] i965/fs: Try to avoid generating extra MOVs to do saturates.

Eric Anholt eric at anholt.net
Mon Mar 26 13:59:08 PDT 2012


This change (before the previous two) produced a .23% +/- .11%
performance improvement in Unigine Tropics at 1024x768 on IVB.

Total instructions: 269270 -> 262649
614/2148 programs affected (28.6%)
179386 -> 172765 instructions in affected programs (3.7% reduction)

v2: Move some of the logic of finding the instruction that produced
    the result of an expression tree to a helper.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp         |   29 ++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h           |    3 ++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |   34 ++++++++++++++++---------
 3 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 5f3d79d..f9c1483 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1717,6 +1717,35 @@ fs_visitor::virtual_grf_interferes(int a, int b)
    return start < end;
 }
 
+/**
+ * Possibly returns an instruction that set up @param reg.
+ *
+ * Sometimes we want to take the result of some expression/variable
+ * dereference tree and rewrite the instruction generating the result
+ * of the tree.  When processing the tree, we know that the
+ * instructions generated are all writing temporaries that are dead
+ * outside of this tree.  So, if we have some instructions that write
+ * a temporary, we're free to point that temp write somewhere else.
+ *
+ * Note that this doesn't guarantee that the instruction generated
+ * only reg -- it might be the size=4 destination of a texture instruction.
+ */
+fs_inst *
+fs_visitor::get_instruction_generating_reg(fs_inst *start,
+					   fs_inst *end,
+					   fs_reg reg)
+{
+   if (end == start ||
+       end->predicated ||
+       end->force_uncompressed ||
+       end->force_sechalf ||
+       !reg.equals(&end->dst)) {
+      return NULL;
+   } else {
+      return end;
+   }
+}
+
 bool
 fs_visitor::run()
 {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 7aebffa..d3a1045 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -487,6 +487,9 @@ public:
    }
 
    int type_size(const struct glsl_type *type);
+   fs_inst *get_instruction_generating_reg(fs_inst *start,
+					   fs_inst *end,
+					   fs_reg reg);
 
    bool run();
    void setup_paramvalues_refs();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index b4ef80b..3460d14 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -172,12 +172,25 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
    if (!sat_val)
       return false;
 
+   fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail();
+
    sat_val->accept(this);
    fs_reg src = this->result;
 
-   this->result = fs_reg(this, ir->type);
-   fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src);
-   inst->saturate = true;
+   fs_inst *last_inst = (fs_inst *) this->instructions.get_tail();
+
+   /* If the last instruction from our accept() didn't generate our
+    * src, generate a saturated MOV
+    */
+   fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
+   if (!modify || modify->regs_written() != 1) {
+      fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src);
+      inst->saturate = true;
+   } else {
+      modify->saturate = true;
+      this->result = src;
+   }
+
 
    return true;
 }
@@ -588,9 +601,6 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
                                    fs_inst *pre_rhs_inst,
                                    fs_inst *last_rhs_inst)
 {
-   if (pre_rhs_inst == last_rhs_inst)
-      return false; /* No instructions generated to work with. */
-
    /* Only attempt if we're doing a direct assignment. */
    if (ir->condition ||
        !(ir->lhs->type->is_scalar() ||
@@ -599,20 +609,20 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
       return false;
 
    /* Make sure the last instruction generated our source reg. */
-   if (last_rhs_inst->predicated ||
-       last_rhs_inst->force_uncompressed ||
-       last_rhs_inst->force_sechalf ||
-       !src.equals(&last_rhs_inst->dst))
+   fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst,
+						    last_rhs_inst,
+						    src);
+   if (!modify)
       return false;
 
    /* If last_rhs_inst wrote a different number of components than our LHS,
     * we can't safely rewrite it.
     */
-   if (ir->lhs->type->vector_elements != last_rhs_inst->regs_written())
+   if (ir->lhs->type->vector_elements != modify->regs_written())
       return false;
 
    /* Success!  Rewrite the instruction. */
-   last_rhs_inst->dst = dst;
+   modify->dst = dst;
 
    return true;
 }
-- 
1.7.9.1



More information about the mesa-dev mailing list