[Mesa-dev] [PATCH 3/3] i965: Avoid generating MOVs for assignments of expressions.

Eric Anholt eric at anholt.net
Tue May 17 20:22:56 PDT 2011


The theory in the codegen was that register coalescing and copy
propagation would eliminate these extra moves.  However, with our
ad-hoc dataflow analysis, we end up missing out on some of those
opportunities inside of control flow.  It's particularly embarassing
when those uneliminated MOVs end up moving from a register to itself
after register allocation.

To eliminate them, we set up this->result with storage before visitor
accept() calls, which the callee can either put its result in if it's
safe, or allocate new storage as usual.

No statistically significant difference measured in 3dbenchmark
egypt/pro.  It does reduce fragment shader instructions across
shader-db by 0.3%.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp |   79 +++++++++++++++++++++++++++++-----
 src/mesa/drivers/dri/i965/brw_fs.h   |    5 ++-
 2 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 76df4dc..a496352 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -912,6 +912,7 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
    if (!sat_val)
       return false;
 
+   this->result = reg_undef;
    sat_val->accept(this);
    fs_reg src = this->result;
 
@@ -958,7 +959,11 @@ fs_visitor::visit(ir_expression *ir)
    if (try_emit_saturate(ir))
       return;
 
+   /* This is where our caller would like us to put the result, if possible. */
+   fs_reg saved_result_storage = this->result;
+
    for (operand = 0; operand < ir->get_num_operands(); operand++) {
+      this->result = reg_undef;
       ir->operands[operand]->accept(this);
       if (this->result.file == BAD_FILE) {
 	 ir_print_visitor v;
@@ -976,10 +981,14 @@ fs_visitor::visit(ir_expression *ir)
       assert(!ir->operands[operand]->type->is_vector());
    }
 
-   /* Storage for our result.  If our result goes into an assignment, it will
-    * just get copy-propagated out, so no worries.
+   /* Inherit storage from our parent if possible, and otherwise we
+    * alloc a temporary.
     */
-   this->result = fs_reg(this, ir->type);
+   if (saved_result_storage.file == BAD_FILE) {
+      this->result = fs_reg(this, ir->type);
+   } else {
+      this->result = saved_result_storage;
+   }
 
    switch (ir->operation) {
    case ir_unop_logic_not:
@@ -1000,6 +1009,9 @@ fs_visitor::visit(ir_expression *ir)
    case ir_unop_sign:
       temp = fs_reg(this, ir->type);
 
+      /* Unalias the destination.  (imagine a = sign(a)) */
+      this->result = fs_reg(this, ir->type);
+
       emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f));
 
       inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f));
@@ -1148,6 +1160,9 @@ fs_visitor::visit(ir_expression *ir)
       break;
 
    case ir_binop_min:
+      /* Unalias the destination */
+      this->result = fs_reg(this, ir->type);
+
       inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
       inst->conditional_mod = BRW_CONDITIONAL_L;
 
@@ -1155,6 +1170,9 @@ fs_visitor::visit(ir_expression *ir)
       inst->predicated = true;
       break;
    case ir_binop_max:
+      /* Unalias the destination */
+      this->result = fs_reg(this, ir->type);
+
       inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
       inst->conditional_mod = BRW_CONDITIONAL_G;
 
@@ -1200,8 +1218,10 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
 	 l.type = brw_type_for_base_type(type);
 	 r.type = brw_type_for_base_type(type);
 
-	 fs_inst *inst = emit(BRW_OPCODE_MOV, l, r);
-	 inst->predicated = predicated;
+	 if (predicated || !l.equals(&r)) {
+	    fs_inst *inst = emit(BRW_OPCODE_MOV, l, r);
+	    inst->predicated = predicated;
+	 }
 
 	 l.reg_offset++;
 	 r.reg_offset++;
@@ -1236,9 +1256,20 @@ fs_visitor::visit(ir_assignment *ir)
    fs_inst *inst;
 
    /* FINISHME: arrays on the lhs */
+   this->result = reg_undef;
    ir->lhs->accept(this);
    l = this->result;
 
+   /* If we're doing a straight assignment, an RHS expression could
+    * drop its result right into our destination.  Otherwise, tell it
+    * not to.
+    */
+   if (!(ir->lhs->type->is_scalar() ||
+	 (ir->lhs->type->is_vector() &&
+	  ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) {
+      this->result = reg_undef;
+   }
+
    ir->rhs->accept(this);
    r = this->result;
 
@@ -1253,9 +1284,13 @@ fs_visitor::visit(ir_assignment *ir)
        ir->lhs->type->is_vector()) {
       for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
 	 if (ir->write_mask & (1 << i)) {
-	    inst = emit(BRW_OPCODE_MOV, l, r);
-	    if (ir->condition)
+	    if (ir->condition) {
+	       inst = emit(BRW_OPCODE_MOV, l, r);
 	       inst->predicated = true;
+	    } else if (!l.equals(&r)) {
+	       inst = emit(BRW_OPCODE_MOV, l, r);
+	    }
+
 	    r.reg_offset++;
 	 }
 	 l.reg_offset++;
@@ -1297,16 +1332,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f));
 	 mlen++;
       } else if (ir->op == ir_txb) {
+	 this->result = reg_undef;
 	 ir->lod_info.bias->accept(this);
 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
 	 mlen++;
       } else {
 	 assert(ir->op == ir_txl);
+	 this->result = reg_undef;
 	 ir->lod_info.lod->accept(this);
 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
 	 mlen++;
       }
 
+      this->result = reg_undef;
       ir->shadow_comparitor->accept(this);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
       mlen++;
@@ -1334,10 +1372,12 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       mlen += 6;
 
       if (ir->op == ir_txb) {
+	 this->result = reg_undef;
 	 ir->lod_info.bias->accept(this);
 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
 	 mlen++;
       } else {
+	 this->result = reg_undef;
 	 ir->lod_info.lod->accept(this);
 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
 	 mlen++;
@@ -1426,6 +1466,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    if (ir->shadow_comparitor) {
       mlen = MAX2(mlen, header_present + 4 * reg_width);
 
+      this->result = reg_undef;
       ir->shadow_comparitor->accept(this);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
       mlen += reg_width;
@@ -1437,6 +1478,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       inst = emit(FS_OPCODE_TEX, dst);
       break;
    case ir_txb:
+      this->result = reg_undef;
       ir->lod_info.bias->accept(this);
       mlen = MAX2(mlen, header_present + 4 * reg_width);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
@@ -1446,6 +1488,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
 
       break;
    case ir_txl:
+      this->result = reg_undef;
       ir->lod_info.lod->accept(this);
       mlen = MAX2(mlen, header_present + 4 * reg_width);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
@@ -1475,6 +1518,7 @@ fs_visitor::visit(ir_texture *ir)
    int sampler;
    fs_inst *inst = NULL;
 
+   this->result = reg_undef;
    ir->coordinate->accept(this);
    fs_reg coordinate = this->result;
 
@@ -1618,6 +1662,7 @@ fs_visitor::visit(ir_texture *ir)
 void
 fs_visitor::visit(ir_swizzle *ir)
 {
+   this->result = reg_undef;
    ir->val->accept(this);
    fs_reg val = this->result;
 
@@ -1680,6 +1725,7 @@ fs_visitor::visit(ir_constant *ir)
       const unsigned size = type_size(ir->type->fields.array);
 
       for (unsigned i = 0; i < ir->type->length; i++) {
+	 this->result = reg_undef;
 	 ir->array_elements[i]->accept(this);
 	 fs_reg src_reg = this->result;
 
@@ -1695,6 +1741,7 @@ fs_visitor::visit(ir_constant *ir)
 	 ir_instruction *const field = (ir_instruction *) node;
 	 const unsigned size = type_size(field->type);
 
+	 this->result = reg_undef;
 	 field->accept(this);
 	 fs_reg src_reg = this->result;
 
@@ -1745,6 +1792,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 	 assert(expr->operands[i]->type->is_scalar());
 
+	 this->result = reg_undef;
 	 expr->operands[i]->accept(this);
 	 op[i] = this->result;
       }
@@ -1809,6 +1857,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
       return;
    }
 
+   this->result = reg_undef;
    ir->accept(this);
 
    if (intel->gen >= 6) {
@@ -1838,6 +1887,7 @@ fs_visitor::emit_if_gen6(ir_if *ir)
       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 	 assert(expr->operands[i]->type->is_scalar());
 
+	 this->result = reg_undef;
 	 expr->operands[i]->accept(this);
 	 op[i] = this->result;
       }
@@ -1899,6 +1949,7 @@ fs_visitor::emit_if_gen6(ir_if *ir)
       return;
    }
 
+   this->result = reg_undef;
    ir->condition->accept(this);
 
    fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0));
@@ -1931,7 +1982,7 @@ fs_visitor::visit(ir_if *ir)
    foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
       ir_instruction *ir = (ir_instruction *)iter.get();
       this->base_ir = ir;
-
+      this->result = reg_undef;
       ir->accept(this);
    }
 
@@ -1941,7 +1992,7 @@ fs_visitor::visit(ir_if *ir)
       foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
 	 ir_instruction *ir = (ir_instruction *)iter.get();
 	 this->base_ir = ir;
-
+	 this->result = reg_undef;
 	 ir->accept(this);
       }
    }
@@ -1965,9 +2016,11 @@ fs_visitor::visit(ir_loop *ir)
 
       if (ir->from) {
 	 this->base_ir = ir->from;
+	 this->result = counter;
 	 ir->from->accept(this);
 
-	 emit(BRW_OPCODE_MOV, counter, this->result);
+	 if (!this->result.equals(&counter))
+	    emit(BRW_OPCODE_MOV, counter, this->result);
       }
    }
 
@@ -1975,6 +2028,7 @@ fs_visitor::visit(ir_loop *ir)
 
    if (ir->to) {
       this->base_ir = ir->to;
+      this->result = reg_undef;
       ir->to->accept(this);
 
       fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result);
@@ -1988,11 +2042,13 @@ fs_visitor::visit(ir_loop *ir)
       ir_instruction *ir = (ir_instruction *)iter.get();
 
       this->base_ir = ir;
+      this->result = reg_undef;
       ir->accept(this);
    }
 
    if (ir->increment) {
       this->base_ir = ir->increment;
+      this->result = reg_undef;
       ir->increment->accept(this);
       emit(BRW_OPCODE_ADD, counter, counter, this->result);
    }
@@ -2042,7 +2098,7 @@ fs_visitor::visit(ir_function *ir)
       foreach_iter(exec_list_iterator, iter, sig->body) {
 	 ir_instruction *ir = (ir_instruction *)iter.get();
 	 this->base_ir = ir;
-
+	 this->result = reg_undef;
 	 ir->accept(this);
       }
    }
@@ -4141,6 +4197,7 @@ fs_visitor::run()
       foreach_iter(exec_list_iterator, iter, *shader->ir) {
 	 ir_instruction *ir = (ir_instruction *)iter.get();
 	 base_ir = ir;
+	 this->result = reg_undef;
 	 ir->accept(this);
       }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 8bd6a14..75b7ec1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -573,7 +573,10 @@ public:
    bool failed;
    char *fail_msg;
 
-   /* Result of last visit() method. */
+   /* On entry to a visit() method, this is the storage for the
+    * result.  On exit, the visit() called may have changed it, in
+    * which case the parent must use the new storage instead.
+    */
    fs_reg result;
 
    fs_reg pixel_x;
-- 
1.7.5.1



More information about the mesa-dev mailing list