[Mesa-dev] [PATCH] i965/vs: Emit a MAC instead of a MUL+ADD combination when possible.

Eric Anholt eric at anholt.net
Tue Jan 10 11:23:14 PST 2012


Fixes a regression in codegen quality from the old VS backend to the
new one, where matrix multiplies were more expensive in instruction
count.

shader-db results:
Total instructions: 58540 -> 55984
815/870 programs affected (93.7%)
57412 -> 54856 instructions in affected programs (4.5% reduction)
hurt programs:
shaders/gst-gl-mosaic-v.vert: 48.21%
shaders/gst-gl-cube-v.vert: 48.21%

Those two shaders are doing really silly things, and the core GLSL
compiler should really be optimizing those silly things away.
Instead, we now end up with MAC sequences involving 0.0 or 1.0, which
are trickier to optimize away than MUL+ADD sequences.
---
 src/mesa/drivers/dri/i965/brw_vec4.h               |    4 ++
 .../drivers/dri/i965/brw_vec4_copy_propagation.cpp |    1 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp        |    6 +++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp     |   45 ++++++++++++++++++++
 4 files changed, 56 insertions(+), 0 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 2555fa7..4922422 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -266,6 +266,7 @@ public:
 
    bool saturate;
    bool predicate_inverse;
+   bool update_accumulator;
    uint32_t predicate;
 
    int conditional_mod; /**< BRW_CONDITIONAL_* */
@@ -524,6 +525,9 @@ public:
 				int base_offset);
 
    bool try_emit_sat(ir_expression *ir);
+   bool try_emit_mac(ir_expression *ir, int mul_op);
+
+
    void resolve_ud_negate(src_reg *reg);
 
    bool process_move_condition(ir_rvalue *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 08d8f5b..1d1dd6f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -96,6 +96,7 @@ try_constant_propagation(vec4_instruction *inst, int arg, src_reg *values[4])
       return true;
 
    case BRW_OPCODE_MUL:
+   case BRW_OPCODE_MAC:
    case BRW_OPCODE_ADD:
       if (arg == 1) {
 	 inst->src[arg] = value;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index a618614..af5069e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -831,6 +831,7 @@ vec4_visitor::generate_code()
       brw_set_predicate_control(p, inst->predicate);
       brw_set_predicate_inverse(p, inst->predicate_inverse);
       brw_set_saturate(p, inst->saturate);
+      brw_set_acc_write_control(p, inst->update_accumulator);
 
       switch (inst->opcode) {
       case BRW_OPCODE_MOV:
@@ -842,6 +843,11 @@ vec4_visitor::generate_code()
       case BRW_OPCODE_MUL:
 	 brw_MUL(p, dst, src[0], src[1]);
 	 break;
+
+      case BRW_OPCODE_MAC:
+	 brw_MAC(p, dst, src[0], src[1]);
+	 break;
+
       case BRW_OPCODE_MACH:
 	 brw_set_acc_write_control(p, 1);
 	 brw_MACH(p, dst, src[0], src[1]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 5df2470..6d78ae9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -156,6 +156,7 @@ ALU1(RNDE)
 ALU1(RNDZ)
 ALU2(ADD)
 ALU2(MUL)
+ALU2(MAC)
 ALU2(MACH)
 ALU2(AND)
 ALU2(OR)
@@ -998,6 +999,47 @@ vec4_visitor::visit(ir_function *ir)
 }
 
 bool
+vec4_visitor::try_emit_mac(ir_expression *ir, int mul_op)
+{
+   if (ir->operation != ir_binop_add || ir->type->base_type != GLSL_TYPE_FLOAT)
+      return false;
+
+   ir_expression *mul = ir->operands[mul_op]->as_expression();
+   if (!mul || mul->operation != ir_binop_mul)
+      return false;
+
+   ir_expression *nonmul = ir->operands[1 - mul_op]->as_expression();
+
+   /* We need an instruction to set update_accumulator on that we'll
+    * MAC with, so just make sure it's one of the common expression
+    * types used to generate a value we would want to accumulate with.
+    */
+   if (!nonmul || (nonmul->operation != ir_binop_add &&
+		   nonmul->operation != ir_binop_mul)) {
+      return false;
+   }
+
+   /* Get the mul args before the nonmul, so that nonmul's
+    * update_accumulator is the last thing before our MAC.
+    */
+   mul->operands[0]->accept(this);
+   src_reg mul_op0 = this->result;
+   mul->operands[1]->accept(this);
+   src_reg mul_op1 = this->result;
+
+   nonmul->accept(this);
+   vec4_instruction *last_nonmul_inst;
+   last_nonmul_inst = (vec4_instruction *)this->instructions.get_tail();
+   last_nonmul_inst->update_accumulator = true;
+   last_nonmul_inst->dst = dst_null_f();
+
+   this->result = src_reg(this, ir->type);
+   emit(MAC(dst_reg(this->result), mul_op0, mul_op1));
+
+   return true;
+}
+
+bool
 vec4_visitor::try_emit_sat(ir_expression *ir)
 {
    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
@@ -1038,6 +1080,9 @@ vec4_visitor::visit(ir_expression *ir)
    dst_reg result_dst;
    vec4_instruction *inst;
 
+   if (try_emit_mac(ir, 1) || try_emit_mac(ir, 0))
+      return;
+
    if (try_emit_sat(ir))
       return;
 
-- 
1.7.7.3



More information about the mesa-dev mailing list