[Mesa-dev] [PATCH 3/3] i965/vs: Avoid the MUL/MACH/MOV sequence for small integer multiplies.

Eric Anholt eric at anholt.net
Fri Jun 7 18:55:06 PDT 2013


We do a lot of multiplies by 3 or 4 for skinning shaders, and we can avoid
the sequence if we just move them into the right argument of the MUL.

On SNB, this means reliably putting a constant in a position where it
can't be constant folded, but that's still better than MUL/MACH/MOV.

Improves GLB 2.7 trex performance by 0.788648% +/- 0.23865% (n=29/30)
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 50 +++++++++++++++++++-------
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 451f7d5..3c453eb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1313,6 +1313,20 @@ vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
    }
 }
 
+static bool
+is_16bit_constant(ir_rvalue *rvalue)
+{
+   ir_constant *constant = rvalue->as_constant();
+   if (!constant)
+      return false;
+
+   if (constant->type != glsl_type::int_type &&
+       constant->type != glsl_type::uint_type)
+      return false;
+
+   return constant->value.u[0] < (1 << 16);
+}
+
 void
 vec4_visitor::visit(ir_expression *ir)
 {
@@ -1472,19 +1486,29 @@ vec4_visitor::visit(ir_expression *ir)
 
    case ir_binop_mul:
       if (ir->type->is_integer()) {
-	 /* For integer multiplication, the MUL uses the low 16 bits
-	  * of one of the operands (src0 on gen6, src1 on gen7).  The
-	  * MACH accumulates in the contribution of the upper 16 bits
-	  * of that operand.
-	  *
-	  * FINISHME: Emit just the MUL if we know an operand is small
-	  * enough.
-	  */
-	 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
-
-	 emit(MUL(acc, op[0], op[1]));
-	 emit(MACH(dst_null_d(), op[0], op[1]));
-	 emit(MOV(result_dst, src_reg(acc)));
+	 /* For integer multiplication, the MUL uses the low 16 bits of one of
+	  * the operands (src0 on gen6, src1 on gen7).  The MACH accumulates
+	  * in the contribution of the upper 16 bits of that operand.  If we
+	  * can determine that one of the args is in the low 16 bits, though,
+	  * we can just emit a single MUL.
+          */
+         if (is_16bit_constant(ir->operands[0])) {
+            if (intel->gen == 6)
+               emit(MUL(result_dst, op[0], op[1]));
+            else
+               emit(MUL(result_dst, op[1], op[0]));
+         } else if (is_16bit_constant(ir->operands[1])) {
+            if (intel->gen == 6)
+               emit(MUL(result_dst, op[1], op[0]));
+            else
+               emit(MUL(result_dst, op[0], op[1]));
+         } else {
+            struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+
+            emit(MUL(acc, op[0], op[1]));
+            emit(MACH(dst_null_d(), op[0], op[1]));
+            emit(MOV(result_dst, src_reg(acc)));
+         }
       } else {
 	 emit(MUL(result_dst, op[0], op[1]));
       }
-- 
1.8.3.rc0



More information about the mesa-dev mailing list