[Mesa-dev] [PATCH 3/3] i965/vs: Avoid the MUL/MACH/MOV sequence for small integer multiplies.
Eric Anholt
eric at anholt.net
Fri Jun 7 18:55:06 PDT 2013
We do a lot of multiplies by 3 or 4 for skinning shaders, and we can avoid
the sequence if we just move them into the right argument of the MUL.
On SNB, this means reliably putting a constant in a position where it
can't be constant folded, but that's still better than MUL/MACH/MOV.
Improves GLB 2.7 trex performance by 0.788648% +/- 0.23865% (n=29/30)
---
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 50 +++++++++++++++++++-------
1 file changed, 37 insertions(+), 13 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 451f7d5..3c453eb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1313,6 +1313,20 @@ vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
}
}
+static bool
+is_16bit_constant(ir_rvalue *rvalue)
+{
+ ir_constant *constant = rvalue->as_constant();
+ if (!constant)
+ return false;
+
+ if (constant->type != glsl_type::int_type &&
+ constant->type != glsl_type::uint_type)
+ return false;
+
+ return constant->value.u[0] < (1 << 16);
+}
+
void
vec4_visitor::visit(ir_expression *ir)
{
@@ -1472,19 +1486,29 @@ vec4_visitor::visit(ir_expression *ir)
case ir_binop_mul:
if (ir->type->is_integer()) {
- /* For integer multiplication, the MUL uses the low 16 bits
- * of one of the operands (src0 on gen6, src1 on gen7). The
- * MACH accumulates in the contribution of the upper 16 bits
- * of that operand.
- *
- * FINISHME: Emit just the MUL if we know an operand is small
- * enough.
- */
- struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
-
- emit(MUL(acc, op[0], op[1]));
- emit(MACH(dst_null_d(), op[0], op[1]));
- emit(MOV(result_dst, src_reg(acc)));
+ /* For integer multiplication, the MUL uses the low 16 bits of one of
+ * the operands (src0 on gen6, src1 on gen7). The MACH accumulates
+ * in the contribution of the upper 16 bits of that operand. If we
+ * can determine that one of the args is in the low 16 bits, though,
+ * we can just emit a single MUL.
+ */
+ if (is_16bit_constant(ir->operands[0])) {
+ if (intel->gen == 6)
+ emit(MUL(result_dst, op[0], op[1]));
+ else
+ emit(MUL(result_dst, op[1], op[0]));
+ } else if (is_16bit_constant(ir->operands[1])) {
+ if (intel->gen == 6)
+ emit(MUL(result_dst, op[1], op[0]));
+ else
+ emit(MUL(result_dst, op[0], op[1]));
+ } else {
+ struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+
+ emit(MUL(acc, op[0], op[1]));
+ emit(MACH(dst_null_d(), op[0], op[1]));
+ emit(MOV(result_dst, src_reg(acc)));
+ }
} else {
emit(MUL(result_dst, op[0], op[1]));
}
--
1.8.3.rc0
More information about the mesa-dev
mailing list