[Mesa-dev] [PATCH 9/9] glsl: Convert mul/div by power-of-two factors to shift expressions.

Sun Apr 6 23:49:55 PDT 2014

Integer shifts are basically always well supported and efficient; that
isn't always true of integer division, and sometimes even integer
multiplication isn't without issues.

On some Intel hardware, INTDIV can't be used in SIMD16 mode.  It also
doesn't support immediate operands (on any generation), while ASR can.

On Haswell, this cuts the number of instructions in dolphin/efb2ram by
7.94%.  It also removes a single MOV in dolphin/realxfb (due to ASR
supporting immediates), and gains SIMD16 support (due to no INTDIV).

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/glsl/opt_algebraic.cpp | 70 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 2 deletions(-)

diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp
index 9d55392..eccc2eb 100644
--- a/src/glsl/opt_algebraic.cpp
+++ b/src/glsl/opt_algebraic.cpp
@@ -34,6 +34,7 @@
 #include "ir_optimization.h"
 #include "ir_builder.h"
 #include "glsl_types.h"
+#include "main/macros.h"
 
 using namespace ir_builder;
 
@@ -68,6 +69,8 @@ public:
 			     int op2);
    ir_rvalue *swizzle_if_required(ir_expression *expr,
 				  ir_rvalue *operand);
+   ir_rvalue *convert_int_math_to_shifts(ir_expression *ir,
+                                         ir_constant *op_const_array[4]);
 
    void *mem_ctx;
 
@@ -185,6 +188,59 @@ ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index,
    return false;
 }
 
+/**
+ * Transform integer multiplication/division by a constant power-of-two
+ * factor into shift instructions.
+ */
+ir_rvalue *
+ir_algebraic_visitor::convert_int_math_to_shifts(ir_expression *ir,
+                                                 ir_constant *op_const_array[4])
+{
+   /* This optimization only makes sense for GPUs with native integers. */
+   if (!native_integers)
+      return NULL;
+
+   assert(ir->operation == ir_binop_mul || ir->operation == ir_binop_div);
+
+   /* Shifts only work for integer types. */
+   if (!ir->type->is_integer())
+      return NULL;
+
+   ir_constant *const_op;
+   ir_rvalue *other_op;
+   if (op_const_array[0]) {
+      const_op = op_const_array[0];
+      other_op = ir->operands[1];
+   } else if (op_const_array[1]) {
+      const_op = op_const_array[1];
+      other_op = ir->operands[0];
+   } else {
+      /* If neither is a constant, we can't check for powers of two. */
+      return NULL;
+   }
+
+   ir_constant_data shift_data;
+   for (int i = 0; i < const_op->type->vector_elements; i++) {
+      if (const_op->type->base_type == GLSL_TYPE_INT &&
+          const_op->value.i[i] <= 0) {
+         /* Negative values aren't powers of two. */
+         return NULL;
+      }
+
+      if (!is_power_of_two(const_op->value.u[i]))
+         return NULL;
+
+      shift_data.u[i] = ffs(const_op->value.u[i]) - 1;
+   }
+
+   ir_constant *shifts = new(mem_ctx) ir_constant(ir->type, &shift_data);
+
+   if (ir->operation == ir_binop_mul)
+      return lshift(other_op, shifts);
+   else
+      return rshift(other_op, shifts);
+}
+
 /* When eliminating an expression and just returning one of its operands,
  * we may need to swizzle that operand out to a vector if the expression was
  * vector type.
@@ -389,7 +445,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
 	 return ir->operands[0];
       break;
 
-   case ir_binop_mul:
+   case ir_binop_mul: {
       if (is_vec_one(op_const[0]))
 	 return ir->operands[1];
       if (is_vec_one(op_const[1]))
@@ -403,6 +459,9 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       if (is_vec_negative_one(op_const[1]))
          return neg(ir->operands[0]);
 
+      ir_rvalue *shift_expr = convert_int_math_to_shifts(ir, op_const);
+      if (shift_expr)
+         return shift_expr;
 
       /* Reassociate multiplication of constants so that we can do
        * constant folding.
@@ -413,8 +472,9 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
 	 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
 
       break;
+   }
 
-   case ir_binop_div:
+   case ir_binop_div: {
       if (is_vec_one(op_const[0]) && ir->type->base_type == GLSL_TYPE_FLOAT) {
 	 return new(mem_ctx) ir_expression(ir_unop_rcp,
 					   ir->operands[1]->type,
@@ -423,7 +483,13 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       }
       if (is_vec_one(op_const[1]))
 	 return ir->operands[0];
+
+      ir_rvalue *shift_expr = convert_int_math_to_shifts(ir, op_const);
+      if (shift_expr)
+         return shift_expr;
+
       break;
+   }
 
    case ir_binop_dot:
       if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
-- 
1.9.0