[Mesa-dev] [PATCH 9/9] glsl: Convert mul/div by power-of-two factors to shift expressions.

Mon Apr 7 06:52:03 PDT 2014

On 04/06/2014 11:49 PM, Kenneth Graunke wrote:
> Integer shifts are basically always well supported and efficient; that
> isn't always true of integer division, and sometimes even integer
> multiplication isn't without issues.
> 
> On some Intel hardware, INTDIV can't be used in SIMD16 mode.  It also
> doesn't support immediate operands (on any generation), while ASR can.
> 
> On Haswell, this cuts the number of instructions in dolphin/efb2ram by
> 7.94%.  It also removes a single MOV in dolphin/realxfb (due to ASR
> supporting immediates), and gains SIMD16 support (due to no INTDIV).
> 
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/glsl/opt_algebraic.cpp | 70 ++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 68 insertions(+), 2 deletions(-)
> 
> diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp
> index 9d55392..eccc2eb 100644
> --- a/src/glsl/opt_algebraic.cpp
> +++ b/src/glsl/opt_algebraic.cpp
> @@ -34,6 +34,7 @@
>  #include "ir_optimization.h"
>  #include "ir_builder.h"
>  #include "glsl_types.h"
> +#include "main/macros.h"
>  
>  using namespace ir_builder;
>  
> @@ -68,6 +69,8 @@ public:
>  			     int op2);
>     ir_rvalue *swizzle_if_required(ir_expression *expr,
>  				  ir_rvalue *operand);
> +   ir_rvalue *convert_int_math_to_shifts(ir_expression *ir,
> +                                         ir_constant *op_const_array[4]);
>  
>     void *mem_ctx;
>  
> @@ -185,6 +188,59 @@ ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index,
>     return false;
>  }
>  
> +/**
> + * Transform integer multiplication/division by a constant power-of-two
> + * factor into shift instructions.
> + */
> +ir_rvalue *
> +ir_algebraic_visitor::convert_int_math_to_shifts(ir_expression *ir,
> +                                                 ir_constant *op_const_array[4])
> +{
> +   /* This optimization only makes sense for GPUs with native integers. */
> +   if (!native_integers)
> +      return NULL;
> +
> +   assert(ir->operation == ir_binop_mul || ir->operation == ir_binop_div);
> +
> +   /* Shifts only work for integer types. */
> +   if (!ir->type->is_integer())
> +      return NULL;

After the previous conversation about converting division to shifts, I
think at least division only generally works for unsigned. -1 / 2 => 0,
but -1 >> 1 => -1.  I don't know what the rules are for multiplication
overflow... I think the result of int32_t(0x70000000)*2 is undefined, so
that should be fine.

> +
> +   ir_constant *const_op;
> +   ir_rvalue *other_op;
> +   if (op_const_array[0]) {
> +      const_op = op_const_array[0];
> +      other_op = ir->operands[1];
> +   } else if (op_const_array[1]) {
> +      const_op = op_const_array[1];
> +      other_op = ir->operands[0];
> +   } else {
> +      /* If neither is a constant, we can't check for powers of two. */
> +      return NULL;
> +   }
> +
> +   ir_constant_data shift_data;
> +   for (int i = 0; i < const_op->type->vector_elements; i++) {
> +      if (const_op->type->base_type == GLSL_TYPE_INT &&
> +          const_op->value.i[i] <= 0) {
> +         /* Negative values aren't powers of two. */
> +         return NULL;
> +      }

But... we could convert x*-4 into -x*4... assuming negation is free on
integer operands.  Seems like that would be good follow-on work...

> +
> +      if (!is_power_of_two(const_op->value.u[i]))
> +         return NULL;
> +
> +      shift_data.u[i] = ffs(const_op->value.u[i]) - 1;
> +   }
> +
> +   ir_constant *shifts = new(mem_ctx) ir_constant(ir->type, &shift_data);
> +
> +   if (ir->operation == ir_binop_mul)
> +      return lshift(other_op, shifts);
> +   else
> +      return rshift(other_op, shifts);
> +}
> +
>  /* When eliminating an expression and just returning one of its operands,
>   * we may need to swizzle that operand out to a vector if the expression was
>   * vector type.
> @@ -389,7 +445,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
>  	 return ir->operands[0];
>        break;
>  
> -   case ir_binop_mul:
> +   case ir_binop_mul: {
>        if (is_vec_one(op_const[0]))
>  	 return ir->operands[1];
>        if (is_vec_one(op_const[1]))
> @@ -403,6 +459,9 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
>        if (is_vec_negative_one(op_const[1]))
>           return neg(ir->operands[0]);
>  
> +      ir_rvalue *shift_expr = convert_int_math_to_shifts(ir, op_const);
> +      if (shift_expr)
> +         return shift_expr;
>  
>        /* Reassociate multiplication of constants so that we can do
>         * constant folding.
> @@ -413,8 +472,9 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
>  	 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
>  
>        break;
> +   }
>  
> -   case ir_binop_div:
> +   case ir_binop_div: {
>        if (is_vec_one(op_const[0]) && ir->type->base_type == GLSL_TYPE_FLOAT) {
>  	 return new(mem_ctx) ir_expression(ir_unop_rcp,
>  					   ir->operands[1]->type,
> @@ -423,7 +483,13 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
>        }
>        if (is_vec_one(op_const[1]))
>  	 return ir->operands[0];
> +
> +      ir_rvalue *shift_expr = convert_int_math_to_shifts(ir, op_const);
> +      if (shift_expr)
> +         return shift_expr;
> +
>        break;
> +   }
>  
>     case ir_binop_dot:
>        if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))