[Mesa-dev] [PATCH 9/9] glsl: Convert mul/div by power-of-two factors to shift expressions.
Ian Romanick
idr at freedesktop.org
Mon Apr 7 06:52:03 PDT 2014
On 04/06/2014 11:49 PM, Kenneth Graunke wrote:
> Integer shifts are basically always well supported and efficient; that
> isn't always true of integer division, and sometimes even integer
> multiplication isn't without issues.
>
> On some Intel hardware, INTDIV can't be used in SIMD16 mode. It also
> doesn't support immediate operands (on any generation), while ASR can.
>
> On Haswell, this cuts the number of instructions in dolphin/efb2ram by
> 7.94%. It also removes a single MOV in dolphin/realxfb (due to ASR
> supporting immediates), and gains SIMD16 support (due to no INTDIV).
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
> src/glsl/opt_algebraic.cpp | 70 ++++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 68 insertions(+), 2 deletions(-)
>
> diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp
> index 9d55392..eccc2eb 100644
> --- a/src/glsl/opt_algebraic.cpp
> +++ b/src/glsl/opt_algebraic.cpp
> @@ -34,6 +34,7 @@
> #include "ir_optimization.h"
> #include "ir_builder.h"
> #include "glsl_types.h"
> +#include "main/macros.h"
>
> using namespace ir_builder;
>
> @@ -68,6 +69,8 @@ public:
> int op2);
> ir_rvalue *swizzle_if_required(ir_expression *expr,
> ir_rvalue *operand);
> + ir_rvalue *convert_int_math_to_shifts(ir_expression *ir,
> + ir_constant *op_const_array[4]);
>
> void *mem_ctx;
>
> @@ -185,6 +188,59 @@ ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index,
> return false;
> }
>
> +/**
> + * Transform integer multiplication/division by a constant power-of-two
> + * factor into shift instructions.
> + */
> +ir_rvalue *
> +ir_algebraic_visitor::convert_int_math_to_shifts(ir_expression *ir,
> + ir_constant *op_const_array[4])
> +{
> + /* This optimization only makes sense for GPUs with native integers. */
> + if (!native_integers)
> + return NULL;
> +
> + assert(ir->operation == ir_binop_mul || ir->operation == ir_binop_div);
> +
> + /* Shifts only work for integer types. */
> + if (!ir->type->is_integer())
> + return NULL;
After the previous conversation about converting division to shifts, I
think at least division only generally works for unsigned. -1 / 2 => 0,
but -1 >> 1 => -1. I don't know what the rules are for multiplication
overflow... I think the result of int32_t(0x70000000)*2 is undefined, so
that should be fine.
> +
> + ir_constant *const_op;
> + ir_rvalue *other_op;
> + if (op_const_array[0]) {
> + const_op = op_const_array[0];
> + other_op = ir->operands[1];
> + } else if (op_const_array[1]) {
> + const_op = op_const_array[1];
> + other_op = ir->operands[0];
> + } else {
> + /* If neither is a constant, we can't check for powers of two. */
> + return NULL;
> + }
> +
> + ir_constant_data shift_data;
> + for (int i = 0; i < const_op->type->vector_elements; i++) {
> + if (const_op->type->base_type == GLSL_TYPE_INT &&
> + const_op->value.i[i] <= 0) {
> + /* Negative values aren't powers of two. */
> + return NULL;
> + }
But... we could convert x*-4 into -x*4... assuming negation is free on
integer operands. Seems like that would be good follow-on work...
> +
> + if (!is_power_of_two(const_op->value.u[i]))
> + return NULL;
> +
> + shift_data.u[i] = ffs(const_op->value.u[i]) - 1;
> + }
> +
> + ir_constant *shifts = new(mem_ctx) ir_constant(ir->type, &shift_data);
> +
> + if (ir->operation == ir_binop_mul)
> + return lshift(other_op, shifts);
> + else
> + return rshift(other_op, shifts);
> +}
> +
> /* When eliminating an expression and just returning one of its operands,
> * we may need to swizzle that operand out to a vector if the expression was
> * vector type.
> @@ -389,7 +445,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
> return ir->operands[0];
> break;
>
> - case ir_binop_mul:
> + case ir_binop_mul: {
> if (is_vec_one(op_const[0]))
> return ir->operands[1];
> if (is_vec_one(op_const[1]))
> @@ -403,6 +459,9 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
> if (is_vec_negative_one(op_const[1]))
> return neg(ir->operands[0]);
>
> + ir_rvalue *shift_expr = convert_int_math_to_shifts(ir, op_const);
> + if (shift_expr)
> + return shift_expr;
>
> /* Reassociate multiplication of constants so that we can do
> * constant folding.
> @@ -413,8 +472,9 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
> reassociate_constant(ir, 1, op_const[1], op_expr[0]);
>
> break;
> + }
>
> - case ir_binop_div:
> + case ir_binop_div: {
> if (is_vec_one(op_const[0]) && ir->type->base_type == GLSL_TYPE_FLOAT) {
> return new(mem_ctx) ir_expression(ir_unop_rcp,
> ir->operands[1]->type,
> @@ -423,7 +483,13 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
> }
> if (is_vec_one(op_const[1]))
> return ir->operands[0];
> +
> + ir_rvalue *shift_expr = convert_int_math_to_shifts(ir, op_const);
> + if (shift_expr)
> + return shift_expr;
> +
> break;
> + }
>
> case ir_binop_dot:
> if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
More information about the mesa-dev
mailing list