[Mesa-dev] [PATCH 1/4] glsl: split DIV_TO_MUL_RCP into single- and double-precision flags

Tue Jan 17 14:38:11 UTC 2017

This patch is:
Reviewed-by: Iago Toral Quiroga <itoral at igalia.com>

On Mon, 2017-01-16 at 17:20 +0100, Nicolai Hähnle wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
> 
> ---
>  src/compiler/glsl/ir_optimization.h      |  4 +++-
>  src/compiler/glsl/lower_instructions.cpp | 19 +++++++++++--------
>  2 files changed, 14 insertions(+), 9 deletions(-)
> 
> diff --git a/src/compiler/glsl/ir_optimization.h
> b/src/compiler/glsl/ir_optimization.h
> index 0d6c4e6..01e5270 100644
> --- a/src/compiler/glsl/ir_optimization.h
> +++ b/src/compiler/glsl/ir_optimization.h
> @@ -30,7 +30,7 @@
>  
>  /* Operations for lower_instructions() */
>  #define SUB_TO_ADD_NEG     0x01
> -#define DIV_TO_MUL_RCP     0x02
> +#define FDIV_TO_MUL_RCP    0x02
>  #define EXP_TO_EXP2        0x04
>  #define POW_TO_EXP2        0x08
>  #define LOG_TO_LOG2        0x10
> @@ -49,6 +49,8 @@
>  #define FIND_LSB_TO_FLOAT_CAST    0x20000
>  #define FIND_MSB_TO_FLOAT_CAST    0x40000
>  #define IMUL_HIGH_TO_MUL          0x80000
> +#define DDIV_TO_MUL_RCP           0x100000
> +#define DIV_TO_MUL_RCP            (FDIV_TO_MUL_RCP |
> DDIV_TO_MUL_RCP)
>  
>  /**
>   * \see class lower_packing_builtins_visitor
> diff --git a/src/compiler/glsl/lower_instructions.cpp
> b/src/compiler/glsl/lower_instructions.cpp
> index 9fc83d1..729cb13 100644
> --- a/src/compiler/glsl/lower_instructions.cpp
> +++ b/src/compiler/glsl/lower_instructions.cpp
> @@ -54,8 +54,8 @@
>   * want to recognize add(op0, neg(op1)) or the other way around to
>   * produce a subtract anyway.
>   *
> - * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
> - * --------------------------------------
> + * FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP:
> + * ---------------------------------------------------------
>   * Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
>   *
>   * Many GPUs don't have a divide instruction (945 and 965 included),
> @@ -63,9 +63,11 @@
>   * reciprocal.  By breaking the operation down, constant reciprocals
>   * can get constant folded.
>   *
> - * DIV_TO_MUL_RCP only lowers floating point division;
> INT_DIV_TO_MUL_RCP
> - * handles the integer case, converting to and from floating point
> so that
> - * RCP is possible.
> + * FDIV_TO_MUL_RCP only lowers single-precision floating point
> division;
> + * DDIV_TO_MUL_RCP only lowers double-precision floating point
> division.
> + * DIV_TO_MUL_RCP is a convenience macro that sets both flags.
> + * INT_DIV_TO_MUL_RCP handles the integer case, converting to and
> from floating
> + * point so that RCP is possible.
>   *
>   * EXP_TO_EXP2 and LOG_TO_LOG2:
>   * ----------------------------
> @@ -326,7 +328,8 @@
> lower_instructions_visitor::mod_to_floor(ir_expression *ir)
>     /* Don't generate new IR that would need to be lowered in an
> additional
>      * pass.
>      */
> -   if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir-
> >type->is_double()))
> +   if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) ||
> +       (lowering(DDIV_TO_MUL_RCP) && ir->type->is_double()))
>        div_to_mul_rcp(div_expr);
>  
>     ir_expression *const floor_expr =
> @@ -1599,8 +1602,8 @@
> lower_instructions_visitor::visit_leave(ir_expression *ir)
>     case ir_binop_div:
>        if (ir->operands[1]->type->is_integer() &&
> lowering(INT_DIV_TO_MUL_RCP))
>  	 int_div_to_mul_rcp(ir);
> -      else if ((ir->operands[1]->type->is_float() ||
> -                ir->operands[1]->type->is_double()) &&
> lowering(DIV_TO_MUL_RCP))
> +      else if ((ir->operands[1]->type->is_float() &&
> lowering(FDIV_TO_MUL_RCP)) ||
> +               (ir->operands[1]->type->is_double() &&
> lowering(DDIV_TO_MUL_RCP)))
>  	 div_to_mul_rcp(ir);
>        break;
>