[Mesa-dev] [PATCH 1/4] glsl: split DIV_TO_MUL_RCP into single- and double-precision flags
Iago Toral
itoral at igalia.com
Tue Jan 17 14:38:11 UTC 2017
This patch is:
Reviewed-by: Iago Toral Quiroga <itoral at igalia.com>
On Mon, 2017-01-16 at 17:20 +0100, Nicolai Hähnle wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> ---
> src/compiler/glsl/ir_optimization.h | 4 +++-
> src/compiler/glsl/lower_instructions.cpp | 19 +++++++++++--------
> 2 files changed, 14 insertions(+), 9 deletions(-)
>
> diff --git a/src/compiler/glsl/ir_optimization.h
> b/src/compiler/glsl/ir_optimization.h
> index 0d6c4e6..01e5270 100644
> --- a/src/compiler/glsl/ir_optimization.h
> +++ b/src/compiler/glsl/ir_optimization.h
> @@ -30,7 +30,7 @@
>
> /* Operations for lower_instructions() */
> #define SUB_TO_ADD_NEG 0x01
> -#define DIV_TO_MUL_RCP 0x02
> +#define FDIV_TO_MUL_RCP 0x02
> #define EXP_TO_EXP2 0x04
> #define POW_TO_EXP2 0x08
> #define LOG_TO_LOG2 0x10
> @@ -49,6 +49,8 @@
> #define FIND_LSB_TO_FLOAT_CAST 0x20000
> #define FIND_MSB_TO_FLOAT_CAST 0x40000
> #define IMUL_HIGH_TO_MUL 0x80000
> +#define DDIV_TO_MUL_RCP 0x100000
> +#define DIV_TO_MUL_RCP (FDIV_TO_MUL_RCP |
> DDIV_TO_MUL_RCP)
>
> /**
> * \see class lower_packing_builtins_visitor
> diff --git a/src/compiler/glsl/lower_instructions.cpp
> b/src/compiler/glsl/lower_instructions.cpp
> index 9fc83d1..729cb13 100644
> --- a/src/compiler/glsl/lower_instructions.cpp
> +++ b/src/compiler/glsl/lower_instructions.cpp
> @@ -54,8 +54,8 @@
> * want to recognize add(op0, neg(op1)) or the other way around to
> * produce a subtract anyway.
> *
> - * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
> - * --------------------------------------
> + * FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP:
> + * ---------------------------------------------------------
> * Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
> *
> * Many GPUs don't have a divide instruction (945 and 965 included),
> @@ -63,9 +63,11 @@
> * reciprocal. By breaking the operation down, constant reciprocals
> * can get constant folded.
> *
> - * DIV_TO_MUL_RCP only lowers floating point division;
> INT_DIV_TO_MUL_RCP
> - * handles the integer case, converting to and from floating point
> so that
> - * RCP is possible.
> + * FDIV_TO_MUL_RCP only lowers single-precision floating point
> division;
> + * DDIV_TO_MUL_RCP only lowers double-precision floating point
> division.
> + * DIV_TO_MUL_RCP is a convenience macro that sets both flags.
> + * INT_DIV_TO_MUL_RCP handles the integer case, converting to and
> from floating
> + * point so that RCP is possible.
> *
> * EXP_TO_EXP2 and LOG_TO_LOG2:
> * ----------------------------
> @@ -326,7 +328,8 @@
> lower_instructions_visitor::mod_to_floor(ir_expression *ir)
> /* Don't generate new IR that would need to be lowered in an
> additional
> * pass.
> */
> - if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir-
> >type->is_double()))
> + if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) ||
> + (lowering(DDIV_TO_MUL_RCP) && ir->type->is_double()))
> div_to_mul_rcp(div_expr);
>
> ir_expression *const floor_expr =
> @@ -1599,8 +1602,8 @@
> lower_instructions_visitor::visit_leave(ir_expression *ir)
> case ir_binop_div:
> if (ir->operands[1]->type->is_integer() &&
> lowering(INT_DIV_TO_MUL_RCP))
> int_div_to_mul_rcp(ir);
> - else if ((ir->operands[1]->type->is_float() ||
> - ir->operands[1]->type->is_double()) &&
> lowering(DIV_TO_MUL_RCP))
> + else if ((ir->operands[1]->type->is_float() &&
> lowering(FDIV_TO_MUL_RCP)) ||
> + (ir->operands[1]->type->is_double() &&
> lowering(DDIV_TO_MUL_RCP)))
> div_to_mul_rcp(ir);
> break;
>
More information about the mesa-dev
mailing list