[Mesa-dev] [PATCH v2 4/5] glsl: split DIV_TO_MUL_RCP into single- and double-precision flags

Thu Jan 19 13:59:39 UTC 2017

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Reviewed-by: Iago Toral Quiroga <itoral at igalia.com>
---
 src/compiler/glsl/ir_optimization.h      |  4 +++-
 src/compiler/glsl/lower_instructions.cpp | 19 +++++++++++--------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index 0d6c4e6..01e5270 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -23,39 +23,41 @@
 
 
 /**
  * \file ir_optimization.h
  *
  * Prototypes for optimization passes to be called by the compiler and drivers.
  */
 
 /* Operations for lower_instructions() */
 #define SUB_TO_ADD_NEG     0x01
-#define DIV_TO_MUL_RCP     0x02
+#define FDIV_TO_MUL_RCP    0x02
 #define EXP_TO_EXP2        0x04
 #define POW_TO_EXP2        0x08
 #define LOG_TO_LOG2        0x10
 #define MOD_TO_FLOOR       0x20
 #define INT_DIV_TO_MUL_RCP 0x40
 #define LDEXP_TO_ARITH     0x80
 #define CARRY_TO_ARITH     0x100
 #define BORROW_TO_ARITH    0x200
 #define SAT_TO_CLAMP       0x400
 #define DOPS_TO_DFRAC      0x800
 #define DFREXP_DLDEXP_TO_ARITH    0x1000
 #define BIT_COUNT_TO_MATH         0x02000
 #define EXTRACT_TO_SHIFTS         0x04000
 #define INSERT_TO_SHIFTS          0x08000
 #define REVERSE_TO_SHIFTS         0x10000
 #define FIND_LSB_TO_FLOAT_CAST    0x20000
 #define FIND_MSB_TO_FLOAT_CAST    0x40000
 #define IMUL_HIGH_TO_MUL          0x80000
+#define DDIV_TO_MUL_RCP           0x100000
+#define DIV_TO_MUL_RCP            (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
 
 /**
  * \see class lower_packing_builtins_visitor
  */
 enum lower_packing_builtins_op {
    LOWER_PACK_UNPACK_NONE               = 0x0000,
 
    LOWER_PACK_SNORM_2x16                = 0x0001,
    LOWER_UNPACK_SNORM_2x16              = 0x0002,
 
diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp
index 9fc83d1..729cb13 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -47,32 +47,34 @@
  * SUB_TO_ADD_NEG:
  * ---------------
  * Breaks an ir_binop_sub expression down to add(op0, neg(op1))
  *
  * This simplifies expression reassociation, and for many backends
  * there is no subtract operation separate from adding the negation.
  * For backends with native subtract operations, they will probably
  * want to recognize add(op0, neg(op1)) or the other way around to
  * produce a subtract anyway.
  *
- * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
- * --------------------------------------
+ * FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP:
+ * ---------------------------------------------------------
  * Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
  *
  * Many GPUs don't have a divide instruction (945 and 965 included),
  * but they do have an RCP instruction to compute an approximate
  * reciprocal.  By breaking the operation down, constant reciprocals
  * can get constant folded.
  *
- * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
- * handles the integer case, converting to and from floating point so that
- * RCP is possible.
+ * FDIV_TO_MUL_RCP only lowers single-precision floating point division;
+ * DDIV_TO_MUL_RCP only lowers double-precision floating point division.
+ * DIV_TO_MUL_RCP is a convenience macro that sets both flags.
+ * INT_DIV_TO_MUL_RCP handles the integer case, converting to and from floating
+ * point so that RCP is possible.
  *
  * EXP_TO_EXP2 and LOG_TO_LOG2:
  * ----------------------------
  * Many GPUs don't have a base e log or exponent instruction, but they
  * do have base 2 versions, so this pass converts exp and log to exp2
  * and log2 operations.
  *
  * POW_TO_EXP2:
  * -----------
  * Many older GPUs don't have an x**y instruction.  For these GPUs, convert
@@ -319,21 +321,22 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
    this->base_ir->insert_before(assign_y);
 
    ir_expression *const div_expr =
       new(ir) ir_expression(ir_binop_div, x->type,
                             new(ir) ir_dereference_variable(x),
                             new(ir) ir_dereference_variable(y));
 
    /* Don't generate new IR that would need to be lowered in an additional
     * pass.
     */
-   if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double()))
+   if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) ||
+       (lowering(DDIV_TO_MUL_RCP) && ir->type->is_double()))
       div_to_mul_rcp(div_expr);
 
    ir_expression *const floor_expr =
       new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
 
    if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
       dfloor_to_dfrac(floor_expr);
 
    ir_expression *const mul_expr =
       new(ir) ir_expression(ir_binop_mul,
@@ -1592,22 +1595,22 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
          double_lrp(ir);
       break;
    case ir_binop_sub:
       if (lowering(SUB_TO_ADD_NEG))
 	 sub_to_add_neg(ir);
       break;
 
    case ir_binop_div:
       if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
 	 int_div_to_mul_rcp(ir);
-      else if ((ir->operands[1]->type->is_float() ||
-                ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP))
+      else if ((ir->operands[1]->type->is_float() && lowering(FDIV_TO_MUL_RCP)) ||
+               (ir->operands[1]->type->is_double() && lowering(DDIV_TO_MUL_RCP)))
 	 div_to_mul_rcp(ir);
       break;
 
    case ir_unop_exp:
       if (lowering(EXP_TO_EXP2))
 	 exp_to_exp2(ir);
       break;
 
    case ir_unop_log:
       if (lowering(LOG_TO_LOG2))
-- 
2.7.4