Mesa (fp64_floor): glsl: dfloor_to_arith WIP

Mon Jan 12 10:17:59 UTC 2015

Module: Mesa
Branch: fp64_floor
Commit: f95b9b0c33b80bcd0ce5fec28608ed80605a6edf
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f95b9b0c33b80bcd0ce5fec28608ed80605a6edf

Author: Tapani Pälli <tapani.palli at intel.com>
Date:   Wed Dec 31 11:14:02 2014 +0200

glsl: dfloor_to_arith WIP

Signed-off-by: Tapani Pälli <tapani.palli at intel.com>

---

 src/glsl/ir_optimization.h               |    1 +
 src/glsl/lower_instructions.cpp          |  129 ++++++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_shader.cpp |    3 +-
 3 files changed, 132 insertions(+), 1 deletion(-)

diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 180ae6f..8f0f024 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -44,6 +44,7 @@
 #define DOPS_TO_DFRAC      0x1000
 #define DFREXP_DLDEXP_TO_ARITH    0x2000
 #define DSQRT_TO_FSQRT            0x4000
+#define DFLOOR_TO_ARITH           0x8000
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 7868be5..7b46c1e 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -45,6 +45,7 @@
  * - SAT_TO_CLAMP
  * - DOPS_TO_DFRAC
  * - DSQRT_TO_FSQRT
+ * - DFLOOR_TO_ARITH
  *
  * SUB_TO_ADD_NEG:
  * ---------------
@@ -125,6 +126,10 @@
  * --------------
  * Splits double square root into exponent division and single precision
  * square root.
+ *
+ * DFLOOR_TO_ARITH
+ * ---------------
+ * Provides floor with pure luck.
  */
 
 #include "main/core.h" /* for M_LOG2E */
@@ -170,6 +175,7 @@ private:
    void double_lrp(ir_expression *);
    void dceil_to_dfrac(ir_expression *);
    void dfloor_to_dfrac(ir_expression *);
+   void dfloor_to_arith(ir_expression *);
    void dround_even_to_dfrac(ir_expression *);
    void dtrunc_to_dfrac(ir_expression *);
    void dsign_to_csel(ir_expression *);
@@ -1095,6 +1101,122 @@ lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir)
    ir->operands[1] = new(ir) ir_dereference_variable(t2);
 }
 
+
+void
+lower_instructions_visitor::dfloor_to_arith(ir_expression *ir)
+{
+   ir_instruction &i = *base_ir;
+   exec_list instructions;
+   ir_factory factory;
+   factory.instructions = &instructions;
+   factory.mem_ctx = ir;
+
+   const unsigned vec_elem = ir->type->vector_elements;
+   ir_rvalue *results[4] = {NULL};
+
+   ir_constant *double_zero = new(ir) ir_constant(0.0, vec_elem);
+   ir_constant *double_one = new(ir) ir_constant(1.0, vec_elem);
+   ir_constant *int_zero = new(ir) ir_constant(0, vec_elem);
+
+   for (unsigned elem = 0; elem < vec_elem; elem++) {
+
+      // 3 cases:
+      // value = 0.0
+
+      factory.emit(if_tree(equal(ir->operands[0], double_zero),
+                           ret(double_zero)));
+
+      // value < 0.0
+      // floor(x) = -floor(abs(x)) - 1.0
+      factory.emit(if_tree(less(ir->operands[0]->clone(ir, NULL), double_zero->clone(ir, NULL)),
+                           neg(sub(abs(ir->operands[0]->clone(ir, NULL)), double_one))));
+
+      // 3. value > 0.0
+
+      ir_variable *unpacked =
+         factory.make_temp(glsl_type::uvec2_type, "unpacked");
+
+      factory.emit(assign(unpacked,
+                          expr(ir_unop_unpack_double_2x32,
+                          swizzle(ir->operands[0]->clone(ir, NULL), elem, 1))));
+
+      ir_rvalue *hi  = swizzle_y(unpacked);
+      ir_rvalue *hi2 = swizzle_y(unpacked);
+
+      // extract components s, m, e from hi
+
+      ir_variable *exponent =
+         factory.make_temp(glsl_type::uint_type, "exponent");
+
+      ir_variable *int_exponent =
+         factory.make_temp(glsl_type::int_type, "int_exponent");
+
+      ir_variable *mantissa =
+         factory.make_temp(glsl_type::uint_type, "mantissa");
+
+      // *e = (bits >> 20) & 0x7ff;
+      // (exp = e - DOUBLE_BIAS)
+      factory.emit(assign(exponent,
+                          sub(bit_and(rshift(hi, factory.constant(20u)),
+                                         factory.constant(0x7ffu)), factory.constant(1023u))));
+
+      factory.emit(assign(int_exponent,
+                          sub(bit_and(rshift(hi->clone(ir, NULL), factory.constant(20u)),
+                                         factory.constant(0x7ffu)), factory.constant(1023u))));
+
+      // if exp < 0, floor(x) = 0
+      factory.emit(if_tree(less(int_exponent, int_zero), ret(double_zero)));
+      // if exp = 0, floor(x) = 1
+      factory.emit(if_tree(equal(int_exponent, int_zero->clone(ir, NULL)), ret(double_one)));
+
+      // else ...
+
+      // *m = bits & 0xfffff; (20 last bits)
+      factory.emit(assign(mantissa,
+                          bit_and(hi2, factory.constant(0xfffffu))));
+
+      // calculate MANTISSA_BITS - exp
+      ir_variable *nmb =
+         factory.make_temp(glsl_type::uint_type, "nmb");
+      factory.emit(assign(nmb, sub(factory.constant(20u), exponent)));
+
+      ir_variable *result =
+        factory.make_temp(glsl_type::uint_type, "result");
+
+      // some temporary helpers
+      ir_variable *a =
+        factory.make_temp(glsl_type::uint_type, "a");
+      ir_variable *b =
+        factory.make_temp(glsl_type::uint_type, "b");
+
+      // return uint32_t mf = (1 << exp) + (m >> nmb)  ... or exp2f(exp) + (m >> nmb)
+      factory.emit(assign(a, lshift(factory.constant(1u), exponent)));
+      factory.emit(assign(b, rshift(mantissa, nmb)));
+      factory.emit(assign(result, add(a, b)));
+
+      // unsigned -> signed conversion
+      ir_variable *c =
+        factory.make_temp(glsl_type::int_type, "c");
+      factory.emit(assign(c, expr(ir_unop_u2i, result)));
+
+      // signed -> double conversion
+      results[elem] = expr(ir_unop_i2d, c);
+   }
+
+   _mesa_print_ir(stderr, &instructions, NULL);
+   i.insert_before(&instructions);
+
+   /* Put the dvec back together */
+   ir->operation = ir_quadop_vector;
+   ir->operands[0] = results[0];
+   ir->operands[1] = results[1];
+   ir->operands[2] = results[2];
+   ir->operands[3] = results[3];
+
+   this->progress = true;
+}
+
+
 void
 lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir)
 {
@@ -1264,6 +1386,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
 	 div_to_mul_rcp(ir);
       break;
 
+   case ir_unop_floor:
+      if (lowering(DFLOOR_TO_ARITH) && ir->operands[0]->type->is_double())
+         dfloor_to_arith(ir);
+      break;
+
    case ir_unop_sqrt:
       if (lowering(DSQRT_TO_FSQRT) && ir->operands[0]->type->is_double())
          dsqrt_to_fsqrt(ir);
@@ -1336,10 +1463,12 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
          dceil_to_dfrac(ir);
       break;
 
+#if 0
    case ir_unop_floor:
       if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
          dfloor_to_dfrac(ir);
       break;
+#endif
 
    case ir_unop_round_even:
       if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 58d6e77..cce29e0 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -146,7 +146,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
                          bitfield_insert |
                          LDEXP_TO_ARITH |
                          DFREXP_DLDEXP_TO_ARITH |
-                         DSQRT_TO_FSQRT);
+                         DSQRT_TO_FSQRT |
+                         DFLOOR_TO_ARITH);
 
       /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
        * if-statements need to be flattened.