[Mesa-dev] [RFC 07/10] nir/lower_double_ops: lower div()

Wed Apr 12 22:43:16 UTC 2017

Signed-off-by: Elie Tournier <elie.tournier at collabora.com>
---
 src/compiler/nir/nir.h                  |   3 +-
 src/compiler/nir/nir_lower_double_ops.c | 138 ++++++++++++++++++++++++++++++++
 src/intel/compiler/brw_nir.c            |   3 +-
 3 files changed, 142 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 58045e3d42..d9925c25c7 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2574,7 +2574,8 @@ typedef enum {
    nir_lower_dsign = (1 << 11),
    nir_lower_deq = (1 << 12),
    nir_lower_dlt = (1 << 13),
-   nir_lower_dmul = (1 << 14)
+   nir_lower_dmul = (1 << 14),
+   nir_lower_ddiv = (1 << 15)
 } nir_lower_doubles_options;
 
 bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options);
diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c
index 807fa18fc1..5d6944e15f 100644
--- a/src/compiler/nir/nir_lower_double_ops.c
+++ b/src/compiler/nir/nir_lower_double_ops.c
@@ -1350,6 +1350,132 @@ lower_fmul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
 
 }
 
+static nir_ssa_def *
+div64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   return lower_fmul64(b, x, lower_rcp(b, y));
+}
+
+static nir_ssa_def *
+lower_fdiv64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_si = get_sign(b, x);
+   nir_ssa_def *x_exp = get_exponent(b, x);
+   nir_ssa_def *x_frac_lo = get_frac_hi(b, x);
+   nir_ssa_def *x_frac_hi = get_frac_lo(b, x);
+   nir_ssa_def *y_si = get_sign(b, y);
+   nir_ssa_def *y_exp = get_exponent(b, y);
+   nir_ssa_def *y_frac_lo = get_frac_lo(b, y);
+   nir_ssa_def *y_frac_hi = get_frac_hi(b, y);
+
+   nir_ssa_def *z_si = nir_ixor(b, x_si, y_si);
+   nir_ssa_def *x_frac = nir_ior(b, x_frac_hi, x_frac_lo);
+   nir_ssa_def *x_exp_frac = nir_ior(b, x_exp, x_frac);
+   nir_ssa_def *y_frac = nir_ior(b, y_frac_hi, y_frac_lo);
+
+   nir_ssa_def *zero = nir_imm_int(b, 0);
+
+   /* Result of NaN, Inf and subnormal division */
+   nir_ssa_def *propagate_nan = propagate_fp64_nan(b, x, y);
+
+   nir_ssa_def *pack_inf_fp64 = pack_fp64(b,
+                                          z_si,
+                                          nir_imm_int(b, 0x7FF),
+                                          zero,
+                                          zero);
+
+   nir_ssa_def *pack_zero_fp64 = pack_fp64(b,
+                                           z_si,
+                                           zero,
+                                           zero,
+                                           zero);
+
+   nir_ssa_def *default_nan =
+      nir_pack_64_2x32_split(b,
+                             nir_imm_int(b, 0xFFFFFFFF),
+                             nir_imm_int(b, 0xFFFFFFFF));
+
+   nir_ssa_def *x_exp_sub = x_exp;
+   nir_ssa_def *x_frac_hi_sub = x_frac_hi;
+   nir_ssa_def *x_frac_lo_sub = x_frac_lo;
+   normalize_fp64_subnormal(b,
+                            x_frac_hi, x_frac_lo,
+                            &x_exp_sub,
+                            &x_frac_hi_sub, &x_frac_lo_sub);
+   nir_ssa_def *x_sub = pack_fp64(b, x_si,
+                                     x_exp_sub,
+                                     x_frac_hi_sub, x_frac_lo_sub);
+   nir_ssa_def *normalize_x = div64(b, x_sub, y);
+
+   nir_ssa_def *y_exp_sub = y_exp;
+   nir_ssa_def *y_frac_hi_sub = y_frac_hi;
+   nir_ssa_def *y_frac_lo_sub = y_frac_lo;
+   normalize_fp64_subnormal(b,
+                            y_frac_hi, y_frac_lo,
+                            &y_exp_sub,
+                            &y_frac_hi_sub, &y_frac_lo_sub);
+   nir_ssa_def *y_sub = pack_fp64(b, y_si,
+                                     y_exp_sub,
+                                     y_frac_hi_sub, y_frac_lo_sub);
+   nir_ssa_def *normalize_y = div64(b, x, y_sub);
+
+   /*
+    * Handle the different exeption before compute the division.
+    *
+    * If x / Inf, return 0.
+    * If Inf / Inf, return Inf.
+    * If Inf / 0, we return a default NaN (0xFFFFFFFFFFFFFFFF)
+    *
+    * If x / NaN or NaN / y, we propagate the NaN.
+    * If NaN / NaN, we select the correct NaN to propagate.
+    *
+    * If x and y are equal to 0, we return a default NaN.
+    * If x is equal to 0, we return 0.
+    * If y is equal to 0, we return Inf.
+    *
+    * If x or y is a subnormal (exponent == 0 and significant != 0),
+    * we normalize this entry and realize the division.
+    */
+   return
+      nir_bcsel(b,
+                nir_ieq(b, x_exp, nir_imm_int(b, 0x7FF)),
+                nir_bcsel(b,
+                          x_frac,
+                          propagate_nan,
+                          nir_bcsel(b,
+                                    nir_ieq(b, y_exp, nir_imm_int(b, 0x7FF)),
+                                    nir_bcsel(b,
+                                              y_frac,
+                                              propagate_nan,
+                                              default_nan),
+                                    pack_inf_fp64)),
+                nir_bcsel(b,
+                          nir_ieq(b, y_exp, nir_imm_int(b, 0x7FF)),
+                          nir_bcsel(b,
+                                    nir_ieq(b, y_frac, zero),
+                                    propagate_nan,
+                                    pack_zero_fp64),
+                          nir_bcsel(b,
+                                    nir_ieq(b, y_exp, zero),
+                                    nir_bcsel(b,
+                                              nir_ieq(b, y_frac, zero),
+                                              nir_bcsel(b,
+                                                        nir_ieq(b,
+                                                                x_exp_frac,
+                                                                zero),
+                                                        default_nan,
+                                                        pack_inf_fp64),
+                                              normalize_y),
+                                    nir_bcsel(b,
+                                              nir_ieq(b, x_exp, zero),
+                                              nir_bcsel(b,
+                                                        nir_ieq(b, x_frac,
+                                                                   zero),
+                                                        pack_zero_fp64,
+                                                        normalize_x),
+                                              div64(b, x, y)))));
+}
+
 static bool
 lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
 {
@@ -1433,6 +1559,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
          return false;
       break;
 
+   case nir_op_fdiv:
+      if (!(options & nir_lower_ddiv))
+         return false;
+      break;
+
    default:
       return false;
    }
@@ -1512,6 +1643,13 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
    }
       break;
 
+   case nir_op_fdiv: {
+      nir_ssa_def *src1 = nir_fmov_alu(&bld, instr->src[1],
+                                      instr->dest.dest.ssa.num_components);
+      result = lower_fdiv64(&bld, src, src1);
+   }
+      break;
+
    default:
       unreachable("unhandled opcode");
    }
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 9dc745d327..1c1867a3ad 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -515,7 +515,8 @@ nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
                              nir_lower_dsign |
                              nir_lower_deq |
                              nir_lower_dlt |
-                             nir_lower_dmul);
+                             nir_lower_dmul |
+                             nir_lower_ddiv);
       OPT(nir_lower_64bit_pack);
    } while (progress);
 
-- 
2.11.0