[Mesa-dev] [RFC 07/10] nir/lower_double_ops: lower div()
Elie Tournier
tournier.elie at gmail.com
Wed Apr 12 22:43:16 UTC 2017
Signed-off-by: Elie Tournier <elie.tournier at collabora.com>
---
src/compiler/nir/nir.h | 3 +-
src/compiler/nir/nir_lower_double_ops.c | 138 ++++++++++++++++++++++++++++++++
src/intel/compiler/brw_nir.c | 3 +-
3 files changed, 142 insertions(+), 2 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 58045e3d42..d9925c25c7 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2574,7 +2574,8 @@ typedef enum {
nir_lower_dsign = (1 << 11),
nir_lower_deq = (1 << 12),
nir_lower_dlt = (1 << 13),
- nir_lower_dmul = (1 << 14)
+ nir_lower_dmul = (1 << 14),
+ nir_lower_ddiv = (1 << 15)
} nir_lower_doubles_options;
bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options);
diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c
index 807fa18fc1..5d6944e15f 100644
--- a/src/compiler/nir/nir_lower_double_ops.c
+++ b/src/compiler/nir/nir_lower_double_ops.c
@@ -1350,6 +1350,132 @@ lower_fmul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
}
+static nir_ssa_def *
+div64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+ return lower_fmul64(b, x, lower_rcp(b, y));
+}
+
+static nir_ssa_def *
+lower_fdiv64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+ nir_ssa_def *x_si = get_sign(b, x);
+ nir_ssa_def *x_exp = get_exponent(b, x);
+ nir_ssa_def *x_frac_lo = get_frac_hi(b, x);
+ nir_ssa_def *x_frac_hi = get_frac_lo(b, x);
+ nir_ssa_def *y_si = get_sign(b, y);
+ nir_ssa_def *y_exp = get_exponent(b, y);
+ nir_ssa_def *y_frac_lo = get_frac_lo(b, y);
+ nir_ssa_def *y_frac_hi = get_frac_hi(b, y);
+
+ nir_ssa_def *z_si = nir_ixor(b, x_si, y_si);
+ nir_ssa_def *x_frac = nir_ior(b, x_frac_hi, x_frac_lo);
+ nir_ssa_def *x_exp_frac = nir_ior(b, x_exp, x_frac);
+ nir_ssa_def *y_frac = nir_ior(b, y_frac_hi, y_frac_lo);
+
+ nir_ssa_def *zero = nir_imm_int(b, 0);
+
+ /* Result of NaN, Inf and subnormal division */
+ nir_ssa_def *propagate_nan = propagate_fp64_nan(b, x, y);
+
+ nir_ssa_def *pack_inf_fp64 = pack_fp64(b,
+ z_si,
+ nir_imm_int(b, 0x7FF),
+ zero,
+ zero);
+
+ nir_ssa_def *pack_zero_fp64 = pack_fp64(b,
+ z_si,
+ zero,
+ zero,
+ zero);
+
+ nir_ssa_def *default_nan =
+ nir_pack_64_2x32_split(b,
+ nir_imm_int(b, 0xFFFFFFFF),
+ nir_imm_int(b, 0xFFFFFFFF));
+
+ nir_ssa_def *x_exp_sub = x_exp;
+ nir_ssa_def *x_frac_hi_sub = x_frac_hi;
+ nir_ssa_def *x_frac_lo_sub = x_frac_lo;
+ normalize_fp64_subnormal(b,
+ x_frac_hi, x_frac_lo,
+ &x_exp_sub,
+ &x_frac_hi_sub, &x_frac_lo_sub);
+ nir_ssa_def *x_sub = pack_fp64(b, x_si,
+ x_exp_sub,
+ x_frac_hi_sub, x_frac_lo_sub);
+ nir_ssa_def *normalize_x = div64(b, x_sub, y);
+
+ nir_ssa_def *y_exp_sub = y_exp;
+ nir_ssa_def *y_frac_hi_sub = y_frac_hi;
+ nir_ssa_def *y_frac_lo_sub = y_frac_lo;
+ normalize_fp64_subnormal(b,
+ y_frac_hi, y_frac_lo,
+ &y_exp_sub,
+ &y_frac_hi_sub, &y_frac_lo_sub);
+ nir_ssa_def *y_sub = pack_fp64(b, y_si,
+ y_exp_sub,
+ y_frac_hi_sub, y_frac_lo_sub);
+ nir_ssa_def *normalize_y = div64(b, x, y_sub);
+
+ /*
+ * Handle the different exeption before compute the division.
+ *
+ * If x / Inf, return 0.
+ * If Inf / Inf, return Inf.
+ * If Inf / 0, we return a default NaN (0xFFFFFFFFFFFFFFFF)
+ *
+ * If x / NaN or NaN / y, we propagate the NaN.
+ * If NaN / NaN, we select the correct NaN to propagate.
+ *
+ * If x and y are equal to 0, we return a default NaN.
+ * If x is equal to 0, we return 0.
+ * If y is equal to 0, we return Inf.
+ *
+ * If x or y is a subnormal (exponent == 0 and significant != 0),
+ * we normalize this entry and realize the division.
+ */
+ return
+ nir_bcsel(b,
+ nir_ieq(b, x_exp, nir_imm_int(b, 0x7FF)),
+ nir_bcsel(b,
+ x_frac,
+ propagate_nan,
+ nir_bcsel(b,
+ nir_ieq(b, y_exp, nir_imm_int(b, 0x7FF)),
+ nir_bcsel(b,
+ y_frac,
+ propagate_nan,
+ default_nan),
+ pack_inf_fp64)),
+ nir_bcsel(b,
+ nir_ieq(b, y_exp, nir_imm_int(b, 0x7FF)),
+ nir_bcsel(b,
+ nir_ieq(b, y_frac, zero),
+ propagate_nan,
+ pack_zero_fp64),
+ nir_bcsel(b,
+ nir_ieq(b, y_exp, zero),
+ nir_bcsel(b,
+ nir_ieq(b, y_frac, zero),
+ nir_bcsel(b,
+ nir_ieq(b,
+ x_exp_frac,
+ zero),
+ default_nan,
+ pack_inf_fp64),
+ normalize_y),
+ nir_bcsel(b,
+ nir_ieq(b, x_exp, zero),
+ nir_bcsel(b,
+ nir_ieq(b, x_frac,
+ zero),
+ pack_zero_fp64,
+ normalize_x),
+ div64(b, x, y)))));
+}
+
static bool
lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
{
@@ -1433,6 +1559,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
return false;
break;
+ case nir_op_fdiv:
+ if (!(options & nir_lower_ddiv))
+ return false;
+ break;
+
default:
return false;
}
@@ -1512,6 +1643,13 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
}
break;
+ case nir_op_fdiv: {
+ nir_ssa_def *src1 = nir_fmov_alu(&bld, instr->src[1],
+ instr->dest.dest.ssa.num_components);
+ result = lower_fdiv64(&bld, src, src1);
+ }
+ break;
+
default:
unreachable("unhandled opcode");
}
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 9dc745d327..1c1867a3ad 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -515,7 +515,8 @@ nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
nir_lower_dsign |
nir_lower_deq |
nir_lower_dlt |
- nir_lower_dmul);
+ nir_lower_dmul |
+ nir_lower_ddiv);
OPT(nir_lower_64bit_pack);
} while (progress);
--
2.11.0
More information about the mesa-dev
mailing list