[Mesa-dev] [RFC 05/10] nir/lower_double_ops: lower lt()
Elie Tournier
tournier.elie at gmail.com
Wed Apr 12 22:43:14 UTC 2017
Signed-off-by: Elie Tournier <elie.tournier at collabora.com>
---
src/compiler/nir/nir.h | 3 +-
src/compiler/nir/nir_lower_double_ops.c | 68 +++++++++++++++++++++++++++++++++
src/intel/compiler/brw_nir.c | 3 +-
3 files changed, 72 insertions(+), 2 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 7b1a4655ca..89d5dd8e1b 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2572,7 +2572,8 @@ typedef enum {
nir_lower_dabs = (1 << 9),
nir_lower_dneg = (1 << 10),
nir_lower_dsign = (1 << 11),
- nir_lower_deq = (1 << 12)
+ nir_lower_deq = (1 << 12),
+ nir_lower_dlt = (1 << 13)
} nir_lower_doubles_options;
bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options);
diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c
index d3e05bf519..38743206a8 100644
--- a/src/compiler/nir/nir_lower_double_ops.c
+++ b/src/compiler/nir/nir_lower_double_ops.c
@@ -36,6 +36,16 @@
* - 32-bit integer and floating point arithmetic
*/
+static nir_ssa_def *
+get_sign(nir_builder *b, nir_ssa_def *src)
+{
+ /* get bits 32-63 */
+ nir_ssa_def *hi = nir_unpack_64_2x32_split_y(b, src);
+
+ /* extract bit 32 of the high word */
+ return nir_ubitfield_extract(b, hi, nir_imm_int(b, 31), nir_imm_int(b, 1));
+}
+
/* Creates a double with the exponent bits set to a given integer value */
static nir_ssa_def *
set_exponent(nir_builder *b, nir_ssa_def *src, nir_ssa_def *exp)
@@ -126,6 +136,18 @@ fix_inv_result(nir_builder *b, nir_ssa_def *res, nir_ssa_def *src,
}
static nir_ssa_def *
+lt64(nir_builder *b, nir_ssa_def *x_hi, nir_ssa_def *x_lo,
+ nir_ssa_def *y_hi, nir_ssa_def *y_lo)
+{
+ nir_ssa_def *lt_hi = nir_flt(b, x_hi, y_hi);
+ nir_ssa_def *eq_hi = nir_ieq(b, x_hi, y_hi);
+ nir_ssa_def *lt_lo = nir_flt(b, x_lo, y_lo);
+
+ /* return (x_hi < y_hi) || ((x_hi == y_hi) && (x_lo < y_lo)); */
+ return nir_ior(b, lt_hi, nir_iand(b, eq_hi, lt_lo));
+}
+
+static nir_ssa_def *
lower_rcp(nir_builder *b, nir_ssa_def *src)
{
/* normalize the input to avoid range issues */
@@ -557,6 +579,40 @@ lower_feq64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
nir_iand(b, eq_x_lo, eq_xy_hi))));
}
+static nir_ssa_def *
+lower_flt64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+ nir_ssa_def *x_si = get_sign(b, x);
+ nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+ nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+ nir_ssa_def *y_si = get_sign(b, y);
+ nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+ nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+ nir_ssa_def *xy_lo = nir_ior(b, x_lo, y_lo);
+ nir_ssa_def *xy_hi = nir_ior(b, x_hi, y_hi);
+ nir_ssa_def *shl_xy_hi = nir_ishl(b, xy_hi, nir_imm_int(b, 1));
+ nir_ssa_def *xy_hi_wo_si = nir_ior(b, shl_xy_hi, xy_lo);
+ nir_ssa_def *ne_xy = nir_ine(b, xy_hi_wo_si, nir_imm_int(b, 0));
+
+ /* if x or y is a nan
+ * return false;
+ * if (x_si != y_si)
+ * return x_si && (((((x_hi | y_hi)<<1)) | x_lo | y_lo) != 0);
+ * return
+ * x_si ? lt64(y_hi, y_lo, x_hi, x_lo) : lt64(x_hi, x_lo, y_hi, y_lo);
+ */
+ return nir_bcsel(b,
+ nir_ior(b, is_nan(b, x), is_nan(b, y)),
+ nir_imm_int(b, NIR_FALSE),
+ nir_bcsel(b,
+ nir_ine(b, x_si, y_si),
+ nir_iand(b, x_si, ne_xy),
+ nir_bcsel(b, x_si,
+ lt64(b, y_hi, y_lo, x_hi, x_lo),
+ lt64(b, x_hi, x_lo, y_hi, y_lo))));
+}
+
static bool
lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
{
@@ -630,6 +686,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
return false;
break;
+ case nir_op_flt:
+ if (!(options & nir_lower_dlt))
+ return false;
+ break;
+
default:
return false;
}
@@ -695,6 +756,13 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
}
break;
+ case nir_op_flt: {
+ nir_ssa_def *src1 = nir_fmov_alu(&bld, instr->src[1],
+ instr->dest.dest.ssa.num_components);
+ result = lower_flt64(&bld, src, src1);
+ }
+ break;
+
default:
unreachable("unhandled opcode");
}
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 7b8b34b4ba..374230a89b 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -513,7 +513,8 @@ nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
nir_lower_dabs |
nir_lower_dneg |
nir_lower_dsign |
- nir_lower_deq);
+ nir_lower_deq |
+ nir_lower_dlt);
OPT(nir_lower_64bit_pack);
} while (progress);
--
2.11.0
More information about the mesa-dev
mailing list