<div dir="ltr"><div><div>Reviewed-by: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>><br><br></div>Did the v2 fix anything other than newly added tests?<br></div>--Jason<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Apr 21, 2016 at 10:13 PM, Samuel Iglesias Gonsálvez <span dir="ltr"><<a href="mailto:siglesias@igalia.com" target="_blank">siglesias@igalia.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">From: Iago Toral Quiroga <<a href="mailto:itoral@igalia.com">itoral@igalia.com</a>><br>
<br>
At least i965 hardware does not have native support for truncating doubles.<br>
<br>
</span>v2:<br>
- Simplified the implementation significantly.<br>
- Fixed the else branch, that was not doing what we wanted.<br>
---<br>
src/compiler/nir/nir.h | 1 +<br>
src/compiler/nir/nir_lower_double_ops.c | 60 +++++++++++++++++++++++++++++++++<br>
2 files changed, 61 insertions(+)<br>
<br>
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h<br>
index 9bfb24a..ab8ae99 100644<br>
--- a/src/compiler/nir/nir.h<br>
+++ b/src/compiler/nir/nir.h<br>
@@ -2330,6 +2330,7 @@ typedef enum {<br>
<span class=""> nir_lower_drcp = (1 << 0),<br>
nir_lower_dsqrt = (1 << 1),<br>
nir_lower_drsq = (1 << 2),<br>
+ nir_lower_dtrunc = (1 << 3),<br>
} nir_lower_doubles_options;<br>
<br>
void nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options);<br>
diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c<br>
</span>index e22e822..e8ae884 100644<br>
--- a/src/compiler/nir/nir_lower_double_ops.c<br>
+++ b/src/compiler/nir/nir_lower_double_ops.c<br>
@@ -299,6 +299,58 @@ lower_sqrt_rsq(nir_builder *b, nir_ssa_def *src, bool sqrt)<br>
<span class=""> return res;<br>
}<br>
<br>
+static nir_ssa_def *<br>
+lower_trunc(nir_builder *b, nir_ssa_def *src)<br>
+{<br>
+ nir_ssa_def *unbiased_exp = nir_isub(b, get_exponent(b, src),<br>
+ nir_imm_int(b, 1023));<br>
+<br>
+ nir_ssa_def *frac_bits = nir_isub(b, nir_imm_int(b, 52), unbiased_exp);<br>
+<br>
+ /*<br>
</span>+ * Decide the operation to apply depending on the unbiased exponent:<br>
<span class="">+ *<br>
+ * if (unbiased_exp < 0)<br>
</span>+ * return 0<br>
<span class="">+ * else if (unbiased_exp > 52)<br>
</span>+ * return src<br>
+ * else<br>
+ * return src & (~0 << frac_bits)<br>
<span class="">+ *<br>
+ * Notice that the else branch is a 64-bit integer operation that we need<br>
+ * to implement in terms of 32-bit integer arithmetics (at least until we<br>
+ * support 64-bit integer arithmetics).<br>
+ */<br>
+<br>
</span>+ /* Compute "~0 << frac_bits" in terms of hi/lo 32-bit integer math */<br>
<span class="">+ nir_ssa_def *mask_lo =<br>
+ nir_bcsel(b,<br>
+ nir_ige(b, frac_bits, nir_imm_int(b, 32)),<br>
</span>+ nir_imm_int(b, 0),<br>
+ nir_ishl(b, nir_imm_int(b, ~0), frac_bits));<br>
<span class="">+<br>
+ nir_ssa_def *mask_hi =<br>
+ nir_bcsel(b,<br>
+ nir_ilt(b, frac_bits, nir_imm_int(b, 33)),<br>
</span>+ nir_imm_int(b, ~0),<br>
+ nir_ishl(b,<br>
+ nir_imm_int(b, ~0),<br>
+ nir_isub(b, frac_bits, nir_imm_int(b, 32))));<br>
<span class="">+<br>
+ nir_ssa_def *src_lo = nir_unpack_double_2x32_split_x(b, src);<br>
+ nir_ssa_def *src_hi = nir_unpack_double_2x32_split_y(b, src);<br>
+<br>
</span>+ return<br>
<span class="">+ nir_bcsel(b,<br>
+ nir_ilt(b, unbiased_exp, nir_imm_int(b, 0)),<br>
</span>+ nir_imm_double(b, 0.0),<br>
<span class="">+ nir_bcsel(b, nir_ige(b, unbiased_exp, nir_imm_int(b, 53)),<br>
</span>+ src,<br>
+ nir_pack_double_2x32_split(b,<br>
+ nir_iand(b, mask_lo, src_lo),<br>
+ nir_iand(b, mask_hi, src_hi))));<br>
<span class="">+}<br>
+<br>
static void<br>
lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)<br>
{<br>
</span>@@ -322,6 +374,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)<br>
<span class=""> return;<br>
break;<br>
<br>
+ case nir_op_ftrunc:<br>
+ if (!(options & nir_lower_dtrunc))<br>
+ return;<br>
+ break;<br>
+<br>
default:<br>
return;<br>
}<br>
</span>@@ -345,6 +402,9 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)<br>
<div class="HOEnZb"><div class="h5"> case nir_op_frsq:<br>
result = lower_sqrt_rsq(&bld, src, false);<br>
break;<br>
+ case nir_op_ftrunc:<br>
+ result = lower_trunc(&bld, src);<br>
+ break;<br>
default:<br>
unreachable("unhandled opcode");<br>
}<br>
--<br>
2.5.0<br>
<br>
</div></div></blockquote></div><br></div>