[Mesa-dev] [PATCH 05/13] nir/lower_double_ops: lower trunc()

Jason Ekstrand jason at jlekstrand.net
Tue Apr 19 22:32:20 UTC 2016


On Tue, Apr 12, 2016 at 1:05 AM, Samuel Iglesias Gonsálvez <
siglesias at igalia.com> wrote:

> From: Iago Toral Quiroga <itoral at igalia.com>
>
> At least i965 hardware does not have native support for truncating doubles.
> ---
>  src/compiler/nir/nir.h                  |  1 +
>  src/compiler/nir/nir_lower_double_ops.c | 83
> +++++++++++++++++++++++++++++++++
>  2 files changed, 84 insertions(+)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 434d92b..f83b2e0 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2286,6 +2286,7 @@ typedef enum {
>     nir_lower_drcp = (1 << 0),
>     nir_lower_dsqrt = (1 << 1),
>     nir_lower_drsq = (1 << 2),
> +   nir_lower_dtrunc = (1 << 3),
>  } nir_lower_doubles_options;
>
>  void nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options
> options);
> diff --git a/src/compiler/nir/nir_lower_double_ops.c
> b/src/compiler/nir/nir_lower_double_ops.c
> index 4cd153c..9eec858 100644
> --- a/src/compiler/nir/nir_lower_double_ops.c
> +++ b/src/compiler/nir/nir_lower_double_ops.c
> @@ -302,6 +302,81 @@ lower_sqrt_rsq(nir_builder *b, nir_ssa_def *src, bool
> sqrt)
>      return res;
>  }
>
> +static nir_ssa_def *
> +lower_trunc(nir_builder *b, nir_ssa_def *src)
> +{
> +   nir_ssa_def *unbiased_exp = nir_isub(b, get_exponent(b, src),
> +                                        nir_imm_int(b, 1023));
> +
> +   nir_ssa_def *frac_bits = nir_isub(b, nir_imm_int(b, 52), unbiased_exp);
> +
> +   /*
> +    * Depending on the exponent, we compute a mask with the bits we need
> to
> +    * remove in order to trunc the double. The mask is computed like this:
> +    *
> +    * if (unbiased_exp < 0)
> +    *    mask = 0x0
> +    * else if (unbiased_exp > 52)
> +    *    mask = 0x7fffffffffffffff
> +    * else
> +    *    mask = (1LL < frac_bits) - 1
>

I'm having a bit of trouble convincing myself that this is correct.  Let me
walk through it one case at a time:

unbiased_exp < 0:
In this case, 2^exp <= 2 so src < 1 and the result should be zero.  In that
case we want to stomp all the bits to zero, not keep them all.

unbiased_exp > 52:
In this case 2^exp is large enough that all of the bits matter.  We want to
keep them all not zero them out.

else:
In this case, 2^exp >= 1 but not big enough to make all the mantissa bits
matter.  We need to mask off the bottom 52-exp many bits.

If I'm getting this backwards, please let me know.  If it's doing what I
think it's doing, there are several cases this should be getting wrong.
Are we testing all of those cases?

One other aside: I think it's more efficient to generate the masks with
either (~0u >> (32 - bits)) or (0x80000000 >> (bits - 1)) if you want the
top bits.  NIR should be able to easily get rid of the integer adds and
subtracts.  Getting rid of the -1 on (1 << frac_bits) - 1 is much harder.


> +    *
> +    * Notice that the else branch is a 64-bit integer operation that we
> need
> +    * to implement in terms of 32-bit integer arithmetics (at least until
> we
> +    * support 64-bit integer arithmetics).
> +    */
> +
> +   /* Compute "mask = (1LL << frac_bits) - 1" in terms of hi/lo 32-bit
> chunks
> +    * for the else branch
> +    */
> +   nir_ssa_def *mask_lo =
> +      nir_bcsel(b,
> +                nir_ige(b, frac_bits, nir_imm_int(b, 32)),
> +                nir_imm_int(b, 0xffffffff),
> +                nir_isub(b,
> +                         nir_ishl(b,
> +                                  nir_imm_int(b, 1),
> +                                  frac_bits),
> +                         nir_imm_int(b, 1)));
> +
> +   nir_ssa_def *mask_hi =
> +      nir_bcsel(b,
> +                nir_ilt(b, frac_bits, nir_imm_int(b, 33)),
> +                nir_imm_int(b, 0),
> +                nir_isub(b,
> +                         nir_ishl(b,
> +                                  nir_imm_int(b, 1),
> +                                  nir_isub(b,
> +                                           frac_bits,
> +                                           nir_imm_int(b, 32))),
> +                         nir_imm_int(b, 1)));
> +
> +   /* Compute the correct mask to use based on unbiased_exp */
> +   nir_ssa_def *mask =
> +      nir_bcsel(b,
> +                nir_ilt(b, unbiased_exp, nir_imm_int(b, 0)),
> +                nir_pack_double_2x32_split(b,
> +                                           nir_imm_int(b, 0xffffffff),
> +                                           nir_imm_int(b, 0x7fffffff)),
> +                nir_bcsel(b, nir_ige(b, unbiased_exp, nir_imm_int(b, 53)),
> +                          nir_imm_double(b, 0.0),
> +                          nir_pack_double_2x32_split(b, mask_lo,
> mask_hi)));
> +
> +   /* Mask off relevant mantissa bits (0..31 in the low 32-bits
> +    * and 0..19 in the high 32 bits)
> +    */
> +   mask_lo = nir_unpack_double_2x32_split_x(b, mask);
> +   mask_hi = nir_unpack_double_2x32_split_y(b, mask);
> +
> +   nir_ssa_def *src_lo = nir_unpack_double_2x32_split_x(b, src);
> +   nir_ssa_def *src_hi = nir_unpack_double_2x32_split_y(b, src);
> +
> +   nir_ssa_def *zero = nir_imm_int(b, 0);
> +   nir_ssa_def *new_src_lo = nir_bfi(b, mask_lo, zero, src_lo);
> +   nir_ssa_def *new_src_hi = nir_bfi(b, mask_hi, zero, src_hi);
> +   return nir_pack_double_2x32_split(b, new_src_lo, new_src_hi);
> +}
> +
>  static void
>  lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options
> options)
>  {
> @@ -325,6 +400,11 @@ lower_doubles_instr(nir_alu_instr *instr,
> nir_lower_doubles_options options)
>           return;
>        break;
>
> +   case nir_op_ftrunc:
> +      if (!(options & nir_lower_dtrunc))
> +         return;
> +      break;
> +
>     default:
>        return;
>     }
> @@ -348,6 +428,9 @@ lower_doubles_instr(nir_alu_instr *instr,
> nir_lower_doubles_options options)
>     case nir_op_frsq:
>        result = lower_sqrt_rsq(&bld, src, false);
>        break;
> +   case nir_op_ftrunc:
> +      result = lower_trunc(&bld, src);
> +      break;
>     default:
>        unreachable("unhandled opcode");
>     }
> --
> 2.5.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160419/146daa63/attachment.html>


More information about the mesa-dev mailing list