[Mesa-dev] [PATCH 05/13] nir/lower_double_ops: lower trunc()

Wed Apr 20 06:37:31 UTC 2016

On Tue, 2016-04-19 at 15:32 -0700, Jason Ekstrand wrote:
> 
> 
> On Tue, Apr 12, 2016 at 1:05 AM, Samuel Iglesias Gonsálvez
> <siglesias at igalia.com> wrote:
>         From: Iago Toral Quiroga <itoral at igalia.com>
>         
>         At least i965 hardware does not have native support for
>         truncating doubles.
>         ---
>          src/compiler/nir/nir.h                  |  1 +
>          src/compiler/nir/nir_lower_double_ops.c | 83
>         +++++++++++++++++++++++++++++++++
>          2 files changed, 84 insertions(+)
>         
>         diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>         index 434d92b..f83b2e0 100644
>         --- a/src/compiler/nir/nir.h
>         +++ b/src/compiler/nir/nir.h
>         @@ -2286,6 +2286,7 @@ typedef enum {
>             nir_lower_drcp = (1 << 0),
>             nir_lower_dsqrt = (1 << 1),
>             nir_lower_drsq = (1 << 2),
>         +   nir_lower_dtrunc = (1 << 3),
>          } nir_lower_doubles_options;
>         
>          void nir_lower_doubles(nir_shader *shader,
>         nir_lower_doubles_options options);
>         diff --git a/src/compiler/nir/nir_lower_double_ops.c
>         b/src/compiler/nir/nir_lower_double_ops.c
>         index 4cd153c..9eec858 100644
>         --- a/src/compiler/nir/nir_lower_double_ops.c
>         +++ b/src/compiler/nir/nir_lower_double_ops.c
>         @@ -302,6 +302,81 @@ lower_sqrt_rsq(nir_builder *b,
>         nir_ssa_def *src, bool sqrt)
>              return res;
>          }
>         
>         +static nir_ssa_def *
>         +lower_trunc(nir_builder *b, nir_ssa_def *src)
>         +{
>         +   nir_ssa_def *unbiased_exp = nir_isub(b, get_exponent(b,
>         src),
>         +                                        nir_imm_int(b,
>         1023));
>         +
>         +   nir_ssa_def *frac_bits = nir_isub(b, nir_imm_int(b, 52),
>         unbiased_exp);
>         +
>         +   /*
>         +    * Depending on the exponent, we compute a mask with the
>         bits we need to
>         +    * remove in order to trunc the double. The mask is
>         computed like this:
>         +    *
>         +    * if (unbiased_exp < 0)
>         +    *    mask = 0x0
>         +    * else if (unbiased_exp > 52)
>         +    *    mask = 0x7fffffffffffffff
>         +    * else
>         +    *    mask = (1LL < frac_bits) - 1
> 
> 
> I'm having a bit of trouble convincing myself that this is correct.
> Let me walk through it one case at a time:
> 
> 
> unbiased_exp < 0:
> 
> In this case, 2^exp <= 2 so src < 1 and the result should be zero.  In
> that case we want to stomp all the bits to zero, not keep them all.
> 
> 
> unbiased_exp > 52:
> 
> In this case 2^exp is large enough that all of the bits matter.  We
> want to keep them all not zero them out.
> 
> 
> else:
> 
> In this case, 2^exp >= 1 but not big enough to make all the mantissa
> bits matter.  We need to mask off the bottom 52-exp many bits.
> 
> 
> If I'm getting this backwards, please let me know.  If it's doing what
> I think it's doing, there are several cases this should be getting
> wrong.  Are we testing all of those cases?

Yes, I think you are getting it backwards. The mask is used to select
the bits from the src that we want to keep. So in the case that
unbiased_exp < 0, a mask of 0 means that we effectively discard all the
bits, as you expect. If unbiased_exp > 52 then mask is all 1's, meaning
that we take all the bits.

> One other aside: I think it's more efficient to generate the masks
> with either (~0u >> (32 - bits)) or (0x80000000 >> (bits - 1)) if you
> want the top bits.  NIR should be able to easily get rid of the
> integer adds and subtracts.  Getting rid of the -1 on (1 << frac_bits)
> - 1 is much harder.

Sure, we can do that.

>         +    *
>         +    * Notice that the else branch is a 64-bit integer
>         operation that we need
>         +    * to implement in terms of 32-bit integer arithmetics (at
>         least until we
>         +    * support 64-bit integer arithmetics).
>         +    */
>         +
>         +   /* Compute "mask = (1LL << frac_bits) - 1" in terms of
>         hi/lo 32-bit chunks
>         +    * for the else branch
>         +    */
>         +   nir_ssa_def *mask_lo =
>         +      nir_bcsel(b,
>         +                nir_ige(b, frac_bits, nir_imm_int(b, 32)),
>         +                nir_imm_int(b, 0xffffffff),
>         +                nir_isub(b,
>         +                         nir_ishl(b,
>         +                                  nir_imm_int(b, 1),
>         +                                  frac_bits),
>         +                         nir_imm_int(b, 1)));
>         +
>         +   nir_ssa_def *mask_hi =
>         +      nir_bcsel(b,
>         +                nir_ilt(b, frac_bits, nir_imm_int(b, 33)),
>         +                nir_imm_int(b, 0),
>         +                nir_isub(b,
>         +                         nir_ishl(b,
>         +                                  nir_imm_int(b, 1),
>         +                                  nir_isub(b,
>         +                                           frac_bits,
>         +                                           nir_imm_int(b,
>         32))),
>         +                         nir_imm_int(b, 1)));
>         +
>         +   /* Compute the correct mask to use based on unbiased_exp
>         */
>         +   nir_ssa_def *mask =
>         +      nir_bcsel(b,
>         +                nir_ilt(b, unbiased_exp, nir_imm_int(b, 0)),
>         +                nir_pack_double_2x32_split(b,
>         +                                           nir_imm_int(b,
>         0xffffffff),
>         +                                           nir_imm_int(b,
>         0x7fffffff)),
>         +                nir_bcsel(b, nir_ige(b, unbiased_exp,
>         nir_imm_int(b, 53)),
>         +                          nir_imm_double(b, 0.0),
>         +                          nir_pack_double_2x32_split(b,
>         mask_lo, mask_hi)));
>         +
>         +   /* Mask off relevant mantissa bits (0..31 in the low
>         32-bits
>         +    * and 0..19 in the high 32 bits)
>         +    */
>         +   mask_lo = nir_unpack_double_2x32_split_x(b, mask);
>         +   mask_hi = nir_unpack_double_2x32_split_y(b, mask);
>         +
>         +   nir_ssa_def *src_lo = nir_unpack_double_2x32_split_x(b,
>         src);
>         +   nir_ssa_def *src_hi = nir_unpack_double_2x32_split_y(b,
>         src);
>         +
>         +   nir_ssa_def *zero = nir_imm_int(b, 0);
>         +   nir_ssa_def *new_src_lo = nir_bfi(b, mask_lo, zero,
>         src_lo);
>         +   nir_ssa_def *new_src_hi = nir_bfi(b, mask_hi, zero,
>         src_hi);
>         +   return nir_pack_double_2x32_split(b, new_src_lo,
>         new_src_hi);

Oh, this is silly, we should be doing a nir_iand instead of a nir_bfi
with zero, I'll fix this too.

>         +}
>         +
>          static void
>          lower_doubles_instr(nir_alu_instr *instr,
>         nir_lower_doubles_options options)
>          {
>         @@ -325,6 +400,11 @@ lower_doubles_instr(nir_alu_instr *instr,
>         nir_lower_doubles_options options)
>                   return;
>                break;
>         
>         +   case nir_op_ftrunc:
>         +      if (!(options & nir_lower_dtrunc))
>         +         return;
>         +      break;
>         +
>             default:
>                return;
>             }
>         @@ -348,6 +428,9 @@ lower_doubles_instr(nir_alu_instr *instr,
>         nir_lower_doubles_options options)
>             case nir_op_frsq:
>                result = lower_sqrt_rsq(&bld, src, false);
>                break;
>         +   case nir_op_ftrunc:
>         +      result = lower_trunc(&bld, src);
>         +      break;
>             default:
>                unreachable("unhandled opcode");
>             }
>         --
>         2.5.0
>         
>         _______________________________________________
>         mesa-dev mailing list
>         mesa-dev at lists.freedesktop.org
>         https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev