[Mesa-dev] [PATCH 2/7] nir/int64: Add some more lowering helpers

Sun Oct 14 22:58:16 UTC 2018

On October 14, 2018 17:12:34 Matt Turner <mattst88 at gmail.com> wrote:

> From: Jason Ekstrand <jason.ekstrand at intel.com>
>
> [mattst88]: Found in an old branch of Jason's.
>
> Jason implemented: inot, iand, ior, iadd, isub, ineg, iabs, compare,
>                   imin, imax, umin, umax
> Matt implemented:  ixor, imov, bcsel
> ---
> src/compiler/nir/nir_lower_int64.c | 186 +++++++++++++++++++++++++++++++++++++
> 1 file changed, 186 insertions(+)
>
> diff --git a/src/compiler/nir/nir_lower_int64.c 
> b/src/compiler/nir/nir_lower_int64.c
> index 0d7f165b406..6b269830801 100644
> --- a/src/compiler/nir/nir_lower_int64.c
> +++ b/src/compiler/nir/nir_lower_int64.c
> @@ -24,6 +24,192 @@
> #include "nir.h"
> #include "nir_builder.h"
>
> +static nir_ssa_def *
> +lower_imov64(nir_builder *b, nir_ssa_def *x)
> +{
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +
> +   return nir_pack_64_2x32_split(b, nir_imov(b, x_lo), nir_imov(b, x_hi));

You don't really need the movs...

> +}
> +
> +static nir_ssa_def *
> +lower_bcsel64(nir_builder *b, nir_ssa_def *cond, nir_ssa_def *x, 
> nir_ssa_def *y)
> +{
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> +
> +   return nir_pack_64_2x32_split(b, nir_bcsel(b, cond, x_lo, y_lo),
> +                                    nir_bcsel(b, cond, x_hi, y_hi));
> +}
> +
> +static nir_ssa_def *
> +lower_inot64(nir_builder *b, nir_ssa_def *x)
> +{
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +
> +   return nir_pack_64_2x32_split(b, nir_inot(b, x_lo), nir_inot(b, x_hi));
> +}
> +
> +static nir_ssa_def *
> +lower_iand64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> +
> +   return nir_pack_64_2x32_split(b, nir_iand(b, x_lo, y_lo),
> +                                    nir_iand(b, x_hi, y_hi));
> +}
> +
> +static nir_ssa_def *
> +lower_ior64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> +
> +   return nir_pack_64_2x32_split(b, nir_ior(b, x_lo, y_lo),
> +                                    nir_ior(b, x_hi, y_hi));
> +}
> +
> +static nir_ssa_def *
> +lower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> +
> +   return nir_pack_64_2x32_split(b, nir_ixor(b, x_lo, y_lo),
> +                                    nir_ixor(b, x_hi, y_hi));
> +}
> +
> +static nir_ssa_def *
> +lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> +
> +   nir_ssa_def *res_lo = nir_iadd(b, x_lo, y_lo);
> +   nir_ssa_def *carry = nir_b2i(b, nir_ult(b, res_lo, x_lo));
> +   nir_ssa_def *res_hi = nir_iadd(b, carry, nir_iadd(b, x_hi, y_hi));
> +
> +   return nir_pack_64_2x32_split(b, res_lo, res_hi);
> +}
> +
> +static nir_ssa_def *
> +lower_isub64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> +
> +   nir_ssa_def *res_lo = nir_isub(b, x_lo, y_lo);
> +   /* In NIR, true is represented by ~0 which is -1 */

We've had discussions (had some at XDC this year) about changing booleans 
to one-bit which would break this.  Doing b2i would be safer but this does 
work for now.

> +   nir_ssa_def *borrow = nir_ult(b, x_lo, y_lo);
> +   nir_ssa_def *res_hi = nir_iadd(b, nir_isub(b, x_hi, y_hi), borrow);
> +
> +   return nir_pack_64_2x32_split(b, res_lo, res_hi);
> +}
> +
> +static nir_ssa_def *
> +lower_ineg64(nir_builder *b, nir_ssa_def *x)
> +{
> +   /* Since isub is the same number of instructions (with better dependencies)
> +    * as iadd, subtraction is actually more efficient for ineg than the usual
> +    * 2's complement "flip the bits and add one".
> +    */
> +   return lower_isub64(b, nir_imm_int64(b, 0), x);
> +}
> +
> +static nir_ssa_def *
> +lower_iabs64(nir_builder *b, nir_ssa_def *x)
> +{
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
> +   return nir_bcsel(b, x_is_neg, lower_ineg64(b, x), x);

lower_bcsel?  Or, since we're depending on this running multiple times, 
just nir_ineg?  I go back and forth on whether a pass like this should run 
in a loop or be smart enough to lower intermediate bits on the fly.  We 
should probably pick one.

> +}
> +
> +static nir_ssa_def *
> +lower_int64_compare(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> +
> +   switch (op) {
> +   case nir_op_ieq:
> +      return nir_iand(b, nir_ieq(b, x_hi, y_hi), nir_ieq(b, x_lo, y_lo));
> +   case nir_op_ine:
> +      return nir_ior(b, nir_ine(b, x_hi, y_hi), nir_ine(b, x_lo, y_lo));
> +   case nir_op_ult:
> +      return nir_ior(b, nir_ult(b, x_hi, y_hi),
> +                        nir_iand(b, nir_ieq(b, x_hi, y_hi),
> +                                    nir_ult(b, x_lo, y_lo)));
> +   case nir_op_ilt:
> +      /* This one is tricky.  Handled below */
> +      break;
> +   case nir_op_uge:
> +      /* Lower as !(x < y) in the hopes of better CSE */
> +      return nir_inot(b, lower_int64_compare(b, nir_op_ult, x, y));
> +   case nir_op_ige:
> +      /* Lower as !(x < y) in the hopes of better CSE */
> +      return nir_inot(b, lower_int64_compare(b, nir_op_ilt, x, y));
> +   default:
> +      unreachable("Invalid comparison");
> +   }
> +
> +   assert(op == nir_op_ilt);
> +
> +   nir_ssa_def *neg_x = lower_ineg64(b, x);
> +   nir_ssa_def *neg_y = lower_ineg64(b, y);
> +   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
> +   nir_ssa_def *y_is_neg = nir_ilt(b, y_hi, nir_imm_int(b, 0));
> +
> +   nir_ssa_def *imm_true = nir_imm_int(b, NIR_TRUE);
> +   nir_ssa_def *imm_false = nir_imm_int(b, NIR_FALSE);
> +   nir_ssa_def *ult = lower_int64_compare(b, nir_op_ult, x, y);
> +   nir_ssa_def *not_ult_neg =
> +      nir_inot(b, lower_int64_compare(b, nir_op_ult, neg_x, neg_y));

If you know both are negative, can't you just do an unsigned comparison in 
the opposite direction?  That would save you all the expensive negation.  
You may even be able to re-use the result above though I haven't thought 
through it all the way.

> +
> +   return nir_bcsel(b, x_is_neg, nir_bcsel(b, y_is_neg, not_ult_neg, 
> imm_true),
> +                                 nir_bcsel(b, y_is_neg, imm_false, ult));
> +}
> +
> +static nir_ssa_def *
> +lower_umax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), y, x);
> +}
> +
> +static nir_ssa_def *
> +lower_imax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), y, x);
> +}
> +
> +static nir_ssa_def *
> +lower_umin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), x, y);
> +}
> +
> +static nir_ssa_def *
> +lower_imin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), x, y);
> +}
> +
> static nir_ssa_def *
> lower_imul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> {
> --
> 2.16.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev