[Mesa-dev] [PATCH 20/20] nir: Narrow unnecessary 64-bit operations to 32-bits

Tue Jul 18 03:14:48 UTC 2017

On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner <mattst88 at gmail.com> wrote:
> If we know the high bits are zero, we can just do a 32-bit comparison on
> the low bytes instead.
> ---
>  src/compiler/nir/nir_opt_algebraic.py | 14 +++++++++-
>  src/compiler/nir/nir_search_helpers.h | 48 +++++++++++++++++++++++++++++++++++
>  2 files changed, 61 insertions(+), 1 deletion(-)
>
> diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
> index df5854270c..a9c3e80929 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -44,7 +44,7 @@ d = 'd'
>  # however, be used for backend-requested lowering operations as those need to
>  # happen regardless of precision.
>  #
> -# Variable names are specified as "[#]name[@type][(cond)]" where "#" inicates
> +# Variable names are specified as "[#]name[@type][(cond)]" where "#" indicates
>  # that the given variable will only match constants and the type indicates that
>  # the given variable will only match values from ALU instructions with the
>  # given output type, and (cond) specifies an additional condition function
> @@ -144,6 +144,16 @@ optimizations = [
>     (('inot', ('ieq', a, b)), ('ine', a, b)),
>     (('inot', ('ine', a, b)), ('ieq', a, b)),
>
> +   # Unnecessary 64-bit comparisons
> +   (('ieq', 'a at 64(fits_in_32_bits)', 'b at 64(fits_in_32_bits)'), ('ieq', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
> +   (('ine', 'a at 64(fits_in_32_bits)', 'b at 64(fits_in_32_bits)'), ('ine', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
> +   (('ilt', 'a at 64(fits_in_32_bits)', 'b at 64(fits_in_32_bits)'), ('ilt', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
> +   (('ige', 'a at 64(fits_in_32_bits)', 'b at 64(fits_in_32_bits)'), ('ige', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
> +   (('ult', 'a at 64(fits_in_32_bits)', 'b at 64(fits_in_32_bits)'), ('ult', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
> +   (('uge', 'a at 64(fits_in_32_bits)', 'b at 64(fits_in_32_bits)'), ('uge', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
> +
> +   (('iand', 'a at 64(fits_in_32_bits)', 'b at 64'), ('pack_64_2x32_split', ('iand', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)), 0)),
> +
>     # 0.0 >= b2f(a)
>     # b2f(a) <= 0.0
>     # b2f(a) == 0.0 because b2f(a) can only be 0 or 1
> @@ -315,6 +325,8 @@ optimizations = [
>     (('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
>                             ('unpack_64_2x32_split_y', a)), a),
>
> +   (('unpack_64_2x32_split_y', 'a(fits_in_32_bits)'), 0),
> +
>     # Byte extraction
>     (('ushr', a, 24), ('extract_u8', a, 3), '!options->lower_extract_byte'),
>     (('iand', 0xff, ('ushr', a, 16)), ('extract_u8', a, 2), '!options->lower_extract_byte'),
> diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h
> index 200f2471f8..c29ea5b9dd 100644
> --- a/src/compiler/nir/nir_search_helpers.h
> +++ b/src/compiler/nir/nir_search_helpers.h
> @@ -115,6 +115,54 @@ is_zero_to_one(nir_alu_instr *instr, unsigned src, unsigned num_components,
>  }
>
>  static inline bool
> +fits_in_32_bits(nir_alu_instr *instr, unsigned src, unsigned num_components,
> +                const uint8_t *swizzle)
> +{
> +   if (instr->src[src].src.is_ssa &&
> +       instr->src[src].src.ssa->parent_instr->type == nir_instr_type_alu) {
> +      nir_alu_instr *parent_instr =
> +         nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
> +
> +      switch (parent_instr->op) {
> +      case nir_op_pack_64_2x32_split: {
> +         nir_const_value *val =
> +            nir_src_as_const_value(parent_instr->src[1].src);
> +
> +         if (val && val->u32[0] == 0)
> +            return true;

I think you need to check all the components here, or at least all the
components that are used by parent_instr.

> +         break;
> +      }
> +      default:
> +         break;
> +      }
> +
> +      return false;
> +   }
> +
> +   nir_const_value *val = nir_src_as_const_value(instr->src[src].src);
> +
> +   if (!val)
> +      return false;
> +
> +   for (unsigned i = 0; i < num_components; i++) {
> +      switch (nir_op_infos[instr->op].input_types[src]) {
> +      case nir_type_int:
> +         if (val->i64[swizzle[i]] != (int)val->i64[swizzle[i]])
> +            return false;
> +         break;
> +      case nir_type_uint:
> +         if (val->u64[swizzle[i]] != (unsigned)val->u64[swizzle[i]])
> +            return false;
> +         break;
> +      default:
> +         return false;
> +      }
> +   }
> +
> +   return true;
> +}

This function seems to be doing two subtly different things:

1. If the instruction defining the source is nir_op_pack_64_2x32_split
or the source type is nir_type_uint, then it checks if the high 32
bits are 0.
2. Otherwise, it checks that the value can be replaced by a 32-bit
value that's sign extended to 64 bits, that is, whether the high 32
bits are the same as bit 31.

The problem is that if the source type is nir_type_int, then this
function checks two different things depending on whether the source
is a constant or the result of nir_op_pack_64_2x32_split. Based on how
you're using it, it seems like you really want to check whether the
high bits are the same as bit 31 in this case, so need to bail out of
the first 'if' if the source type isn't nir_type_uint.

> +
> +static inline bool
>  is_not_const(nir_alu_instr *instr, unsigned src, unsigned num_components,
>               const uint8_t *swizzle)
>  {
> --
> 2.13.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev