[Mesa-dev] [PATCH v3 6/8] nir: Add a simple int64 lowering pass

Fri Feb 24 23:58:54 UTC 2017

On Fri, Feb 24, 2017 at 3:48 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
> The algorithms used by this pass, especially for division, are heavily
> based on the work Ian Romanick did for the similar int64 lowering pass
> in the GLSL compiler.
>
> v2: Properly handle vectors
>
> v3: Get rid of log2_denom stuff.  Since we're using bcsel, we do all the
>     calculations anyway and this is just extra instructions.
> ---
>  src/compiler/Makefile.sources      |   1 +
>  src/compiler/nir/nir.h             |  11 ++
>  src/compiler/nir/nir_lower_int64.c | 261 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 273 insertions(+)
>  create mode 100644 src/compiler/nir/nir_lower_int64.c
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index 643a018..2455d4e 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -221,6 +221,7 @@ NIR_FILES = \
>         nir/nir_lower_locals_to_regs.c \
>         nir/nir_lower_idiv.c \
>         nir/nir_lower_indirect_derefs.c \
> +       nir/nir_lower_int64.c \
>         nir/nir_lower_io.c \
>         nir/nir_lower_io_to_temporaries.c \
>         nir/nir_lower_io_to_scalar.c \
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 5243a9e..1a23e19 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2540,6 +2540,17 @@ void nir_lower_to_source_mods(nir_shader *shader);
>  bool nir_lower_gs_intrinsics(nir_shader *shader);
>
>  typedef enum {
> +   nir_lower_imul64 = (1 << 0),
> +   nir_lower_isign64 = (1 << 1),
> +   nir_lower_udiv64 = (1 << 2),
> +   nir_lower_idiv64 = (1 << 3),
> +   nir_lower_umod64 = (1 << 4),
> +   nir_lower_imod64 = (1 << 5),
> +} nir_lower_int64_options;
> +
> +bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options options);
> +
> +typedef enum {
>     nir_lower_drcp = (1 << 0),
>     nir_lower_dsqrt = (1 << 1),
>     nir_lower_drsq = (1 << 2),
> diff --git a/src/compiler/nir/nir_lower_int64.c b/src/compiler/nir/nir_lower_int64.c
> new file mode 100644
> index 0000000..c40305e
> --- /dev/null
> +++ b/src/compiler/nir/nir_lower_int64.c
> @@ -0,0 +1,261 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +#include "nir_builder.h"
> +
> +static nir_ssa_def *
> +lower_umul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> +
> +   nir_ssa_def *res_lo = nir_imul(b, x_lo, y_lo);
> +   nir_ssa_def *res_hi = nir_iadd(b, nir_umul_high(b, x_lo, y_lo),
> +                         nir_iadd(b, nir_imul(b, x_lo, y_hi),
> +                                     nir_imul(b, x_hi, y_lo)));
> +
> +   return nir_pack_64_2x32_split(b, res_lo, res_hi);
> +}
> +
> +static nir_ssa_def *
> +lower_isign64(nir_builder *b, nir_ssa_def *x)
> +{
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +
> +   nir_ssa_def *is_non_zero = nir_i2b(b, nir_ior(b, x_lo, x_hi));
> +   nir_ssa_def *res_hi = nir_ishr(b, x_hi, nir_imm_int(b, 31));
> +   nir_ssa_def *res_lo = nir_ior(b, res_hi, nir_b2i(b, is_non_zero));
> +
> +   return nir_pack_64_2x32_split(b, res_lo, res_hi);
> +}
> +
> +static void
> +lower_udiv64_mod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d,
> +                   nir_ssa_def **q, nir_ssa_def **r)
> +{
> +   /* TODO: We should specially handle the case where the denominator is a
> +    * constant.  In that case, we should be able to reduce it to a multiply by
> +    * a constant, some shifts, and an add.
> +    */
> +   nir_ssa_def *n_lo = nir_unpack_64_2x32_split_x(b, n);
> +   nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
> +   nir_ssa_def *d_lo = nir_unpack_64_2x32_split_x(b, d);
> +   nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d);
> +
> +   nir_const_value v = { .u32 = { 0, 0, 0, 0 } };
> +   nir_ssa_def *q_lo = nir_build_imm(b, n->num_components, 32, v);
> +   nir_ssa_def *q_hi = nir_build_imm(b, n->num_components, 32, v);
> +
> +   nir_ssa_def *n_hi_before_if = n_hi;
> +   nir_ssa_def *q_hi_before_if = q_hi;
> +
> +   /* If the upper 32 bits of denom are non-zero, it is impossible for shifts
> +    * greater than 32 bits to occur.  If the upper 32 bits of the numerator
> +    * are zero, it is impossible for (denom << [63, 32]) <= numer unless
> +    * denom == 0.
> +    */
> +   nir_ssa_def *need_high_div =
> +      nir_iand(b, nir_ieq(b, d_hi, nir_imm_int(b, 0)), nir_uge(b, n_hi, d_lo));
> +   nir_push_if(b, nir_bany(b, need_high_div));
> +   {
> +      /* If we only have one component, then the bany above goes away and
> +       * this is always true within the if statement.
> +       */
> +      if (n->num_components == 1)
> +         need_high_div = nir_imm_int(b, NIR_TRUE);
> +
> +      for (int i = 31; i >= 0; i--) {
> +         /* if ((d.x << i) <= n.y) {
> +         *    n.y -= d.x << i;
> +         *    quot.y |= 1U << i;
> +         * }

Looks like these three lines are missing one space of indentation.