[Mesa-dev] [PATCH v3 6/8] nir: Add a simple int64 lowering pass
Matt Turner
mattst88 at gmail.com
Fri Feb 24 23:58:54 UTC 2017
On Fri, Feb 24, 2017 at 3:48 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
> The algorithms used by this pass, especially for division, are heavily
> based on the work Ian Romanick did for the similar int64 lowering pass
> in the GLSL compiler.
>
> v2: Properly handle vectors
>
> v3: Get rid of log2_denom stuff. Since we're using bcsel, we do all the
> calculations anyway and this is just extra instructions.
> ---
> src/compiler/Makefile.sources | 1 +
> src/compiler/nir/nir.h | 11 ++
> src/compiler/nir/nir_lower_int64.c | 261 +++++++++++++++++++++++++++++++++++++
> 3 files changed, 273 insertions(+)
> create mode 100644 src/compiler/nir/nir_lower_int64.c
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index 643a018..2455d4e 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -221,6 +221,7 @@ NIR_FILES = \
> nir/nir_lower_locals_to_regs.c \
> nir/nir_lower_idiv.c \
> nir/nir_lower_indirect_derefs.c \
> + nir/nir_lower_int64.c \
> nir/nir_lower_io.c \
> nir/nir_lower_io_to_temporaries.c \
> nir/nir_lower_io_to_scalar.c \
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 5243a9e..1a23e19 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2540,6 +2540,17 @@ void nir_lower_to_source_mods(nir_shader *shader);
> bool nir_lower_gs_intrinsics(nir_shader *shader);
>
> typedef enum {
> + nir_lower_imul64 = (1 << 0),
> + nir_lower_isign64 = (1 << 1),
> + nir_lower_udiv64 = (1 << 2),
> + nir_lower_idiv64 = (1 << 3),
> + nir_lower_umod64 = (1 << 4),
> + nir_lower_imod64 = (1 << 5),
> +} nir_lower_int64_options;
> +
> +bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options options);
> +
> +typedef enum {
> nir_lower_drcp = (1 << 0),
> nir_lower_dsqrt = (1 << 1),
> nir_lower_drsq = (1 << 2),
> diff --git a/src/compiler/nir/nir_lower_int64.c b/src/compiler/nir/nir_lower_int64.c
> new file mode 100644
> index 0000000..c40305e
> --- /dev/null
> +++ b/src/compiler/nir/nir_lower_int64.c
> @@ -0,0 +1,261 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +#include "nir_builder.h"
> +
> +static nir_ssa_def *
> +lower_umul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> +
> + nir_ssa_def *res_lo = nir_imul(b, x_lo, y_lo);
> + nir_ssa_def *res_hi = nir_iadd(b, nir_umul_high(b, x_lo, y_lo),
> + nir_iadd(b, nir_imul(b, x_lo, y_hi),
> + nir_imul(b, x_hi, y_lo)));
> +
> + return nir_pack_64_2x32_split(b, res_lo, res_hi);
> +}
> +
> +static nir_ssa_def *
> +lower_isign64(nir_builder *b, nir_ssa_def *x)
> +{
> + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +
> + nir_ssa_def *is_non_zero = nir_i2b(b, nir_ior(b, x_lo, x_hi));
> + nir_ssa_def *res_hi = nir_ishr(b, x_hi, nir_imm_int(b, 31));
> + nir_ssa_def *res_lo = nir_ior(b, res_hi, nir_b2i(b, is_non_zero));
> +
> + return nir_pack_64_2x32_split(b, res_lo, res_hi);
> +}
> +
> +static void
> +lower_udiv64_mod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d,
> + nir_ssa_def **q, nir_ssa_def **r)
> +{
> + /* TODO: We should specially handle the case where the denominator is a
> + * constant. In that case, we should be able to reduce it to a multiply by
> + * a constant, some shifts, and an add.
> + */
> + nir_ssa_def *n_lo = nir_unpack_64_2x32_split_x(b, n);
> + nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
> + nir_ssa_def *d_lo = nir_unpack_64_2x32_split_x(b, d);
> + nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d);
> +
> + nir_const_value v = { .u32 = { 0, 0, 0, 0 } };
> + nir_ssa_def *q_lo = nir_build_imm(b, n->num_components, 32, v);
> + nir_ssa_def *q_hi = nir_build_imm(b, n->num_components, 32, v);
> +
> + nir_ssa_def *n_hi_before_if = n_hi;
> + nir_ssa_def *q_hi_before_if = q_hi;
> +
> + /* If the upper 32 bits of denom are non-zero, it is impossible for shifts
> + * greater than 32 bits to occur. If the upper 32 bits of the numerator
> + * are zero, it is impossible for (denom << [63, 32]) <= numer unless
> + * denom == 0.
> + */
> + nir_ssa_def *need_high_div =
> + nir_iand(b, nir_ieq(b, d_hi, nir_imm_int(b, 0)), nir_uge(b, n_hi, d_lo));
> + nir_push_if(b, nir_bany(b, need_high_div));
> + {
> + /* If we only have one component, then the bany above goes away and
> + * this is always true within the if statement.
> + */
> + if (n->num_components == 1)
> + need_high_div = nir_imm_int(b, NIR_TRUE);
> +
> + for (int i = 31; i >= 0; i--) {
> + /* if ((d.x << i) <= n.y) {
> + * n.y -= d.x << i;
> + * quot.y |= 1U << i;
> + * }
Looks like these three lines are missing one space of indentation.
More information about the mesa-dev
mailing list