[Mesa-dev] [PATCH v3 6/8] nir: Add a simple int64 lowering pass
Jason Ekstrand
jason at jlekstrand.net
Sat Feb 25 00:07:00 UTC 2017
On Fri, Feb 24, 2017 at 3:58 PM, Matt Turner <mattst88 at gmail.com> wrote:
> On Fri, Feb 24, 2017 at 3:48 PM, Jason Ekstrand <jason at jlekstrand.net>
> wrote:
> > The algorithms used by this pass, especially for division, are heavily
> > based on the work Ian Romanick did for the similar int64 lowering pass
> > in the GLSL compiler.
> >
> > v2: Properly handle vectors
> >
> > v3: Get rid of log2_denom stuff. Since we're using bcsel, we do all the
> > calculations anyway and this is just extra instructions.
> > ---
> > src/compiler/Makefile.sources | 1 +
> > src/compiler/nir/nir.h | 11 ++
> > src/compiler/nir/nir_lower_int64.c | 261 ++++++++++++++++++++++++++++++
> +++++++
> > 3 files changed, 273 insertions(+)
> > create mode 100644 src/compiler/nir/nir_lower_int64.c
> >
> > diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.
> sources
> > index 643a018..2455d4e 100644
> > --- a/src/compiler/Makefile.sources
> > +++ b/src/compiler/Makefile.sources
> > @@ -221,6 +221,7 @@ NIR_FILES = \
> > nir/nir_lower_locals_to_regs.c \
> > nir/nir_lower_idiv.c \
> > nir/nir_lower_indirect_derefs.c \
> > + nir/nir_lower_int64.c \
> > nir/nir_lower_io.c \
> > nir/nir_lower_io_to_temporaries.c \
> > nir/nir_lower_io_to_scalar.c \
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index 5243a9e..1a23e19 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -2540,6 +2540,17 @@ void nir_lower_to_source_mods(nir_shader
> *shader);
> > bool nir_lower_gs_intrinsics(nir_shader *shader);
> >
> > typedef enum {
> > + nir_lower_imul64 = (1 << 0),
> > + nir_lower_isign64 = (1 << 1),
> > + nir_lower_udiv64 = (1 << 2),
> > + nir_lower_idiv64 = (1 << 3),
> > + nir_lower_umod64 = (1 << 4),
> > + nir_lower_imod64 = (1 << 5),
> > +} nir_lower_int64_options;
> > +
> > +bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options
> options);
> > +
> > +typedef enum {
> > nir_lower_drcp = (1 << 0),
> > nir_lower_dsqrt = (1 << 1),
> > nir_lower_drsq = (1 << 2),
> > diff --git a/src/compiler/nir/nir_lower_int64.c
> b/src/compiler/nir/nir_lower_int64.c
> > new file mode 100644
> > index 0000000..c40305e
> > --- /dev/null
> > +++ b/src/compiler/nir/nir_lower_int64.c
> > @@ -0,0 +1,261 @@
> > +/*
> > + * Copyright © 2016 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> obtaining a
> > + * copy of this software and associated documentation files (the
> "Software"),
> > + * to deal in the Software without restriction, including without
> limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> next
> > + * paragraph) shall be included in all copies or substantial portions
> of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include "nir.h"
> > +#include "nir_builder.h"
> > +
> > +static nir_ssa_def *
> > +lower_umul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> > +{
> > + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> > + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> > + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> > +
> > + nir_ssa_def *res_lo = nir_imul(b, x_lo, y_lo);
> > + nir_ssa_def *res_hi = nir_iadd(b, nir_umul_high(b, x_lo, y_lo),
> > + nir_iadd(b, nir_imul(b, x_lo, y_hi),
> > + nir_imul(b, x_hi, y_lo)));
> > +
> > + return nir_pack_64_2x32_split(b, res_lo, res_hi);
> > +}
> > +
> > +static nir_ssa_def *
> > +lower_isign64(nir_builder *b, nir_ssa_def *x)
> > +{
> > + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> > +
> > + nir_ssa_def *is_non_zero = nir_i2b(b, nir_ior(b, x_lo, x_hi));
> > + nir_ssa_def *res_hi = nir_ishr(b, x_hi, nir_imm_int(b, 31));
> > + nir_ssa_def *res_lo = nir_ior(b, res_hi, nir_b2i(b, is_non_zero));
> > +
> > + return nir_pack_64_2x32_split(b, res_lo, res_hi);
> > +}
> > +
> > +static void
> > +lower_udiv64_mod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d,
> > + nir_ssa_def **q, nir_ssa_def **r)
> > +{
> > + /* TODO: We should specially handle the case where the denominator
> is a
> > + * constant. In that case, we should be able to reduce it to a
> multiply by
> > + * a constant, some shifts, and an add.
> > + */
> > + nir_ssa_def *n_lo = nir_unpack_64_2x32_split_x(b, n);
> > + nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
> > + nir_ssa_def *d_lo = nir_unpack_64_2x32_split_x(b, d);
> > + nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d);
> > +
> > + nir_const_value v = { .u32 = { 0, 0, 0, 0 } };
> > + nir_ssa_def *q_lo = nir_build_imm(b, n->num_components, 32, v);
> > + nir_ssa_def *q_hi = nir_build_imm(b, n->num_components, 32, v);
> > +
> > + nir_ssa_def *n_hi_before_if = n_hi;
> > + nir_ssa_def *q_hi_before_if = q_hi;
> > +
> > + /* If the upper 32 bits of denom are non-zero, it is impossible for
> shifts
> > + * greater than 32 bits to occur. If the upper 32 bits of the
> numerator
> > + * are zero, it is impossible for (denom << [63, 32]) <= numer unless
> > + * denom == 0.
> > + */
> > + nir_ssa_def *need_high_div =
> > + nir_iand(b, nir_ieq(b, d_hi, nir_imm_int(b, 0)), nir_uge(b, n_hi,
> d_lo));
> > + nir_push_if(b, nir_bany(b, need_high_div));
> > + {
> > + /* If we only have one component, then the bany above goes away
> and
> > + * this is always true within the if statement.
> > + */
> > + if (n->num_components == 1)
> > + need_high_div = nir_imm_int(b, NIR_TRUE);
> > +
> > + for (int i = 31; i >= 0; i--) {
> > + /* if ((d.x << i) <= n.y) {
> > + * n.y -= d.x << i;
> > + * quot.y |= 1U << i;
> > + * }
>
> Looks like these three lines are missing one space of indentation.
>
Gah! How did tabs get in there! Good catch. Fixed locally.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170224/bd9833d1/attachment.html>
More information about the mesa-dev
mailing list