<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Fri, Feb 24, 2017 at 3:58 PM, Matt Turner <<a href="mailto:mattst88@gmail.com" target="_blank">mattst88@gmail.com</a>> wrote: <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">On Fri, Feb 24, 2017 at 3:48 PM, Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>> wrote: > The algorithms used by this pass, especially for division, are heavily > based on the work Ian Romanick did for the similar int64 lowering pass > in the GLSL compiler. > > v2: Properly handle vectors > > v3: Get rid of log2_denom stuff. Since we're using bcsel, we do all the > calculations anyway and this is just extra instructions. > --- > src/compiler/Makefile.sources | 1 + > src/compiler/nir/nir.h | 11 ++ > src/compiler/nir/nir_lower_int64.c | 261 +++++++++++++++++++++++++++++++++++++ > 3 files changed, 273 insertions(+) > create mode 100644 src/compiler/nir/nir_lower_int64.c > > diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources > index 643a018..2455d4e 100644 > --- a/src/compiler/Makefile.sources > +++ b/src/compiler/Makefile.sources > @@ -221,6 +221,7 @@ NIR_FILES = \ > nir/nir_lower_locals_to_regs.c \ > nir/nir_lower_idiv.c \ > nir/nir_lower_indirect_derefs.c \ > + nir/nir_lower_int64.c \ > nir/nir_lower_io.c \ > nir/nir_lower_io_to_temporaries.c \ > nir/nir_lower_io_to_scalar.c \ > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index 5243a9e..1a23e19 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -2540,6 +2540,17 @@ void nir_lower_to_source_mods(nir_shader *shader); > bool nir_lower_gs_intrinsics(nir_shader *shader); > > typedef enum { > + nir_lower_imul64 = (1 << 0), > + nir_lower_isign64 = (1 << 1), > + nir_lower_udiv64 = (1 << 2), > + nir_lower_idiv64 = (1 << 3), > + nir_lower_umod64 = (1 << 4), > + nir_lower_imod64 = (1 << 5), > +} nir_lower_int64_options; > + > +bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options options); > + > +typedef enum { > nir_lower_drcp = (1 << 0), > nir_lower_dsqrt = (1 << 1), > nir_lower_drsq = (1 << 2), > diff --git a/src/compiler/nir/nir_lower_int64.c b/src/compiler/nir/nir_lower_int64.c > new file mode 100644 > index 0000000..c40305e > --- /dev/null > +++ b/src/compiler/nir/nir_lower_int64.c > @@ -0,0 +1,261 @@ > +/* > + * Copyright © 2016 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include "nir.h" > +#include "nir_builder.h" > + > +static nir_ssa_def * > +lower_umul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) > +{ > + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); > + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); > + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); > + > + nir_ssa_def *res_lo = nir_imul(b, x_lo, y_lo); > + nir_ssa_def *res_hi = nir_iadd(b, nir_umul_high(b, x_lo, y_lo), > + nir_iadd(b, nir_imul(b, x_lo, y_hi), > + nir_imul(b, x_hi, y_lo))); > + > + return nir_pack_64_2x32_split(b, res_lo, res_hi); > +} > + > +static nir_ssa_def * > +lower_isign64(nir_builder *b, nir_ssa_def *x) > +{ > + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); > + > + nir_ssa_def *is_non_zero = nir_i2b(b, nir_ior(b, x_lo, x_hi)); > + nir_ssa_def *res_hi = nir_ishr(b, x_hi, nir_imm_int(b, 31)); > + nir_ssa_def *res_lo = nir_ior(b, res_hi, nir_b2i(b, is_non_zero)); > + > + return nir_pack_64_2x32_split(b, res_lo, res_hi); > +} > + > +static void > +lower_udiv64_mod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d, > + nir_ssa_def **q, nir_ssa_def **r) > +{ > + /* TODO: We should specially handle the case where the denominator is a > + * constant. In that case, we should be able to reduce it to a multiply by > + * a constant, some shifts, and an add. > + */ > + nir_ssa_def *n_lo = nir_unpack_64_2x32_split_x(b, n); > + nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n); > + nir_ssa_def *d_lo = nir_unpack_64_2x32_split_x(b, d); > + nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d); > + > + nir_const_value v = { .u32 = { 0, 0, 0, 0 } }; > + nir_ssa_def *q_lo = nir_build_imm(b, n->num_components, 32, v); > + nir_ssa_def *q_hi = nir_build_imm(b, n->num_components, 32, v); > + > + nir_ssa_def *n_hi_before_if = n_hi; > + nir_ssa_def *q_hi_before_if = q_hi; > + > + /* If the upper 32 bits of denom are non-zero, it is impossible for shifts > + * greater than 32 bits to occur. If the upper 32 bits of the numerator > + * are zero, it is impossible for (denom << [63, 32]) <= numer unless > + * denom == 0. > + */ > + nir_ssa_def *need_high_div = > + nir_iand(b, nir_ieq(b, d_hi, nir_imm_int(b, 0)), nir_uge(b, n_hi, d_lo)); > + nir_push_if(b, nir_bany(b, need_high_div)); > + { > + /* If we only have one component, then the bany above goes away and > + * this is always true within the if statement. > + */ > + if (n->num_components == 1) > + need_high_div = nir_imm_int(b, NIR_TRUE); > + > + for (int i = 31; i >= 0; i--) { > + /* if ((d.x << i) <= n.y) { > + * n.y -= d.x << i; > + * quot.y |= 1U << i; > + * } </div></div>Looks like these three lines are missing one space of indentation. </blockquote></div> </div><div class="gmail_extra">Gah! How did tabs get in there! Good catch. Fixed locally. </div></div>