[Mesa-dev] [PATCH v3 4/7] nir: Add a pass for lowering integer division by constants
Daniel Schürmann
daniel.schuermann at campus.tu-berlin.de
Mon Oct 29 16:59:45 UTC 2018
Hi Jason,
thx for doing this pass, I was about to do the same (then we'd have 3 :P).
I'm not completely sure, but it looks like you're implementation is
based on "Division by Invariant Integers using Multiplication" from T.
Granlund and P. L. Montgomery? I think, it should be mentioned then.
Although some things are done a bit different (like idiv by pow2)...
Do you also plan to do runtime-invariants or even general lowering of
udiv and friends? What do you think about some kind of ircp_scaled
opcode to get the scaled 1/d multiplicator for some specified bit-size?
The idea would be that for architectures without udiv support, they'd
only have to implement the first part of the lowering while this pass
does the second.
>
> It's a reasonably well-known fact in the world of compilers that integer
> divisions by constants can be replaced by a multiply, an add, and some
> shifts. This commit adds such an optimization to NIR for easiest case
> of udiv. Other division operations will be added in following commits.
> In order to provide some additional driver control, the pass takes a
> minimum bit size to optimize.
> ---
> src/compiler/Makefile.sources | 1 +
> src/compiler/nir/meson.build | 1 +
> src/compiler/nir/nir.h | 2 +
> src/compiler/nir/nir_opt_idiv_const.c | 215 ++++++++++++++++++++++++++
> 4 files changed, 219 insertions(+)
> create mode 100644 src/compiler/nir/nir_opt_idiv_const.c
>
> diff --git a/src/compiler/Makefile.sources
b/src/compiler/Makefile.sources
> index b65bb9b80b9..e44bce85ad9 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -278,6 +278,7 @@ NIR_FILES = \
> nir/nir_opt_find_array_copies.c \
> nir/nir_opt_gcm.c \
> nir/nir_opt_global_to_local.c \
> + nir/nir_opt_idiv_const.c \
> nir/nir_opt_if.c \
> nir/nir_opt_intrinsics.c \
> nir/nir_opt_loop_unroll.c \
> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> index d8f65640004..63f31e09870 100644
> --- a/src/compiler/nir/meson.build
> +++ b/src/compiler/nir/meson.build
> @@ -162,6 +162,7 @@ files_libnir = files(
> 'nir_opt_find_array_copies.c',
> 'nir_opt_gcm.c',
> 'nir_opt_global_to_local.c',
> + 'nir_opt_idiv_const.c',
> 'nir_opt_if.c',
> 'nir_opt_intrinsics.c',
> 'nir_opt_large_constants.c',
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 96b437e7c82..4991c8c604c 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -3081,6 +3081,8 @@ bool nir_opt_find_array_copies(nir_shader *shader);
>
> bool nir_opt_gcm(nir_shader *shader, bool value_number);
>
> +bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size);
> +
> bool nir_opt_if(nir_shader *shader);
>
> bool nir_opt_intrinsics(nir_shader *shader);
> diff --git a/src/compiler/nir/nir_opt_idiv_const.c
> b/src/compiler/nir/nir_opt_idiv_const.c
> new file mode 100644
> index 00000000000..7fa739161ba
> --- /dev/null
> +++ b/src/compiler/nir/nir_opt_idiv_const.c
> @@ -0,0 +1,215 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person
obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without
> limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> next
> + * paragraph) shall be included in all copies or substantial portions
> of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +#include "nir_builder.h"
> +#include "util/fast_idiv_by_const.h"
> +#include "util/u_math.h"
> +
> +static nir_ssa_def *
> +build_udiv(nir_builder *b, nir_ssa_def *n, uint64_t d)
> +{
> + if (d == 0) {
> + return nir_imm_intN_t(b, 0, n->bit_size);
> + } else if (util_is_power_of_two_or_zero64(d)) {
> + return nir_ushr(b, n, nir_imm_int(b, util_logbase2_64(d)));
> + } else {
> + struct util_fast_udiv_info m =
> + util_compute_fast_udiv_info(d, n->bit_size, n->bit_size);
> +
> + if (m.pre_shift)
> + n = nir_ushr(b, n, nir_imm_int(b, m.pre_shift));
> + if (m.increment)
> + n = nir_uadd_sat(b, n, nir_imm_intN_t(b, m.increment,
> n->bit_size));
Does Intel have an actual instruction for nir_uadd_sat?
Here, your algorithm is a bit different from the paper, and if there is
no actual hardware for this instruction, I'd suppose to leave it out.
Anyway, codegen for hardware without support for uadd_sat would look
different, so it might be worth to offer a second path here.
I think, lowering uadd_sat afterwards could would give worse code.
> + n = nir_umul_high(b, n, nir_imm_intN_t(b, m.multiplier,
> n->bit_size));
> + if (m.post_shift)
> + n = nir_ushr(b, n, nir_imm_int(b, m.post_shift));
> +
> + return n;
> + }
> +}
> +
> +static nir_ssa_def *
> +build_umod(nir_builder *b, nir_ssa_def *n, uint64_t d)
> +{
> + if (d == 0) {
> + return nir_imm_intN_t(b, 0, n->bit_size);
> + } else if (util_is_power_of_two_or_zero64(d)) {
> + return nir_iand(b, n, nir_imm_intN_t(b, d - 1, n->bit_size));
> + } else {
> + return nir_isub(b, n, nir_imul(b, build_udiv(b, n, d),
> + nir_imm_intN_t(b, d,
> n->bit_size)));
> + }
> +}
> +
What about imod? :)
> +static nir_ssa_def *
> +build_idiv(nir_builder *b, nir_ssa_def *n, int64_t d)
> +{
> + if (d == 0) {
> + return nir_imm_intN_t(b, 0, n->bit_size);
> + } else if (d == 1) {
> + return n;
> + } else if (d == -1) {
> + return nir_ineg(b, n);
> + } else if (util_is_power_of_two_or_zero64(d)) {
> + uint64_t abs_d = d < 0 ? -d : d;
> + nir_ssa_def *uq = nir_ishr(b, n, nir_imm_int(b,
> util_logbase2_64(abs_d)));
> + nir_ssa_def *n_neg = nir_ilt(b, n, nir_imm_intN_t(b, 0,
> n->bit_size));
> + nir_ssa_def *neg = d < 0 ? nir_inot(b, n_neg) : n_neg;
> + return nir_bcsel(b, neg, nir_ineg(b, uq), uq);
> + } else {
> + struct util_fast_sdiv_info m =
> + util_compute_fast_sdiv_info(d, n->bit_size);
> +
> + nir_ssa_def *res =
> + nir_imul_high(b, n, nir_imm_intN_t(b, m.multiplier,
> n->bit_size));
> + if (d > 0 && m.multiplier < 0)
> + res = nir_iadd(b, res, n);
> + if (d < 0 && m.multiplier > 0)
> + res = nir_isub(b, res, n);
> + if (m.shift)
> + res = nir_ishr(b, res, nir_imm_int(b, m.shift));
> + res = nir_iadd(b, res, nir_ushr(b, res, nir_imm_int(b,
> n->bit_size - 1)));
> +
> + return res;
> + }
> +}
> +
> +static bool
> +nir_opt_idiv_const_instr(nir_builder *b, nir_alu_instr *alu)
> +{
> + assert(alu->dest.dest.is_ssa);
> + assert(alu->src[0].src.is_ssa && alu->src[1].src.is_ssa);
> +
> + nir_const_value *const_denom =
nir_src_as_const_value(alu->src[1].src);
> + if (!const_denom)
> + return false;
> +
> + unsigned bit_size = alu->src[1].src.ssa->bit_size;
> +
> + b->cursor = nir_before_instr(&alu->instr);
> +
> + nir_ssa_def *q[4];
> + for (unsigned comp = 0; comp < alu->dest.dest.ssa.num_components;
> comp++) {
> + /* Get the numerator for the channel */
> + nir_ssa_def *n = nir_channel(b, alu->src[0].src.ssa,
> + alu->src[0].swizzle[comp]);
> +
> + /* Get the denominator for the channel */
> + int64_t d;
> + switch (bit_size) {
> + case 8:
> + d = const_denom->i8[alu->src[1].swizzle[comp]];
> + break;
> + case 16:
> + d = const_denom->i16[alu->src[1].swizzle[comp]];
> + break;
> + case 32:
> + d = const_denom->i32[alu->src[1].swizzle[comp]];
> + break;
> + case 64:
> + d = const_denom->i64[alu->src[1].swizzle[comp]];
> + break;
> + default:
> + unreachable("Invalid bit size");
> + }
> +
> + nir_alu_type d_type = nir_op_infos[alu->op].input_types[1];
> + if (nir_alu_type_get_base_type(d_type) == nir_type_uint) {
> + /* The code above sign-extended. If we're lowering an
> unsigned op,
> + * we need to mask it off to the correct number of bits so
that a
> + * cast to uint64_t will do the right thing.
> + */
> + if (bit_size < 64)
> + d &= (1ull << bit_size) - 1;
> + }
> +
> + switch (alu->op) {
> + case nir_op_udiv:
> + q[comp] = build_udiv(b, n, d);
> + break;
> + case nir_op_idiv:
> + q[comp] = build_idiv(b, n, d);
> + break;
> + case nir_op_umod:
> + q[comp] = build_umod(b, n, d);
> + break;
> + default:
> + unreachable("Unknown integer division op");
> + }
> + }
> +
> + nir_ssa_def *qvec = nir_vec(b, q, alu->dest.dest.ssa.num_components);
> + nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(qvec));
> + nir_instr_remove(&alu->instr);
> +
> + return true;
> +}
> +
> +static bool
> +nir_opt_idiv_const_impl(nir_function_impl *impl, unsigned min_bit_size)
> +{
> + bool progress = false;
> +
> + nir_builder b;
> + nir_builder_init(&b, impl);
> +
> + nir_foreach_block(block, impl) {
> + nir_foreach_instr_safe(instr, block) {
> + if (instr->type != nir_instr_type_alu)
> + continue;
> +
> + nir_alu_instr *alu = nir_instr_as_alu(instr);
> + if (alu->op != nir_op_udiv &&
> + alu->op != nir_op_idiv &&
> + alu->op != nir_op_umod)
> + continue;
> +
> + assert(alu->dest.dest.is_ssa);
> + if (alu->dest.dest.ssa.bit_size < min_bit_size)
> + continue;
> +
> + progress |= nir_opt_idiv_const_instr(&b, alu);
> + }
> + }
> +
> + if (progress) {
> + nir_metadata_preserve(impl, nir_metadata_block_index |
> + nir_metadata_dominance);
> + }
> +
> + return progress;
> +}
> +
> +bool
> +nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size)
> +{
> + bool progress = false;
> +
> + nir_foreach_function(function, shader) {
> + if (function->impl)
> + progress |= nir_opt_idiv_const_impl(function->impl,
> min_bit_size);
> + }
> +
> + return progress;
> +}
Kind regards,
Daniel
More information about the mesa-dev
mailing list