[Mesa-dev] [PATCH 11/12] nir: Add partial redundancy elimination for compares
Ian Romanick
idr at freedesktop.org
Thu Jun 28 04:46:24 UTC 2018
From: Ian Romanick <ian.d.romanick at intel.com>
This pass attempts to dectect code sequences like
if (x < y) {
z = y - z;
...
}
and replace them with sequences like
t = x - y;
if (t < 0) {
z = t;
...
}
On architectures where the subtract can generate the flags used by the
if-statement, this saves an instruction. It's also possible that moving
an instruction out of the if-statement will allow
nir_opt_peephole_select to convert the whole thing to a bcsel.
Currently only floating point compares and adds are supported. Adding
support for integer will be a challenge due to integer overflow. There
are a couple possible solutions, but they may not apply to all
architectures.
Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
---
src/compiler/Makefile.sources | 1 +
src/compiler/nir/meson.build | 1 +
src/compiler/nir/nir.h | 2 +
src/compiler/nir/nir_instr_set.c | 3 +-
src/compiler/nir/nir_opt_comparison_pre.c | 354 ++++++++++++++++++++++++++++++
src/compiler/nir/nir_search_helpers.h | 29 +++
6 files changed, 389 insertions(+), 1 deletion(-)
create mode 100644 src/compiler/nir/nir_opt_comparison_pre.c
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 231c5cbfae1..8dc2acb698a 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -263,6 +263,7 @@ NIR_FILES = \
nir/nir_move_load_const.c \
nir/nir_move_vec_src_uses_to_dest.c \
nir/nir_normalize_cubemap_coords.c \
+ nir/nir_opt_comparison_pre.c \
nir/nir_opt_conditional_discard.c \
nir/nir_opt_constant_folding.c \
nir/nir_opt_copy_prop_vars.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 30e425aa840..3426deb065d 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -148,6 +148,7 @@ files_libnir = files(
'nir_move_load_const.c',
'nir_move_vec_src_uses_to_dest.c',
'nir_normalize_cubemap_coords.c',
+ 'nir_opt_comparison_pre.c',
'nir_opt_conditional_discard.c',
'nir_opt_constant_folding.c',
'nir_opt_copy_prop_vars.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f1b9a468e9a..201eb032552 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2899,6 +2899,8 @@ bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only);
bool nir_lower_phis_to_regs_block(nir_block *block);
bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
+bool nir_opt_comparison_pre(nir_shader *shader);
+
bool nir_opt_algebraic(nir_shader *shader);
bool nir_opt_algebraic_before_ffma(nir_shader *shader);
bool nir_opt_algebraic_late(nir_shader *shader);
diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index 1a491f46ff4..e9371af230a 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -239,7 +239,8 @@ get_neg_instr(const nir_src *s)
{
const struct nir_alu_instr *const alu = nir_src_as_alu_instr_const(s);
- return alu->op == nir_op_fneg || alu->op == nir_op_ineg ? alu : NULL;
+ return alu != NULL && (alu->op == nir_op_fneg || alu->op == nir_op_ineg)
+ ? alu : NULL;
}
bool
diff --git a/src/compiler/nir/nir_opt_comparison_pre.c b/src/compiler/nir/nir_opt_comparison_pre.c
new file mode 100644
index 00000000000..86a35f78e02
--- /dev/null
+++ b/src/compiler/nir/nir_opt_comparison_pre.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir_instr_set.h"
+#include "nir_search_helpers.h"
+#include "nir_builder.h"
+#include "util/u_vector.h"
+
+/* Partial redundancy elimination of compares
+ *
+ * Seaches for comparisons of the form 'a cmp b' that dominate arithmetic
+ * instructions like 'b - a'. The comparison is replaced by the arithmetic
+ * instruction, and the result is compared with zero. For example,
+ *
+ * vec1 32 ssa_111 = flt 0.37, ssa_110.w
+ * if ssa_111 {
+ * block block_1:
+ * vec1 32 ssa_112 = fadd ssa_110.w, -0.37
+ * ...
+ *
+ * becomes
+ *
+ * vec1 32 ssa_111 = fadd ssa_110.w, -0.37
+ * vec1 32 ssa_112 = flt 0.0, ssa_111
+ * if ssa_112 {
+ * block block_1:
+ * ...
+ */
+
+struct block_queue {
+ struct exec_list blocks;
+ struct exec_list reusable_blocks;
+};
+
+struct block_instructions {
+ struct exec_node node;
+ struct u_vector instructions;
+};
+
+static void
+block_queue_init(struct block_queue *bq)
+{
+ exec_list_make_empty(&bq->blocks);
+ exec_list_make_empty(&bq->reusable_blocks);
+}
+
+static void
+block_queue_finish(struct block_queue *bq)
+{
+ struct block_instructions *n;
+
+ while ((n = (struct block_instructions *) exec_list_pop_head(&bq->blocks)) != NULL) {
+ u_vector_finish(&n->instructions);
+ free(n);
+ }
+
+ while ((n = (struct block_instructions *) exec_list_pop_head(&bq->reusable_blocks)) != NULL) {
+ free(n);
+ }
+}
+
+static struct block_instructions *
+push_block(struct block_queue *bq)
+{
+ struct block_instructions *bi =
+ (struct block_instructions *) exec_list_pop_head(&bq->reusable_blocks);
+
+ if (bi == NULL) {
+ bi = calloc(1, sizeof(struct block_instructions));
+
+ if (bi == NULL)
+ return NULL;
+ }
+
+ if (!u_vector_init(&bi->instructions,
+ sizeof(struct nir_alu_instr *),
+ 8 * sizeof(struct nir_alu_instr *)))
+ return NULL;
+
+ exec_list_push_tail(&bq->blocks, &bi->node);
+
+ return bi;
+}
+
+static void
+pop_block(struct block_queue *bq, struct block_instructions *bi)
+{
+ u_vector_finish(&bi->instructions);
+ exec_node_remove(&bi->node);
+ exec_list_push_head(&bq->reusable_blocks, &bi->node);
+}
+
+static void
+add_instruction_for_block(struct block_instructions *bi,
+ struct nir_alu_instr *alu)
+{
+ struct nir_alu_instr **data =
+ u_vector_add(&bi->instructions);
+
+ *data = alu;
+}
+
+static void
+rewrite_compare_instruction(nir_builder *bld, nir_alu_instr *orig_cmp,
+ nir_alu_instr *orig_add, bool zero_on_left)
+{
+ void *const mem_ctx = ralloc_parent(orig_cmp);
+
+ bld->cursor = nir_before_instr(&orig_cmp->instr);
+
+ /* This is somewhat tricky. The compare instruction may be something like
+ * (fcmp, a, b) while the add instruction is something like (fadd, fneg(a),
+ * b). This is problematic because the SSA value for the fneg(a) may not
+ * exist yet at the compare instruction.
+ *
+ * We fabricate the operands of the new add. This is done using
+ * information provided by zero_on_left. If zero_on_left is true, we know
+ * the resulting compare instruction is (fcmp, 0.0, (fadd, x, y)). If the
+ * original compare instruction was (fcmp, a, b), x = b and y = -a. If
+ * zero_on_left is false, the resulting compare instruction is (fcmp,
+ * (fadd, x, y), 0.0) and x = a and y = -b.
+ */
+ nir_ssa_def *const a = nir_ssa_for_alu_src(bld, orig_cmp, 0);
+ nir_ssa_def *const b = nir_ssa_for_alu_src(bld, orig_cmp, 1);
+
+ nir_ssa_def *const fadd = zero_on_left
+ ? nir_fadd(bld, b, nir_fneg(bld, a))
+ : nir_fadd(bld, a, nir_fneg(bld, b));
+
+ nir_ssa_def *const zero =
+ nir_imm_floatN_t(bld, 0.0, orig_add->dest.dest.ssa.bit_size);
+
+ nir_ssa_def *const cmp = zero_on_left
+ ? nir_build_alu(bld, orig_cmp->op, zero, fadd, NULL, NULL)
+ : nir_build_alu(bld, orig_cmp->op, fadd, zero, NULL, NULL);
+
+ /* Generating extra moves of the results is the easy way to make sure the
+ * writemasks match the original instructions. Later optimization passes
+ * will clean these up.
+ */
+ nir_alu_instr *mov_add = nir_alu_instr_create(mem_ctx, nir_op_imov);
+ mov_add->dest.write_mask = orig_add->dest.write_mask;
+ nir_ssa_dest_init(&mov_add->instr, &mov_add->dest.dest,
+ orig_add->dest.dest.ssa.num_components,
+ orig_add->dest.dest.ssa.bit_size, NULL);
+ mov_add->src[0].src = nir_src_for_ssa(fadd);
+
+ nir_builder_instr_insert(bld, &mov_add->instr);
+
+ nir_alu_instr *mov_cmp = nir_alu_instr_create(mem_ctx, nir_op_imov);
+ mov_cmp->dest.write_mask = orig_cmp->dest.write_mask;
+ nir_ssa_dest_init(&mov_cmp->instr, &mov_cmp->dest.dest,
+ orig_cmp->dest.dest.ssa.num_components,
+ orig_cmp->dest.dest.ssa.bit_size, NULL);
+ mov_cmp->src[0].src = nir_src_for_ssa(cmp);
+
+ nir_builder_instr_insert(bld, &mov_cmp->instr);
+
+ nir_ssa_def_rewrite_uses(&orig_cmp->dest.dest.ssa,
+ nir_src_for_ssa(&mov_cmp->dest.dest.ssa));
+ nir_ssa_def_rewrite_uses(&orig_add->dest.dest.ssa,
+ nir_src_for_ssa(&mov_add->dest.dest.ssa));
+
+ /* We know these have no more uses because we just rewrote them all, so we
+ * can remove them.
+ */
+ nir_instr_remove(&orig_cmp->instr);
+ nir_instr_remove(&orig_add->instr);
+}
+
+static bool
+comparison_pre_block(nir_block *block, struct block_queue *bq, nir_builder *bld)
+{
+ bool progress = false;
+
+ struct block_instructions *bi = push_block(bq);
+
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_alu)
+ continue;
+
+ struct nir_alu_instr *const alu = nir_instr_as_alu(instr);
+
+ if (alu->dest.dest.ssa.num_components != 1)
+ continue;
+
+ if (alu->dest.saturate)
+ continue;
+
+ static const uint8_t swizzle[4] = { 0, 0, 0, 0 };
+
+ switch (alu->op) {
+ case nir_op_fadd: {
+ /* If the instruction is fadd, check it against comparison
+ * instructions that dominate it.
+ */
+ struct block_instructions *b =
+ (struct block_instructions *) exec_list_get_head_raw(&bq->blocks);
+
+ while (b->node.next != NULL) {
+ nir_alu_instr **a;
+ bool rewrote_compare = false;
+
+ u_vector_foreach(a, &b->instructions) {
+ nir_alu_instr *const cmp = *a;
+
+ if (cmp == NULL)
+ continue;
+
+ /* The operands of both instructions are, with some liberty,
+ * commutative. Check all three permutations. The third
+ * permutaiton is a negation of the original operation, so it
+ * is handled separately.
+ */
+ if ((nir_alu_srcs_equal(cmp, alu, 0, 0) &&
+ nir_alu_srcs_negative_equal(cmp, alu, 1, 1)) ||
+ (nir_alu_srcs_equal(cmp, alu, 0, 1) &&
+ nir_alu_srcs_negative_equal(cmp, alu, 1, 0))) {
+ /* These are the cases where (A cmp B) matches either (A +
+ * -B) or (-B + A)
+ *
+ * A cmp B <=> A + -B cmp 0
+ */
+ rewrite_compare_instruction(bld, cmp, alu, false);
+
+ *a = NULL;
+ rewrote_compare = true;
+ break;
+ } else if (nir_alu_srcs_equal(cmp, alu, 1, 0) &&
+ nir_alu_srcs_negative_equal(cmp, alu, 0, 1)) {
+ /* This is the case where (A cmp B) matches (B + -A).
+ *
+ * A cmp B <=> 0 cmp B + -A
+ */
+ rewrite_compare_instruction(bld, cmp, alu, true);
+
+ *a = NULL;
+ rewrote_compare = true;
+ break;
+ }
+ }
+
+ /* Bail after a compare in the most dominating block is found.
+ * This is necessary because 'alu' has been remove from the
+ * instruction stream. Should there be a matching compare in
+ * another block, calling rewrite_compare_instruction again will
+ * try to operate on a node that is not in the list as if it were
+ * in the list.
+ *
+ * FINISHME: There may be opportunity for additional optimization
+ * here. I discovered this problem due to a shader in Guacamelee.
+ * It may be possible to rewrite the matching compares that are
+ * encountered later to reuse the result from the compare that was
+ * first rewritten. It's also possible that this is just taken
+ * care of by calling the optimization pass repeatedly.
+ */
+ if (rewrote_compare) {
+ progress = true;
+ break;
+ }
+
+ b = (struct block_instructions *) b->node.next;
+ }
+
+ break;
+ }
+
+ case nir_op_flt:
+ case nir_op_fge:
+ case nir_op_fne:
+ case nir_op_feq:
+ /* If the instruction is a comparison that is used by an if-statement
+ * or a bcsel and neither operand is immediate value 0, add it to the
+ * set.
+ */
+ if (is_used_by_if(alu) &&
+ is_not_const_zero(alu, 0, 1, swizzle) &&
+ is_not_const_zero(alu, 1, 1, swizzle))
+ add_instruction_for_block(bi, alu);
+
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ for (unsigned i = 0; i < block->num_dom_children; i++) {
+ nir_block *child = block->dom_children[i];
+
+ if (comparison_pre_block(child, bq, bld))
+ progress = true;
+ }
+
+ pop_block(bq, bi);
+
+ return progress;
+}
+
+static bool
+nir_opt_comparison_pre_impl(nir_function_impl *impl)
+{
+ struct block_queue bq;
+ nir_builder bld;
+
+ block_queue_init(&bq);
+ nir_builder_init(&bld, impl);
+
+ nir_metadata_require(impl, nir_metadata_dominance);
+
+ const bool progress =
+ comparison_pre_block(nir_start_block(impl), &bq, &bld);
+
+ block_queue_finish(&bq);
+
+ if (progress)
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return progress;
+}
+
+bool
+nir_opt_comparison_pre(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl)
+ progress |= nir_opt_comparison_pre_impl(function->impl);
+ }
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h
index 8bc6d723c34..85af2f12f9e 100644
--- a/src/compiler/nir/nir_search_helpers.h
+++ b/src/compiler/nir/nir_search_helpers.h
@@ -109,6 +109,29 @@ is_zero_to_one(nir_alu_instr *instr, unsigned src, unsigned num_components,
return true;
}
+static inline bool
+is_not_const_zero(nir_alu_instr *instr, unsigned src, unsigned num_components,
+ const uint8_t *swizzle)
+{
+ nir_const_value *val = nir_src_as_const_value(instr->src[src].src);
+
+ if (!val)
+ return true;
+
+ for (unsigned i = 0; i < num_components; i++) {
+ switch (nir_op_infos[instr->op].input_types[src]) {
+ case nir_type_float:
+ if (val->f32[swizzle[i]] == 0.0f)
+ return false;
+ break;
+ default:
+ return false;
+ }
+ }
+
+ return true;
+}
+
static inline bool
is_not_const(nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components,
UNUSED const uint8_t *swizzle)
@@ -159,6 +182,12 @@ is_used_once(nir_alu_instr *instr)
return true;
}
+static inline bool
+is_used_by_if(nir_alu_instr *instr)
+{
+ return !list_empty(&instr->dest.dest.ssa.if_uses);
+}
+
static inline bool
is_not_used_by_if(nir_alu_instr *instr)
{
--
2.14.4
More information about the mesa-dev
mailing list