Mesa (main): nir: Add pass to lower phi precision
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Jun 30 00:25:48 UTC 2021
Module: Mesa
Branch: main
Commit: c7b935962b27667fd4730b53c5fd3ec45d2773c3
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c7b935962b27667fd4730b53c5fd3ec45d2773c3
Author: Rob Clark <robdclark at chromium.org>
Date: Mon Jun 21 14:19:51 2021 -0700
nir: Add pass to lower phi precision
In addition to register pressure benefits from getting more fp16/int16,
this avoids i2imp's from standing in the way of loop unrolling.
Signed-off-by: Rob Clark <robdclark at chromium.org>
Reviewed-by: Connor Abbott <cwabbott0 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11545>
---
src/compiler/nir/meson.build | 1 +
src/compiler/nir/nir.h | 2 +
src/compiler/nir/nir_opt_phi_precision.c | 498 ++++++++++++++++++++++++++++++
src/mesa/state_tracker/st_glsl_to_nir.cpp | 1 +
4 files changed, 502 insertions(+)
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 229dacdb59d..9102307f388 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -234,6 +234,7 @@ files_libnir = files(
'nir_opt_move_discards_to_top.c',
'nir_opt_offsets.c',
'nir_opt_peephole_select.c',
+ 'nir_opt_phi_precision.c',
'nir_opt_rematerialize_compares.c',
'nir_opt_remove_phis.c',
'nir_opt_shrink_vectors.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 99d0f8039c3..4623492dd9c 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -5256,6 +5256,8 @@ bool nir_opt_rematerialize_compares(nir_shader *shader);
bool nir_opt_remove_phis(nir_shader *shader);
bool nir_opt_remove_phis_block(nir_block *block);
+bool nir_opt_phi_precision(nir_shader *shader);
+
bool nir_opt_shrink_vectors(nir_shader *shader, bool shrink_image_store);
bool nir_opt_trivial_continues(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_phi_precision.c b/src/compiler/nir/nir_opt_phi_precision.c
new file mode 100644
index 00000000000..449e8913d16
--- /dev/null
+++ b/src/compiler/nir/nir_opt_phi_precision.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright © 2021 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/*
+ * This pass tries to reduce the bitsize of phi instructions by either
+ * moving narrowing conversions from the phi's consumers to the phi's
+ * sources, if all the uses of the phi are equivalent narrowing
+ * instructions. In other words, convert:
+ *
+ * vec1 32 ssa_124 = load_const (0x00000000)
+ * ...
+ * loop {
+ * ...
+ * vec1 32 ssa_155 = phi block_0: ssa_124, block_4: ssa_53
+ * vec1 16 ssa_8 = i2imp ssa_155
+ * ...
+ * vec1 32 ssa_53 = i2i32 ssa_52
+ * }
+ *
+ * into:
+ *
+ * vec1 32 ssa_124 = load_const (0x00000000)
+ * vec1 16 ssa_156 = i2imp ssa_124
+ * ...
+ * loop {
+ * ...
+ * vec1 16 ssa_8 = phi block_0: ssa_156, block_4: ssa_157
+ * ...
+ * vec1 32 ssa_53 = i2i32 ssa_52
+ * vec1 16 ssa_157 = i2i16 ssa_53
+ * }
+ *
+ * Or failing that, tries to push widening conversion of phi srcs to
+ * the phi def. In this case, since load_const is frequently one
+ * of the phi sources this pass checks if can be narrowed without a
+ * loss of precision:
+ *
+ * vec1 32 ssa_0 = load_const (0x00000000)
+ * ...
+ * loop {
+ * ...
+ * vec1 32 ssa_8 = phi block_0: ssa_0, block_4: ssa_19
+ * ...
+ * vec1 16 ssa_18 = iadd ssa_21, ssa_3
+ * vec1 32 ssa_19 = i2i32 ssa_18
+ * }
+ *
+ * into:
+ *
+ * vec1 32 ssa_0 = load_const (0x00000000)
+ * vec1 16 ssa_22 = i2i16 ssa_0
+ * ...
+ * loop {
+ * ...
+ * vec1 16 ssa_8 = phi block_0: ssa_22, block_4: ssa_18
+ * vec1 32 ssa_23 = i2i32 ssa_8
+ * ...
+ * vec1 16 ssa_18 = iadd ssa_21, ssa_3
+ * }
+ *
+ * Note that either transformations can convert x2ymp into x2y16, which
+ * is normally done later in nir_opt_algebraic_late(), losing the option
+ * to fold away sequences like (i2i32 (i2imp (x))), but algebraic opts
+ * cannot see through phis.
+ */
+
+#define INVALID_OP nir_num_opcodes
+
+/**
+ * Get the corresponding exact conversion for a x2ymp conversion
+ */
+static nir_op
+concrete_conversion(nir_op op)
+{
+ switch (op) {
+ case nir_op_i2imp: return nir_op_i2i16;
+ case nir_op_i2fmp: return nir_op_i2f16;
+ case nir_op_u2fmp: return nir_op_u2f16;
+ case nir_op_f2fmp: return nir_op_f2f16;
+ case nir_op_f2imp: return nir_op_f2i16;
+ case nir_op_f2ump: return nir_op_f2u16;
+ default: return op;
+ }
+}
+
+static nir_op
+narrowing_conversion_op(nir_instr *instr, nir_op current_op)
+{
+ if (instr->type != nir_instr_type_alu)
+ return INVALID_OP;
+
+ nir_op op = nir_instr_as_alu(instr)->op;
+ switch (op) {
+ case nir_op_i2imp:
+ case nir_op_i2i16:
+ case nir_op_i2fmp:
+ case nir_op_i2f16:
+ case nir_op_u2fmp:
+ case nir_op_u2f16:
+ case nir_op_f2fmp:
+ case nir_op_f2f16:
+ case nir_op_f2imp:
+ case nir_op_f2i16:
+ case nir_op_f2ump:
+ case nir_op_f2u16:
+ case nir_op_f2f16_rtne:
+ case nir_op_f2f16_rtz:
+ break;
+ default:
+ return INVALID_OP;
+ }
+
+ /* If we've already picked a conversion op from a previous phi use,
+ * make sure it is compatible with the current use
+ */
+ if (current_op != INVALID_OP) {
+ if (current_op != op) {
+ /* If we have different conversions, but one can be converted
+ * to the other, then let's do that:
+ */
+ if (concrete_conversion(current_op) == concrete_conversion(op)) {
+ op = concrete_conversion(op);
+ } else {
+ return INVALID_OP;
+ }
+ }
+ }
+
+ return op;
+}
+
+static nir_op
+widening_conversion_op(nir_instr *instr, unsigned *bit_size)
+{
+ if (instr->type != nir_instr_type_alu)
+ return INVALID_OP;
+
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ switch (alu->op) {
+ case nir_op_i2i32:
+ case nir_op_i2f32:
+ case nir_op_u2f32:
+ case nir_op_f2f32:
+ case nir_op_f2i32:
+ case nir_op_f2u32:
+ break;
+ default:
+ return INVALID_OP;
+ }
+
+ *bit_size = nir_src_bit_size(alu->src[0].src);
+
+ /* We also need to check that the conversion's dest was actually
+ * wider:
+ */
+ if (nir_dest_bit_size(alu->dest.dest) <= *bit_size)
+ return INVALID_OP;
+
+ return alu->op;
+}
+
+static nir_alu_type
+op_to_type(nir_op op)
+{
+ return nir_alu_type_get_base_type(nir_op_infos[op].output_type);
+}
+
+/* Try to move narrowing instructions consuming the phi into the phi's
+ * sources to reduce the phi's precision:
+ */
+static bool
+try_move_narrowing_dst(nir_builder *b, nir_phi_instr *phi)
+{
+ nir_op op = INVALID_OP;
+
+ assert(phi->dest.is_ssa);
+
+ /* If the phi has already been narrowed, nothing more to do: */
+ if (phi->dest.ssa.bit_size != 32)
+ return false;
+
+ /* Are the only uses of the phi conversion instructions, and
+ * are they all the same conversion?
+ */
+ nir_foreach_use (use, &phi->dest.ssa) {
+ op = narrowing_conversion_op(use->parent_instr, op);
+
+ /* Not a (compatible) narrowing conversion: */
+ if (op == INVALID_OP)
+ return false;
+ }
+
+ /* an if_uses means the phi is used directly in a conditional, ie.
+ * without a conversion
+ */
+ if (!list_is_empty(&phi->dest.ssa.if_uses))
+ return false;
+
+ /* If the phi has no uses, then nothing to do: */
+ if (op == INVALID_OP)
+ return false;
+
+ /* construct replacement phi instruction: */
+ nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
+ nir_ssa_dest_init(&new_phi->instr, &new_phi->dest,
+ phi->dest.ssa.num_components,
+ nir_alu_type_get_type_size(nir_op_infos[op].output_type),
+ NULL);
+
+ /* Push the conversion into the new phi sources: */
+ nir_foreach_phi_src (src, phi) {
+ assert(src->src.is_ssa);
+
+ /* insert new conversion instr in block of original phi src: */
+ b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr);
+ nir_ssa_def *old_src = src->src.ssa;
+ nir_ssa_def *new_src = nir_build_alu(b, op, old_src, NULL, NULL, NULL);
+
+ /* and add corresponding phi_src to the new_phi: */
+ nir_phi_src *phi_src = ralloc(new_phi, nir_phi_src);
+ phi_src->pred = src->pred;
+ phi_src->src = nir_src_for_ssa(new_src);
+ exec_list_push_tail(&new_phi->srcs, &phi_src->node);
+ }
+
+ /* And finally rewrite the original uses of the original phi uses to
+ * directly use the new phi, skipping the conversion out of the orig
+ * phi
+ */
+ nir_foreach_use (use, &phi->dest.ssa) {
+ /* We've previously established that all the uses were alu
+ * conversion ops:
+ */
+ nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
+
+ assert(alu->dest.dest.is_ssa);
+
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, &new_phi->dest.ssa);
+ }
+
+ /* And finally insert the new phi after all sources are in place: */
+ b->cursor = nir_after_instr(&phi->instr);
+ nir_builder_instr_insert(b, &new_phi->instr);
+
+ return true;
+}
+
+static bool
+can_convert_load_const(nir_load_const_instr *lc, nir_op op)
+{
+ nir_alu_type type = op_to_type(op);
+
+ /* Note that we only handle phi's with bit_size == 32: */
+ assert(lc->def.bit_size == 32);
+
+ for (unsigned i = 0; i < lc->def.num_components; i++) {
+ switch (type) {
+ case nir_type_int:
+ if (lc->value[i].i32 != (int32_t)(int16_t)lc->value[i].i32)
+ return false;
+ break;
+ case nir_type_uint:
+ if (lc->value[i].u32 != (uint32_t)(uint16_t)lc->value[i].u32)
+ return false;
+ break;
+ case nir_type_float:
+ if (lc->value[i].f32 != _mesa_half_to_float(
+ _mesa_float_to_half(lc->value[i].f32)))
+ return false;
+ break;
+ default:
+ unreachable("bad type");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/* Check all the phi sources to see if they are the same widening op, in
+ * which case we can push the widening op to the other side of the phi
+ */
+static nir_op
+find_widening_op(nir_phi_instr *phi, unsigned *bit_size)
+{
+ nir_op op = INVALID_OP;
+
+ bool has_load_const = false;
+ *bit_size = 0;
+
+ nir_foreach_phi_src (src, phi) {
+ assert(src->src.is_ssa);
+
+ nir_instr *instr = src->src.ssa->parent_instr;
+ if (instr->type == nir_instr_type_load_const) {
+ has_load_const = true;
+ continue;
+ }
+
+ unsigned src_bit_size;
+ nir_op src_op = widening_conversion_op(instr, &src_bit_size);
+
+ /* Not a widening conversion: */
+ if (src_op == INVALID_OP)
+ return INVALID_OP;
+
+ /* If it is a widening conversion, it needs to be the same op as
+ * other phi sources:
+ */
+ if ((op != INVALID_OP) && (op != src_op))
+ return INVALID_OP;
+
+ if (*bit_size && (*bit_size != src_bit_size))
+ return INVALID_OP;
+
+ op = src_op;
+ *bit_size = src_bit_size;
+ }
+
+ if ((op == INVALID_OP) || !has_load_const)
+ return op;
+
+ /* If we could otherwise move widening sources, but load_const is
+ * one of the phi sources (and does not have a widening conversion,
+ * but could have a narrowing->widening sequence inserted without
+ * loss of precision), then we could insert a narrowing->widening
+ * sequence to make the rest of the transformation possible:
+ */
+ nir_foreach_phi_src (src, phi) {
+ assert(src->src.is_ssa);
+
+ nir_instr *instr = src->src.ssa->parent_instr;
+ if (instr->type != nir_instr_type_load_const)
+ continue;
+
+ if (!can_convert_load_const(nir_instr_as_load_const(instr), op))
+ return INVALID_OP;
+ }
+
+ return op;
+}
+
+/* Try to move widening conversions into the phi to the phi's output
+ * to reduce the phi's precision:
+ */
+static bool
+try_move_widening_src(nir_builder *b, nir_phi_instr *phi)
+{
+ assert(phi->dest.is_ssa);
+
+ /* If the phi has already been narrowed, nothing more to do: */
+ if (phi->dest.ssa.bit_size != 32)
+ return false;
+
+ unsigned bit_size;
+ nir_op op = find_widening_op(phi, &bit_size);
+
+ if (op == INVALID_OP)
+ return false;
+
+ /* construct replacement phi instruction: */
+ nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
+ nir_ssa_dest_init(&new_phi->instr, &new_phi->dest,
+ phi->dest.ssa.num_components,
+ bit_size, NULL);
+
+ /* Remove the widening conversions from the phi sources: */
+ nir_foreach_phi_src (src, phi) {
+ assert(src->src.is_ssa);
+
+ nir_instr *instr = src->src.ssa->parent_instr;
+ nir_ssa_def *new_src;
+
+ b->cursor = nir_after_instr(instr);
+
+ if (instr->type == nir_instr_type_load_const) {
+ /* if the src is a load_const, we've already verified that it
+ * is safe to insert a narrowing conversion to make the rest
+ * of this transformation legal:
+ */
+ nir_load_const_instr *lc = nir_instr_as_load_const(instr);
+
+ if (op_to_type(op) == nir_type_float) {
+ new_src = nir_f2f16(b, &lc->def);
+ } else {
+ new_src = nir_i2i16(b, &lc->def);
+ }
+ } else {
+ /* at this point we know the sources source is a conversion: */
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+ /* The conversion we are stripping off could have had a swizzle,
+ * so replace it with a mov if necessary:
+ */
+ unsigned num_comp = nir_dest_num_components(alu->dest.dest);
+ new_src = nir_mov_alu(b, alu->src[0], num_comp);
+ }
+
+ /* add corresponding phi_src to the new_phi: */
+ nir_phi_src *phi_src = ralloc(new_phi, nir_phi_src);
+ phi_src->pred = src->pred;
+ phi_src->src = nir_src_for_ssa(new_src);
+ exec_list_push_tail(&new_phi->srcs, &phi_src->node);
+ }
+
+ /* And insert the new phi after all sources are in place: */
+ b->cursor = nir_after_instr(&phi->instr);
+ nir_builder_instr_insert(b, &new_phi->instr);
+
+ /* And finally add back the widening conversion after the phi,
+ * and re-write the original phi's uses
+ */
+ b->cursor = nir_after_instr_and_phis(&new_phi->instr);
+ nir_ssa_def *def = nir_build_alu(b, op, &new_phi->dest.ssa, NULL, NULL, NULL);
+
+ nir_ssa_def_rewrite_uses(&phi->dest.ssa, def);
+
+ return true;
+}
+
+static bool
+lower_phi(nir_builder *b, nir_phi_instr *phi)
+{
+ bool progress = try_move_narrowing_dst(b, phi);
+ if (!progress)
+ progress = try_move_widening_src(b, phi);
+ return progress;
+}
+
+bool
+nir_opt_phi_precision(nir_shader *shader)
+{
+ bool progress = false;
+
+ /* If 8b or 16b bit_sizes are not used, no point to run this pass: */
+ unsigned bit_sizes_used = shader->info.bit_sizes_float |
+ shader->info.bit_sizes_int;
+
+ if (!bit_sizes_used) {
+ nir_shader_gather_info(shader, nir_shader_get_entrypoint(shader));
+ bit_sizes_used = shader->info.bit_sizes_float |
+ shader->info.bit_sizes_int;
+ }
+
+ if (!(bit_sizes_used & (8 | 16)))
+ return false;
+
+ nir_foreach_function(function, shader) {
+ if (!function->impl)
+ continue;
+
+ nir_builder b;
+ nir_builder_init(&b, function->impl);
+
+ nir_foreach_block (block, function->impl) {
+ nir_foreach_instr_safe (instr, block) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ progress |= lower_phi(&b, nir_instr_as_phi(instr));
+ }
+ }
+
+ if (progress) {
+ nir_metadata_preserve(function->impl,
+ nir_metadata_block_index |
+ nir_metadata_dominance);
+ } else {
+ nir_metadata_preserve(function->impl, nir_metadata_all);
+ }
+ }
+
+ return progress;
+}
+
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index e9ea019a498..303dc084639 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -292,6 +292,7 @@ st_nir_opts(nir_shader *nir)
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
+ NIR_PASS(progress, nir, nir_opt_phi_precision);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);
More information about the mesa-commit
mailing list