Mesa (main): pan/bi: Propagate fabs/neg/sat
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Jun 15 20:41:30 UTC 2021
Module: Mesa
Branch: main
Commit: 41070fedca5ae1dbfb63017c6d9edfe65ff43c4a
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=41070fedca5ae1dbfb63017c6d9edfe65ff43c4a
Author: Alyssa Rosenzweig <alyssa at collabora.com>
Date: Thu Jun 10 19:05:29 2021 -0400
pan/bi: Propagate fabs/neg/sat
Initial support for modifier propagation. Bifrost makes this
unreasonably hard.
total instructions in shared programs: 151604 -> 150761 (-0.56%)
instructions in affected programs: 48773 -> 47930 (-1.73%)
helped: 212
HURT: 0
helped stats (abs) min: 1 max: 28 x̄: 3.98 x̃: 1
helped stats (rel) min: 0.29% max: 12.70% x̄: 1.75% x̃: 1.26%
95% mean confidence interval for instructions value: -4.71 -3.25
95% mean confidence interval for instructions %-change: -1.97% -1.53%
Instructions are helped.
total tuples in shared programs: 131876 -> 131560 (-0.24%)
tuples in affected programs: 25393 -> 25077 (-1.24%)
helped: 104
HURT: 3
helped stats (abs) min: 1 max: 28 x̄: 3.08 x̃: 2
helped stats (rel) min: 0.34% max: 8.57% x̄: 1.55% x̃: 1.04%
HURT stats (abs) min: 1 max: 2 x̄: 1.33 x̃: 1
HURT stats (rel) min: 0.51% max: 2.86% x̄: 1.30% x̃: 0.53%
95% mean confidence interval for tuples value: -3.63 -2.28
95% mean confidence interval for tuples %-change: -1.73% -1.21%
Tuples are helped.
total clauses in shared programs: 28122 -> 28032 (-0.32%)
clauses in affected programs: 2720 -> 2630 (-3.31%)
helped: 58
HURT: 1
helped stats (abs) min: 1 max: 6 x̄: 1.57 x̃: 1
helped stats (rel) min: 0.88% max: 14.29% x̄: 4.06% x̃: 3.67%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 7.69% max: 7.69% x̄: 7.69% x̃: 7.69%
95% mean confidence interval for clauses value: -1.85 -1.20
95% mean confidence interval for clauses %-change: -4.60% -3.13%
Clauses are helped.
total quadwords in shared programs: 119778 -> 119509 (-0.22%)
quadwords in affected programs: 20698 -> 20429 (-1.30%)
helped: 95
HURT: 1
helped stats (abs) min: 1 max: 28 x̄: 2.85 x̃: 2
helped stats (rel) min: 0.38% max: 7.14% x̄: 1.50% x̃: 1.13%
HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2
HURT stats (rel) min: 3.23% max: 3.23% x̄: 3.23% x̃: 3.23%
95% mean confidence interval for quadwords value: -3.49 -2.11
95% mean confidence interval for quadwords %-change: -1.71% -1.20%
Quadwords are helped.
Signed-off-by: Alyssa Rosenzweig <alyssa at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11327>
---
src/panfrost/bifrost/bi_opt_mod_props.c | 233 ++++++++++++++++++++++++++++++++
src/panfrost/bifrost/bifrost_compile.c | 2 +
src/panfrost/bifrost/compiler.h | 6 +
src/panfrost/bifrost/meson.build | 1 +
4 files changed, 242 insertions(+)
diff --git a/src/panfrost/bifrost/bi_opt_mod_props.c b/src/panfrost/bifrost/bi_opt_mod_props.c
new file mode 100644
index 00000000000..03888184288
--- /dev/null
+++ b/src/panfrost/bifrost/bi_opt_mod_props.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2021 Collabora, Ltd.
+ * Copyright (C) 2021 Alyssa Rosenzweig <alyssa at rosenzweig.io>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler.h"
+
+static bool
+bi_takes_fabs(bi_instr *I, unsigned s)
+{
+ switch (I->op) {
+ case BI_OPCODE_FCMP_V2F16:
+ case BI_OPCODE_FMAX_V2F16:
+ case BI_OPCODE_FMIN_V2F16:
+ /* TODO: Check count or lower */
+ return false;
+ case BI_OPCODE_V2F32_TO_V2F16:
+ /* TODO: Needs both match or lower */
+ return false;
+ case BI_OPCODE_FLOG_TABLE_F32:
+ /* TODO: Need to check mode */
+ return false;
+ default:
+ return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);
+ }
+}
+
+static bool
+bi_takes_fneg(bi_instr *I, unsigned s)
+{
+ switch (I->op) {
+ case BI_OPCODE_CUBE_SSEL:
+ case BI_OPCODE_CUBE_TSEL:
+ case BI_OPCODE_CUBEFACE:
+ /* TODO: Needs match or lower */
+ return false;
+ case BI_OPCODE_FREXPE_F32:
+ case BI_OPCODE_FREXPE_V2F16:
+ case BI_OPCODE_FLOG_TABLE_F32:
+ /* TODO: Need to check mode */
+ return false;
+ default:
+ return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);
+ }
+}
+
+static bool
+bi_is_fabsneg(bi_instr *I)
+{
+ return (I->op == BI_OPCODE_FADD_F32 || I->op == BI_OPCODE_FADD_V2F16) &&
+ (I->src[1].type == BI_INDEX_CONSTANT && I->src[1].value == 0) &&
+ (I->clamp == BI_CLAMP_NONE);
+}
+
+static enum bi_swizzle
+bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)
+{
+ assert(a <= BI_SWIZZLE_H11);
+ assert(b <= BI_SWIZZLE_H11);
+
+ bool al = (a & BI_SWIZZLE_H10);
+ bool ar = (a & BI_SWIZZLE_H01);
+ bool bl = (b & BI_SWIZZLE_H10);
+ bool br = (b & BI_SWIZZLE_H01);
+
+ return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |
+ ((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);
+}
+
+/* Like bi_replace_index, but composes instead of overwrites */
+
+static inline bi_index
+bi_compose_float_index(bi_index old, bi_index repl)
+{
+ /* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise
+ * -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */
+ repl.neg = old.neg ^ (repl.neg && !old.abs);
+
+ /* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */
+ repl.abs |= old.abs;
+
+ /* Use the old swizzle to select from the replacement swizzle */
+ repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);
+
+ return repl;
+}
+
+void
+bi_opt_mod_prop_forward(bi_context *ctx)
+{
+ bi_instr **lut = calloc(sizeof(bi_instr *), ((ctx->ssa_alloc + 1) << 2));
+
+ bi_foreach_instr_global_safe(ctx, I) {
+ if (bi_is_ssa(I->dest[0]))
+ lut[bi_word_node(I->dest[0])] = I;
+
+ bi_foreach_src(I, s) {
+ if (!bi_is_ssa(I->src[s]))
+ continue;
+
+ bi_instr *mod = lut[bi_word_node(I->src[s])];
+
+ if (!mod)
+ continue;
+
+ if (bi_opcode_props[mod->op].size != bi_opcode_props[I->op].size)
+ continue;
+
+ if (bi_is_fabsneg(mod)) {
+ if (mod->src[0].abs && !bi_takes_fabs(I, s))
+ continue;
+
+ if (mod->src[0].neg && !bi_takes_fneg(I, s))
+ continue;
+
+ I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);
+ }
+ }
+ }
+
+ free(lut);
+}
+
+/* RSCALE has restrictions on how the clamp may be used, only used for
+ * specialized transcendental sequences that set the clamp explicitly anyway */
+
+static bool
+bi_takes_clamp(bi_instr *I)
+{
+ switch (I->op) {
+ case BI_OPCODE_FMA_RSCALE_F32:
+ case BI_OPCODE_FMA_RSCALE_V2F16:
+ case BI_OPCODE_FADD_RSCALE_F32:
+ return false;
+ default:
+ return bi_opcode_props[I->op].clamp;
+ }
+}
+
+/* Treating clamps as functions, compute the composition f circ g. For {NONE,
+ * SAT, SAT_SIGNED, CLAMP_POS}, anything left- or right-composed with NONE is
+ * unchanged, anything composed with itself is unchanged, and any two
+ * nontrivial distinct clamps compose to SAT (left as an exercise) */
+
+static enum bi_clamp
+bi_compose_clamp(enum bi_clamp f, enum bi_clamp g)
+{
+ return (f == BI_CLAMP_NONE) ? g :
+ (g == BI_CLAMP_NONE) ? f :
+ (f == g) ? f :
+ BI_CLAMP_CLAMP_0_1;
+}
+
+static bool
+bi_is_fclamp(bi_instr *I)
+{
+ return (I->op == BI_OPCODE_FADD_F32 || I->op == BI_OPCODE_FADD_V2F16) &&
+ (!I->src[0].abs && !I->src[0].neg) &&
+ (I->src[1].type == BI_INDEX_CONSTANT && I->src[1].value == 0) &&
+ (I->clamp != BI_CLAMP_NONE);
+}
+
+static bool
+bi_optimizer_clamp(bi_instr *I, bi_instr *use)
+{
+ if (!bi_is_fclamp(use)) return false;
+ if (!bi_takes_clamp(I)) return false;
+ if (use->src[0].neg || use->src[0].abs) return false;
+
+ I->clamp = bi_compose_clamp(I->clamp, use->clamp);
+ I->dest[0] = use->dest[0];
+ return true;
+}
+
+void
+bi_opt_mod_prop_backward(bi_context *ctx)
+{
+ unsigned count = ((ctx->ssa_alloc + 1) << 2);
+ bi_instr **uses = calloc(count, sizeof(*uses));
+ BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));
+
+ bi_foreach_instr_global_rev(ctx, I) {
+ bi_foreach_src(I, s) {
+ if (bi_is_ssa(I->src[s])) {
+ unsigned v = bi_word_node(I->src[s]);
+
+ if (uses[v])
+ BITSET_SET(multiple, v);
+ else
+ uses[v] = I;
+ }
+ }
+
+ if (!bi_is_ssa(I->dest[0]))
+ continue;
+
+ bi_instr *use = uses[bi_word_node(I->dest[0])];
+
+ if (!use || BITSET_TEST(multiple, bi_word_node(I->dest[0])))
+ continue;
+
+ if (bi_opcode_props[use->op].size != bi_opcode_props[I->op].size)
+ continue;
+
+ /* Destination has a single use, try to propagate */
+ if (bi_optimizer_clamp(I, use)) {
+ bi_remove_instruction(use);
+ continue;
+ }
+ }
+
+ free(uses);
+ free(multiple);
+}
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 0b4cbcd037e..376cf7772a7 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -3431,6 +3431,8 @@ bifrost_compile_shader_nir(nir_shader *nir,
bi_opt_push_ubo(ctx);
bi_opt_constant_fold(ctx);
bi_opt_copy_prop(ctx);
+ bi_opt_mod_prop_forward(ctx);
+ bi_opt_mod_prop_backward(ctx);
bi_opt_dead_code_eliminate(ctx);
bi_foreach_block(ctx, _block) {
diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h
index e41d62ea8ab..09d3f10f722 100644
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@@ -707,6 +707,10 @@ bi_node_to_index(unsigned node, unsigned node_count)
bi_foreach_block(ctx, v_block) \
bi_foreach_instr_in_block((bi_block *) v_block, v)
+#define bi_foreach_instr_global_rev(ctx, v) \
+ bi_foreach_block_rev(ctx, v_block) \
+ bi_foreach_instr_in_block_rev((bi_block *) v_block, v)
+
#define bi_foreach_instr_global_safe(ctx, v) \
bi_foreach_block(ctx, v_block) \
bi_foreach_instr_in_block_safe((bi_block *) v_block, v)
@@ -774,6 +778,8 @@ void bi_print_shader(bi_context *ctx, FILE *fp);
/* BIR passes */
void bi_opt_copy_prop(bi_context *ctx);
+void bi_opt_mod_prop_forward(bi_context *ctx);
+void bi_opt_mod_prop_backward(bi_context *ctx);
void bi_opt_dead_code_eliminate(bi_context *ctx);
void bi_opt_dce_post_ra(bi_context *ctx);
void bi_opt_push_ubo(bi_context *ctx);
diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build
index 21f672e4198..4faf6a05ff4 100644
--- a/src/panfrost/bifrost/meson.build
+++ b/src/panfrost/bifrost/meson.build
@@ -29,6 +29,7 @@ libpanfrost_bifrost_files = files(
'bi_opt_copy_prop.c',
'bi_opt_dce.c',
'bi_opt_push_ubo.c',
+ 'bi_opt_mod_props.c',
'bi_pack.c',
'bi_ra.c',
'bi_schedule.c',
More information about the mesa-commit
mailing list