Mesa (main): pan/bi: Optimize replication

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sat Feb 19 03:33:22 UTC 2022


Module: Mesa
Branch: main
Commit: f7d44a46cd424e797a38ef732360e546f093f0ae
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f7d44a46cd424e797a38ef732360e546f093f0ae

Author: Alyssa Rosenzweig <alyssa at collabora.com>
Date:   Sat Jan 15 12:26:42 2022 -0500

pan/bi: Optimize replication

Bifrost's 16-bit support comes in the form of vectorized instructions,
so when we manipulate scalars, we usually replicate to both bottom and
top halves of 32-bit registers. Add an analysis pass that detects
replication. Then, use that replication pass to optimize out useless
swizzle instructions (by changing them to plain moves, which can be
copypropped).

This optimization is a slight shader-db win on its own, and allows us to
transition to lower_bool_to_bitsize without regressing shader-db.

total instructions in shared programs: 90323 -> 90257 (-0.07%)
instructions in affected programs: 2513 -> 2447 (-2.63%)
helped: 20
HURT: 0
helped stats (abs) min: 1.0 max: 16.0 x̄: 3.30 x̃: 2
helped stats (rel) min: 1.25% max: 11.11% x̄: 4.80% x̃: 4.29%
95% mean confidence interval for instructions value: -5.05 -1.55
95% mean confidence interval for instructions %-change: -6.06% -3.54%
Instructions are helped.

total tuples in shared programs: 73769 -> 73740 (-0.04%)
tuples in affected programs: 1611 -> 1582 (-1.80%)
helped: 17
HURT: 0
helped stats (abs) min: 1.0 max: 9.0 x̄: 1.71 x̃: 1
helped stats (rel) min: 0.58% max: 16.67% x̄: 4.80% x̃: 3.33%
95% mean confidence interval for tuples value: -2.70 -0.71
95% mean confidence interval for tuples %-change: -7.06% -2.54%
Tuples are helped.

total clauses in shared programs: 15997 -> 15993 (-0.03%)
clauses in affected programs: 27 -> 23 (-14.81%)
helped: 4
HURT: 0
helped stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1
helped stats (rel) min: 7.69% max: 25.00% x̄: 18.17% x̃: 20.00%
95% mean confidence interval for clauses value: -1.00 -1.00
95% mean confidence interval for clauses %-change: -29.91% -6.44%
Clauses are helped.

total cycles in shared programs: 7623.13 -> 7622.13 (-0.01%)
cycles in affected programs: 64.83 -> 63.83 (-1.54%)
helped: 13
HURT: 0
helped stats (abs) min: 0.0416660000000002 max: 0.375 x̄: 0.08 x̃: 0
helped stats (rel) min: 1.02% max: 5.56% x̄: 2.82% x̃: 2.50%
95% mean confidence interval for cycles value: -0.13 -0.02
95% mean confidence interval for cycles %-change: -3.79% -1.85%
Cycles are helped.

total arith in shared programs: 2763.75 -> 2762.46 (-0.05%)
arith in affected programs: 67.17 -> 65.88 (-1.92%)
helped: 18
HURT: 0
helped stats (abs) min: 0.0416660000000002 max: 0.375 x̄: 0.07 x̃: 0
helped stats (rel) min: 1.02% max: 22.22% x̄: 5.68% x̃: 3.16%
95% mean confidence interval for arith value: -0.11 -0.03
95% mean confidence interval for arith %-change: -8.56% -2.80%
Arith are helped.

total quadwords in shared programs: 68173 -> 68155 (-0.03%)
quadwords in affected programs: 1258 -> 1240 (-1.43%)
helped: 14
HURT: 0
helped stats (abs) min: 1.0 max: 3.0 x̄: 1.29 x̃: 1
helped stats (rel) min: 0.42% max: 8.70% x̄: 3.88% x̃: 3.67%
95% mean confidence interval for quadwords value: -1.64 -0.93
95% mean confidence interval for quadwords %-change: -5.27% -2.49%
Quadwords are helped.

Signed-off-by: Alyssa Rosenzweig <alyssa at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14576>

---

 src/panfrost/bifrost/bi_lower_swizzle.c | 116 ++++++++++++++++++++++++++++++++
 src/panfrost/bifrost/compiler.h         |  22 ++++++
 2 files changed, 138 insertions(+)

diff --git a/src/panfrost/bifrost/bi_lower_swizzle.c b/src/panfrost/bifrost/bi_lower_swizzle.c
index 32517ad93de..2d79fcf83ca 100644
--- a/src/panfrost/bifrost/bi_lower_swizzle.c
+++ b/src/panfrost/bifrost/bi_lower_swizzle.c
@@ -137,6 +137,106 @@ bi_lower_swizzle_16(bi_context *ctx, bi_instr *ins, unsigned src)
         ins->src[src].swizzle = BI_SWIZZLE_H01;
 }
 
+static bool
+bi_swizzle_replicates_8(enum bi_swizzle swz)
+{
+        switch (swz) {
+        case BI_SWIZZLE_B0000:
+        case BI_SWIZZLE_B1111:
+        case BI_SWIZZLE_B2222:
+        case BI_SWIZZLE_B3333:
+                return true;
+        default:
+                return false;
+        }
+}
+
+static bool
+bi_swizzle_replicates_16(enum bi_swizzle swz)
+{
+        switch (swz) {
+        case BI_SWIZZLE_H00:
+        case BI_SWIZZLE_H11:
+                return true;
+        default:
+                /* If a swizzle replicates every 8-bits, it also replicates
+                 * every 16-bits, so allow 8-bit replicating swizzles.
+                 */
+                return bi_swizzle_replicates_8(swz);
+        }
+}
+
+static bool
+bi_instr_replicates(bi_instr *I, BITSET_WORD *replicates_16)
+{
+        switch (I->op) {
+
+        /* Instructions that construct vectors have replicated output if their
+         * sources are identical. Check this case first.
+         */
+        case BI_OPCODE_MKVEC_V2I16:
+        case BI_OPCODE_V2F16_TO_V2S16:
+        case BI_OPCODE_V2F16_TO_V2U16:
+        case BI_OPCODE_V2F32_TO_V2F16:
+        case BI_OPCODE_V2S16_TO_V2F16:
+        case BI_OPCODE_V2S8_TO_V2F16:
+        case BI_OPCODE_V2S8_TO_V2S16:
+        case BI_OPCODE_V2U16_TO_V2F16:
+        case BI_OPCODE_V2U8_TO_V2F16:
+        case BI_OPCODE_V2U8_TO_V2U16:
+                return bi_is_value_equiv(I->src[0], I->src[1]);
+
+        /* 16-bit transcendentals are defined to output zero in their
+         * upper half, so they do not replicate
+         */
+        case BI_OPCODE_FRCP_F16:
+        case BI_OPCODE_FRSQ_F16:
+                return false;
+
+        /* Not sure, be conservative, we don't use these.. */
+        case BI_OPCODE_VN_ASST1_F16:
+        case BI_OPCODE_FPCLASS_F16:
+        case BI_OPCODE_FPOW_SC_DET_F16:
+                return false;
+
+        default:
+                break;
+        }
+
+        /* Replication analysis only makes sense for ALU instructions */
+        if (bi_opcode_props[I->op].message != BIFROST_MESSAGE_NONE)
+                return false;
+
+        /* We only analyze 16-bit instructions for 16-bit replication. We could
+         * maybe do better.
+         */
+        if (bi_opcode_props[I->op].size != BI_SIZE_16)
+                return false;
+
+        bi_foreach_src(I, s) {
+                if (bi_is_null(I->src[s]))
+                        continue;
+
+                /* Replicated swizzles */
+                if (bi_swizzle_replicates_16(I->src[s].swizzle))
+                        continue;
+
+                /* Replicated values */
+                if (bi_is_ssa(I->src[s]) &&
+                    BITSET_TEST(replicates_16, bi_word_node(I->src[s])))
+                        continue;
+
+                /* Replicated constants */
+                if (I->src[s].type == BI_INDEX_CONSTANT &&
+                    (I->src[s].value & 0xFFFF) == (I->src[s].value >> 16))
+                        continue;
+
+                return false;
+        }
+
+        return true;
+}
+
 void
 bi_lower_swizzle(bi_context *ctx)
 {
@@ -146,4 +246,20 @@ bi_lower_swizzle(bi_context *ctx)
                                 bi_lower_swizzle_16(ctx, ins, s);
                 }
         }
+
+        /* Now that we've lowered swizzles, clean up the mess */
+        BITSET_WORD *replicates_16 = calloc(sizeof(bi_index), ((ctx->ssa_alloc + 1) << 2));
+
+        bi_foreach_instr_global(ctx, ins) {
+                if (bi_is_ssa(ins->dest[0]) && bi_instr_replicates(ins, replicates_16))
+                        BITSET_SET(replicates_16, bi_word_node(ins->dest[0]));
+
+                if (ins->op == BI_OPCODE_SWZ_V2I16 && bi_is_ssa(ins->src[0]) &&
+                    BITSET_TEST(replicates_16, bi_word_node(ins->src[0]))) {
+                        ins->op = BI_OPCODE_MOV_I32;
+                        ins->src[0].swizzle = BI_SWIZZLE_H01;
+                }
+        }
+
+        free(replicates_16);
 }
diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h
index 5e767f3d2a7..72d21da8ee4 100644
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@@ -355,6 +355,28 @@ bi_is_word_equiv(bi_index left, bi_index right)
         return bi_is_equiv(left, right) && left.offset == right.offset;
 }
 
+/* An even stronger equivalence that checks if indices correspond to the
+ * right value when evaluated
+ */
+static inline bool
+bi_is_value_equiv(bi_index left, bi_index right)
+{
+        if (left.type == BI_INDEX_CONSTANT && right.type == BI_INDEX_CONSTANT) {
+                return (bi_apply_swizzle(left.value, left.swizzle) ==
+                        bi_apply_swizzle(right.value, right.swizzle)) &&
+                       (left.abs == right.abs) &&
+                       (left.neg == right.neg);
+        } else {
+                return (left.value == right.value) &&
+                       (left.abs == right.abs) &&
+                       (left.neg == right.neg) &&
+                       (left.swizzle == right.swizzle) &&
+                       (left.offset == right.offset) &&
+                       (left.reg == right.reg) &&
+                       (left.type == right.type);
+        }
+}
+
 #define BI_MAX_DESTS 2
 #define BI_MAX_SRCS 5
 



More information about the mesa-commit mailing list