Mesa (main): pan/bi: Extract MUX to CSEL optimization

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Jun 6 16:24:56 UTC 2022


Module: Mesa
Branch: main
Commit: 1768afa5b969dac8e0eedc45f76e02e302329d85
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=1768afa5b969dac8e0eedc45f76e02e302329d85

Author: Alyssa Rosenzweig <alyssa at collabora.com>
Date:   Fri May  6 17:09:56 2022 -0400

pan/bi: Extract MUX to CSEL optimization

It's portable, and useful to both Bifrost and Valhall, in the clause scheduler
and in an instruction selection respectively. Move it from the Bifrost clause
scheduler to common code so we can share the benefits.

Signed-off-by: Alyssa Rosenzweig <alyssa at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16857>

---

 src/panfrost/bifrost/bi_schedule.c | 58 ++------------------------------------
 src/panfrost/bifrost/bir.c         | 57 +++++++++++++++++++++++++++++++++++++
 src/panfrost/bifrost/compiler.h    |  3 ++
 3 files changed, 63 insertions(+), 55 deletions(-)

diff --git a/src/panfrost/bifrost/bi_schedule.c b/src/panfrost/bifrost/bi_schedule.c
index 1f511d8eee5..348cc3a3f75 100644
--- a/src/panfrost/bifrost/bi_schedule.c
+++ b/src/panfrost/bifrost/bi_schedule.c
@@ -504,58 +504,6 @@ bi_can_iaddc(bi_instr *ins)
                 ins->src[1].swizzle == BI_SWIZZLE_H01);
 }
 
-/*
- * When MUX.i32 or MUX.v2i16 is used to multiplex entire sources, they can be
- * replaced by CSEL as follows:
- *
- *      MUX.neg(x, y, b) -> CSEL.s.lt(b, 0, x, y)
- *      MUX.int_zero(x, y, b) -> CSEL.i.eq(b, 0, x, y)
- *      MUX.fp_zero(x, y, b) -> CSEL.f.eq(b, 0, x, y)
- *
- * MUX.bit cannot be transformed like this.
- *
- * Note that MUX.v2i16 has partial support for swizzles, which CSEL.v2i16 lacks.
- * So we must check the swizzles too.
- */
-static bool
-bi_can_csel(bi_instr *I)
-{
-        return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
-                (I->mux != BI_MUX_BIT) &&
-                (I->src[0].swizzle == BI_SWIZZLE_H01) &&
-                (I->src[1].swizzle == BI_SWIZZLE_H01) &&
-                (I->src[2].swizzle == BI_SWIZZLE_H01);
-}
-
-static enum bi_opcode
-bi_csel_for_mux(bool b32, enum bi_mux mux)
-{
-        switch (mux) {
-        case BI_MUX_INT_ZERO:
-                return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
-        case BI_MUX_NEG:
-                return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
-        case BI_MUX_FP_ZERO:
-                return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
-        default:
-             unreachable("No CSEL for MUX.bit");
-        }
-}
-
-static void
-bi_replace_mux_with_csel(bi_instr *I)
-{
-        assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
-        I->op = bi_csel_for_mux(I->op == BI_OPCODE_MUX_I32, I->mux);
-        I->cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
-
-        bi_index vTrue = I->src[0], vFalse = I->src[1], cond = I->src[2];
-
-        I->src[0] = cond;
-        I->src[1] = bi_zero();
-        I->src[2] = vTrue;
-        I->src[3] = vFalse;
-}
 /*
  * The encoding of *FADD.v2f16 only specifies a single abs flag. All abs
  * encodings are permitted by swapping operands; however, this scheme fails if
@@ -576,7 +524,7 @@ bi_can_fma(bi_instr *ins)
                 return true;
 
         /* +MUX -> *CSEL */
-        if (bi_can_csel(ins))
+        if (bi_can_replace_with_csel(ins))
                 return true;
 
         /* *FADD.v2f16 has restricted abs modifiers, use +FADD.v2f16 instead */
@@ -1332,8 +1280,8 @@ bi_take_instr(bi_context *ctx, struct bi_worklist st,
                 assert(bi_can_iaddc(instr));
                 instr->op = BI_OPCODE_IADDC_I32;
                 instr->src[2] = bi_zero();
-        } else if (fma && bi_can_csel(instr)) {
-                bi_replace_mux_with_csel(instr);
+        } else if (fma && bi_can_replace_with_csel(instr)) {
+                bi_replace_mux_with_csel(instr, false);
         }
 
         return instr;
diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c
index f8323f93cfb..a4dff6dc2e6 100644
--- a/src/panfrost/bifrost/bir.c
+++ b/src/panfrost/bifrost/bir.c
@@ -244,3 +244,60 @@ bi_reconverge_branches(bi_block *block)
         /* Reconverge if the successor has multiple predecessors */
         return bi_num_predecessors(succ) > 1;
 }
+
+/*
+ * When MUX.i32 or MUX.v2i16 is used to multiplex entire sources, they can be
+ * replaced by CSEL as follows:
+ *
+ *      MUX.neg(x, y, b) -> CSEL.s.lt(b, 0, x, y)
+ *      MUX.int_zero(x, y, b) -> CSEL.i.eq(b, 0, x, y)
+ *      MUX.fp_zero(x, y, b) -> CSEL.f.eq(b, 0, x, y)
+ *
+ * MUX.bit cannot be transformed like this.
+ *
+ * Note that MUX.v2i16 has partial support for swizzles, which CSEL.v2i16 lacks.
+ * So we must check the swizzles too.
+ */
+bool
+bi_can_replace_with_csel(bi_instr *I)
+{
+        return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
+                (I->mux != BI_MUX_BIT) &&
+                (I->src[0].swizzle == BI_SWIZZLE_H01) &&
+                (I->src[1].swizzle == BI_SWIZZLE_H01) &&
+                (I->src[2].swizzle == BI_SWIZZLE_H01);
+}
+
+static enum bi_opcode
+bi_csel_for_mux(bool must_sign, bool b32, enum bi_mux mux)
+{
+        switch (mux) {
+        case BI_MUX_INT_ZERO:
+                if (must_sign)
+                        return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16;
+                else
+                        return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
+        case BI_MUX_NEG:
+                return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
+        case BI_MUX_FP_ZERO:
+                return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
+        default:
+             unreachable("No CSEL for MUX.bit");
+        }
+}
+
+void
+bi_replace_mux_with_csel(bi_instr *I, bool must_sign)
+{
+        assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
+        I->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux);
+        I->cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
+        I->mux = 0;
+
+        bi_index vTrue = I->src[0], vFalse = I->src[1], cond = I->src[2];
+
+        I->src[0] = cond;
+        I->src[1] = bi_zero();
+        I->src[2] = vTrue;
+        I->src[3] = vFalse;
+}
diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h
index ee7c8c99cb7..902239236fc 100644
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@@ -1070,6 +1070,9 @@ bi_clause * bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause);
 bool bi_side_effects(const bi_instr *I);
 bool bi_reconverge_branches(bi_block *block);
 
+bool bi_can_replace_with_csel(bi_instr *I);
+void bi_replace_mux_with_csel(bi_instr *I, bool must_sign);
+
 void bi_print_instr(const bi_instr *I, FILE *fp);
 void bi_print_slots(bi_registers *regs, FILE *fp);
 void bi_print_tuple(bi_tuple *tuple, FILE *fp);



More information about the mesa-commit mailing list