Mesa (master): pan/bi: Switch to 1-bit bools

Wed May 5 02:45:05 UTC 2021

Module: Mesa
Branch: master
Commit: 2db8048aaae7287061d940bd27e19ca282ff4375
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2db8048aaae7287061d940bd27e19ca282ff4375

Author: Alyssa Rosenzweig <alyssa at collabora.com>
Date:   Mon May  3 19:00:35 2021 -0400

pan/bi: Switch to 1-bit bools

In prep for FP16.

Signed-off-by: Alyssa Rosenzweig <alyssa at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10627>

---

 src/panfrost/bifrost/bifrost_compile.c        | 173 ++++++++++++--------------
 src/panfrost/bifrost/bifrost_nir_algebraic.py |   3 -
 2 files changed, 81 insertions(+), 95 deletions(-)

diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 399cbf534fe..dbd4295303e 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -1138,14 +1138,8 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
 
         case nir_intrinsic_discard_if: {
                 bi_index src = bi_src_index(&instr->src[0]);
-
-                unsigned sz = nir_src_bit_size(instr->src[0]);
-                assert(sz == 16 || sz == 32);
-
-                if (sz == 16)
-                        src = bi_half(src, false);
-
-                bi_discard_f32(b, src, bi_zero(), BI_CMPF_NE);
+                assert(nir_src_bit_size(instr->src[0]) == 1);
+                bi_discard_f32(b, bi_half(src, false), bi_imm_u16(0), BI_CMPF_NE);
                 break;
         }
 
@@ -1223,7 +1217,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
 	case nir_intrinsic_load_front_face:
                 /* r58 == 0 means primitive is front facing */
                 bi_icmp_i32_to(b, dst, bi_register(58), bi_zero(), BI_CMPF_EQ,
-                                BI_RESULT_TYPE_M1);
+                                BI_RESULT_TYPE_I1);
                 break;
 
         case nir_intrinsic_load_point_coord:
@@ -1293,6 +1287,10 @@ bi_alu_src_index(nir_alu_src src, unsigned comps)
 
         unsigned bitsize = nir_src_bit_size(src.src);
 
+        /* TODO: Do we need to do something more clever with 1-bit bools? */
+        if (bitsize == 1)
+                bitsize = 16;
+
         /* the bi_index carries the 32-bit (word) offset separate from the
          * subword swizzle, first handle the offset */
 
@@ -1344,33 +1342,6 @@ bi_nir_round(nir_op op)
         }
 }
 
-static enum bi_cmpf
-bi_cmpf_nir(nir_op op)
-{
-        switch (op) {
-        case nir_op_flt32:
-        case nir_op_ilt32:
-        case nir_op_ult32:
-                return BI_CMPF_LT;
-
-        case nir_op_fge32:
-        case nir_op_ige32:
-        case nir_op_uge32:
-                return BI_CMPF_GE;
-
-        case nir_op_feq32:
-        case nir_op_ieq32:
-                return BI_CMPF_EQ;
-
-        case nir_op_fneu32:
-        case nir_op_ine32:
-                return BI_CMPF_NE;
-
-        default:
-                unreachable("Invalid compare");
-        }
-}
-
 /* Convenience for lowered transcendentals */
 
 static bi_index
@@ -1523,6 +1494,64 @@ bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos)
         bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx, BI_ROUND_NONE);
 }
 
+static bi_instr *
+bi_emit_alu_bool(bi_builder *b, unsigned sz, nir_op op,
+      bi_index dst, bi_index s0, bi_index s1, bi_index s2)
+{
+        /* Handle 1-bit bools as zero/nonzero rather than specifically 0/1 or 0/~0.
+         * This will give the optimizer flexibility. */
+        if (sz == 1) sz = 16;
+        bi_index f = bi_zero();
+        bi_index t = bi_imm_uintN(0x1, sz);
+
+        switch (op) {
+        case nir_op_feq:
+                return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_EQ, BI_RESULT_TYPE_I1);
+        case nir_op_flt:
+                return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1);
+        case nir_op_fge:
+                return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1);
+        case nir_op_fneu:
+                return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_NE, BI_RESULT_TYPE_I1);
+
+        case nir_op_ieq:
+                return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_EQ, BI_RESULT_TYPE_I1);
+        case nir_op_ine:
+                return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_NE, BI_RESULT_TYPE_I1);
+        case nir_op_ilt:
+                return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1);
+        case nir_op_ige:
+                return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1);
+        case nir_op_ult:
+                return bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1);
+        case nir_op_uge:
+                return bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1);
+
+        case nir_op_iand:
+                return bi_lshift_and_to(b, sz, dst, s0, s1, bi_imm_u8(0));
+        case nir_op_ior:
+                return bi_lshift_or_to(b, sz, dst, s0, s1, bi_imm_u8(0));
+        case nir_op_ixor:
+                return bi_lshift_xor_to(b, sz, dst, s0, s1, bi_imm_u8(0));
+        case nir_op_inot:
+                return bi_lshift_xor_to(b, sz, dst, s0, t, bi_imm_u8(0));
+
+        case nir_op_f2b1:
+                return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ);
+        case nir_op_i2b1:
+                return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ);
+        case nir_op_b2b1:
+                return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ);
+
+        case nir_op_bcsel:
+                return bi_csel_to(b, nir_type_int, sz, dst, s0, f, s1, s2, BI_CMPF_NE);
+
+        default:
+                fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[op].name);
+                unreachable("Unhandled boolean ALU instruction");
+        }
+}
+
 static void
 bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
 {
@@ -1531,6 +1560,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
         unsigned sz = nir_dest_bit_size(instr->dest.dest);
         unsigned comps = nir_dest_num_components(instr->dest.dest);
         unsigned src_sz = srcs > 0 ? nir_src_bit_size(instr->src[0].src) : 0;
+        unsigned src1_sz = srcs > 1 ? nir_src_bit_size(instr->src[1].src) : 0;
 
         /* Indicate scalarness */
         if ((sz == 1 || sz == 16) && comps == 1)
@@ -1604,6 +1634,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
                         comps > 3 ? instr->src[0].swizzle[3] : 0,
                 };
 
+                if (sz == 1) sz = 16;
                 bi_make_vec_to(b, dst, unoffset_srcs, channels, comps, sz);
                 return;
         }
@@ -1656,6 +1687,11 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
         bi_index s1 = srcs > 1 ? bi_alu_src_index(instr->src[1], comps) : bi_null();
         bi_index s2 = srcs > 2 ? bi_alu_src_index(instr->src[2], comps) : bi_null();
 
+        if (sz == 1) {
+                bi_emit_alu_bool(b, src_sz, instr->op, dst, s0, s1, s2);
+                return;
+        }
+
         switch (instr->op) {
         case nir_op_ffma:
                 bi_fma_to(b, sz, dst, s0, s1, s2, BI_ROUND_NONE);
@@ -1727,13 +1763,12 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
                 break;
         }
 
-        case nir_op_b8csel:
-        case nir_op_b16csel:
-        case nir_op_b32csel:
-                if (sz == 8)
+        case nir_op_bcsel:
+                if (src1_sz == 8)
                         bi_mux_v4i8_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
                 else
-                        bi_csel_to(b, nir_type_int, sz, dst, s0, bi_zero(), s1, s2, BI_CMPF_NE);
+                        bi_csel_to(b, nir_type_int, src1_sz,
+                                        dst, s0, bi_zero(), s1, s2, BI_CMPF_NE);
                 break;
 
         case nir_op_ishl:
@@ -1747,42 +1782,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
                 bi_arshift_to(b, sz, dst, s0, bi_null(), bi_byte(s1, 0));
                 break;
 
-        case nir_op_flt32:
-        case nir_op_fge32:
-        case nir_op_feq32:
-        case nir_op_fneu32:
-                bi_fcmp_to(b, sz, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                BI_RESULT_TYPE_M1);
-                break;
-
-        case nir_op_ieq32:
-        case nir_op_ine32:
-                if (sz == 32) {
-                        bi_icmp_i32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                } else if (sz == 16) {
-                        bi_icmp_v2i16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                } else {
-                        bi_icmp_v4i8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                }
-                break;
-
-        case nir_op_ilt32:
-        case nir_op_ige32:
-                if (sz == 32) {
-                        bi_icmp_s32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                } else if (sz == 16) {
-                        bi_icmp_v2s16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                } else {
-                        bi_icmp_v4s8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                }
-                break;
-
         case nir_op_imin:
         case nir_op_umin:
                 bi_csel_to(b, nir_op_infos[instr->op].input_types[0], sz, dst,
@@ -1795,20 +1794,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
                                 s0, s1, s0, s1, BI_CMPF_GT);
                 break;
 
-        case nir_op_ult32:
-        case nir_op_uge32:
-                if (sz == 32) {
-                        bi_icmp_u32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                } else if (sz == 16) {
-                        bi_icmp_v2u16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                } else {
-                        bi_icmp_v4u8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                }
-                break;
-
         case nir_op_fddx:
         case nir_op_fddy: {
                 bi_index lane1 = bi_lshift_and_i32(b,
@@ -1946,6 +1931,11 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
                                 BI_CMPF_NE);
                 break;
 
+        case nir_op_b2b32:
+                bi_csel_to(b, nir_type_int, sz, dst, s0, bi_zero(),
+                                bi_imm_u32(~0), bi_zero(), BI_CMPF_NE);
+                break;
+
         case nir_op_b2i8:
         case nir_op_b2i16:
         case nir_op_b2i32:
@@ -2965,7 +2955,6 @@ bi_optimize_nir(nir_shader *nir, bool is_blend)
                 NIR_PASS(progress, nir, nir_opt_cse);
         }
 
-        NIR_PASS(progress, nir, nir_lower_bool_to_int32);
         NIR_PASS(progress, nir, bifrost_nir_lower_algebraic_late);
         NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
 
diff --git a/src/panfrost/bifrost/bifrost_nir_algebraic.py b/src/panfrost/bifrost/bifrost_nir_algebraic.py
index 955d914c82b..cad2ffb5021 100644
--- a/src/panfrost/bifrost/bifrost_nir_algebraic.py
+++ b/src/panfrost/bifrost/bifrost_nir_algebraic.py
@@ -38,9 +38,6 @@ SPECIAL = ['fexp2', 'flog2', 'fsin', 'fcos']
 for op in SPECIAL:
         algebraic_late += [((op + '@16', a), ('f2f16', (op, ('f2f32', a))))]
 
-algebraic_late += [(('f2b32', a), ('fneu32', a, 0.0)),
-             (('i2b32', a), ('ine32', a, 0))]
-
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('-p', '--import-path', required=True)