Mesa (master): pan/bi: Switch to 1-bit bools
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed May 5 02:45:05 UTC 2021
Module: Mesa
Branch: master
Commit: 2db8048aaae7287061d940bd27e19ca282ff4375
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2db8048aaae7287061d940bd27e19ca282ff4375
Author: Alyssa Rosenzweig <alyssa at collabora.com>
Date: Mon May 3 19:00:35 2021 -0400
pan/bi: Switch to 1-bit bools
In prep for FP16.
Signed-off-by: Alyssa Rosenzweig <alyssa at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10627>
---
src/panfrost/bifrost/bifrost_compile.c | 173 ++++++++++++--------------
src/panfrost/bifrost/bifrost_nir_algebraic.py | 3 -
2 files changed, 81 insertions(+), 95 deletions(-)
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 399cbf534fe..dbd4295303e 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -1138,14 +1138,8 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
case nir_intrinsic_discard_if: {
bi_index src = bi_src_index(&instr->src[0]);
-
- unsigned sz = nir_src_bit_size(instr->src[0]);
- assert(sz == 16 || sz == 32);
-
- if (sz == 16)
- src = bi_half(src, false);
-
- bi_discard_f32(b, src, bi_zero(), BI_CMPF_NE);
+ assert(nir_src_bit_size(instr->src[0]) == 1);
+ bi_discard_f32(b, bi_half(src, false), bi_imm_u16(0), BI_CMPF_NE);
break;
}
@@ -1223,7 +1217,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
case nir_intrinsic_load_front_face:
/* r58 == 0 means primitive is front facing */
bi_icmp_i32_to(b, dst, bi_register(58), bi_zero(), BI_CMPF_EQ,
- BI_RESULT_TYPE_M1);
+ BI_RESULT_TYPE_I1);
break;
case nir_intrinsic_load_point_coord:
@@ -1293,6 +1287,10 @@ bi_alu_src_index(nir_alu_src src, unsigned comps)
unsigned bitsize = nir_src_bit_size(src.src);
+ /* TODO: Do we need to do something more clever with 1-bit bools? */
+ if (bitsize == 1)
+ bitsize = 16;
+
/* the bi_index carries the 32-bit (word) offset separate from the
* subword swizzle, first handle the offset */
@@ -1344,33 +1342,6 @@ bi_nir_round(nir_op op)
}
}
-static enum bi_cmpf
-bi_cmpf_nir(nir_op op)
-{
- switch (op) {
- case nir_op_flt32:
- case nir_op_ilt32:
- case nir_op_ult32:
- return BI_CMPF_LT;
-
- case nir_op_fge32:
- case nir_op_ige32:
- case nir_op_uge32:
- return BI_CMPF_GE;
-
- case nir_op_feq32:
- case nir_op_ieq32:
- return BI_CMPF_EQ;
-
- case nir_op_fneu32:
- case nir_op_ine32:
- return BI_CMPF_NE;
-
- default:
- unreachable("Invalid compare");
- }
-}
-
/* Convenience for lowered transcendentals */
static bi_index
@@ -1523,6 +1494,64 @@ bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos)
bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx, BI_ROUND_NONE);
}
+static bi_instr *
+bi_emit_alu_bool(bi_builder *b, unsigned sz, nir_op op,
+ bi_index dst, bi_index s0, bi_index s1, bi_index s2)
+{
+ /* Handle 1-bit bools as zero/nonzero rather than specifically 0/1 or 0/~0.
+ * This will give the optimizer flexibility. */
+ if (sz == 1) sz = 16;
+ bi_index f = bi_zero();
+ bi_index t = bi_imm_uintN(0x1, sz);
+
+ switch (op) {
+ case nir_op_feq:
+ return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_EQ, BI_RESULT_TYPE_I1);
+ case nir_op_flt:
+ return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1);
+ case nir_op_fge:
+ return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1);
+ case nir_op_fneu:
+ return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_NE, BI_RESULT_TYPE_I1);
+
+ case nir_op_ieq:
+ return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_EQ, BI_RESULT_TYPE_I1);
+ case nir_op_ine:
+ return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_NE, BI_RESULT_TYPE_I1);
+ case nir_op_ilt:
+ return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1);
+ case nir_op_ige:
+ return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1);
+ case nir_op_ult:
+ return bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1);
+ case nir_op_uge:
+ return bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1);
+
+ case nir_op_iand:
+ return bi_lshift_and_to(b, sz, dst, s0, s1, bi_imm_u8(0));
+ case nir_op_ior:
+ return bi_lshift_or_to(b, sz, dst, s0, s1, bi_imm_u8(0));
+ case nir_op_ixor:
+ return bi_lshift_xor_to(b, sz, dst, s0, s1, bi_imm_u8(0));
+ case nir_op_inot:
+ return bi_lshift_xor_to(b, sz, dst, s0, t, bi_imm_u8(0));
+
+ case nir_op_f2b1:
+ return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ);
+ case nir_op_i2b1:
+ return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ);
+ case nir_op_b2b1:
+ return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ);
+
+ case nir_op_bcsel:
+ return bi_csel_to(b, nir_type_int, sz, dst, s0, f, s1, s2, BI_CMPF_NE);
+
+ default:
+ fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[op].name);
+ unreachable("Unhandled boolean ALU instruction");
+ }
+}
+
static void
bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
{
@@ -1531,6 +1560,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
unsigned sz = nir_dest_bit_size(instr->dest.dest);
unsigned comps = nir_dest_num_components(instr->dest.dest);
unsigned src_sz = srcs > 0 ? nir_src_bit_size(instr->src[0].src) : 0;
+ unsigned src1_sz = srcs > 1 ? nir_src_bit_size(instr->src[1].src) : 0;
/* Indicate scalarness */
if ((sz == 1 || sz == 16) && comps == 1)
@@ -1604,6 +1634,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
comps > 3 ? instr->src[0].swizzle[3] : 0,
};
+ if (sz == 1) sz = 16;
bi_make_vec_to(b, dst, unoffset_srcs, channels, comps, sz);
return;
}
@@ -1656,6 +1687,11 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
bi_index s1 = srcs > 1 ? bi_alu_src_index(instr->src[1], comps) : bi_null();
bi_index s2 = srcs > 2 ? bi_alu_src_index(instr->src[2], comps) : bi_null();
+ if (sz == 1) {
+ bi_emit_alu_bool(b, src_sz, instr->op, dst, s0, s1, s2);
+ return;
+ }
+
switch (instr->op) {
case nir_op_ffma:
bi_fma_to(b, sz, dst, s0, s1, s2, BI_ROUND_NONE);
@@ -1727,13 +1763,12 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
break;
}
- case nir_op_b8csel:
- case nir_op_b16csel:
- case nir_op_b32csel:
- if (sz == 8)
+ case nir_op_bcsel:
+ if (src1_sz == 8)
bi_mux_v4i8_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
else
- bi_csel_to(b, nir_type_int, sz, dst, s0, bi_zero(), s1, s2, BI_CMPF_NE);
+ bi_csel_to(b, nir_type_int, src1_sz,
+ dst, s0, bi_zero(), s1, s2, BI_CMPF_NE);
break;
case nir_op_ishl:
@@ -1747,42 +1782,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
bi_arshift_to(b, sz, dst, s0, bi_null(), bi_byte(s1, 0));
break;
- case nir_op_flt32:
- case nir_op_fge32:
- case nir_op_feq32:
- case nir_op_fneu32:
- bi_fcmp_to(b, sz, dst, s0, s1, bi_cmpf_nir(instr->op),
- BI_RESULT_TYPE_M1);
- break;
-
- case nir_op_ieq32:
- case nir_op_ine32:
- if (sz == 32) {
- bi_icmp_i32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
- BI_RESULT_TYPE_M1);
- } else if (sz == 16) {
- bi_icmp_v2i16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
- BI_RESULT_TYPE_M1);
- } else {
- bi_icmp_v4i8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
- BI_RESULT_TYPE_M1);
- }
- break;
-
- case nir_op_ilt32:
- case nir_op_ige32:
- if (sz == 32) {
- bi_icmp_s32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
- BI_RESULT_TYPE_M1);
- } else if (sz == 16) {
- bi_icmp_v2s16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
- BI_RESULT_TYPE_M1);
- } else {
- bi_icmp_v4s8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
- BI_RESULT_TYPE_M1);
- }
- break;
-
case nir_op_imin:
case nir_op_umin:
bi_csel_to(b, nir_op_infos[instr->op].input_types[0], sz, dst,
@@ -1795,20 +1794,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
s0, s1, s0, s1, BI_CMPF_GT);
break;
- case nir_op_ult32:
- case nir_op_uge32:
- if (sz == 32) {
- bi_icmp_u32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
- BI_RESULT_TYPE_M1);
- } else if (sz == 16) {
- bi_icmp_v2u16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
- BI_RESULT_TYPE_M1);
- } else {
- bi_icmp_v4u8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
- BI_RESULT_TYPE_M1);
- }
- break;
-
case nir_op_fddx:
case nir_op_fddy: {
bi_index lane1 = bi_lshift_and_i32(b,
@@ -1946,6 +1931,11 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
BI_CMPF_NE);
break;
+ case nir_op_b2b32:
+ bi_csel_to(b, nir_type_int, sz, dst, s0, bi_zero(),
+ bi_imm_u32(~0), bi_zero(), BI_CMPF_NE);
+ break;
+
case nir_op_b2i8:
case nir_op_b2i16:
case nir_op_b2i32:
@@ -2965,7 +2955,6 @@ bi_optimize_nir(nir_shader *nir, bool is_blend)
NIR_PASS(progress, nir, nir_opt_cse);
}
- NIR_PASS(progress, nir, nir_lower_bool_to_int32);
NIR_PASS(progress, nir, bifrost_nir_lower_algebraic_late);
NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
diff --git a/src/panfrost/bifrost/bifrost_nir_algebraic.py b/src/panfrost/bifrost/bifrost_nir_algebraic.py
index 955d914c82b..cad2ffb5021 100644
--- a/src/panfrost/bifrost/bifrost_nir_algebraic.py
+++ b/src/panfrost/bifrost/bifrost_nir_algebraic.py
@@ -38,9 +38,6 @@ SPECIAL = ['fexp2', 'flog2', 'fsin', 'fcos']
for op in SPECIAL:
algebraic_late += [((op + '@16', a), ('f2f16', (op, ('f2f32', a))))]
-algebraic_late += [(('f2b32', a), ('fneu32', a, 0.0)),
- (('i2b32', a), ('ine32', a, 0))]
-
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-p', '--import-path', required=True)
More information about the mesa-commit
mailing list