Mesa (master): nir/algebraic: expand existing 32-bit patterns to all bit sizes using loops

Thu Sep 10 23:50:12 UTC 2020

Module: Mesa
Branch: master
Commit: 26fc5e1f4a844692f2ba2568d6d232881be01e0e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=26fc5e1f4a844692f2ba2568d6d232881be01e0e

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Tue Sep  1 02:20:34 2020 -0400

nir/algebraic: expand existing 32-bit patterns to all bit sizes using loops

Reviewed-by: Rob Clark <robdclark at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6283>

---

 src/compiler/nir/nir_opt_algebraic.py | 187 ++++++++++++++++++++--------------
 1 file changed, 110 insertions(+), 77 deletions(-)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 8cc8630d19b..11b0d00de44 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -520,22 +520,29 @@ optimizations.extend([
    (('fmin', ('fmin', a, b), b), ('fmin', a, b)),
    (('umin', ('umin', a, b), b), ('umin', a, b)),
    (('imin', ('imin', a, b), b), ('imin', a, b)),
-   (('iand at 32', a, ('inot', ('ishr', a, 31))), ('imax', a, 0)),
-
-   # Simplify logic to detect sign of an integer.
-   (('ieq', ('iand', 'a at 32', 0x80000000), 0x00000000), ('ige', a, 0)),
-   (('ine', ('iand', 'a at 32', 0x80000000), 0x80000000), ('ige', a, 0)),
-   (('ine', ('iand', 'a at 32', 0x80000000), 0x00000000), ('ilt', a, 0)),
-   (('ieq', ('iand', 'a at 32', 0x80000000), 0x80000000), ('ilt', a, 0)),
-   (('ine', ('ushr', 'a at 32', 31), 0), ('ilt', a, 0)),
-   (('ieq', ('ushr', 'a at 32', 31), 0), ('ige', a, 0)),
-   (('ieq', ('ushr', 'a at 32', 31), 1), ('ilt', a, 0)),
-   (('ine', ('ushr', 'a at 32', 31), 1), ('ige', a, 0)),
-   (('ine', ('ishr', 'a at 32', 31), 0), ('ilt', a, 0)),
-   (('ieq', ('ishr', 'a at 32', 31), 0), ('ige', a, 0)),
-   (('ieq', ('ishr', 'a at 32', 31), -1), ('ilt', a, 0)),
-   (('ine', ('ishr', 'a at 32', 31), -1), ('ige', a, 0)),
+])
+
+# Integer sizes
+for s in [8, 16, 32, 64]:
+    optimizations.extend([
+       (('iand@{}'.format(s), a, ('inot', ('ishr', a, s - 1))), ('imax', a, 0)),
+
+       # Simplify logic to detect sign of an integer.
+       (('ieq', ('iand', 'a@{}'.format(s), 1 << (s - 1)), 0),            ('ige', a, 0)),
+       (('ine', ('iand', 'a@{}'.format(s), 1 << (s - 1)), 1 << (s - 1)), ('ige', a, 0)),
+       (('ine', ('iand', 'a@{}'.format(s), 1 << (s - 1)), 0),            ('ilt', a, 0)),
+       (('ieq', ('iand', 'a@{}'.format(s), 1 << (s - 1)), 1 << (s - 1)), ('ilt', a, 0)),
+       (('ine', ('ushr', 'a@{}'.format(s), s - 1), 0), ('ilt', a, 0)),
+       (('ieq', ('ushr', 'a@{}'.format(s), s - 1), 0), ('ige', a, 0)),
+       (('ieq', ('ushr', 'a@{}'.format(s), s - 1), 1), ('ilt', a, 0)),
+       (('ine', ('ushr', 'a@{}'.format(s), s - 1), 1), ('ige', a, 0)),
+       (('ine', ('ishr', 'a@{}'.format(s), s - 1), 0), ('ilt', a, 0)),
+       (('ieq', ('ishr', 'a@{}'.format(s), s - 1), 0), ('ige', a, 0)),
+       (('ieq', ('ishr', 'a@{}'.format(s), s - 1), -1), ('ilt', a, 0)),
+       (('ine', ('ishr', 'a@{}'.format(s), s - 1), -1), ('ige', a, 0)),
+    ])
 
+optimizations.extend([
    (('fmin', a, ('fneg', a)), ('fneg', ('fabs', a))),
    (('imin', a, ('ineg', a)), ('ineg', ('iabs', a))),
    (('fmin', a, ('fneg', ('fabs', a))), ('fneg', ('fabs', a))),
@@ -620,43 +627,93 @@ optimizations.extend([
    (('iand', ('ult(is_used_once)', a, c), ('ult', b, c)), ('ult', ('umax', a, b), c)),
    (('iand', ('uge(is_used_once)', a, b), ('uge', a, c)), ('uge', a, ('umax', b, c))),
    (('iand', ('uge(is_used_once)', a, c), ('uge', b, c)), ('uge', ('umin', a, b), c)),
+])
+
+# Float sizes
+for s in [16, 32, 64]:
+    fp_one = {16: 0x3c00, 32: 0x3f800000, 64: 0x3ff0000000000000}[s]
+
+    optimizations.extend([
+       # These derive from the previous patterns with the application of b < 0 <=>
+       # 0 < -b.  The transformation should be applied if either comparison is
+       # used once as this ensures that the number of comparisons will not
+       # increase.  The sources to the ior and iand are not symmetric, so the
+       # rules have to be duplicated to get this behavior.
+       (('~ior', ('flt(is_used_once)', 0.0, 'a@{}'.format(s)), ('flt', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmax', a, ('fneg', b)))),
+       (('~ior', ('flt', 0.0, 'a@{}'.format(s)), ('flt(is_used_once)', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmax', a, ('fneg', b)))),
+       (('~ior', ('fge(is_used_once)', 0.0, 'a@{}'.format(s)), ('fge', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmin', a, ('fneg', b)))),
+       (('~ior', ('fge', 0.0, 'a@{}'.format(s)), ('fge(is_used_once)', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmin', a, ('fneg', b)))),
+       (('~iand', ('flt(is_used_once)', 0.0, 'a@{}'.format(s)), ('flt', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmin', a, ('fneg', b)))),
+       (('~iand', ('flt', 0.0, 'a@{}'.format(s)), ('flt(is_used_once)', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmin', a, ('fneg', b)))),
+       (('~iand', ('fge(is_used_once)', 0.0, 'a@{}'.format(s)), ('fge', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmax', a, ('fneg', b)))),
+       (('~iand', ('fge', 0.0, 'a@{}'.format(s)), ('fge(is_used_once)', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmax', a, ('fneg', b)))),
+
+       # The (i2f32, ...) part is an open-coded fsign.  When that is combined with
+       # the bcsel, it's basically copysign(1.0, a).  There is no copysign in NIR,
+       # so emit an open-coded version of that.
+       (('bcsel@{}'.format(s), ('feq', a, 0.0), 1.0, ('i2f{}'.format(s), ('iadd', ('b2i{}'.format(s), ('flt', 0.0, 'a@{}'.format(s))), ('ineg', ('b2i{}'.format(s), ('flt', 'a@{}'.format(s), 0.0)))))),
+        ('ior', fp_one, ('iand', a, 1 << (s - 1)))),
+
+       (('bcsel', a, ('b2f(is_used_once)', 'b@{}'.format(s)), ('b2f', 'c@{}'.format(s))), ('b2f', ('bcsel', a, b, c))),
+
+       # The C spec says, "If the value of the integral part cannot be represented
+       # by the integer type, the behavior is undefined."  "Undefined" can mean
+       # "the conversion doesn't happen at all."
+       (('~i2f{}'.format(s), ('f2i', 'a@{}'.format(s))), ('ftrunc', a)),
+
+       # Ironically, mark these as imprecise because removing the conversions may
+       # preserve more precision than doing the conversions (e.g.,
+       # uint(float(0x81818181u)) == 0x81818200).
+       (('~f2i{}'.format(s), ('i2f', 'a@{}'.format(s))), a),
+       (('~f2i{}'.format(s), ('u2f', 'a@{}'.format(s))), a),
+       (('~f2u{}'.format(s), ('i2f', 'a@{}'.format(s))), a),
+       (('~f2u{}'.format(s), ('u2f', 'a@{}'.format(s))), a),
+
+       (('fadd', ('b2f{}'.format(s), ('flt', 0.0, 'a@{}'.format(s))), ('fneg', ('b2f{}'.format(s), ('flt', 'a@{}'.format(s), 0.0)))), ('fsign', a), '!options->lower_fsign'),
+       (('iadd', ('b2i{}'.format(s), ('flt', 0, 'a@{}'.format(s))), ('ineg', ('b2i{}'.format(s), ('flt', 'a@{}'.format(s), 0)))), ('f2i{}'.format(s), ('fsign', a)), '!options->lower_fsign'),
+    ])
 
-   # These derive from the previous patterns with the application of b < 0 <=>
-   # 0 < -b.  The transformation should be applied if either comparison is
-   # used once as this ensures that the number of comparisons will not
-   # increase.  The sources to the ior and iand are not symmetric, so the
-   # rules have to be duplicated to get this behavior.
-   (('~ior', ('flt(is_used_once)', 0.0, 'a at 32'), ('flt', 'b at 32', 0.0)), ('flt', 0.0, ('fmax', a, ('fneg', b)))),
-   (('~ior', ('flt', 0.0, 'a at 32'), ('flt(is_used_once)', 'b at 32', 0.0)), ('flt', 0.0, ('fmax', a, ('fneg', b)))),
-   (('~ior', ('fge(is_used_once)', 0.0, 'a at 32'), ('fge', 'b at 32', 0.0)), ('fge', 0.0, ('fmin', a, ('fneg', b)))),
-   (('~ior', ('fge', 0.0, 'a at 32'), ('fge(is_used_once)', 'b at 32', 0.0)), ('fge', 0.0, ('fmin', a, ('fneg', b)))),
-   (('~iand', ('flt(is_used_once)', 0.0, 'a at 32'), ('flt', 'b at 32', 0.0)), ('flt', 0.0, ('fmin', a, ('fneg', b)))),
-   (('~iand', ('flt', 0.0, 'a at 32'), ('flt(is_used_once)', 'b at 32', 0.0)), ('flt', 0.0, ('fmin', a, ('fneg', b)))),
-   (('~iand', ('fge(is_used_once)', 0.0, 'a at 32'), ('fge', 'b at 32', 0.0)), ('fge', 0.0, ('fmax', a, ('fneg', b)))),
-   (('~iand', ('fge', 0.0, 'a at 32'), ('fge(is_used_once)', 'b at 32', 0.0)), ('fge', 0.0, ('fmax', a, ('fneg', b)))),
+    # Conversions from a lower bit size to a higher bit size and back can always be removed
+    for h in [16, 32, 64]:
+        if s < h:
+            optimizations.extend([
+               (('f2f{}'.format(s), ('f2f{}'.format(h), 'a@{}'.format(s))), a),
+               (('i2i{}'.format(s), ('i2i{}'.format(h), 'a@{}'.format(s))), a),
+               (('u2u{}'.format(s), ('u2u{}'.format(h), 'a@{}'.format(s))), a),
+               (('f2f{}'.format(s), ('b2f{}'.format(h), 'a at 1')), ('b2f{}'.format(s), a)),
+               (('i2i{}'.format(s), ('b2i{}'.format(h), 'a at 1')), ('b2i{}'.format(s), a)),
+               (('u2u{}'.format(s), ('b2i{}'.format(h), 'a at 1')), ('b2i{}'.format(s), a)),
+            ])
+
+# Integer sizes
+for s in [8, 16, 32, 64]:
+    optimizations.extend([
+       (('iand', ('ieq', 'a@{}'.format(s), 0), ('ieq', 'b@{}'.format(s), 0)), ('ieq', ('umax', a, b), 0)),
+       (('ior',  ('ieq', 'a@{}'.format(s), 0), ('ieq', 'b@{}'.format(s), 0)), ('ieq', ('umin', a, b), 0)),
+       (('iand', ('ine', 'a@{}'.format(s), 0), ('ine', 'b@{}'.format(s), 0)), ('ine', ('umin', a, b), 0)),
+       (('ior',  ('ine', 'a@{}'.format(s), 0), ('ine', 'b@{}'.format(s), 0)), ('ine', ('umax', a, b), 0)),
+
+       # True/False are ~0 and 0 in NIR.  b2i of True is 1, and -1 is ~0 (True).
+       (('ineg', ('b2i{}'.format(s), 'a@{}'.format(s))), a),
+
+       # SM5 32-bit shifts are defined to use the 5 least significant bits (or 4 bits for 16 bits)
+       (('ishl', 'a@{}'.format(s), ('iand', s - 1, b)), ('ishl', a, b)),
+       (('ishr', 'a@{}'.format(s), ('iand', s - 1, b)), ('ishr', a, b)),
+       (('ushr', 'a@{}'.format(s), ('iand', s - 1, b)), ('ushr', a, b)),
+    ])
 
+optimizations.extend([
    # Common pattern like 'if (i == 0 || i == 1 || ...)'
    (('ior', ('ieq', a, 0), ('ieq', a, 1)), ('uge', 1, a)),
    (('ior', ('uge', 1, a), ('ieq', a, 2)), ('uge', 2, a)),
    (('ior', ('uge', 2, a), ('ieq', a, 3)), ('uge', 3, a)),
 
-   # The (i2f32, ...) part is an open-coded fsign.  When that is combined with
-   # the bcsel, it's basically copysign(1.0, a).  There is no copysign in NIR,
-   # so emit an open-coded version of that.
-   (('bcsel at 32', ('feq', a, 0.0), 1.0, ('i2f32', ('iadd', ('b2i32', ('flt', 0.0, 'a at 32')), ('ineg', ('b2i32', ('flt', 'a at 32', 0.0)))))),
-    ('ior', 0x3f800000, ('iand', a, 0x80000000))),
-
    (('ior', a, ('ieq', a, False)), True),
    (('ior', a, ('inot', a)), -1),
 
    (('ine', ('ineg', ('b2i', 'a at 1')), ('ineg', ('b2i', 'b at 1'))), ('ine', a, b)),
    (('b2i', ('ine', 'a at 1', 'b at 1')), ('b2i', ('ixor', a, b))),
 
-   (('iand', ('ieq', 'a at 32', 0), ('ieq', 'b at 32', 0)), ('ieq', ('umax', a, b), 0)),
-   (('ior',  ('ieq', 'a at 32', 0), ('ieq', 'b at 32', 0)), ('ieq', ('umin', a, b), 0)),
-   (('iand', ('ine', 'a at 32', 0), ('ine', 'b at 32', 0)), ('ine', ('umin', a, b), 0)),
-   (('ior',  ('ine', 'a at 32', 0), ('ine', 'b at 32', 0)), ('ine', ('umax', a, b), 0)),
-
    # This pattern occurs coutresy of __flt64_nonnan in the soft-fp64 code.
    # The first part of the iand comes from the !__feq64_nonnan.
    #
@@ -780,8 +837,6 @@ optimizations.extend([
    (('fsat', ('fadd', ('b2f', 'a at 1'), ('b2f', 'b at 1'))), ('b2f', ('ior', a, b))),
    (('iand', 'a at bool16', 1.0), ('b2f', a)),
    (('iand', 'a at bool32', 1.0), ('b2f', a)),
-   # True/False are ~0 and 0 in NIR.  b2i of True is 1, and -1 is ~0 (True).
-   (('ineg', ('b2i32', 'a at 32')), a),
    (('flt', ('fneg', ('b2f', 'a at 1')), 0), a), # Generated by TGSI KILL_IF.
    # Comparison with the same args.  Note that these are not done for
    # the float versions because NaN always returns false on float
@@ -880,7 +935,6 @@ optimizations.extend([
    (('bcsel', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a)))),
    (('bcsel', True, b, c), b),
    (('bcsel', False, b, c), c),
-   (('bcsel', a, ('b2f(is_used_once)', 'b at 32'), ('b2f', 'c at 32')), ('b2f', ('bcsel', a, b, c))),
 
    (('bcsel', a, b, b), b),
    (('~fcsel', a, b, b), b),
@@ -901,11 +955,6 @@ optimizations.extend([
    (('iand', ('ineg', ('b2i', a)), 1.0), ('b2f', a)),
    (('iand', ('ineg', ('b2i', a)), 1),   ('b2i', a)),
 
-   # SM5 32-bit shifts are defined to use the 5 least significant bits
-   (('ishl', 'a at 32', ('iand', 31, b)), ('ishl', a, b)),
-   (('ishr', 'a at 32', ('iand', 31, b)), ('ishr', a, b)),
-   (('ushr', 'a at 32', ('iand', 31, b)), ('ushr', a, b)),
-
    # Conversions
    (('i2b16', ('b2i', 'a at 16')), a),
    (('i2b32', ('b2i', 'a at 32')), a),
@@ -915,31 +964,12 @@ optimizations.extend([
    (('i2b', ('iabs', a)), ('i2b', a)),
    (('inot', ('f2b1', a)), ('feq', a, 0.0)),
 
-   # The C spec says, "If the value of the integral part cannot be represented
-   # by the integer type, the behavior is undefined."  "Undefined" can mean
-   # "the conversion doesn't happen at all."
-   (('~i2f32', ('f2i32', 'a at 32')), ('ftrunc', a)),
-
-   # Ironically, mark these as imprecise because removing the conversions may
-   # preserve more precision than doing the conversions (e.g.,
-   # uint(float(0x81818181u)) == 0x81818200).
-   (('~f2i32', ('i2f', 'a at 32')), a),
-   (('~f2i32', ('u2f', 'a at 32')), a),
-   (('~f2u32', ('i2f', 'a at 32')), a),
-   (('~f2u32', ('u2f', 'a at 32')), a),
-
    # Conversions from 16 bits to 32 bits and back can always be removed
-   (('f2f16', ('f2f32', 'a at 16')), a),
    (('f2fmp', ('f2f32', 'a at 16')), a),
-   (('i2i16', ('i2i32', 'a at 16')), a),
    (('i2imp', ('i2i32', 'a at 16')), a),
-   (('u2u16', ('u2u32', 'a at 16')), a),
    (('u2ump', ('u2u32', 'a at 16')), a),
-   (('f2f16', ('b2f32', 'a at 1')), ('b2f16', a)),
    (('f2fmp', ('b2f32', 'a at 1')), ('b2f16', a)),
-   (('i2i16', ('b2i32', 'a at 1')), ('b2i16', a)),
    (('i2imp', ('b2i32', 'a at 1')), ('b2i16', a)),
-   (('u2u16', ('b2i32', 'a at 1')), ('b2i16', a)),
    (('u2ump', ('b2i32', 'a at 1')), ('b2i16', a)),
    # Conversions to 16 bits would be lossy so they should only be removed if
    # the instruction was generated by the precision lowering pass.
@@ -1436,8 +1466,6 @@ optimizations.extend([
    (('imin', ('imax', a, -1), 1), ('isign', a), '!options->lower_isign'),
    (('imax', ('imin', a, 1), -1), ('isign', a), '!options->lower_isign'),
    (('fsign', a), ('fsub', ('b2f', ('flt', 0.0, a)), ('b2f', ('flt', a, 0.0))), 'options->lower_fsign'),
-   (('fadd', ('b2f32', ('flt', 0.0, 'a at 32')), ('fneg', ('b2f32', ('flt', 'a at 32', 0.0)))), ('fsign', a), '!options->lower_fsign'),
-   (('iadd', ('b2i32', ('flt', 0, 'a at 32')), ('ineg', ('b2i32', ('flt', 'a at 32', 0)))), ('f2i32', ('fsign', a)), '!options->lower_fsign'),
 
    # Address/offset calculations:
    # Drivers supporting imul24 should use the nir_lower_amul() pass, this
@@ -1957,9 +1985,6 @@ late_optimizations = [
    (('ior', a, a), a),
    (('iand', a, a), a),
 
-   (('iand', ('ine(is_used_once)', 'a at 32', 0), ('ine', 'b at 32', 0)), ('ine', ('umin', a, b), 0)),
-   (('ior',  ('ieq(is_used_once)', 'a at 32', 0), ('ieq', 'b at 32', 0)), ('ieq', ('umin', a, b), 0)),
-
    (('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))),
 
    (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
@@ -1969,10 +1994,6 @@ late_optimizations = [
 
    (('~flrp', ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a)),
 
-   (('~fadd at 16', 1.0, ('fmul(is_used_once)', c , ('fadd', b, -1.0 ))), ('fadd', ('fadd', 1.0, ('fneg', c)), ('fmul', b, c)), 'options->lower_flrp16'),
-   (('~fadd at 32', 1.0, ('fmul(is_used_once)', c , ('fadd', b, -1.0 ))), ('fadd', ('fadd', 1.0, ('fneg', c)), ('fmul', b, c)), 'options->lower_flrp32'),
-   (('~fadd at 64', 1.0, ('fmul(is_used_once)', c , ('fadd', b, -1.0 ))), ('fadd', ('fadd', 1.0, ('fneg', c)), ('fmul', b, c)), 'options->lower_flrp64'),
-
    # A similar operation could apply to any ffma(#a, b, #(-a/2)), but this
    # particular operation is common for expanding values stored in a texture
    # from [0,1] to [-1,1].
@@ -2013,8 +2034,6 @@ late_optimizations = [
    (('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))),
    (('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
 
-   (('bcsel', a, 0, ('b2f32', ('inot', 'b at bool'))), ('b2f32', ('inot', ('ior', a, b)))),
-
    # Putting this in 'optimizations' interferes with the bcsel(a, op(b, c),
    # op(b, d)) => op(b, bcsel(a, c, d)) transformations.  I do not know why.
    (('bcsel', ('feq', ('fsqrt', 'a(is_not_negative)'), 0.0), intBitsToFloat(0x7f7fffff), ('frsq', a)),
@@ -2081,6 +2100,20 @@ late_optimizations = [
    (('ushr', a, 0), a),
 ]
 
+# Integer sizes
+for s in [8, 16, 32, 64]:
+    late_optimizations.extend([
+        (('iand', ('ine(is_used_once)', 'a@{}'.format(s), 0), ('ine', 'b@{}'.format(s), 0)), ('ine', ('umin', a, b), 0)),
+        (('ior',  ('ieq(is_used_once)', 'a@{}'.format(s), 0), ('ieq', 'b@{}'.format(s), 0)), ('ieq', ('umin', a, b), 0)),
+    ])
+
+# Float sizes
+for s in [16, 32, 64]:
+    late_optimizations.extend([
+       (('~fadd@{}'.format(s), 1.0, ('fmul(is_used_once)', c , ('fadd', b, -1.0 ))), ('fadd', ('fadd', 1.0, ('fneg', c)), ('fmul', b, c)), 'options->lower_flrp{}'.format(s)),
+       (('bcsel', a, 0, ('b2f{}'.format(s), ('inot', 'b at bool'))), ('b2f{}'.format(s), ('inot', ('ior', a, b)))),
+    ])
+
 for op in ['fadd']:
     late_optimizations += [
         (('bcsel', a, (op + '(is_used_once)', b, c), (op, b, d)), (op, b, ('bcsel', a, c, d))),