[Mesa-dev] [PATCH 1/2] nir: Add inverted bitwise ops

Thu Apr 25 22:37:43 UTC 2019

In addition to the familiar iand/ior/ixor, some architectures feature
destination-inverted versions inand/inor/inxor. Certain
architectures also have source-inverted forms, dubbed iandnot/iornot
here. Midgard has the all of these opcodes natively. Many arches have
comparible features to implement some/all of the above. Paired with De
Morgan's Laws, these opcodes allow anything of the form
"~? (~?a [&|] ~?b)" to complete in one instruction.

This can be used to simplify some backend-specific code on affected
architectures, e.f. 8eb36c91 ("intel/fs: Emit logical-not of operands on
Gen8+").

Signed-off-by: Alyssa Rosenzweig <alyssa at rosenzweig.io>
Cc: Ian Romanick <ian.d.romanick at intel.com>
Cc: Kenneth Graunke <kenneth at whitecape.org>
---
 src/compiler/nir/nir.h                |  4 ++++
 src/compiler/nir/nir_opcodes.py       | 18 ++++++++++++++++++
 src/compiler/nir/nir_opt_algebraic.py | 12 ++++++++++++
 3 files changed, 34 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index e878a63409d..3e01ec2cc06 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2318,6 +2318,10 @@ typedef struct nir_shader_compiler_options {
    bool lower_hadd;
    bool lower_add_sat;
 
+   /* Set if inand/inor/inxor and iandnot/iornot supported respectively */
+   bool bitwise_dest_invertable;
+   bool bitwise_src_invertable;
+
    /**
     * Should nir_lower_io() create load_interpolated_input intrinsics?
     *
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index d35d820aa5b..f9d92afb53e 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -690,6 +690,24 @@ binop("iand", tuint, commutative + associative, "src0 & src1")
 binop("ior", tuint, commutative + associative, "src0 | src1")
 binop("ixor", tuint, commutative + associative, "src0 ^ src1")
 
+# inverted bitwise logic operators
+#
+# These variants of the above include bitwise NOTs either on the result of the
+# whole expression or on the latter operand. On some hardware (e.g. Midgard),
+# these are native ops. On other hardware (e.g. Intel Gen8+), these can be
+# implemented as modifiers of the standard three. Along with appropriate
+# algebraic passes, these should permit any permutation of inverses on AND/OR
+# to execute in a single cycle. For example, ~(a & ~b) = ~(~(~a | ~(~b))) = ~a
+# | b = b | ~a = iornot(b, a).
+
+binop("inand", tuint, commutative, "~(src0 & src1)")
+binop("inor", tuint, commutative, "~(src0 | src1)")
+binop("inxor", tuint, commutative, "~(src0 ^ src1)")
+binop("iandnot", tuint, "", "src0 & (~src1)")
+binop("iornot", tuint, "", "src0 & (~src1)")
+
+
+
 
 # floating point logic operators
 #
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index dad0545594f..6cb3e8cb950 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -1052,6 +1052,18 @@ late_optimizations = [
    (('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
 
    (('bcsel', a, 0, ('b2f32', ('inot', 'b at bool'))), ('b2f32', ('inot', ('ior', a, b)))),
+
+   # We don't want to deal with inverted forms, so run this late. Any
+   # combination of inverts on flags or output should result in a single
+   # instruction if these are supported; cases not explicitly handled would
+   # have been simplified via De Morgan's Law
+   (('inot', ('iand', a, b)), ('inand', a, b), 'options->bitwise_dest_invertable'),
+   (('inot', ('ior', a, b)), ('inor', a, b), 'options->bitwise_dest_invertable'),
+   (('inot', ('ixor', a, b)), ('inxor', a, b), 'options->bitwise_dest_invertable'),
+   (('iand', ('inot', a), b), ('iandnot', b, a), 'options->bitwise_src_invertable'),
+   (('iand', a, ('inot', b)), ('iandnot', a, b), 'options->bitwise_src_invertable'),
+   (('ior', a, ('inot', b)), ('iornot', a, b), 'options->bitwise_src_invertable'),
+   (('ior', ('inot', a), b), ('iornot', b, a), 'options->bitwise_src_invertable'),
 ]
 
 print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render())
-- 
2.20.1