[Mesa-dev] [PATCH 1/4] nir: introduce new convergent and cross-thread attributes

Mon Jun 5 19:22:54 UTC 2017

These are properties of the instruction that must be respected when
moving it around, in addition to the usual SSA dominance guarantee.
Previously, we only had special handling for fddx and fddy, in a very
ad-hoc way. But with arb_shader_ballot and arb_shader_group_vote, we'll
have to start handling a lot more instructions with similar constraints,
so we want to add a more formal model of what the optimizer can and
cannot do.

Signed-off-by: Connor Abbott <cwabbott0 at gmail.com>
---
 src/compiler/nir/nir.h            | 88 ++++++++++++++++++++++++++++++++++++++-
 src/compiler/nir/nir_opcodes.py   | 32 +++++++++-----
 src/compiler/nir/nir_opcodes_c.py |  2 +
 3 files changed, 110 insertions(+), 12 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 3b827bf..63b1879 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -774,6 +774,25 @@ typedef struct {
     */
    nir_alu_type input_types[4];
 
+   /**
+    * Indicates whether this ALU operation is "cross-thread". An operation is
+    * convergent if results in one thread depend on inputs in another thread,
+    * and therefore optimizations cannot change the execution mask when the
+    * operation is called. Examples of cross-thread operations include
+    * screen-space derivatives, the "any" reduction which returns "true" in
+    * all threads if any thread inputs "true", etc.
+    */
+   bool cross_thread;
+
+   /**
+    * Indicates that this ALU operation is "convergent". An operation is
+    * convergent when it must always be called in convergent control flow,
+    * that is, control flow with the same execution mask as when the program
+    * started. If an operation is convergent, it must be cross-thread as well,
+    * since the optimizer must maintain the guarantee.
+    */
+   bool convergent;
+
    nir_op_algebraic_property algebraic_properties;
 } nir_op_info;
 
@@ -985,6 +1004,17 @@ typedef enum {
     * intrinsic are due to the register reads/writes.
     */
    NIR_INTRINSIC_CAN_REORDER = (1 << 1),
+
+   /**
+    * Whether the intrinsic is cross-thread. See the definition in
+    * nir_op_infos.
+    */
+   NIR_INTRINSIC_CROSS_THREAD,
+
+   /**
+    * Whether the intrinsic is convergent. See the definition in nir_op_infos.
+    */
+   NIR_INTRINSIC_CONVERGENT,
 } nir_intrinsic_semantic_flag;
 
 /**
@@ -1076,7 +1106,7 @@ typedef struct {
    unsigned num_indices;
 
    /** indicates the usage of intr->const_index[n] */
-   unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
+   unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; 
 
    /** semantic flags for calls to this intrinsic */
    nir_intrinsic_semantic_flag flags;
@@ -1459,6 +1489,62 @@ NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
                 type, nir_instr_type_parallel_copy)
 
 /*
+ * Helpers to determine if an instruction is cross-thread or convergent. See
+ * the definitions in nir_op_info.
+ */
+static inline bool
+nir_instr_is_convergent(const nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu: {
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+      return nir_op_infos[alu->op].convergent;
+   }
+
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      return nir_intrinsic_infos[intrin->intrinsic].flags &
+         NIR_INTRINSIC_CONVERGENT;
+   }
+
+   case nir_instr_type_tex:
+         switch (nir_instr_as_tex(instr)->op) {
+         case nir_texop_tex:
+         case nir_texop_txb:
+         case nir_texop_lod:
+            /* These two take implicit derivatives */
+            return true;
+
+         default:
+            return false;
+         }
+
+   default:
+      return false;
+   }
+}
+
+static inline bool
+nir_instr_is_cross_thread(const nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu: {
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+      return nir_op_infos[alu->op].cross_thread;
+   }
+
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      return nir_intrinsic_infos[intrin->intrinsic].flags &
+         NIR_INTRINSIC_CROSS_THREAD;
+   }
+
+   default:
+      return nir_instr_is_convergent(instr);
+   }
+}
+
+/*
  * Control flow
  *
  * Control flow consists of a tree of control flow nodes, which include
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 31b4615..be3ab6d 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -32,7 +32,8 @@ class Opcode(object):
    NOTE: this must be kept in sync with nir_op_info
    """
    def __init__(self, name, output_size, output_type, input_sizes,
-                input_types, algebraic_properties, const_expr):
+                input_types, convergent, cross_thread, algebraic_properties,
+                const_expr):
       """Parameters:
 
       - name is the name of the opcode (prepend nir_op_ for the enum name)
@@ -40,6 +41,7 @@ class Opcode(object):
       - input_types is a list of types
       - algebraic_properties is a space-seperated string, where nir_op_is_ is
         prepended before each entry
+      - convergent and cross_thread are booleans
       - const_expr is an expression or series of statements that computes the
         constant value of the opcode given the constant values of its inputs.
 
@@ -70,6 +72,10 @@ class Opcode(object):
       assert isinstance(input_types, list)
       assert isinstance(input_types[0], str)
       assert isinstance(algebraic_properties, str)
+      assert isinstance(convergent, bool)
+      assert isinstance(cross_thread, bool)
+      if convergent:
+          cross_thread = True
       assert isinstance(const_expr, str)
       assert len(input_sizes) == len(input_types)
       assert 0 <= output_size <= 4
@@ -83,6 +89,8 @@ class Opcode(object):
       self.output_type = output_type
       self.input_sizes = input_sizes
       self.input_types = input_types
+      self.convergent = convergent
+      self.cross_thread = cross_thread
       self.algebraic_properties = algebraic_properties
       self.const_expr = const_expr
 
@@ -105,16 +113,18 @@ associative = "associative "
 opcodes = {}
 
 def opcode(name, output_size, output_type, input_sizes, input_types,
-           algebraic_properties, const_expr):
+           algebraic_properties, const_expr, convergent=False,
+           cross_thread=False):
    assert name not in opcodes
    opcodes[name] = Opcode(name, output_size, output_type, input_sizes,
-                          input_types, algebraic_properties, const_expr)
+                          input_types, convergent, cross_thread,
+                          algebraic_properties, const_expr)
 
 def unop_convert(name, out_type, in_type, const_expr):
    opcode(name, 0, out_type, [0], [in_type], "", const_expr)
 
-def unop(name, ty, const_expr):
-   opcode(name, 0, ty, [0], [ty], "", const_expr)
+def unop(name, ty, const_expr, convergent=False, cross_thread=False):
+   opcode(name, 0, ty, [0], [ty], "", const_expr, convergent, cross_thread)
 
 def unop_horiz(name, output_size, output_type, input_size, input_type,
                const_expr):
@@ -211,12 +221,12 @@ unop("fcos", tfloat, "bit_size == 64 ? cos(src0) : cosf(src0)")
 # Partial derivatives.
 
 
-unop("fddx", tfloat, "0.0") # the derivative of a constant is 0.
-unop("fddy", tfloat, "0.0")
-unop("fddx_fine", tfloat, "0.0")
-unop("fddy_fine", tfloat, "0.0")
-unop("fddx_coarse", tfloat, "0.0")
-unop("fddy_coarse", tfloat, "0.0")
+unop("fddx", tfloat, "0.0", convergent=True) # the derivative of a constant is 0.
+unop("fddy", tfloat, "0.0", convergent=True)
+unop("fddx_fine", tfloat, "0.0", convergent=True)
+unop("fddy_fine", tfloat, "0.0", convergent=True)
+unop("fddx_coarse", tfloat, "0.0", convergent=True)
+unop("fddy_coarse", tfloat, "0.0", convergent=True)
 
 
 # Floating point pack and unpack operations.
diff --git a/src/compiler/nir/nir_opcodes_c.py b/src/compiler/nir/nir_opcodes_c.py
index a1db54f..e470310 100644
--- a/src/compiler/nir/nir_opcodes_c.py
+++ b/src/compiler/nir/nir_opcodes_c.py
@@ -108,6 +108,8 @@ const nir_op_info nir_op_infos[nir_num_opcodes] = {
    .input_types = {
       ${ ", ".join("nir_type_" + type for type in opcode.input_types) }
    },
+   .convergent = ${str(opcode.convergent).lower()},
+   .cross_thread = ${str(opcode.cross_thread).lower()},
    .algebraic_properties =
       ${ "0" if opcode.algebraic_properties == "" else " | ".join(
             "NIR_OP_IS_" + prop.upper() for prop in
-- 
2.9.3