[Mesa-dev] [RFC] [PATCH] nir: Add a loop analysis pass

Thomas Helland thomashelland90 at gmail.com
Sat Aug 1 06:03:59 PDT 2015


I was meant to send out this yesterday, but my git send-email
got corrupted again. Don't know what's up with that. But enough
with the chit-chat, here it is.

The pass is now working! It detects the induction variables
and calculates the unroll count correctly on the one shader
I've tried it on as of now. There'll be a lot more testing
to ensure that it works for other types of induction
variables than your average i++, and with other types than int.

There's still some stuff that's not up to par. There is a lot
of copying of results between the pass's structures and the
structures I've set up for permanent storage. I'm not really
happy about that, so I'll do a job at reducing the duplication.

There is also something fishy with the var-to-basic-ind
hash table. As of now I don't set up a new table and copy
over the information, so it will plain fail when used.

I haven't looked over the coding style at all, so there is
probably a lot of style issues, so don't spend to much time
looking into that.

And I see now that the makefile-changes is wrong. I'll fix that.

I also need to do something wrt swizzles. That will be my next
big thing on the list, as I also need to do a job witht that for
the value range propagation pass I'm writing.

With that said, those are mainly mechanical changes. I don't
expect the aproach I've taken to change a lot, so I think
it is now ready to be put on the list for a round of comments.

Signed-off-by: Thomas Helland <thomashelland90 at gmail.com>
---
 src/glsl/Makefile.sources       |   2 +
 src/glsl/nir/nir.h              |  75 +++-
 src/glsl/nir/nir_loop_analyze.c | 929 ++++++++++++++++++++++++++++++++++++++++
 src/glsl/nir/nir_metadata.c     |   2 +
 4 files changed, 1007 insertions(+), 1 deletion(-)
 create mode 100644 src/glsl/nir/nir_loop_analyze.c

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index d784a81..9e286b6 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -30,6 +30,8 @@ NIR_FILES = \
 	nir/nir_intrinsics.c \
 	nir/nir_intrinsics.h \
 	nir/nir_live_variables.c \
+	nir/nir_loop_analyze.c \
+	nir/nir_loop_analyze.h \
 	nir/nir_lower_alu_to_scalar.c \
 	nir/nir_lower_atomics.c \
 	nir/nir_lower_global_vars_to_local.c \
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index a0b4e5c..3818aba 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -70,7 +70,6 @@ struct nir_function;
 struct nir_shader;
 struct nir_instr;
 
-
 /**
  * Description of built-in state associated with a uniform
  *
@@ -1272,10 +1271,78 @@ nir_if_last_else_node(nir_if *if_stmt)
    return exec_node_data(nir_cf_node, tail, node);
 }
 
+typedef enum {
+   undefined,
+   invariant,
+   basic_induction
+} nir_loop_variable_type;
+
+typedef struct {
+   /* The ssa_def associated with this info */
+   nir_ssa_def *def;
+
+   /* The type of this ssa_def */
+   nir_loop_variable_type type;
+
+   /* Link to the loop_variable list for the loop */
+   struct list_head loop_vars_link;
+
+   /* A link for a list of invariant variables */
+   struct list_head invariant_link;
+
+   /* A link for a list of induction variables */
+   struct list_head induction_link;
+
+   /* If the ssa-def is constant */
+   bool is_constant;
+
+   bool in_conditional_block;
+
+   bool in_nested_loop;
+} nir_loop_variable;
+
+typedef struct {
+   nir_op alu_op;                                /* The type of alu-operation    */
+   nir_loop_variable *alu_def;                   /* The def of the alu-operation */
+   nir_loop_variable *invariant;                 /* The invariant alu-operand    */
+   nir_loop_variable *phi;                       /* The other alu-operand        */
+   nir_loop_variable *def_outside_loop;          /* The phi-src outside the loop */
+} nir_basic_induction_var;
+
+typedef struct {
+   nir_if *nif;
+
+   /* Some more suitable fields like maybe indicated trip-count? */
+   nir_instr *conditional_instr;
+
+   struct list_head loop_terminator_link;
+} nir_loop_terminator;
+
+typedef struct {
+   /* Loop_variable for all ssa_defs in loop */
+   struct list_head loop_vars_list;
+
+   /* How many times the loop is run (if known) */
+   uint32_t trip_count;
+   bool is_trip_count_known;
+
+   nir_loop_terminator *limiting_terminator;
+
+   /* A list of loop_terminators terminating this loop XXX: These (apart from the limiting terminator) can be dead-code-eliminated */
+   struct list_head loop_terminator_list;
+
+   /* The ssa_defs that are invariant */
+   struct list_head invariant_list;
+
+   struct hash_table *var_to_basic_ind;
+} nir_loop_info;
+
 typedef struct {
    nir_cf_node cf_node;
 
    struct exec_list body; /** < list of nir_cf_node */
+
+   nir_loop_info *info;
 } nir_loop;
 
 static inline nir_cf_node *
@@ -1299,6 +1366,7 @@ typedef enum {
    nir_metadata_block_index = 0x1,
    nir_metadata_dominance = 0x2,
    nir_metadata_live_variables = 0x4,
+   nir_metadata_loop_analysis = 0x5,
 } nir_metadata;
 
 typedef struct {
@@ -1428,6 +1496,8 @@ typedef struct nir_shader_compiler_options {
     * are simulated by floats.)
     */
    bool native_integers;
+
+   uint32_t max_loop_instructions;
 } nir_shader_compiler_options;
 
 typedef struct nir_shader {
@@ -1674,6 +1744,9 @@ void nir_lower_to_source_mods(nir_shader *shader);
 void nir_normalize_cubemap_coords(nir_shader *shader);
 
 void nir_live_variables_impl(nir_function_impl *impl);
+void nir_loop_analyze_impl(nir_function_impl *impl);
+void nir_loop_analyze(nir_shader *shader);
+
 bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
 
 void nir_convert_to_ssa_impl(nir_function_impl *impl);
diff --git a/src/glsl/nir/nir_loop_analyze.c b/src/glsl/nir/nir_loop_analyze.c
new file mode 100644
index 0000000..712bf11
--- /dev/null
+++ b/src/glsl/nir/nir_loop_analyze.c
@@ -0,0 +1,929 @@
+/*
+ * Copyright © 2015 Thomas Helland (for Google's Summer of Code)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * Basic induction variable:
+ *    i = i + c
+ *    i = i - c
+ *    i = i * c        XXX: These are probably completely legit cases, but
+ *    i = i / c             I have no idea how to calculate the trip-count
+ *    where c is loop invariant or constant
+ *    defined only once in a loop
+ *
+ * This implementation is kinda slow as it retries all variables in the loop
+ * each time it has progressed (for invariant and induction detection).
+ * Could make it demand driven and use a worklist. However, the process
+ * of getting the users of each ssa-def might prove to be just as expensive as
+ * the few extra variables checked, as there are usually few instructions in
+ * the loops we encounter. Therefore the KISS method has been chosen as for now.
+ *
+ * If one of the operands is an induction variable
+ *   --> Can not be loop invariant.
+ * Can be used as a validation after the pass has run.
+ *
+ * We can check the conditional of any diverging control flow in the loop.
+ * If the conditional is invariant then we can add the defs in the
+ * then and else branch to the list of defs to analyze.
+ * This can, in practice, be done by checking each def we set to invariant
+ * if it is a condition for an if.
+ * This may be unnecessary if we implement loop unswitching.
+ * Then we will split the loop in two and remove the conditional in the loop.
+ *
+ * There are possibilities for doing some kind of unrolling even if one
+ * does not have a set unroll-count. This involves a switch-statement that
+ * has the same amount of cases as the unroll-factor,
+ * a so called "Duff's Device"
+ */
+
+#include "nir.h"
+
+typedef struct {
+
+   /* The ssa_def associated with this info */
+   nir_ssa_def *def;
+
+   /* The type of this ssa_def */
+   nir_loop_variable_type type;
+
+   /* A link for the work list */
+   struct list_head process_link;
+
+   /* A link for a list of invariant variables */
+   struct list_head invariant_link;
+
+   /* A link for a list of induction variables */
+   struct list_head induction_link;
+
+   bool is_constant;
+   bool in_loop;
+   bool in_conditional_block;
+   bool in_nested_loop;
+} loop_variable;
+
+typedef struct {
+   nir_op alu_op;                                /* The type of alu-operation    */
+   loop_variable *alu_def;                       /* The def of the alu-operation */
+   loop_variable *invariant;                     /* The invariant alu-operand    */
+   loop_variable *phi;                           /* The other alu-operand        */
+   loop_variable *def_outside_loop;              /* The phi-src outside the loop */
+} basic_induction_var;
+
+typedef struct {
+
+   /* The loop we store information for */
+   nir_loop *loop;
+
+   /* Loop_variable for all ssa_defs in function */
+   loop_variable *loop_vars;
+
+   /* How many times the loop is run (if known) */
+   uint32_t trip_count;
+   bool is_trip_count_known;
+
+   nir_loop_terminator *limiting_terminator;
+
+   /* A list of loop_terminators terminating this loop */
+   struct list_head loop_terminator_list;
+
+   /* A list of the loop_vars to analyze */
+   struct list_head process_list;
+
+   struct hash_table *var_to_basic_ind;
+
+} loop_info_state;
+
+static loop_variable *
+get_loop_var(nir_ssa_def *value, loop_info_state *state)
+{
+   loop_variable *var = &(state->loop_vars[value->index]);
+   return var;
+}
+
+static inline bool
+add_ssa_def_to_process_list(nir_ssa_def *def, void *void_state)
+{
+   loop_info_state *state = void_state;
+   loop_variable *var = get_loop_var(def, state);
+   /* Add to the tail of the list so that we start at the beginning of the
+    * defs in the loop instead of at the end. That way we will have less
+    * recursive calls.
+    */
+   LIST_ADDTAIL(&(var->process_link), &(state->process_list));
+   return true;
+}
+
+static inline bool
+mark_ssa_def_as_in_conditional_block(nir_ssa_def *def, void *void_state)
+{
+   loop_info_state *state = void_state;
+   loop_variable *var = get_loop_var(def, state);
+   var->in_conditional_block = true;
+   var->in_loop = true;
+   return true;
+}
+
+static inline bool
+mark_block_as_in_conditional_block(nir_block *block, void *void_state)
+{
+   nir_foreach_instr(block, instr)
+      nir_foreach_ssa_def(instr, mark_ssa_def_as_in_conditional_block,
+                          void_state);
+   return true;
+}
+
+static inline bool
+mark_ssa_def_as_nested(nir_ssa_def *def, void *void_state)
+{
+   loop_info_state *state = void_state;
+   loop_variable *var = get_loop_var(def, state);
+   var->in_nested_loop = true;
+   var->in_loop = true;
+   return true;
+}
+
+static inline bool
+mark_block_as_nested(nir_block *block, void *void_state)
+{
+   nir_foreach_instr(block, instr)
+      nir_foreach_ssa_def(instr, mark_ssa_def_as_nested, void_state);
+   return true;
+}
+
+static inline bool
+mark_ssa_def_as_in_loop(nir_ssa_def *def, void *void_state)
+{
+   loop_info_state *state = void_state;
+   loop_variable *var = get_loop_var(def, state);
+   var->in_loop = true;
+   return true;
+}
+
+static void
+initialize_loop(loop_info_state *state, bool first_run)
+{
+   foreach_list_typed_safe(nir_cf_node, node, node, &state->loop->body) {
+      switch (node->type) {
+      case nir_cf_node_block:
+         nir_foreach_instr(nir_cf_node_as_block(node), instr) {
+            nir_foreach_ssa_def(instr, add_ssa_def_to_process_list, state);
+            nir_foreach_ssa_def(instr, mark_ssa_def_as_in_loop, state);
+         }
+         break;
+      case nir_cf_node_if:
+         if (first_run) {
+            nir_foreach_block_in_cf_node(node,
+                                         mark_block_as_in_conditional_block,
+                                         state);
+         }
+         break;
+      case nir_cf_node_loop:
+         if (first_run)
+            nir_foreach_block_in_cf_node(node, mark_block_as_nested, state);
+         break;
+      case nir_cf_node_function:
+         break;
+      }
+   }
+}
+
+static inline bool
+is_var_alu(loop_variable *var)
+{
+   return (var->def->parent_instr->type == nir_instr_type_alu);
+}
+
+static inline bool
+is_var_phi(loop_variable *var)
+{
+   return (var->def->parent_instr->type == nir_instr_type_phi);
+}
+
+static inline bool
+is_ssa_def_invariant(nir_ssa_def *def, loop_info_state *state)
+{
+   loop_variable *var = get_loop_var(def, state);
+   if (var->type == invariant)
+      return true;
+
+   if (!var->in_loop) {
+      var->type = invariant;
+      return true;
+   }
+
+   assert(var->type != basic_induction);
+
+   /* An operation is invariant if all operands are invariant, constant or
+    * are defined outside the loop.
+    */
+   if (is_var_alu(var)) {
+      nir_alu_instr *alu = nir_instr_as_alu(def->parent_instr);
+
+      for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+         if (!is_ssa_def_invariant(alu->src[i].src.ssa, state))
+            return false;
+      }
+      var->type = invariant;
+      return true;
+   }
+
+   /* Phis shouldn't be invariant except if one operand is invariant,
+    * and the other is the phi itself. I think these types of phi's
+    * are removed already. load_consts are already set to invariant
+    * and constant in the initialization. So skip both of these
+    */
+   var->type = undefined;
+   return false;
+}
+
+static void
+compute_invariance_information(loop_info_state *state)
+{
+   /* An expression is invariant in a loop L if:
+    *  (base cases)
+    *    – it’s a constant
+    *    – it’s a variable use, all of whose single defs are outside of L
+    *  (inductive cases)
+    *    – it’s a pure computation all of whose args are loop invariant
+    *    – it’s a variable use whose single reaching def, and the
+    *          rhs of that def is loop-invariant
+    */
+
+   bool changes;
+
+   do {
+      changes = false;
+      list_for_each_entry_safe(loop_variable, var,
+                               &state->process_list, process_link) {
+
+         if (var->in_conditional_block || var->in_nested_loop) {
+            LIST_DEL(&var->process_link);
+            continue;
+         }
+
+         if (is_ssa_def_invariant(var->def, state)) {
+
+            LIST_DEL(&var->process_link);
+            nir_foreach_if_use_safe(var->def, use_src) {
+               /* We can possibly add the then and else branch to the
+                * process-list. It should be noted that we should not add
+                * conditionals inside these blocks for the same reason we
+                * did not initially add this block. This will recurse,
+                * so if there are nested conditionals with invariant
+                * conditions they will hit this path, and so those blocks
+                * will also be analyzed
+                */
+            }
+
+            changes = true;
+         }
+      }
+   } while (changes);
+}
+
+static inline bool
+is_var_basic_induction_var(loop_variable *var, loop_info_state *state)
+{
+   if (var->type == basic_induction)
+      return true;
+
+   /* Should not occur as this is already checked in the caller */
+   if (var->type == invariant)
+      return false;
+
+   /* We are only interested in checking phi's for the basic induction
+    * variable case as its simple to detect. All basic induction variables
+    * we care about has a phi node
+    */
+   if (!is_var_phi(var))
+      return false;
+
+   nir_phi_instr *phi = nir_instr_as_phi(var->def->parent_instr);
+
+   basic_induction_var *biv = rzalloc(NULL, basic_induction_var);
+   biv->phi = var;
+
+   nir_foreach_phi_src(phi, src) {
+      loop_variable *src_var = get_loop_var(src->src.ssa, state);
+
+      /* If one of the sources is in a conditional or nested then panic */
+      if (src_var->in_conditional_block || src_var->in_nested_loop) {
+         ralloc_free(biv);
+         return false;
+      }
+
+      if (!src_var->in_loop)
+         biv->def_outside_loop = src_var;
+
+      if (src_var->in_loop && !src_var->in_conditional_block &&
+          !src_var->in_nested_loop && is_var_alu(src_var)) {
+         nir_alu_instr *alu = nir_instr_as_alu(src_var->def->parent_instr);
+         biv->alu_def = src_var;
+
+         switch (alu->op) {
+         case nir_op_fadd:
+         case nir_op_iadd:
+         case nir_op_uadd_carry:
+         case nir_op_fsub:
+         case nir_op_isub:
+         case nir_op_usub_borrow:
+         case nir_op_fmul:
+         case nir_op_imul:
+         case nir_op_umul_high:
+         case nir_op_fdiv:
+         case nir_op_idiv:
+         case nir_op_udiv:
+            for (unsigned i = 0; i < 2; i++) {
+               /* Is one of the operands invariant, and the other the phi? */
+               if (is_ssa_def_invariant(alu->src[i].src.ssa, state) &&
+                   alu->src[1-i].src.ssa->index == phi->dest.ssa.index)
+                  biv->invariant = get_loop_var(alu->src[i].src.ssa, state);
+            }
+            biv->alu_op = alu->op;
+            break;
+         default:
+            ralloc_free(biv);
+            return false;
+         }
+      }
+   }
+
+   if (biv->alu_def && biv->def_outside_loop && biv->invariant && biv->phi) {
+      biv->alu_def->type = basic_induction;
+      biv->phi->type = basic_induction;
+      _mesa_hash_table_insert(state->var_to_basic_ind, biv->alu_def, biv);
+      _mesa_hash_table_insert(state->var_to_basic_ind, biv->phi, biv);
+      ralloc_adopt(state, NULL);
+      return true;
+   }
+
+   /* It was found not to fulfill the requirements for being a
+    * basic induction variable. Clean up before returning
+    */
+   ralloc_free(biv);
+   return false;
+}
+
+static bool
+compute_induction_information(loop_info_state *state)
+{
+   /* XXX: Here we should probably run through all instructions in the list
+    * once to check if any of them are invariant conditionals for an if.
+    * Then we can add the branches to the analysis, as they are invariant.
+    * Might not be useful if we decide to add loop unswitching.
+    */
+   bool changes;
+   bool found_induction_var = false;
+
+   do {
+      changes = false;
+      list_for_each_entry_safe(loop_variable, var,
+                               &state->process_list, process_link) {
+
+         /* It can't be an induction variable if it is invariant and we
+          * don't want to deal with things in nested loops or conditionals
+          */
+         if (var->type == invariant ||
+             var->in_conditional_block ||
+             var->in_nested_loop) {
+            LIST_DEL(&(var->process_link));
+            continue;
+         }
+
+         if (is_var_basic_induction_var(var, state)) {
+            found_induction_var = true;
+            /* If a phi is marked basic_ind we also mark the alu_def basic_ind
+             * at the same time. It will then be detected as basic_ind here,
+             * on the second passing and removed from the list
+             */
+            LIST_DEL(&(var->process_link));
+            changes = true;
+         }
+
+      }
+   } while (changes);
+   return found_induction_var;
+}
+
+static bool
+initialize_ssa_def(nir_ssa_def *def, void *void_state)
+{
+   loop_info_state *state = void_state;
+   loop_variable *var = get_loop_var(def, state);
+
+   var->in_loop = false;
+   var->def = def;
+
+   if (def->parent_instr->type == nir_instr_type_load_const) {
+      var->type = invariant;
+      var->is_constant = true;
+   } else {
+      var->type = undefined;
+   }
+   return true;
+}
+
+static bool
+initialize_block(nir_block *block, void *void_state)
+{
+   nir_foreach_instr(block, instr)
+      nir_foreach_ssa_def(instr, initialize_ssa_def, void_state);
+   return true;
+}
+
+static bool
+is_loop_terminator(nir_if *nif)
+{
+   /* If there is stuff in the else-block that means that this is not
+    * a simple break on true if-statement and so we bail. XXX: This is
+    * probably not correct. Should track all breaks from loop instead.
+    */
+   foreach_list_typed_safe(nir_cf_node, node, node, &nif->else_list) {
+      if (node->type == nir_cf_node_block) {
+         nir_block *block = nir_cf_node_as_block(node);
+         nir_foreach_instr(block, instr)
+            return false;
+      }
+   }
+
+   nir_cf_node *first_then = nir_if_first_then_node(nif);
+
+   nir_block *first_then_block = nir_cf_node_as_block(first_then);
+
+   nir_instr *first_instr = nir_block_first_instr(first_then_block);
+
+   if (first_instr->type == nir_instr_type_jump) {
+      nir_jump_instr *jump = nir_instr_as_jump(first_instr);
+      if (jump->type == nir_jump_break)
+         return true;
+   }
+
+   return false;
+}
+
+static void
+get_loop_terminators(loop_info_state *state)
+{
+   foreach_list_typed_safe(nir_cf_node, node, node, &state->loop->body) {
+      if (node->type == nir_cf_node_if &&
+          is_loop_terminator(nir_cf_node_as_if(node))) {
+
+         nir_loop_terminator *terminator = rzalloc(state, nir_loop_terminator);
+
+         list_add(&terminator->loop_terminator_link,
+                  &state->loop_terminator_list);
+
+         terminator->nif = nir_cf_node_as_if(node);
+
+         terminator->conditional_instr =
+               terminator->nif->condition.ssa->parent_instr;
+
+      }
+   }
+}
+
+static basic_induction_var *
+get_basic_ind_var_for_loop_var(loop_variable *var, loop_info_state *state)
+{
+   assert(var->type == basic_induction);
+
+   return _mesa_hash_table_search(state->var_to_basic_ind, var)->data;
+}
+
+/* This function is mostly a direct port of the same functionality
+ * in the GLSL compiler loop analsis pass.
+ */
+static int
+calculate_iterations(nir_const_value *initial, nir_const_value *step,
+                     nir_const_value *limit, nir_op cond_op, nir_op alu_op)
+{
+   /* Superfluous assert */
+   assert(initial != NULL && step != NULL && limit != NULL);
+
+   switch (alu_op) {
+   case nir_op_fadd:
+   case nir_op_iadd:
+   case nir_op_uadd_carry:
+   case nir_op_fsub:
+   case nir_op_isub:
+   case nir_op_usub_borrow:
+      break;
+   default:
+      return -1;
+   }
+
+   int iter_int = 0;
+
+   if (alu_op == nir_op_fadd || alu_op == nir_op_fsub) {
+      float span = limit->f[0] - initial->f[0];
+      float iterations = span / step->f[0];
+      iter_int = (int) iterations;
+   }
+
+   if (alu_op == nir_op_iadd || alu_op == nir_op_fsub) {
+      int span = limit->i[0] - initial->i[0];
+      iter_int = span / step->i[0];
+   }
+
+   if (alu_op == nir_op_uadd_carry || alu_op == nir_op_usub_borrow) {
+      unsigned span = limit->u[0] - initial->u[0];
+      unsigned iterations = span / step->u[0];
+      iter_int = (int) iterations;
+   }
+
+   /* An explanation from the GLSL unrolling pass:
+    *
+    * Make sure that the calculated number of iterations satisfies the exit
+    * condition.  This is needed to catch off-by-one errors and some types of
+    * ill-formed loops.  For example, we need to detect that the following
+    * loop does not have a maximum iteration count.
+    *
+    *    for (float x = 0.0; x != 0.9; x += 0.2);
+    */
+   const int bias[] = { -1, 0, 1 };
+   bool valid_loop = false;
+
+   for (unsigned i = 0; i < ARRAY_SIZE(bias); i++) {
+      if (valid_loop)
+         continue;
+
+      switch (cond_op) {
+      case nir_op_uge:
+      case nir_op_ult: {
+         unsigned temp_iterations = (unsigned) (iter_int + bias[i]);
+         unsigned mul = temp_iterations * step->u[0];
+         unsigned add = mul + initial->u[0];
+         bool cmp = false;
+
+         if (cond_op == nir_op_uge)
+            cmp = (add >= limit->u[0]);
+
+         if (cond_op == nir_op_ult)
+            cmp = (add < limit->u[0]);
+
+         if (cmp) {
+            iter_int = temp_iterations < 0 ? (int) -temp_iterations :
+                                             (int) temp_iterations;
+            valid_loop = true;
+         }
+         break;
+      }
+      case nir_op_feq:
+      case nir_op_fge:
+      case nir_op_flt:
+      case nir_op_fne: {
+         float temp_iterations = (float) (iter_int + bias[i]);
+         float mul = temp_iterations * step->f[0];
+         float add = mul + initial->f[0];
+         bool cmp = false;
+
+         if (cond_op == nir_op_feq)
+            cmp = (add == limit->f[0]);
+
+         if (cond_op == nir_op_fge)
+            cmp = (add >= limit->f[0]);
+
+         if (cond_op == nir_op_flt)
+            cmp = (add < limit->f[0]);
+
+         if (cond_op == nir_op_fne)
+            cmp = (add != limit->f[0]);
+
+         if (cmp) {
+            iter_int = temp_iterations < 0 ? (int) -temp_iterations :
+                                             (int) temp_iterations;
+            valid_loop = true;
+         }
+         break;
+      }
+      case nir_op_ieq:
+      case nir_op_ige:
+      case nir_op_ilt:
+      case nir_op_ine: {
+         int temp_iterations = iter_int + bias[i];
+         int mul = temp_iterations * step->i[0];
+         int add = mul + initial->i[0];
+         bool cmp = false;
+
+         if (cond_op == nir_op_ieq)
+            cmp = (add == limit->i[0]);
+
+         if (cond_op == nir_op_ige)
+            cmp = (add >= limit->i[0]);
+
+         if (cond_op == nir_op_ilt)
+            cmp = (add < limit->i[0]);
+
+         if (cond_op == nir_op_ine)
+            cmp = (add != limit->i[0]);
+
+         if (cmp) {
+            iter_int = temp_iterations;
+
+            valid_loop = true;
+         }
+         break;
+      }
+      default:
+         return -1;
+      }
+   }
+
+   return (valid_loop) ? iter_int : -1;
+}
+
+/* This can probably be redesigned to return the terminator with the lowest
+ * tripcount, or NULL if it can not be determined.
+ */
+static void
+get_trip_count(loop_info_state *state)
+{
+   /* We can now run through each of the terminators of the loop to try
+    * to infer a possible trip-count. We need to check them all,
+    * and set the lowest trip-count as the trip-count of our loop.
+    * If one of the terminators has an undecidable trip-count we can not
+    * safely assume anything about the duration of the loop.
+    */
+
+   state->is_trip_count_known = false;
+   nir_loop_terminator *limiting_terminator = NULL;
+   int min_trip_count = -2;
+
+   list_for_each_entry(nir_loop_terminator, terminator,
+                       &state->loop_terminator_list, loop_terminator_link) {
+
+      assert(terminator->conditional_instr->type == nir_instr_type_alu);
+
+      nir_alu_instr *alu = nir_instr_as_alu(terminator->conditional_instr);
+
+      loop_variable *basic_ind = NULL;
+      loop_variable *limit = NULL;
+
+      nir_op cond_op;
+
+      switch (alu->op) {
+      case nir_op_uge:
+      case nir_op_ult:
+      case nir_op_feq:
+      case nir_op_fge:
+      case nir_op_flt:
+      case nir_op_fne:
+      case nir_op_ieq:
+      case nir_op_ige:
+      case nir_op_ilt:
+      case nir_op_ine:
+         /* We assume that the limit is the "right" operand */
+         basic_ind = get_loop_var(alu->src[0].src.ssa, state);
+         limit = get_loop_var(alu->src[1].src.ssa, state);
+         cond_op = alu->op;
+
+         if (basic_ind->type != basic_induction) {
+
+            /* We had it the wrong way, flip things around */
+            basic_ind = get_loop_var(alu->src[1].src.ssa, state);
+            limit = get_loop_var(alu->src[0].src.ssa, state);
+
+            switch (cond_op) {
+               case nir_op_uge: cond_op = nir_op_ult; break;
+               case nir_op_ult: cond_op = nir_op_uge; break;
+               case nir_op_fge: cond_op = nir_op_flt; break;
+               case nir_op_flt: cond_op = nir_op_fge; break;
+               case nir_op_ige: cond_op = nir_op_ilt; break;
+               case nir_op_ilt: cond_op = nir_op_ige; break;
+               default: unreachable("Should not get here");
+            }
+         }
+
+         /* The comparison has to have a basic induction variable
+          * and a constant for us to be able to find trip counts
+          */
+         if (basic_ind->type != basic_induction || !limit->is_constant)
+            return;
+
+         basic_induction_var *ind =
+               get_basic_ind_var_for_loop_var(basic_ind, state);
+
+         if (!ind->def_outside_loop->is_constant ||
+             !ind->invariant->is_constant)
+            return;
+
+         /* We have determined that we have the following constants:
+          * (With the typical int i = 0; i < x; i++; as an example)
+          *    - Upper limit.
+          *    - Starting value
+          *    - Step / iteration size
+          * Thats all thats needed to calculate the trip-count
+          */
+
+         nir_load_const_instr *initial_instr =
+               nir_instr_as_load_const(
+                     ind->def_outside_loop->def->parent_instr);
+         nir_const_value initial_val = initial_instr->value;
+
+         nir_load_const_instr *step_instr =
+               nir_instr_as_load_const(ind->invariant->def->parent_instr);
+         nir_const_value step_val = step_instr->value;
+
+         nir_load_const_instr *limit_instr =
+               nir_instr_as_load_const(limit->def->parent_instr);
+         nir_const_value limit_val = limit_instr->value;
+
+         int iterations = calculate_iterations(&initial_val, &step_val,
+                                               &limit_val, cond_op,
+                                               ind->alu_op);
+
+         /* If this is the first run then overwrite the value */
+         if (min_trip_count == -2)
+            min_trip_count = iterations;
+
+         /* Where we not able to calculate the iterations? */
+         if (iterations == -1)
+            return;
+
+         /* If we have found a smaller amount of iterations than previously
+          * that means we have identified a more limiting terminator
+          */
+         if (iterations < min_trip_count) {
+            min_trip_count = iterations;
+            limiting_terminator = terminator;
+         }
+         break;
+
+      default:
+         return;
+      }
+   }
+
+   state->is_trip_count_known = true;
+   state->trip_count = min_trip_count;
+   state->limiting_terminator = limiting_terminator;
+}
+
+static void
+get_loop_info(loop_info_state *state, nir_function_impl *impl)
+{
+   /* Initialize all variables to "outside_loop". This also marks defs
+    * invariant and constant if they are nir_instr_type_load_const's
+    */
+   nir_foreach_block(impl, initialize_block, state);
+
+   /* Add all entries in the outermost part of the loop to the processing list
+    * Mark the entries in conditionals or in nested loops accordingly
+    */
+   initialize_loop(state, true);
+
+   /* Induction analysis needs invariance information so get that first */
+   compute_invariance_information(state);
+
+   /* We may now have filled the process_list with instructions from inside
+    * the nested blocks in the loop. Remove all instructions from the list
+    * before we start computing induction information.
+    */
+   list_inithead(&state->process_list);
+
+   /* Add all entries in the outermost part of the loop to the processing list
+    * Mark the entries in conditionals or in nested loops accordingly
+    */
+   initialize_loop(state, false);
+
+   bool found_induction_var;
+
+   /* We have invariance information so try to find induction variables */
+   found_induction_var = compute_induction_information(state);
+
+   if (!found_induction_var)
+      return;
+
+   /* We should now gather a list of the loop's terminators. This just
+    * writes to the state (eventually). XXX: Fixup
+    * Might want to rename the function for clarity
+    */
+   get_loop_terminators(state);
+
+   if (LIST_IS_EMPTY(&state->loop_terminator_list))
+      return;
+
+   /* We can now run through each of the terminators of the loop
+    * and try to compute a trip-count. // XXX: Return value?
+    */
+   get_trip_count(state);
+}
+
+static loop_info_state *
+initialize_loop_info_state(nir_loop *loop, void *mem_ctx, nir_function_impl *impl)
+{
+   loop_info_state *state = rzalloc(mem_ctx, loop_info_state);
+   state->loop_vars = rzalloc_array(mem_ctx, loop_variable, impl->ssa_alloc);
+   state->loop = loop;
+
+   list_inithead(&state->process_list);
+   list_inithead(&state->loop_terminator_list);
+
+   state->var_to_basic_ind = _mesa_hash_table_create(mem_ctx,
+                                                     _mesa_hash_pointer,
+                                                     _mesa_key_pointer_equal);
+
+   return state;
+}
+
+static void
+process_loops(nir_cf_node *cf_node)
+{
+   switch (cf_node->type) {
+   case nir_cf_node_block:
+      return;
+   case nir_cf_node_if: {
+      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
+      foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list)
+         process_loops(nested_node);
+      foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list)
+         process_loops(nested_node);
+      return;
+   }
+   case nir_cf_node_loop: {
+      nir_loop *loop = nir_cf_node_as_loop(cf_node);
+      foreach_list_typed(nir_cf_node, nested_node, node, &loop->body)
+         process_loops(nested_node);
+      break;
+   }
+   default:
+      unreachable("unknown cf node type");
+   }
+
+   nir_loop *loop = nir_cf_node_as_loop(cf_node);
+
+   nir_function_impl *impl = nir_cf_node_get_function(cf_node);
+
+   loop_info_state *state = initialize_loop_info_state(loop, NULL, impl);
+
+   get_loop_info(state, impl);
+
+   loop->info = ralloc(loop, nir_loop_info);
+   loop->info->is_trip_count_known = state->is_trip_count_known;
+   if (state->is_trip_count_known) {
+      loop->info->trip_count = state->trip_count;
+      loop->info->limiting_terminator = state->limiting_terminator;
+   }
+
+   LIST_INITHEAD(&loop->info->loop_terminator_list);
+   LIST_INITHEAD(&loop->info->loop_vars_list);
+
+   if (!LIST_IS_EMPTY(&state->loop_terminator_list))
+      list_replace(&state->loop_terminator_list,
+                   &loop->info->loop_terminator_list);
+
+   for (int i = 0; i < impl->ssa_alloc; i++) {
+      loop_variable *var = &state->loop_vars[i];
+
+      if (!var->in_loop)
+         continue;
+
+      nir_loop_variable *new_var = rzalloc(loop->info, nir_loop_variable);
+      new_var->def = var->def;
+      new_var->in_conditional_block = var->in_conditional_block;
+      new_var->in_nested_loop = var->in_nested_loop;
+      new_var->is_constant = var->is_constant;
+      new_var->type = var->type;
+      LIST_ADD(&new_var->loop_vars_link, &loop->info->loop_vars_list);
+   }
+
+   loop->info->var_to_basic_ind = state->var_to_basic_ind; // XXX: This should be copied over to a new struct
+
+   ralloc_free(state);
+}
+
+void
+nir_loop_analyze_impl(nir_function_impl *impl)
+{
+   nir_index_ssa_defs(impl);
+   foreach_list_typed(nir_cf_node, node, node, &impl->body)
+      process_loops(node);
+}
+
+void
+nir_loop_analyze(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_loop_analyze_impl(overload->impl);
+   }
+}
diff --git a/src/glsl/nir/nir_metadata.c b/src/glsl/nir/nir_metadata.c
index a03e124..4c3ff04 100644
--- a/src/glsl/nir/nir_metadata.c
+++ b/src/glsl/nir/nir_metadata.c
@@ -41,6 +41,8 @@ nir_metadata_require(nir_function_impl *impl, nir_metadata required)
       nir_calc_dominance_impl(impl);
    if (NEEDS_UPDATE(nir_metadata_live_variables))
       nir_live_variables_impl(impl);
+   if (NEEDS_UPDATE(nir_metadata_loop_analysis))
+      nir_loop_analyze_impl(impl);
 
 #undef NEEDS_UPDATE
 
-- 
2.4.4



More information about the mesa-dev mailing list