Mesa (master): lima/ppir: use a ready list in node_to_instr

Wed May 27 21:35:54 UTC 2020

Module: Mesa
Branch: master
Commit: b3023055e075386e96fe2fbf093f0db261c0d9fa
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b3023055e075386e96fe2fbf093f0db261c0d9fa

Author: Erico Nunes <nunes.erico at gmail.com>
Date:   Sun May 17 15:56:42 2020 +0200

lima/ppir: use a ready list in node_to_instr

After the recent optimizations in ppir lowering that increase options
for combining, node_to_instr now may have multiple options of nodes to
insert and needs to decide which is better.
For example, if an instruction uses both a varying and a texture, there
are two nodes nodes that can be inserted to the load varying slot in the
same instruction (ld_var and ld_coords). It is much more advantageous to
pipeline the load texture coords since that enables the higher precision
path for texture coordinates. However, with the current recursive
expansion, this cannot be influenced.

This simple ready list implementation in node_to_instr allows it to
choose the next node to expand based on a priority score, rather than
relying on the random order coming from the recursive expansion.

Other than preferring nodes with pipeline output (which covers ld_coords
vs ld_var), nodes using later slots in the pipeline are now expanded
first, allowing node_to_instr to make all of the earlier (pipelineable)
nodes available in the ready list so the best one can be chosen when
picking nodes for the earlier slots.

Fixes: 632a921bd0d lima/ppir: optimize tex loads with single successor

Signed-off-by: Erico Nunes <nunes.erico at gmail.com>
Reviewed-by: Vasily Khoruzhick <anarsoul at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5092>

---

 src/gallium/drivers/lima/ir/pp/node_to_instr.c | 104 ++++++++++++++++++-------
 src/gallium/drivers/lima/ir/pp/ppir.h          |   1 +
 2 files changed, 77 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/lima/ir/pp/node_to_instr.c b/src/gallium/drivers/lima/ir/pp/node_to_instr.c
index 9f21fa31e26..52632c88ce6 100644
--- a/src/gallium/drivers/lima/ir/pp/node_to_instr.c
+++ b/src/gallium/drivers/lima/ir/pp/node_to_instr.c
@@ -214,40 +214,88 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node)
    return true;
 }
 
-static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node)
+static unsigned int ppir_node_score(ppir_node *node)
 {
-   /* first try pipeline sched, if that didn't succeed try normal scheduling */
-   if (!ppir_do_node_to_instr_try_insert(block, node))
-      if (!ppir_do_one_node_to_instr(block, node))
-         return false;
+   /* preferentially expand nodes in later instruction slots first, so
+    * nodes for earlier slots (which are more likely pipelineable) get
+    * added to the ready list. */
+   unsigned int late_slot = 0;
+   int *slots = ppir_op_infos[node->op].slots;
+   if (slots)
+      for (int i = 0; slots[i] != PPIR_INSTR_SLOT_END; i++)
+         late_slot = MAX2(late_slot, slots[i]);
+
+   /* to untie, favour nodes with pipelines for earlier expansion.
+    * increase that for nodes with chained pipelines */
+   unsigned int pipeline = 0;
+   ppir_node *n = node;
+   ppir_dest *dest = ppir_node_get_dest(n);
+   while (dest && dest->type == ppir_target_pipeline) {
+      pipeline++;
+      assert(ppir_node_has_single_src_succ(n));
+      n = ppir_node_first_succ(n);
+      dest = ppir_node_get_dest(n);
+   }
+   assert(pipeline < 4);
 
-   if (node->is_end)
-      node->instr->is_end = true;
+   return (late_slot << 2 | pipeline);
+}
 
-   /* we have to make sure the dep not be destroyed (due to
-    * succ change) in ppir_do_node_to_instr, otherwise we can't
-    * do recursion like this */
-   ppir_node_foreach_pred(node, dep) {
-      ppir_node *pred = dep->pred;
-      bool ready = true;
-
-      /* pred may already be processed by the previous pred
-       * (this pred may be both node and previous pred's child) */
-      if (pred->instr)
-         continue;
-
-      /* insert pred only when all its successors have been inserted */
-      ppir_node_foreach_succ(pred, dep) {
-         ppir_node *succ = dep->succ;
-         if (!succ->instr) {
-            ready = false;
-            break;
-         }
+static ppir_node *ppir_ready_list_pick_best(ppir_block *block,
+                                            struct list_head *ready_list)
+{
+   unsigned int best_score = 0;
+   ppir_node *best = NULL;
+
+   list_for_each_entry(ppir_node, node, ready_list, sched_list) {
+      unsigned int score = ppir_node_score(node);
+      if (!best || score > best_score) {
+         best = node;
+         best_score = score;
       }
+   }
 
-      if (ready) {
-         if (!ppir_do_node_to_instr(block, pred))
+   assert(best);
+   return best;
+}
+
+static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *root)
+{
+   struct list_head ready_list;
+   list_inithead(&ready_list);
+   list_addtail(&root->sched_list, &ready_list);
+
+   while (!list_is_empty(&ready_list)) {
+      ppir_node *node = ppir_ready_list_pick_best(block, &ready_list);
+      list_del(&node->sched_list);
+
+      /* first try pipeline sched, if that didn't succeed try normal sched */
+      if (!ppir_do_node_to_instr_try_insert(block, node))
+         if (!ppir_do_one_node_to_instr(block, node))
             return false;
+
+      if (node->is_end)
+         node->instr->is_end = true;
+
+      ppir_node_foreach_pred(node, dep) {
+         ppir_node *pred = dep->pred;
+         bool ready = true;
+
+         /* pred may already have been processed by a previous node */
+         if (pred->instr)
+            continue;
+
+         /* insert pred only when all its successors have been inserted */
+         ppir_node_foreach_succ(pred, dep) {
+            ppir_node *succ = dep->succ;
+            if (!succ->instr) {
+               ready = false;
+               break;
+            }
+         }
+
+         if (ready)
+            list_addtail(&pred->sched_list, &ready_list);
       }
    }
 
diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h
index 631dcaaad51..8344a0038d6 100644
--- a/src/gallium/drivers/lima/ir/pp/ppir.h
+++ b/src/gallium/drivers/lima/ir/pp/ppir.h
@@ -152,6 +152,7 @@ typedef struct {
 
 typedef struct ppir_node {
    struct list_head list;
+   struct list_head sched_list;
    ppir_op op;
    ppir_node_type type;
    int index;