Mesa (master): broadcom/compiler: disallow ldunif during ldvary sequences if possible

Wed Mar 10 08:06:19 UTC 2021

Module: Mesa
Branch: master
Commit: c057a1211bb265e322782f2d827621dd9f17821d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c057a1211bb265e322782f2d827621dd9f17821d

Author: Iago Toral Quiroga <itoral at igalia.com>
Date:   Tue Mar  9 09:20:50 2021 +0100

broadcom/compiler: disallow ldunif during ldvary sequences if possible

This restores many of the hurt shaders from the previous patch at the
expense of re-adding ldvary tracking in the scheduler.

total instructions in shared programs: 13760415 -> 13755738 (-0.03%)
instructions in affected programs: 1207560 -> 1202883 (-0.39%)
helped: 5080
HURT: 1731
Instructions are helped.

total max-temps in shared programs: 2322991 -> 2322828 (<.01%)
max-temps in affected programs: 5063 -> 4900 (-3.22%)
helped: 229
HURT: 108
Max-temps are helped.

total sfu-stalls in shared programs: 31827 -> 31545 (-0.89%)
sfu-stalls in affected programs: 478 -> 196 (-59.00%)
helped: 304
HURT: 21
Sfu-stalls are helped.

total inst-and-stalls in shared programs: 13792242 -> 13787283 (-0.04%)
inst-and-stalls in affected programs: 1220856 -> 1215897 (-0.41%)
helped: 5162
HURT: 1697
Inst-and-stalls are helped.

Reviewed-by: Alejandro Piñeiro <apinheiro at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9471>

---

 src/broadcom/compiler/qpu_schedule.c | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index 092b9252f83..ee1b728468c 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -460,6 +460,7 @@ struct choose_scoreboard {
         int last_thrsw_tick;
         bool tlb_locked;
         bool fixup_ldvary;
+        int ldvary_count;
 };
 
 static bool
@@ -873,6 +874,12 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
         return ok;
 }
 
+static inline bool
+try_skip_for_ldvary_pipelining(const struct v3d_qpu_instr *inst)
+{
+        return inst->sig.ldunif || inst->sig.ldunifrf;
+}
+
 static struct schedule_node *
 choose_instruction_to_schedule(struct v3d_compile *c,
                                struct choose_scoreboard *scoreboard,
@@ -889,10 +896,18 @@ choose_instruction_to_schedule(struct v3d_compile *c,
                         return NULL;
         }
 
+        bool ldvary_pipelining = c->s->info.stage == MESA_SHADER_FRAGMENT &&
+                                 scoreboard->ldvary_count < c->num_inputs;
+        bool skipped_insts_for_ldvary_pipelining = false;
+retry:
         list_for_each_entry(struct schedule_node, n, &scoreboard->dag->heads,
                             dag.link) {
                 const struct v3d_qpu_instr *inst = &n->inst->qpu;
 
+                if (ldvary_pipelining && try_skip_for_ldvary_pipelining(inst)) {
+                        skipped_insts_for_ldvary_pipelining = true;
+                        continue;
+                }
 
                 /* Don't choose the branch instruction until it's the last one
                  * left.  We'll move it up to fit its delay slots after we
@@ -1021,11 +1036,23 @@ choose_instruction_to_schedule(struct v3d_compile *c,
                 }
         }
 
-        /* If we are pairing an ldvary, flag it so we can fix it up for optimal
-         * pipelining of ldvary sequences.
+        /* If we did not find any instruction to schedule but we discarded
+         * some of them to prioritize ldvary pipelining, try again.
          */
-        if (prev_inst && chosen && chosen->inst->qpu.sig.ldvary)
-                scoreboard->fixup_ldvary = true;
+        if (!chosen && !prev_inst && skipped_insts_for_ldvary_pipelining) {
+                skipped_insts_for_ldvary_pipelining = false;
+                ldvary_pipelining = false;
+                goto retry;
+        }
+
+        if (chosen && chosen->inst->qpu.sig.ldvary) {
+                scoreboard->ldvary_count++;
+                /* If we are pairing an ldvary, flag it so we can fix it up for
+                 * optimal pipelining of ldvary sequences.
+                 */
+                if (prev_inst)
+                        scoreboard->fixup_ldvary = true;
+        }
 
         return chosen;
 }