Mesa (main): pan/bi: Do helper termination analysis on clauses

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Jul 12 23:44:38 UTC 2021


Module: Mesa
Branch: main
Commit: 6bf8e960faae8a4109389a0d3f428ba2db376296
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=6bf8e960faae8a4109389a0d3f428ba2db376296

Author: Alyssa Rosenzweig <alyssa at collabora.com>
Date:   Mon Jul 12 11:11:27 2021 -0400

pan/bi: Do helper termination analysis on clauses

Unlike the dependency analysis for the skip bits which is a function of
the data flow graph, the thread termination analysis is dependent on the
actual sequence of instructions. As such, it must be done after
scheduling to be correct in the presence of out-of-order scheduling.

Furthermore it's specified in terms of clauses, not instructions.
Reflecting this in our code gets a nice simplification. As a side effect
this puts extra td flags on subsequent clauses, which matches the DDK's
behaviour. (Maybe td is just a hint?)

Signed-off-by: Alyssa Rosenzweig <alyssa at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10961>

---

 src/panfrost/bifrost/bi_helper_invocations.c | 20 ++++++++------------
 src/panfrost/bifrost/bi_pack.c               | 21 +++++----------------
 src/panfrost/bifrost/bi_print.c              |  3 +++
 src/panfrost/bifrost/bi_printer.c.py         |  3 ---
 src/panfrost/bifrost/bifrost_compile.c       |  9 +++++++--
 src/panfrost/bifrost/compiler.h              |  6 +++---
 6 files changed, 26 insertions(+), 36 deletions(-)

diff --git a/src/panfrost/bifrost/bi_helper_invocations.c b/src/panfrost/bifrost/bi_helper_invocations.c
index 564027187ea..a8d2c61c1e2 100644
--- a/src/panfrost/bifrost/bi_helper_invocations.c
+++ b/src/panfrost/bifrost/bi_helper_invocations.c
@@ -119,10 +119,6 @@ bi_block_uses_helpers(bi_block *block)
 static bool
 bi_block_terminates_helpers(bi_block *block)
 {
-        /* Can't terminate if there are no helpers */
-        if (!(block->pass_flags & 1))
-                return false;
-
         /* Can't terminate if a successor needs helpers */
         pan_foreach_successor((&block->base), succ) {
                 if (((bi_block *) succ)->pass_flags & 1)
@@ -187,19 +183,19 @@ bi_analyze_helper_terminate(bi_context *ctx)
         _mesa_set_destroy(visited, NULL);
         _mesa_set_destroy(worklist, NULL);
 
-        /* Finally, set helper_terminate on the last derivative-calculating
-         * instruction in a block that terminates helpers */
+        /* Finally, mark clauses requiring helpers */
         bi_foreach_block(ctx, _block) {
                 bi_block *block = (bi_block *) _block;
 
-                if (!bi_block_terminates_helpers(block))
-                        continue;
+                /* At the end, there are helpers iff we don't terminate */
+                bool helpers = !bi_block_terminates_helpers(block);
 
-                bi_foreach_instr_in_block_rev(block, I) {
-                        if (bi_instr_uses_helpers(I)) {
-                                I->tdd = true;
-                                break;
+                bi_foreach_clause_in_block_rev(block, clause) {
+                        bi_foreach_instr_in_clause_rev(block, clause, I) {
+                                helpers |= bi_instr_uses_helpers(I);
                         }
+
+                        clause->td = !helpers;
                 }
         }
 }
diff --git a/src/panfrost/bifrost/bi_pack.c b/src/panfrost/bifrost/bi_pack.c
index 491d71bebec..a9a1a415fd4 100644
--- a/src/panfrost/bifrost/bi_pack.c
+++ b/src/panfrost/bifrost/bi_pack.c
@@ -28,7 +28,7 @@
  * bits on the wire (as well as fixup branches) */
 
 static uint64_t
-bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2, bool tdd)
+bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2)
 {
         /* next_dependencies are the union of the dependencies of successors'
          * dependencies */
@@ -43,7 +43,7 @@ bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2, bool tdd
                 .flow_control =
                         (next_1 == NULL && next_2 == NULL) ?
                         BIFROST_FLOW_END :  clause->flow_control,
-                .terminate_discarded_threads = tdd,
+                .terminate_discarded_threads = clause->td,
                 .next_clause_prefetch = clause->next_clause_prefetch && next_1,
                 .staging_barrier = staging_barrier,
                 .staging_register = clause->staging_register,
@@ -625,8 +625,7 @@ bi_pack_format(struct util_dynarray *emission,
 static void
 bi_pack_clause(bi_context *ctx, bi_clause *clause,
                 bi_clause *next_1, bi_clause *next_2,
-                struct util_dynarray *emission, gl_shader_stage stage,
-                bool tdd)
+                struct util_dynarray *emission, gl_shader_stage stage)
 {
         struct bi_packed_tuple ins[8] = { 0 };
 
@@ -644,7 +643,7 @@ bi_pack_clause(bi_context *ctx, bi_clause *clause,
         unsigned constant_quads =
                 DIV_ROUND_UP(clause->constant_count - (ec0_packed ? 1 : 0), 2);
 
-        uint64_t header = bi_pack_header(clause, next_1, next_2, tdd);
+        uint64_t header = bi_pack_header(clause, next_1, next_2);
         uint64_t ec0 = (clause->constants[0] >> 4);
         unsigned m0 = (clause->pcrel_idx == 0) ? 4 : 0;
 
@@ -739,17 +738,7 @@ bi_pack(bi_context *ctx, struct util_dynarray *emission)
 
                         previous_size = emission->size;
 
-                        /* Terminate discarded threads after the clause if any
-                         * instruction needs threads terminated. Note that this
-                         * may be set for CLPER.i32 which is not
-                         * message-passing, so we need to check all
-                         * instructions */
-                        bool tdd = false;
-
-                        bi_foreach_instr_in_clause(block, clause, I)
-                                tdd |= I->tdd;
-
-                        bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage, tdd);
+                        bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage);
 
                         if (!is_last)
                                 bi_collect_blend_ret_addr(ctx, emission, clause);
diff --git a/src/panfrost/bifrost/bi_print.c b/src/panfrost/bifrost/bi_print.c
index 295ff8efcab..aea95cc071d 100644
--- a/src/panfrost/bifrost/bi_print.c
+++ b/src/panfrost/bifrost/bi_print.c
@@ -103,6 +103,9 @@ bi_print_clause(bi_clause *clause, FILE *fp)
         if (clause->staging_barrier)
                 fprintf(fp, " osrb");
 
+        if (clause->td)
+                fprintf(fp, " td");
+
         if (clause->pcrel_idx != ~0)
                 fprintf(fp, " pcrel(%u)", clause->pcrel_idx);
 
diff --git a/src/panfrost/bifrost/bi_printer.c.py b/src/panfrost/bifrost/bi_printer.c.py
index b9450dbed0c..306124b5811 100644
--- a/src/panfrost/bifrost/bi_printer.c.py
+++ b/src/panfrost/bifrost/bi_printer.c.py
@@ -168,9 +168,6 @@ bi_print_instr(bi_instr *I, FILE *fp)
     if (I->table)
         fprintf(fp, ".%s", bi_table_as_str(I->table));
 
-    if (I->tdd)
-        fprintf(fp, ".tdd");
-
     switch (I->op) {
 % for opcode in ops:
 <%
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 5d5ddfae855..3b577944a77 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -3585,8 +3585,9 @@ bifrost_compile_shader_nir(nir_shader *nir,
                 bi_print_shader(ctx, stdout);
         bi_lower_fau(ctx);
 
-        /* Analyze as late as possible before RA/scheduling */
-        bi_analyze_helper_terminate(ctx);
+        /* Analyze before register allocation to avoid false dependencies. The
+         * skip bit is a function of only the data flow graph and is invariant
+         * under valid scheduling. */
         bi_analyze_helper_requirements(ctx);
 
         bi_register_allocate(ctx);
@@ -3595,6 +3596,10 @@ bifrost_compile_shader_nir(nir_shader *nir,
                 bi_print_shader(ctx, stdout);
         bi_schedule(ctx);
         bi_assign_scoreboard(ctx);
+
+        /* Analyze after scheduling since we depend on instruction order. */
+        bi_analyze_helper_terminate(ctx);
+
         if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
                 bi_print_shader(ctx, stdout);
 
diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h
index fa20643a9d5..6966102e8aa 100644
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@@ -316,9 +316,6 @@ typedef struct {
          * useless double fills */
         bool no_spill;
 
-        /* Should we terminate discarded threads after executing this instruction? */
-        bool tdd;
-
         /* Override table, inducing a DTSEL_IMM pair if nonzero */
         enum bi_table table;
 
@@ -520,6 +517,9 @@ typedef struct {
         /* Unique in a clause */
         enum bifrost_message_type message_type;
         bi_instr *message;
+
+        /* Discard helper threads */
+        bool td;
 } bi_clause;
 
 typedef struct bi_block {



More information about the mesa-commit mailing list