Mesa (master): vc4: Flag the last thread switch in the program as the last.

Eric Anholt anholt at kemper.freedesktop.org
Sun Nov 13 03:24:18 UTC 2016


Module: Mesa
Branch: master
Commit: ace0d810e56a1e2978fc3ac237158918ebe2a23c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=ace0d810e56a1e2978fc3ac237158918ebe2a23c

Author: Eric Anholt <eric at anholt.net>
Date:   Thu Nov 10 17:28:20 2016 -0800

vc4: Flag the last thread switch in the program as the last.

We don't allow the last thread switch to be inside control flow, to be
sure that we hit the last state exactly once.  If the last texturing was
in control flow, fall back to single threaded.

---

 src/gallium/drivers/vc4/vc4_program.c  | 11 +++++++++++
 src/gallium/drivers/vc4/vc4_qir.h      |  5 +++++
 src/gallium/drivers/vc4/vc4_qpu_emit.c | 18 ++++++++++++++++++
 3 files changed, 34 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index ad06d85..d2281ce 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2285,6 +2285,17 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
 
         switch (stage) {
         case QSTAGE_FRAG:
+                /* FS threading requires that the thread execute
+                 * QPU_SIG_LAST_THREAD_SWITCH exactly once before terminating
+                 * (with no other THRSW afterwards, obviously).  If we didn't
+                 * fetch a texture at a top level block, this wouldn't be
+                 * true.
+                 */
+                if (c->fs_threaded && !c->last_thrsw_at_top_level) {
+                        c->failed = true;
+                        return c;
+                }
+
                 emit_frag_end(c);
                 break;
         case QSTAGE_VERT:
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 03ac1f5..eebfdf0 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -514,6 +514,9 @@ struct vc4_compile {
 
         struct list_head qpu_inst_list;
 
+        /* Pre-QPU-scheduled instruction containing the last THRSW */
+        uint64_t *last_thrsw;
+
         uint64_t *qpu_insts;
         uint32_t qpu_inst_count;
         uint32_t qpu_inst_size;
@@ -540,6 +543,8 @@ struct vc4_compile {
          */
         bool fs_threaded;
 
+        bool last_thrsw_at_top_level;
+
         bool failed;
 };
 
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index cb93624..e2f0425 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -504,6 +504,7 @@ vc4_generate_code_block(struct vc4_compile *c,
                         queue(block, qpu_NOP());
                         *last_inst(block) = qpu_set_sig(*last_inst(block),
                                                         QPU_SIG_THREAD_SWITCH);
+                        c->last_thrsw = last_inst(block);
                         break;
 
                 case QOP_BRANCH:
@@ -591,6 +592,23 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
         qir_for_each_block(block, c)
                 vc4_generate_code_block(c, block, temp_registers);
 
+        /* Switch the last SIG_THRSW instruction to SIG_LAST_THRSW.
+         *
+         * LAST_THRSW is a new signal in BCM2708B0 (including Raspberry Pi)
+         * that ensures that a later thread doesn't try to lock the scoreboard
+         * and terminate before an earlier-spawned thread on the same QPU, by
+         * delaying switching back to the later shader until earlier has
+         * finished.  Otherwise, if the earlier thread was hitting the same
+         * quad, the scoreboard would deadlock.
+         */
+        if (c->last_thrsw) {
+                assert(QPU_GET_FIELD(*c->last_thrsw, QPU_SIG) ==
+                       QPU_SIG_THREAD_SWITCH);
+                *c->last_thrsw = ((*c->last_thrsw & ~QPU_SIG_MASK) |
+                                  QPU_SET_FIELD(QPU_SIG_LAST_THREAD_SWITCH,
+                                                QPU_SIG));
+        }
+
         uint32_t cycles = qpu_schedule_instructions(c);
         uint32_t inst_count_at_schedule_time = c->qpu_inst_count;
 




More information about the mesa-commit mailing list