Mesa (master): broadcom/compiler: Emit uniform loops using uniform control flow

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Feb 1 08:28:55 UTC 2021


Module: Mesa
Branch: master
Commit: 79bde75131c1f64bc34092a081c531b025a5a588
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=79bde75131c1f64bc34092a081c531b025a5a588

Author: Arcady Goldmints-Orlov <agoldmints at igalia.com>
Date:   Thu Dec 24 10:24:56 2020 -0600

broadcom/compiler: Emit uniform loops using uniform control flow

Similarly to if statements, uniform loops are now emitted without
predication, using simple branches for breaks and continues. The
uniformity of the loop is determined by running the
nir_divergence_analysis pass.

Reviewed-by: Iago Toral Quiroga <itoral at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7726>

---

 src/broadcom/compiler/nir_to_vir.c   | 92 ++++++++++++++++++++++++++++++------
 src/broadcom/compiler/v3d_compiler.h |  6 +++
 src/broadcom/compiler/vir.c          |  2 +
 3 files changed, 85 insertions(+), 15 deletions(-)

diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 20f3d316a98..ac41a136b38 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -2774,8 +2774,6 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
  * XXX perf: Could we be using flpush/flpop somehow for our execution channel
  * enabling?
  *
- * XXX perf: For uniform control flow, we should be able to skip c->execute
- * handling entirely.
  */
 static void
 ntq_activate_execute_for_block(struct v3d_compile *c)
@@ -2823,9 +2821,13 @@ ntq_emit_uniform_if(struct v3d_compile *c, nir_if *if_stmt)
         ntq_emit_cf_list(c, &if_stmt->then_list);
 
         if (!empty_else_block) {
-                /* At the end of the THEN block, jump to ENDIF */
-                vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
-                vir_link_blocks(c->cur_block, after_block);
+                /* At the end of the THEN block, jump to ENDIF, unless
+                 * the block ended in a break or continue.
+                 */
+                if (!c->cur_block->branch_emitted) {
+                        vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
+                        vir_link_blocks(c->cur_block, after_block);
+                }
 
                 /* Emit the else block. */
                 vir_set_emit_block(c, else_block);
@@ -2932,7 +2934,7 @@ ntq_emit_if(struct v3d_compile *c, nir_if *nif)
         bool was_in_control_flow = c->in_control_flow;
         c->in_control_flow = true;
         if (!vir_in_nonuniform_control_flow(c) &&
-            nir_src_is_dynamically_uniform(nif->condition)) {
+            !nir_src_is_divergent(nif->condition)) {
                 ntq_emit_uniform_if(c, nif);
         } else {
                 ntq_emit_nonuniform_if(c, nif);
@@ -2959,7 +2961,34 @@ ntq_emit_jump(struct v3d_compile *c, nir_jump_instr *jump)
                 break;
 
         case nir_jump_return:
-                unreachable("All returns shouold be lowered\n");
+                unreachable("All returns should be lowered\n");
+                break;
+
+        case nir_jump_halt:
+        case nir_jump_goto:
+        case nir_jump_goto_if:
+                unreachable("not supported\n");
+                break;
+        }
+}
+
+static void
+ntq_emit_uniform_jump(struct v3d_compile *c, nir_jump_instr *jump)
+{
+        switch (jump->type) {
+        case nir_jump_break:
+                vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
+                vir_link_blocks(c->cur_block, c->loop_break_block);
+                c->cur_block->branch_emitted = true;
+                break;
+        case nir_jump_continue:
+                vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
+                vir_link_blocks(c->cur_block, c->loop_cont_block);
+                c->cur_block->branch_emitted = true;
+                break;
+
+        case nir_jump_return:
+                unreachable("All returns should be lowered\n");
                 break;
 
         case nir_jump_halt:
@@ -2995,7 +3024,10 @@ ntq_emit_instr(struct v3d_compile *c, nir_instr *instr)
                 break;
 
         case nir_instr_type_jump:
-                ntq_emit_jump(c, nir_instr_as_jump(instr));
+                if (vir_in_nonuniform_control_flow(c))
+                        ntq_emit_jump(c, nir_instr_as_jump(instr));
+                else
+                        ntq_emit_uniform_jump(c, nir_instr_as_jump(instr));
                 break;
 
         default:
@@ -3017,20 +3049,14 @@ ntq_emit_block(struct v3d_compile *c, nir_block *block)
 static void ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list);
 
 static void
-ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
+ntq_emit_nonuniform_loop(struct v3d_compile *c, nir_loop *loop)
 {
-        bool was_in_control_flow = c->in_control_flow;
-        c->in_control_flow = true;
-
         bool was_uniform_control_flow = false;
         if (!vir_in_nonuniform_control_flow(c)) {
                 c->execute = vir_MOV(c, vir_uniform_ui(c, 0));
                 was_uniform_control_flow = true;
         }
 
-        struct qblock *save_loop_cont_block = c->loop_cont_block;
-        struct qblock *save_loop_break_block = c->loop_break_block;
-
         c->loop_cont_block = vir_new_block(c);
         c->loop_break_block = vir_new_block(c);
 
@@ -3067,6 +3093,42 @@ ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
                 c->execute = c->undef;
         else
                 ntq_activate_execute_for_block(c);
+}
+
+static void
+ntq_emit_uniform_loop(struct v3d_compile *c, nir_loop *loop)
+{
+
+        c->loop_cont_block = vir_new_block(c);
+        c->loop_break_block = vir_new_block(c);
+
+        vir_link_blocks(c->cur_block, c->loop_cont_block);
+        vir_set_emit_block(c, c->loop_cont_block);
+
+        ntq_emit_cf_list(c, &loop->body);
+
+        if (!c->cur_block->branch_emitted) {
+                vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
+                vir_link_blocks(c->cur_block, c->loop_cont_block);
+        }
+
+        vir_set_emit_block(c, c->loop_break_block);
+}
+
+static void
+ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
+{
+        bool was_in_control_flow = c->in_control_flow;
+        c->in_control_flow = true;
+
+        struct qblock *save_loop_cont_block = c->loop_cont_block;
+        struct qblock *save_loop_break_block = c->loop_break_block;
+
+        if (vir_in_nonuniform_control_flow(c) || loop->divergent) {
+                ntq_emit_nonuniform_loop(c, loop);
+        } else {
+                ntq_emit_uniform_loop(c, loop);
+        }
 
         c->loop_break_block = save_loop_break_block;
         c->loop_cont_block = save_loop_cont_block;
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index d0bfa1f6470..ec6087b1f50 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -456,6 +456,12 @@ struct qblock {
         /** Offset within the uniform stream of the branch instruction */
         uint32_t branch_uniform;
 
+        /**
+         * Has the terminating branch of this block already been emitted
+         * by a break or continue?
+         */
+        bool branch_emitted;
+
         /** @{ used by v3d_vir_live_variables.c */
         BITSET_WORD *def;
         BITSET_WORD *defin;
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 583b5215350..a36be86a1f3 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -1156,6 +1156,8 @@ v3d_attempt_compile(struct v3d_compile *c)
         }
 
         NIR_PASS_V(c->s, nir_lower_bool_to_int32);
+        nir_convert_to_lcssa(c->s, true, true);
+        NIR_PASS_V(c->s, nir_divergence_analysis);
         NIR_PASS_V(c->s, nir_convert_from_ssa, true);
 
         struct nir_schedule_options schedule_options = {



More information about the mesa-commit mailing list