Mesa (master): ac/llvm: fix demote inside conditional branches

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Nov 12 21:21:53 UTC 2020


Module: Mesa
Branch: master
Commit: aa757f4f8c367f408bc02b0daf4787b61dd899d6
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=aa757f4f8c367f408bc02b0daf4787b61dd899d6

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Sun Sep 20 22:50:52 2020 -0400

ac/llvm: fix demote inside conditional branches

The big comment explains it.

v2: don't kill if subgroup ops are used

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7586>

---

 src/amd/llvm/ac_llvm_build.c  | 18 ++++++++++++++++++
 src/amd/llvm/ac_llvm_build.h  |  1 +
 src/amd/llvm/ac_nir_to_llvm.c | 32 ++++++++++++++++++++++++++++----
 3 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c
index 00a3346928f..d336f3a506b 100644
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -3138,6 +3138,22 @@ void ac_build_else(struct ac_llvm_context *ctx, int label_id)
    current_branch->next_block = endif_block;
 }
 
+/* Invoked after a branch is exited. */
+static void ac_branch_exited(struct ac_llvm_context *ctx)
+{
+   if (ctx->flow->depth == 0 && ctx->conditional_demote_seen) {
+      /* The previous conditional branch contained demote. Kill threads
+       * after all conditional blocks because amdgcn.wqm.vote doesn't
+       * return usable values inside the blocks.
+       *
+       * This is an optional optimization that only kills whole inactive quads.
+       */
+      LLVMValueRef cond = LLVMBuildLoad(ctx->builder, ctx->postponed_kill, "");
+      ac_build_kill_if_false(ctx, ac_build_wqm_vote(ctx, cond));
+      ctx->conditional_demote_seen = false;
+   }
+}
+
 void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
 {
    struct ac_llvm_flow *current_branch = get_current_flow(ctx);
@@ -3149,6 +3165,7 @@ void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
    set_basicblock_name(current_branch->next_block, "endif", label_id);
 
    ctx->flow->depth--;
+   ac_branch_exited(ctx);
 }
 
 void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
@@ -3162,6 +3179,7 @@ void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
    LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
    set_basicblock_name(current_loop->next_block, "endloop", label_id);
    ctx->flow->depth--;
+   ac_branch_exited(ctx);
 }
 
 void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id)
diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h
index 72b349f7717..32da9ec9733 100644
--- a/src/amd/llvm/ac_llvm_build.h
+++ b/src/amd/llvm/ac_llvm_build.h
@@ -118,6 +118,7 @@ struct ac_llvm_context {
     * False = demoted lanes
     */
    LLVMValueRef postponed_kill;
+   bool conditional_demote_seen;
 
    /* Since ac_nir_translate makes a local copy of ac_llvm_context, there
     * are two ac_llvm_contexts. Declare a pointer here, so that the control
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index fcab2570414..d9329cde2b8 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -2813,13 +2813,37 @@ static void emit_demote(struct ac_nir_context *ctx, const nir_intrinsic_instr *i
       cond = ctx->ac.i1false;
    }
 
-   /* Kill immediately while maintaining WQM. */
-   ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
-
    LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
    mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, "");
    LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill);
-   return;
+
+   if (!ctx->info->fs.needs_all_helper_invocations) {
+      /* This is an optional optimization that only kills whole inactive quads.
+       * It's not used when subgroup operations can possibly use all helper
+       * invocations.
+       */
+      if (ctx->ac.flow->depth == 0) {
+         ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
+      } else {
+         /* amdgcn.wqm.vote doesn't work inside conditional blocks. Here's why.
+          *
+          * The problem is that kill(wqm.vote(0)) kills all active threads within
+          * the block, which breaks the whole quad mode outside the block if
+          * the conditional block has partially active quads (2x2 pixel blocks).
+          * E.g. threads 0-3 are active outside the block, but only thread 0 is
+          * active inside the block. Thread 0 shouldn't be killed by demote,
+          * because threads 1-3 are still active outside the block.
+          *
+          * The fix for amdgcn.wqm.vote would be to return S_WQM((live & ~exec) | cond)
+          * instead of S_WQM(cond).
+          *
+          * The less efficient workaround we do here is to save the kill condition
+          * to a temporary (postponed_kill) and do kill(wqm.vote(cond)) after we
+          * exit the conditional block.
+          */
+         ctx->ac.conditional_demote_seen = true;
+      }
+   }
 }
 
 static LLVMValueRef visit_load_local_invocation_index(struct ac_nir_context *ctx)



More information about the mesa-commit mailing list