Mesa (master): ac/llvm: fix demote inside conditional branches
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Nov 12 21:21:53 UTC 2020
Module: Mesa
Branch: master
Commit: aa757f4f8c367f408bc02b0daf4787b61dd899d6
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=aa757f4f8c367f408bc02b0daf4787b61dd899d6
Author: Marek Olšák <marek.olsak at amd.com>
Date: Sun Sep 20 22:50:52 2020 -0400
ac/llvm: fix demote inside conditional branches
The big comment explains it.
v2: don't kill if subgroup ops are used
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7586>
---
src/amd/llvm/ac_llvm_build.c | 18 ++++++++++++++++++
src/amd/llvm/ac_llvm_build.h | 1 +
src/amd/llvm/ac_nir_to_llvm.c | 32 ++++++++++++++++++++++++++++----
3 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c
index 00a3346928f..d336f3a506b 100644
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -3138,6 +3138,22 @@ void ac_build_else(struct ac_llvm_context *ctx, int label_id)
current_branch->next_block = endif_block;
}
+/* Invoked after a branch is exited. */
+static void ac_branch_exited(struct ac_llvm_context *ctx)
+{
+ if (ctx->flow->depth == 0 && ctx->conditional_demote_seen) {
+ /* The previous conditional branch contained demote. Kill threads
+ * after all conditional blocks because amdgcn.wqm.vote doesn't
+ * return usable values inside the blocks.
+ *
+ * This is an optional optimization that only kills whole inactive quads.
+ */
+ LLVMValueRef cond = LLVMBuildLoad(ctx->builder, ctx->postponed_kill, "");
+ ac_build_kill_if_false(ctx, ac_build_wqm_vote(ctx, cond));
+ ctx->conditional_demote_seen = false;
+ }
+}
+
void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
{
struct ac_llvm_flow *current_branch = get_current_flow(ctx);
@@ -3149,6 +3165,7 @@ void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
set_basicblock_name(current_branch->next_block, "endif", label_id);
ctx->flow->depth--;
+ ac_branch_exited(ctx);
}
void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
@@ -3162,6 +3179,7 @@ void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
set_basicblock_name(current_loop->next_block, "endloop", label_id);
ctx->flow->depth--;
+ ac_branch_exited(ctx);
}
void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id)
diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h
index 72b349f7717..32da9ec9733 100644
--- a/src/amd/llvm/ac_llvm_build.h
+++ b/src/amd/llvm/ac_llvm_build.h
@@ -118,6 +118,7 @@ struct ac_llvm_context {
* False = demoted lanes
*/
LLVMValueRef postponed_kill;
+ bool conditional_demote_seen;
/* Since ac_nir_translate makes a local copy of ac_llvm_context, there
* are two ac_llvm_contexts. Declare a pointer here, so that the control
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index fcab2570414..d9329cde2b8 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -2813,13 +2813,37 @@ static void emit_demote(struct ac_nir_context *ctx, const nir_intrinsic_instr *i
cond = ctx->ac.i1false;
}
- /* Kill immediately while maintaining WQM. */
- ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
-
LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, "");
LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill);
- return;
+
+ if (!ctx->info->fs.needs_all_helper_invocations) {
+ /* This is an optional optimization that only kills whole inactive quads.
+ * It's not used when subgroup operations can possibly use all helper
+ * invocations.
+ */
+ if (ctx->ac.flow->depth == 0) {
+ ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
+ } else {
+ /* amdgcn.wqm.vote doesn't work inside conditional blocks. Here's why.
+ *
+ * The problem is that kill(wqm.vote(0)) kills all active threads within
+ * the block, which breaks the whole quad mode outside the block if
+ * the conditional block has partially active quads (2x2 pixel blocks).
+ * E.g. threads 0-3 are active outside the block, but only thread 0 is
+ * active inside the block. Thread 0 shouldn't be killed by demote,
+ * because threads 1-3 are still active outside the block.
+ *
+ * The fix for amdgcn.wqm.vote would be to return S_WQM((live & ~exec) | cond)
+ * instead of S_WQM(cond).
+ *
+ * The less efficient workaround we do here is to save the kill condition
+ * to a temporary (postponed_kill) and do kill(wqm.vote(cond)) after we
+ * exit the conditional block.
+ */
+ ctx->ac.conditional_demote_seen = true;
+ }
+ }
}
static LLVMValueRef visit_load_local_invocation_index(struct ac_nir_context *ctx)
More information about the mesa-commit
mailing list