Mesa (master): ir3: Fix bug with shaders that only exit via discard

Wed Apr 22 10:04:39 UTC 2020

Module: Mesa
Branch: master
Commit: 4daa3917a38a6d18ba7cc66071342b9f7fa92f53
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=4daa3917a38a6d18ba7cc66071342b9f7fa92f53

Author: Connor Abbott <cwabbott0 at gmail.com>
Date:   Tue Apr 21 11:36:40 2020 +0200

ir3: Fix bug with shaders that only exit via discard

discard is supposed to be a terminator, killing the thread, so that it's
possible to exit main solely by a discard e.g. inside of an infinite
loop. However, it currently isn't treated as a terminator in NIR due to
workarounds turning it into demote (d3d-style kill) and even if that
were fixed, we probably wouldn't want to treat discard_if as a jump
since otherwise the scheduler wouldn't be able to schedule things around
it. So, add this workaround which inserts jump instructions as
necessary to guarantee that the program always terminates.

This fixes a hang in dEQP-VK.graphicsfuzz.while-inside-switch, which
conditionally does a discard inside an infinite loop.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4658>

---

 src/freedreno/ir3/ir3.h          |  2 ++
 src/freedreno/ir3/ir3_legalize.c | 69 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 6cb882002f0..44175888200 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -1168,6 +1168,8 @@ static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n)
 	list_for_each_entry(struct ir3_block, __block, __list, node)
 #define foreach_block_safe(__block, __list) \
 	list_for_each_entry_safe(struct ir3_block, __block, __list, node)
+#define foreach_block_rev(__block, __list) \
+	list_for_each_entry_rev(struct ir3_block, __block, __list, node)
 
 /* iterators for arrays: */
 #define foreach_array(__array, __list) \
diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c
index bed8cd1c03a..d0b0dd83b46 100644
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@@ -592,6 +592,73 @@ block_sched(struct ir3 *ir)
 	}
 }
 
+/* Here we workaround the fact that kill doesn't actually kill the thread as
+ * GL expects. The last instruction always needs to be an end instruction,
+ * which means that if we're stuck in a loop where kill is the only way out,
+ * then we may have to jump out to the end. kill may also have the d3d
+ * semantics of converting the thread to a helper thread, rather than setting
+ * the exec mask to 0, in which case the helper thread could get stuck in an
+ * infinite loop.
+ *
+ * We do this late, both to give the scheduler the opportunity to reschedule
+ * kill instructions earlier and to avoid having to create a separate basic
+ * block.
+ *
+ * TODO: Assuming that the wavefront doesn't stop as soon as all threads are
+ * killed, we might benefit by doing this more aggressively when the remaining
+ * part of the program after the kill is large, since that would let us
+ * skip over the instructions when there are no non-killed threads left.
+ */
+static void
+kill_sched(struct ir3 *ir, struct ir3_shader_variant *so)
+{
+	/* True if we know that this block will always eventually lead to the end
+	 * block:
+	 */
+	bool always_ends = true;
+	bool added = false;
+	struct ir3_block *last_block =
+		list_last_entry(&ir->block_list, struct ir3_block, node);
+
+	foreach_block_rev (block, &ir->block_list) {
+		for (unsigned i = 0; i < 2 && block->successors[i]; i++) {
+			if (block->successors[i]->start_ip <= block->end_ip)
+				always_ends = false;
+		}
+
+		if (always_ends)
+			continue;
+
+		foreach_instr_safe (instr, &block->instr_list) {
+			if (instr->opc != OPC_KILL)
+				continue;
+
+			struct ir3_instruction *br = ir3_instr_create(block, OPC_BR);
+			br->regs[1] = instr->regs[1];
+			br->cat0.target =
+				list_last_entry(&ir->block_list, struct ir3_block, node);
+
+			list_del(&br->node);
+			list_add(&br->node, &instr->node);
+
+			added = true;
+		}
+	}
+
+	if (added) {
+		/* I'm not entirely sure how the branchstack works, but we probably
+		 * need to add at least one entry for the divergence which is resolved
+		 * at the end:
+		 */
+		so->branchstack++;
+
+		/* We don't update predecessors/successors, so we have to do this
+		 * manually:
+		 */
+		mark_jp(last_block);
+	}
+}
+
 /* Insert nop's required to make this a legal/valid shader program: */
 static void
 nop_sched(struct ir3 *ir)
@@ -669,6 +736,8 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
 	*max_bary = ctx->max_bary;
 
 	block_sched(ir);
+	if (so->type == MESA_SHADER_FRAGMENT)
+		kill_sched(ir, so);
 	nop_sched(ir);
 
 	do {