Mesa (staging/19.3): freedreno/ir3: fix gpu hang with pre-fs-tex-fetch

Wed Nov 13 18:33:33 UTC 2019

Module: Mesa
Branch: staging/19.3
Commit: d311c745c6d25e17d9ab48f96d16ce0a26a6ad31
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d311c745c6d25e17d9ab48f96d16ce0a26a6ad31

Author: Rob Clark <robdclark at chromium.org>
Date:   Fri Nov  8 12:55:27 2019 -0800

freedreno/ir3: fix gpu hang with pre-fs-tex-fetch

For pre-fs-dispatch texture fetch, we need to assign bary_ij to r0.x,
even if it is not used in the shader (ie. only varying use is for tex
coords).  But if, for example, gl_FragCoord is used, it could get
assigned on top of bary_ij, resulting in a GPU hang.

The solution to this is two-fold: (1) the inputs/outputs rework has the
benefit of making RA realize bary_ij is a vec2, even if there are no
split/collect instructions (due to no varying fetches in the shader
itself).  And (2) extend the live ranges of meta:input instructions to
the first non-input, to prevent RA from assigning the same register to
multiple inputs.

Backport note: because of (1) above, a better solution for 19.3 would be
to revert f30c256ec05.

Fixes: f30c256ec05 ("freedreno/ir3: enable pre-fs texture fetch for a6xx")
Signed-off-by: Rob Clark <robdclark at chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg at google.com>
Reviewed-by: Eric Anholt <eric at anholt.net>
(cherry picked from commit b22617fb57be54a859a8d62a5e545afcb38266e9)

---

 src/freedreno/ir3/ir3_ra.c    | 12 ++++++++++++
 src/freedreno/ir3/ir3_sched.c | 30 ++++++++++++++++++++----------
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index 39d34bb51d9..ff273de1b08 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -702,6 +702,15 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 
 	block->data = bd;
 
+	struct ir3_instruction *first_non_input = NULL;
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		if (instr->opc != OPC_META_INPUT) {
+			first_non_input = instr;
+			break;
+		}
+	}
+
+
 	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
 		struct ir3_instruction *src;
 		struct ir3_register *reg;
@@ -771,6 +780,9 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 
 				def(name, id->defn);
 
+				if (instr->opc == OPC_META_INPUT)
+					use(name, first_non_input);
+
 				if (is_high(id->defn)) {
 					ra_set_node_class(ctx->g, name,
 							ctx->set->high_classes[id->cls - HIGH_OFFSET]);
diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c
index 34c648a6748..c828b5da6c1 100644
--- a/src/freedreno/ir3/ir3_sched.c
+++ b/src/freedreno/ir3/ir3_sched.c
@@ -783,18 +783,28 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
 	list_inithead(&block->instr_list);
 	list_inithead(&ctx->depth_list);
 
-	/* first a pre-pass to schedule all meta:input instructions
-	 * (which need to appear first so that RA knows the register is
-	 * occupied), and move remaining to depth sorted list:
+	/* First schedule all meta:input instructions, followed by
+	 * tex-prefetch.  We want all of the instructions that load
+	 * values into registers before the shader starts to go
+	 * before any other instructions.  But in particular we
+	 * want inputs to come before prefetches.  This is because
+	 * a FS's bary_ij input may not actually be live in the
+	 * shader, but it should not be scheduled on top of any
+	 * other input (but can be overwritten by a tex prefetch)
+	 *
+	 * Finally, move all the remaining instructions to the depth-
+	 * list
 	 */
-	list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
-		if ((instr->opc == OPC_META_INPUT) ||
-				(instr->opc == OPC_META_TEX_PREFETCH)) {
+	list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node)
+		if (instr->opc == OPC_META_INPUT)
 			schedule(ctx, instr);
-		} else {
-			ir3_insert_by_depth(instr, &ctx->depth_list);
-		}
-	}
+
+	list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node)
+		if (instr->opc == OPC_META_TEX_PREFETCH)
+			schedule(ctx, instr);
+
+	list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node)
+		ir3_insert_by_depth(instr, &ctx->depth_list);
 
 	while (!list_is_empty(&ctx->depth_list)) {
 		struct ir3_sched_notes notes = {0};