Mesa (master): freedreno/ir3: fix potential gpu lockup with kill

Tue Oct 21 13:39:04 UTC 2014

Module: Mesa
Branch: master
Commit: 8a0ffedd8de51eaf980855283c4525dba6dc5847
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=8a0ffedd8de51eaf980855283c4525dba6dc5847

Author: Rob Clark <robclark at freedesktop.org>
Date:   Sat Oct 18 15:28:16 2014 -0400

freedreno/ir3: fix potential gpu lockup with kill

It seems like the hardware is unhappy if we execute a kill instruction
prior to last input (ei).  Probably the shader thread stops executing
and the end-input flag is never set.

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/ir3/ir3.c       |   11 +++++++++++
 src/gallium/drivers/freedreno/ir3/ir3.h       |   10 ++++++++++
 src/gallium/drivers/freedreno/ir3/ir3_depth.c |   16 +++++++++++++--
 src/gallium/drivers/freedreno/ir3/ir3_sched.c |   26 +++++++++++++++++++++++++
 4 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index 70d37ff..60d4e4a 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -81,6 +81,8 @@ void ir3_destroy(struct ir3 *shader)
 		shader->chunk = chunk->next;
 		free(chunk);
 	}
+	free(shader->instrs);
+	free(shader->baryfs);
 	free(shader);
 }
 
@@ -596,6 +598,15 @@ static void insert_instr(struct ir3 *shader,
 				shader->instrs_sz * sizeof(shader->instrs[0]));
 	}
 	shader->instrs[shader->instrs_count++] = instr;
+
+	if (is_input(instr)) {
+		if (shader->baryfs_count == shader->baryfs_sz) {
+			shader->baryfs_sz = MAX2(2 * shader->baryfs_sz, 16);
+			shader->baryfs = realloc(shader->baryfs,
+					shader->baryfs_sz * sizeof(shader->baryfs[0]));
+		}
+		shader->baryfs[shader->baryfs_count++] = instr;
+	}
 }
 
 struct ir3_block * ir3_block_create(struct ir3 *shader,
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index d2d3dca..21992f6 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -210,7 +210,11 @@ struct ir3_instruction {
 		 * result of moving a const to a reg would have a low cost,  so to
 		 * it could make sense to duplicate the instruction at various
 		 * points where the result is needed to reduce register footprint.
+		 *
+		 * DEPTH_UNUSED used to mark unused instructions after depth
+		 * calculation pass.
 		 */
+#define DEPTH_UNUSED  ~0
 		unsigned depth;
 	};
 	struct ir3_instruction *next;
@@ -224,6 +228,8 @@ struct ir3_heap_chunk;
 struct ir3 {
 	unsigned instrs_count, instrs_sz;
 	struct ir3_instruction **instrs;
+	unsigned baryfs_count, baryfs_sz;
+	struct ir3_instruction **baryfs;
 	unsigned heap_idx;
 	struct ir3_heap_chunk *chunk;
 };
@@ -272,6 +278,10 @@ static inline void ir3_clear_mark(struct ir3 *shader)
 	/* TODO would be nice to drop the instruction array.. for
 	 * new compiler, _clear_mark() is all we use it for, and
 	 * we could probably manage a linked list instead..
+	 *
+	 * Also, we'll probably want to mark instructions within
+	 * a block, so tracking the list of instrs globally is
+	 * unlikely to be what we want.
 	 */
 	unsigned i;
 	for (i = 0; i < shader->instrs_count; i++) {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
index dcc0362..76413d4 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
@@ -150,10 +150,22 @@ void ir3_block_depth(struct ir3_block *block)
 		if (block->outputs[i])
 			ir3_instr_depth(block->outputs[i]);
 
-	/* at this point, any unvisited input is unused: */
+	/* mark un-used instructions: */
+	for (i = 0; i < block->shader->instrs_count; i++) {
+		struct ir3_instruction *instr = block->shader->instrs[i];
+
+		/* just consider instructions within this block: */
+		if (instr->block != block)
+			continue;
+
+		if (!ir3_instr_check_mark(instr))
+			instr->depth = DEPTH_UNUSED;
+	}
+
+	/* cleanup unused inputs: */
 	for (i = 0; i < block->ninputs; i++) {
 		struct ir3_instruction *in = block->inputs[i];
-		if (in && !ir3_instr_check_mark(in))
+		if (in && (in->depth == DEPTH_UNUSED))
 			block->inputs[i] = NULL;
 	}
 }
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
index 24d7c63..b2ef811 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
@@ -242,6 +242,32 @@ static int trysched(struct ir3_sched_ctx *ctx,
 	if (delay)
 		return delay;
 
+	/* if the instruction is a kill, we need to ensure *every*
+	 * bary.f is scheduled.  The hw seems unhappy if the thread
+	 * gets killed before the end-input (ei) flag is hit.
+	 *
+	 * We could do this by adding each bary.f instruction as
+	 * virtual ssa src for the kill instruction.  But we have
+	 * fixed length instr->regs[].
+	 *
+	 * TODO this wouldn't be quite right if we had multiple
+	 * basic blocks, if any block was conditional.  We'd need
+	 * to schedule the bary.f's outside of any block which
+	 * was conditional that contained a kill.. I think..
+	 */
+	if (is_kill(instr)) {
+		struct ir3 *ir = instr->block->shader;
+		unsigned i;
+
+		for (i = 0; i < ir->baryfs_count; i++) {
+			if (ir->baryfs[i]->depth == DEPTH_UNUSED)
+				continue;
+			delay = trysched(ctx, ir->baryfs[i]);
+			if (delay)
+				return delay;
+		}
+	}
+
 	/* if this is a write to address/predicate register, and that
 	 * register is currently in use, we need to defer until it is
 	 * free: