Mesa (master): freedreno/ir3: track max flow control depth for a5xx/a6xx

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Dec 7 18:50:40 UTC 2018


Module: Mesa
Branch: master
Commit: 5c2c1f0a2d5cec771b6cbfadf43f44a632ff57fc
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5c2c1f0a2d5cec771b6cbfadf43f44a632ff57fc

Author: Rob Clark <robdclark at gmail.com>
Date:   Wed Dec  5 10:51:16 2018 -0500

freedreno/ir3: track max flow control depth for a5xx/a6xx

Rather than just hard-coding BRANCHSTACK size.

Signed-off-by: Rob Clark <robdclark at gmail.com>

---

 src/freedreno/ir3/ir3_compiler_nir.c             | 24 ++++++++++++++++++++++++
 src/freedreno/ir3/ir3_context.h                  |  5 +++++
 src/freedreno/ir3/ir3_shader.h                   |  4 ++++
 src/gallium/drivers/freedreno/a5xx/fd5_program.c |  4 ++--
 src/gallium/drivers/freedreno/a6xx/fd6_program.c |  4 ++--
 5 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 6b33c1f898..f8155747c5 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -2341,6 +2341,20 @@ emit_loop(struct ir3_context *ctx, nir_loop *nloop)
 }
 
 static void
+stack_push(struct ir3_context *ctx)
+{
+	ctx->stack++;
+	ctx->max_stack = MAX2(ctx->max_stack, ctx->stack);
+}
+
+static void
+stack_pop(struct ir3_context *ctx)
+{
+	compile_assert(ctx, ctx->stack > 0);
+	ctx->stack--;
+}
+
+static void
 emit_cf_list(struct ir3_context *ctx, struct exec_list *list)
 {
 	foreach_list_typed(nir_cf_node, node, node, list) {
@@ -2349,10 +2363,14 @@ emit_cf_list(struct ir3_context *ctx, struct exec_list *list)
 			emit_block(ctx, nir_cf_node_as_block(node));
 			break;
 		case nir_cf_node_if:
+			stack_push(ctx);
 			emit_if(ctx, nir_cf_node_as_if(node));
+			stack_pop(ctx);
 			break;
 		case nir_cf_node_loop:
+			stack_push(ctx);
 			emit_loop(ctx, nir_cf_node_as_loop(node));
+			stack_pop(ctx);
 			break;
 		case nir_cf_node_function:
 			ir3_context_error(ctx, "TODO\n");
@@ -2479,9 +2497,13 @@ emit_function(struct ir3_context *ctx, nir_function_impl *impl)
 {
 	nir_metadata_require(impl, nir_metadata_block_index);
 
+	compile_assert(ctx, ctx->stack == 0);
+
 	emit_cf_list(ctx, &impl->body);
 	emit_block(ctx, impl->end_block);
 
+	compile_assert(ctx, ctx->stack == 0);
+
 	/* at this point, we should have a single empty block,
 	 * into which we emit the 'end' instruction.
 	 */
@@ -3079,6 +3101,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 		ir3_print(ir);
 	}
 
+	so->branchstack = ctx->max_stack;
+
 	/* Note that actual_in counts inputs that are not bary.f'd for FS: */
 	if (so->type == MESA_SHADER_VERTEX)
 		so->total_in = actual_in;
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h
index 63c5d8baaf..99f43cb5ab 100644
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -86,6 +86,11 @@ struct ir3_context {
 
 	unsigned num_arrays;
 
+	/* Tracking for max level of flowcontrol (branchstack) needed
+	 * by a5xx+:
+	 */
+	unsigned stack, max_stack;
+
 	/* a common pattern for indirect addressing is to request the
 	 * same address register multiple times.  To avoid generating
 	 * duplicate instruction sequences (which our backend does not
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index bc47160d6e..418c77ae8b 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -295,6 +295,10 @@ struct ir3_shader_variant {
 	struct ir3_info info;
 	struct ir3 *ir;
 
+	/* Levels of nesting of flow control:
+	 */
+	unsigned branchstack;
+
 	/* the instructions length is in units of instruction groups
 	 * (4 instructions for a3xx, 16 instructions for a4xx.. each
 	 * instruction is 2 dwords):
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
index 97a84b01c0..9c54244457 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
@@ -443,7 +443,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
 	OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
 			A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
 			0x6 | /* XXX seems to be always set? */
-			A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+			A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
 			COND(s[VS].v->num_samp > 0, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
 
 	struct ir3_shader_linkage l = {0};
@@ -567,7 +567,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
 			A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
 			A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
 			A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
-			A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+			A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
 			COND(s[FS].v->num_samp > 0, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));
 
 	OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index 71dadef97e..add2d28b86 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -402,7 +402,7 @@ setup_stateobj(struct fd_ringbuffer *ring,
 	OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_THREADSIZE(fssz) |
 			A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
 			A6XX_SP_VS_CTRL_REG0_MERGEDREGS |
-			A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+			A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
 			COND(s[VS].v->num_samp > 0, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
 
 	struct ir3_shader_linkage l = {0};
@@ -524,7 +524,7 @@ setup_stateobj(struct fd_ringbuffer *ring,
 			0x1000000 |
 			A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
 			A6XX_SP_FS_CTRL_REG0_MERGEDREGS |
-			A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+			A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
 			COND(s[FS].v->num_samp > 0, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
 
 	OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A982, 1);




More information about the mesa-commit mailing list