Mesa (master): freedreno/a3xx: deal with optimized tex instructions

Rob Clark robclark at kemper.freedesktop.org
Tue Apr 8 20:07:05 UTC 2014


Module: Mesa
Branch: master
Commit: ee839cc6ef92d37ec6a44e6036e7a2c46172a16a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=ee839cc6ef92d37ec6a44e6036e7a2c46172a16a

Author: Rob Clark <robclark at freedesktop.org>
Date:   Tue Apr  8 14:14:43 2014 -0400

freedreno/a3xx: deal with optimized tex instructions

Keep track of whether we actually have any sam instructions in the
resulting shader, rather than using TGSI SAMP declarations.  If the sam
instruction is optimized out, because the result is not used, we don't
want to emit texture state, etc.  In fact emitting sampler state and/or
setting PIXLODENABLE bit when there are no texture fetches seems to
cause lockup.

In theory this should never happen for a "normal" shader, unless the
state tracker is wonky.  But it is a very real possibility for binning
pass shaders.

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/a3xx/fd3_compiler.c  |   10 +--------
 .../drivers/freedreno/a3xx/fd3_compiler_old.c      |    2 +-
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c      |   18 +++++++++++-----
 src/gallium/drivers/freedreno/a3xx/fd3_program.c   |    6 +++---
 src/gallium/drivers/freedreno/a3xx/fd3_program.h   |    4 ++--
 src/gallium/drivers/freedreno/a3xx/ir3.h           |    4 ++--
 src/gallium/drivers/freedreno/a3xx/ir3_ra.c        |   22 +++++++++++++++++---
 7 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index 1d99e5c..911330c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -2054,12 +2054,6 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
 	}
 }
 
-static void
-decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
-{
-	ctx->so->samplers_count++;
-}
-
 /* from TGSI perspective, we actually have inputs.  But most of the "inputs"
  * for a fragment shader are just bary.f instructions.  The *actual* inputs
  * from the hw perspective are the frag_pos and optionally frag_coord and
@@ -2160,8 +2154,6 @@ compile_instructions(struct fd3_compile_context *ctx)
 				decl_out(ctx, decl);
 			} else if (decl->Declaration.File == TGSI_FILE_INPUT) {
 				decl_in(ctx, decl);
-			} else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
-				decl_samp(ctx, decl);
 			}
 			break;
 		}
@@ -2320,7 +2312,7 @@ fd3_compile_shader(struct fd3_shader_variant *so,
 	}
 
 	ret = ir3_block_ra(block, so->type, key.half_precision,
-			so->frag_coord, so->frag_face);
+			so->frag_coord, so->frag_face, &so->has_samp);
 	if (ret)
 		goto out;
 
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
index 76de287..ee58591 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
@@ -1417,7 +1417,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
 static void
 decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
 {
-	ctx->so->samplers_count++;
+	ctx->so->has_samp = true;
 }
 
 static void
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 00f1014..b1cf3fd 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -177,7 +177,7 @@ emit_textures(struct fd_ringbuffer *ring,
 				CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
 		for (i = 0; i < tex->num_samplers; i++) {
 			static const struct fd3_sampler_stateobj dummy_sampler = {};
-			struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
+			const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
 					fd3_sampler_stateobj(tex->samplers[i]) :
 					&dummy_sampler;
 			OUT_RING(ring, sampler->texsamp0);
@@ -542,11 +542,19 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 	if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX))
 		fd_wfi(ctx, ring);
 
-	if (dirty & FD_DIRTY_VERTTEX)
-		emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
+	if (dirty & FD_DIRTY_VERTTEX) {
+		if (vp->has_samp)
+			emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
+		else
+			dirty &= ~FD_DIRTY_VERTTEX;
+	}
 
-	if (dirty & FD_DIRTY_FRAGTEX)
-		emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
+	if (dirty & FD_DIRTY_FRAGTEX) {
+		if (fp->has_samp)
+			emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
+		else
+			dirty &= ~FD_DIRTY_FRAGTEX;
+	}
 
 	ctx->dirty &= ~dirty;
 }
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 09cadf8..b5544e8 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -120,7 +120,7 @@ create_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key)
 			v->inputs_count = 0;
 			v->outputs_count = 0;
 			v->total_in = 0;
-			v->samplers_count = 0;
+			v->has_samp = false;
 			v->immediates_count = 0;
 		}
 	} else {
@@ -397,7 +397,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
 			A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
 			A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
 			A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
-			COND(vp->samplers_count > 0, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
+			COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
 			A3XX_SP_VS_CTRL_REG0_LENGTH(vp->instrlen));
 	OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
 			A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
@@ -475,7 +475,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
 				A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
 				A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
 				A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
-				COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
+				COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
 				A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
 		OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
 				A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
index 8d4fd57..e0866c1 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
@@ -107,8 +107,8 @@ struct fd3_shader_variant {
 
 	unsigned total_in;       /* sum of inputs (scalar) */
 
-	/* samplers: */
-	unsigned samplers_count;
+	/* do we have one or more texture sample instructions: */
+	bool has_samp;
 
 	/* const reg # of first immediate, ie. 1 == c1
 	 * (not regid, because TGSI thinks in terms of vec4 registers,
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.h b/src/gallium/drivers/freedreno/a3xx/ir3.h
index 0905234..872f478 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir3.h
+++ b/src/gallium/drivers/freedreno/a3xx/ir3.h
@@ -385,8 +385,8 @@ void ir3_block_sched(struct ir3_block *block);
 
 /* register assignment: */
 int ir3_block_ra(struct ir3_block *block, enum shader_t type,
-		bool half_precision, bool frag_coord, bool frag_face);
-
+		bool half_precision, bool frag_coord, bool frag_face,
+		bool *has_samp);
 
 #ifndef ARRAY_SIZE
 #  define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
index 4e48ede..57c68c7 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
@@ -56,6 +56,7 @@ struct ir3_ra_ctx {
 	bool half_precision;
 	bool frag_coord;
 	bool frag_face;
+	bool has_samp;
 	int cnt;
 	bool error;
 };
@@ -654,8 +655,17 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 		if (is_sfu(n))
 			regmask_set(&needs_ss, n->regs[0]);
 
-		if (is_tex(n))
+		if (is_tex(n)) {
+			/* this ends up being the # of samp instructions.. but that
+			 * is ok, everything else only cares whether it is zero or
+			 * not.  We do this here, rather than when we encounter a
+			 * SAMP decl, because (especially in binning pass shader)
+			 * the samp instruction(s) could get eliminated if the
+			 * result is not used.
+			 */
+			ctx->has_samp = true;
 			regmask_set(&needs_sy, n->regs[0]);
+		}
 
 		/* both tex/sfu appear to not always immediately consume
 		 * their src register(s):
@@ -730,7 +740,8 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 }
 
 int ir3_block_ra(struct ir3_block *block, enum shader_t type,
-		bool half_precision, bool frag_coord, bool frag_face)
+		bool half_precision, bool frag_coord, bool frag_face,
+		bool *has_samp)
 {
 	struct ir3_ra_ctx ctx = {
 			.block = block,
@@ -739,6 +750,11 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type,
 			.frag_coord = frag_coord,
 			.frag_face = frag_face,
 	};
+	int ret;
+
 	ir3_shader_clear_mark(block->shader);
-	return block_ra(&ctx, block);
+	ret = block_ra(&ctx, block);
+	*has_samp = ctx.has_samp;
+
+	return ret;
 }




More information about the mesa-commit mailing list