Mesa (master): freedreno/ir3: large const support

Rob Clark robclark at kemper.freedesktop.org
Wed Oct 15 19:52:02 UTC 2014


Module: Mesa
Branch: master
Commit: 652b8fbbbb0132c634c90e4d1fdbca9497b7cd94
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=652b8fbbbb0132c634c90e4d1fdbca9497b7cd94

Author: Rob Clark <robclark at freedesktop.org>
Date:   Wed Oct 15 13:08:00 2014 -0400

freedreno/ir3: large const support

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/a3xx/fd3_program.c |   13 +++++++-----
 src/gallium/drivers/freedreno/freedreno_screen.c |    6 +++++-
 src/gallium/drivers/freedreno/ir3/ir3.c          |    2 +-
 src/gallium/drivers/freedreno/ir3/ir3.h          |    2 +-
 src/gallium/drivers/freedreno/ir3/ir3_compiler.c |   23 +++++++++++++++++-----
 5 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 8de0008..d674e0c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -186,6 +186,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 	enum a3xx_instrbuffermode fpbuffer, vpbuffer;
 	uint32_t fpbuffersz, vpbuffersz, fsoff;
 	uint32_t pos_regid, posz_regid, psize_regid, color_regid;
+	int constmode;
 	int i, j, k;
 
 	vp = fd3_emit_get_vp(emit);
@@ -241,6 +242,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 		fsoff = 256 - fpbuffersz;
 	}
 
+	/* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
+	constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
+
 	pos_regid = find_output_regid(vp,
 		ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
 	posz_regid = find_output_regid(fp,
@@ -256,6 +260,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 
 	OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
 	OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+			A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
 			/* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
 			 * flush some caches? I think we only need to set those
 			 * bits if we have updated const or shader..
@@ -275,7 +280,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 			A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));
 
 	OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
-	OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
+	OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
 			COND(emit->key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
 			A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
 			A3XX_SP_SP_CTRL_REG_L0MODE(0));
@@ -381,11 +386,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 				A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen + 1, 0)) |
 				A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
 
-		/* NOTE: I believe VS.CONSTLEN should be <= FS.CONSTOBJOFFSET*/
-		debug_assert(vp->constlen <= 128);
-
 		OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
-		OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
+		OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
+					MAX2(128, vp->constlen)) |
 				A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
 		OUT_RELOC(ring, fp->bo, 0, 0, 0);  /* SP_FS_OBJ_START_REG */
 	}
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index eb2d954..24f360b 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -354,7 +354,11 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
 	case PIPE_SHADER_CAP_MAX_TEMPS:
 		return 64; /* Max native temporaries. */
 	case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
-		return ((screen->gpu_id >= 300) ? 1024 : 64) * sizeof(float[4]);
+		/* NOTE: seems to be limit for a3xx is actually 512 but
+		 * split between VS and FS.  Use lower limit of 256 to
+		 * avoid getting into impossible situations:
+		 */
+		return ((screen->gpu_id >= 300) ? 256 : 64) * sizeof(float[4]);
 	case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
 		return 1;
 	case PIPE_SHADER_CAP_MAX_PREDS:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index 3da10fb..70d37ff 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -104,7 +104,7 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
 		val.iim_val = reg->iim_val;
 	} else {
 		int8_t components = util_last_bit(reg->wrmask);
-		int8_t max = (reg->num + repeat + components - 1) >> 2;
+		int16_t max = (reg->num + repeat + components - 1) >> 2;
 
 		val.comp = reg->num & 0x3;
 		val.num  = reg->num >> 2;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index b92a57a..d2d3dca 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -47,7 +47,7 @@ struct ir3_info {
 	 */
 	int8_t   max_reg;   /* highest GPR # used by shader */
 	int8_t   max_half_reg;
-	int8_t   max_const;
+	int16_t  max_const;
 };
 
 struct ir3_register {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
index 1a5119c..8c4ec88 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -600,11 +600,6 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx,
 	struct ir3_register *reg;
 	struct ir3_instruction *orig = NULL;
 
-	/* TODO we need to use a mov to temp for const >= 64.. or maybe
-	 * we could use relative addressing..
-	 */
-	compile_assert(ctx, src->Index < 64);
-
 	switch (src->File) {
 	case TGSI_FILE_IMMEDIATE:
 		/* TODO if possible, use actual immediate instead of const.. but
@@ -632,6 +627,24 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx,
 		break;
 	}
 
+	/* We seem to have 8 bits (6.2) for dst register always, so I think
+	 * it is safe to assume GPR cannot be >=64
+	 *
+	 * cat3 instructions only have 8 bits for src2, but cannot take a
+	 * const for src2
+	 *
+	 * cat5 and cat6 in some cases only has 8 bits, but cannot take a
+	 * const for any src.
+	 *
+	 * Other than that we seem to have 12 bits to encode const src,
+	 * except for cat1 which may only have 11 bits (but that seems like
+	 * a bug)
+	 */
+	if (flags & IR3_REG_CONST)
+		compile_assert(ctx, src->Index < (1 << 9));
+	else
+		compile_assert(ctx, src->Index < (1 << 6));
+
 	if (src->Absolute)
 		flags |= IR3_REG_ABS;
 	if (src->Negate)




More information about the mesa-commit mailing list