Mesa (master): freedreno/a3xx: use INDIRECT state load for shaders

Rob Clark robclark at kemper.freedesktop.org
Sat Sep 14 17:32:50 UTC 2013


Module: Mesa
Branch: master
Commit: 1a42d4ee34d73cbc3e5bff3dcce5a913cd58aaba
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=1a42d4ee34d73cbc3e5bff3dcce5a913cd58aaba

Author: Rob Clark <robclark at freedesktop.org>
Date:   Fri Sep  6 18:21:25 2013 -0400

freedreno/a3xx: use INDIRECT state load for shaders

With a debug option to force DIRECT (mainly to make it easier for
capturing cmdstream dumps).  Using INDIRECT for large shaders at least
makes a noticable reduction in CPU load, which helps for CPU limited
games.

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/a3xx/fd3_program.c |   35 +++++++++++++++++-----
 src/gallium/drivers/freedreno/freedreno_screen.c |    1 +
 src/gallium/drivers/freedreno/freedreno_util.h   |    1 +
 3 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index d84bbe9..b0eec6e 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -186,7 +186,8 @@ emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
 {
 	struct ir3_shader_info *si = &so->info;
 	enum adreno_state_block sb;
-	uint32_t i, *bin;
+	enum adreno_state_src src;
+	uint32_t i, sz, *bin;
 
 	if (so->type == SHADER_VERTEX) {
 		sb = SB_VERT_SHADER;
@@ -194,17 +195,31 @@ emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
 		sb = SB_FRAG_SHADER;
 	}
 
-	// XXX use SS_INDIRECT
-	bin = fd_bo_map(so->bo);
-	OUT_PKT3(ring, CP_LOAD_STATE, 2 + si->sizedwords);
+	if (fd_mesa_debug & FD_DBG_DIRECT) {
+		sz = si->sizedwords;
+		src = SS_DIRECT;
+		bin = fd_bo_map(so->bo);
+	} else {
+		sz = 0;
+		src = SS_INDIRECT;
+		bin = NULL;
+	}
+
+	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
-			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+			CP_LOAD_STATE_0_STATE_SRC(src) |
 			CP_LOAD_STATE_0_STATE_BLOCK(sb) |
 			CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
-	OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
-			CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
-	for (i = 0; i < si->sizedwords; i++)
+	if (bin) {
+		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+				CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
+	} else {
+		OUT_RELOC(ring, so->bo, 0,
+				CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
+	}
+	for (i = 0; i < sz; i++) {
 		OUT_RING(ring, bin[i]);
+	}
 }
 
 void
@@ -223,6 +238,10 @@ fd3_program_emit(struct fd_ringbuffer *ring,
 
 	OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
 	OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+			/* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
+			 * flush some caches? I think we only need to set those
+			 * bits if we have updated const or shader..
+			 */
 			A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
 			A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
 	OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 7412e3d..eada1af 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -61,6 +61,7 @@ static const struct debug_named_value debug_options[] = {
 		{"dclear",    FD_DBG_DCLEAR, "Mark all state dirty after clear"},
 		{"dgmem",     FD_DBG_DGMEM,  "Mark all state dirty after GMEM tile pass"},
 		{"dscis",     FD_DBG_DSCIS,  "Disable scissor optimization"},
+		{"direct",    FD_DBG_DIRECT, "Force inline (SS_DIRECT) state loads"},
 		DEBUG_NAMED_VALUE_END
 };
 
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index f867233..4c7c78b 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -57,6 +57,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
 #define FD_DBG_DCLEAR   0x04
 #define FD_DBG_DGMEM    0x08
 #define FD_DBG_DSCIS    0x10
+#define FD_DBG_DIRECT   0x20
 extern int fd_mesa_debug;
 
 #define DBG(fmt, ...) \




More information about the mesa-commit mailing list