Mesa (master): freedreno/ir3: move emit_const to ir3

Rob Clark robclark at kemper.freedesktop.org
Mon Jul 27 17:51:14 UTC 2015


Module: Mesa
Branch: master
Commit: 56462a30080c1f25a81ae566d59a25d2ad6bb809
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=56462a30080c1f25a81ae566d59a25d2ad6bb809

Author: Rob Clark <robclark at freedesktop.org>
Date:   Fri Jul 24 13:07:33 2015 -0400

freedreno/ir3: move emit_const to ir3

Details of the cmdstream packets are different between a3xx and a4xx,
but the logic about the layout of const registers is the same, as that
is dictated by the ir3 shader compiler.  So rather than duplicating
logic that is tightly coupled to ir3 between a3xx and a4xx, move this
into ir3 and use per-generation callbacks for to build the cmdstream
packets.

This should make it easier to pass additional const regs (such as for
transform feedback).  And it also keeps the layout internal to ir3 in
case we want to make the layout more dynamic some day.

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/a3xx/fd3_context.c  |    1 +
 src/gallium/drivers/freedreno/a3xx/fd3_draw.c     |    2 +-
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c     |  156 ++++++---------------
 src/gallium/drivers/freedreno/a3xx/fd3_emit.h     |    6 +-
 src/gallium/drivers/freedreno/a4xx/fd4_context.c  |    1 +
 src/gallium/drivers/freedreno/a4xx/fd4_draw.c     |    2 +-
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c     |  156 ++++++---------------
 src/gallium/drivers/freedreno/a4xx/fd4_emit.h     |    6 +-
 src/gallium/drivers/freedreno/freedreno_context.h |    9 +-
 src/gallium/drivers/freedreno/freedreno_util.h    |    1 +
 src/gallium/drivers/freedreno/ir3/ir3_shader.c    |  148 +++++++++++++++++++
 src/gallium/drivers/freedreno/ir3/ir3_shader.h    |    4 +
 12 files changed, 263 insertions(+), 229 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
index 8441898..dc33783 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -121,6 +121,7 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv)
 	fd3_gmem_init(pctx);
 	fd3_texture_init(pctx);
 	fd3_prog_init(pctx);
+	fd3_emit_init(pctx);
 
 	pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv);
 	if (!pctx)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index 070ed43..fc30d48 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -345,7 +345,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
 
 	fd3_emit_vertex_bufs(ring, &emit);
 
-	fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+	fd3_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL);
 
 	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
 	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 07cc226..9032366 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -43,19 +43,26 @@
 #include "fd3_format.h"
 #include "fd3_zsa.h"
 
+static const enum adreno_state_block sb[] = {
+	[SHADER_VERTEX]   = SB_VERT_SHADER,
+	[SHADER_FRAGMENT] = SB_FRAG_SHADER,
+};
+
 /* regid:          base const register
  * prsc or dwords: buffer containing constant values
  * sizedwords:     size of const value buffer
  */
 void
-fd3_emit_constant(struct fd_ringbuffer *ring,
-		enum adreno_state_block sb,
+fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
 		const uint32_t *dwords, struct pipe_resource *prsc)
 {
 	uint32_t i, sz;
 	enum adreno_state_src src;
 
+	debug_assert((regid % 4) == 0);
+	debug_assert((sizedwords % 4) == 0);
+
 	if (prsc) {
 		sz = 0;
 		src = SS_INDIRECT;
@@ -67,7 +74,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
 	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
 			CP_LOAD_STATE_0_STATE_SRC(src) |
-			CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
 			CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
 	if (prsc) {
 		struct fd_bo *bo = fd_resource(prsc)->bo;
@@ -84,89 +91,31 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
 }
 
 static void
-emit_constants(struct fd_ringbuffer *ring,
-		enum adreno_state_block sb,
-		struct fd_constbuf_stateobj *constbuf,
-		struct ir3_shader_variant *shader,
-		bool emit_immediates)
+fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+		uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
 {
-	uint32_t enabled_mask = constbuf->enabled_mask;
-	uint32_t max_const;
-	int i;
-
-	// XXX TODO only emit dirty consts.. but we need to keep track if
-	// they are clobbered by a clear, gmem2mem, or mem2gmem..
-	constbuf->dirty_mask = enabled_mask;
-
-	/* in particular, with binning shader we may end up with unused
-	 * consts, ie. we could end up w/ constlen that is smaller
-	 * than first_immediate.  In that case truncate the user consts
-	 * early to avoid HLSQ lockup caused by writing too many consts
-	 */
-	max_const = MIN2(shader->first_driver_param, shader->constlen);
-
-	/* emit user constants: */
-	if (enabled_mask & 1) {
-		const unsigned index = 0;
-		struct pipe_constant_buffer *cb = &constbuf->cb[index];
-		unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
-
-		// I expect that size should be a multiple of vec4's:
-		assert(size == align(size, 4));
-
-		/* and even if the start of the const buffer is before
-		 * first_immediate, the end may not be:
-		 */
-		size = MIN2(size, 4 * max_const);
-
-		if (size && constbuf->dirty_mask & (1 << index)) {
-			fd3_emit_constant(ring, sb, 0,
-							  cb->buffer_offset, size,
-							  cb->user_buffer, cb->buffer);
-			constbuf->dirty_mask &= ~(1 << index);
-		}
-
-		enabled_mask &= ~(1 << index);
-	}
-
-	if (shader->constlen > shader->first_driver_param) {
-		uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
-		/* emit ubos: */
-		OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
-		OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param * 2) |
-				 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
-				 CP_LOAD_STATE_0_STATE_BLOCK(sb) |
-				 CP_LOAD_STATE_0_NUM_UNIT(params * 2));
-		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
-				 CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
-
-		for (i = 1; i <= params * 4; i++) {
-			struct pipe_constant_buffer *cb = &constbuf->cb[i];
-			assert(!cb->user_buffer);
-			if ((enabled_mask & (1 << i)) && cb->buffer)
-				OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
-			else
-				OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
-		}
-	}
+	uint32_t i;
 
-	/* emit shader immediates: */
-	if (shader && emit_immediates) {
-		int size = shader->immediates_count;
-		uint32_t base = shader->first_immediate;
+	debug_assert((regid % 4) == 0);
+	debug_assert((num % 4) == 0);
 
-		/* truncate size to avoid writing constants that shader
-		 * does not use:
-		 */
-		size = MIN2(size + base, shader->constlen) - base;
-
-		/* convert out of vec4: */
-		base *= 4;
-		size *= 4;
-
-		if (size > 0) {
-			fd3_emit_constant(ring, sb, base,
-				0, size, shader->immediates[0].val, NULL);
+	OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
+			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+			CP_LOAD_STATE_0_NUM_UNIT(num/2));
+	OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+			CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+	for (i = 0; i < num; i++) {
+		if (bos[i]) {
+			if (write) {
+				OUT_RELOCW(ring, bos[i], offsets[i], 0, 0);
+			} else {
+				OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+			}
+		} else {
+			OUT_RING(ring, 0xbad00000 | (i << 16));
 		}
 	}
 }
@@ -669,33 +618,12 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
 	OUT_RING(ring, HLSQ_FLUSH);
 
-	if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
-			/* evil hack to deal sanely with clear path: */
-			(emit->prog == &ctx->prog)) {
-		fd_wfi(ctx, ring);
-		emit_constants(ring,  SB_VERT_SHADER,
-				&ctx->constbuf[PIPE_SHADER_VERTEX],
-				vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
-		if (!emit->key.binning_pass) {
-			emit_constants(ring, SB_FRAG_SHADER,
-					&ctx->constbuf[PIPE_SHADER_FRAGMENT],
-					fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
-		}
-	}
-
-	/* emit driver params every time */
-	if (emit->info && emit->prog == &ctx->prog) {
-		uint32_t vertex_params[4] = {
-			emit->info->indexed ? emit->info->index_bias : emit->info->start,
-			0,
-			0,
-			0
-		};
-		if (vp->constlen >= vp->first_driver_param + 4) {
-			fd3_emit_constant(ring, SB_VERT_SHADER,
-							  (vp->first_driver_param + 4) * 4,
-							  0, 4, vertex_params, NULL);
-		}
+	if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
+		ir3_emit_consts(vp, ring, emit->info, dirty);
+		if (!emit->key.binning_pass)
+			ir3_emit_consts(fp, ring, emit->info, dirty);
+		/* mark clean after emitting consts: */
+		ctx->prog.dirty = 0;
 	}
 
 	if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) {
@@ -930,3 +858,11 @@ fd3_emit_restore(struct fd_context *ctx)
 
 	ctx->needs_rb_fbd = true;
 }
+
+void
+fd3_emit_init(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->emit_const = fd3_emit_const;
+	ctx->emit_const_bo = fd3_emit_const_bo;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
index 8f21919..7956547 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -37,10 +37,8 @@
 #include "ir3_shader.h"
 
 struct fd_ringbuffer;
-enum adreno_state_block;
 
-void fd3_emit_constant(struct fd_ringbuffer *ring,
-		enum adreno_state_block sb,
+void fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
 		const uint32_t *dwords, struct pipe_resource *prsc);
 
@@ -90,4 +88,6 @@ void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
 void fd3_emit_restore(struct fd_context *ctx);
 
+void fd3_emit_init(struct pipe_context *pctx);
+
 #endif /* FD3_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
index 6e109b6..e172d35 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
@@ -119,6 +119,7 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv)
 	fd4_gmem_init(pctx);
 	fd4_texture_init(pctx);
 	fd4_prog_init(pctx);
+	fd4_emit_init(pctx);
 
 	pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv);
 	if (!pctx)
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index d070f5f..ff1dfdc 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -295,7 +295,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
 	OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW3 */
 
 	/* until fastclear works: */
-	fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+	fd4_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL);
 
 	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index f3e1cce..4462a82 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -43,19 +43,26 @@
 #include "fd4_format.h"
 #include "fd4_zsa.h"
 
+static const enum adreno_state_block sb[] = {
+	[SHADER_VERTEX]   = SB_VERT_SHADER,
+	[SHADER_FRAGMENT] = SB_FRAG_SHADER,
+};
+
 /* regid:          base const register
  * prsc or dwords: buffer containing constant values
  * sizedwords:     size of const value buffer
  */
 void
-fd4_emit_constant(struct fd_ringbuffer *ring,
-		enum adreno_state_block sb,
+fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
 		const uint32_t *dwords, struct pipe_resource *prsc)
 {
 	uint32_t i, sz;
 	enum adreno_state_src src;
 
+	debug_assert((regid % 4) == 0);
+	debug_assert((sizedwords % 4) == 0);
+
 	if (prsc) {
 		sz = 0;
 		src = 0x2;  // TODO ??
@@ -67,7 +74,7 @@ fd4_emit_constant(struct fd_ringbuffer *ring,
 	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
 			CP_LOAD_STATE_0_STATE_SRC(src) |
-			CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
 			CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
 	if (prsc) {
 		struct fd_bo *bo = fd_resource(prsc)->bo;
@@ -84,89 +91,31 @@ fd4_emit_constant(struct fd_ringbuffer *ring,
 }
 
 static void
-emit_constants(struct fd_ringbuffer *ring,
-		enum adreno_state_block sb,
-		struct fd_constbuf_stateobj *constbuf,
-		struct ir3_shader_variant *shader,
-		bool emit_immediates)
+fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+		uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
 {
-	uint32_t enabled_mask = constbuf->enabled_mask;
-	uint32_t max_const;
-	int i;
-
-	// XXX TODO only emit dirty consts.. but we need to keep track if
-	// they are clobbered by a clear, gmem2mem, or mem2gmem..
-	constbuf->dirty_mask = enabled_mask;
-
-	/* in particular, with binning shader we may end up with unused
-	 * consts, ie. we could end up w/ constlen that is smaller
-	 * than first_immediate.  In that case truncate the user consts
-	 * early to avoid HLSQ lockup caused by writing too many consts
-	 */
-	max_const = MIN2(shader->first_driver_param, shader->constlen);
-
-	/* emit user constants: */
-	if (enabled_mask & 1) {
-		const unsigned index = 0;
-		struct pipe_constant_buffer *cb = &constbuf->cb[index];
-		unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
-
-		// I expect that size should be a multiple of vec4's:
-		assert(size == align(size, 4));
-
-		/* and even if the start of the const buffer is before
-		 * first_immediate, the end may not be:
-		 */
-		size = MIN2(size, 4 * max_const);
-
-		if (size && (constbuf->dirty_mask & (1 << index))) {
-			fd4_emit_constant(ring, sb, 0,
-					cb->buffer_offset, size,
-					cb->user_buffer, cb->buffer);
-			constbuf->dirty_mask &= ~(1 << index);
-		}
+	uint32_t i;
 
-		enabled_mask &= ~(1 << index);
-	}
-
-	/* emit ubos: */
-	if (shader->constlen > shader->first_driver_param) {
-		uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
-		OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
-		OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) |
-				CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
-				CP_LOAD_STATE_0_STATE_BLOCK(sb) |
-				CP_LOAD_STATE_0_NUM_UNIT(params));
-		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
-				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
-
-		for (i = 1; i <= params * 4; i++) {
-			struct pipe_constant_buffer *cb = &constbuf->cb[i];
-			assert(!cb->user_buffer);
-			if ((enabled_mask & (1 << i)) && cb->buffer)
-				OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
-			else
-				OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
-		}
-	}
+	debug_assert((regid % 4) == 0);
+	debug_assert((num % 4) == 0);
 
-	/* emit shader immediates: */
-	if (shader && emit_immediates) {
-		int size = shader->immediates_count;
-		uint32_t base = shader->first_immediate;
-
-		/* truncate size to avoid writing constants that shader
-		 * does not use:
-		 */
-		size = MIN2(size + base, shader->constlen) - base;
-
-		/* convert out of vec4: */
-		base *= 4;
-		size *= 4;
-
-		if (size > 0) {
-			fd4_emit_constant(ring, sb, base,
-				0, size, shader->immediates[0].val, NULL);
+	OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
+			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+			CP_LOAD_STATE_0_NUM_UNIT(num/4));
+	OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+			CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+	for (i = 0; i < num; i++) {
+		if (bos[i]) {
+			if (write) {
+				OUT_RELOCW(ring, bos[i], offsets[i], 0, 0);
+			} else {
+				OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+			}
+		} else {
+			OUT_RING(ring, 0xbad00000 | (i << 16));
 		}
 	}
 }
@@ -520,33 +469,12 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 	if (dirty & FD_DIRTY_PROG)
 		fd4_program_emit(ring, emit);
 
-	if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
-			/* evil hack to deal sanely with clear path: */
-			(emit->prog == &ctx->prog)) {
-		fd_wfi(ctx, ring);
-		emit_constants(ring,  SB_VERT_SHADER,
-				&ctx->constbuf[PIPE_SHADER_VERTEX],
-				vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
-		if (!emit->key.binning_pass) {
-			emit_constants(ring, SB_FRAG_SHADER,
-					&ctx->constbuf[PIPE_SHADER_FRAGMENT],
-					fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
-		}
-	}
-
-	/* emit driver params every time */
-	if (emit->info && emit->prog == &ctx->prog) {
-		uint32_t vertex_params[4] = {
-			emit->info->indexed ? emit->info->index_bias : emit->info->start,
-			0,
-			0,
-			0
-		};
-		if (vp->constlen >= vp->first_driver_param + 4) {
-			fd4_emit_constant(ring, SB_VERT_SHADER,
-							  (vp->first_driver_param + 4) * 4,
-							  0, 4, vertex_params, NULL);
-		}
+	if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
+		ir3_emit_consts(vp, ring, emit->info, dirty);
+		if (!emit->key.binning_pass)
+			ir3_emit_consts(fp, ring, emit->info, dirty);
+		/* mark clean after emitting consts: */
+		ctx->prog.dirty = 0;
 	}
 
 	if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
@@ -767,3 +695,11 @@ fd4_emit_restore(struct fd_context *ctx)
 
 	ctx->needs_rb_fbd = true;
 }
+
+void
+fd4_emit_init(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->emit_const = fd4_emit_const;
+	ctx->emit_const_bo = fd4_emit_const_bo;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
index 7d059f8..7debee5 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
@@ -37,10 +37,8 @@
 #include "ir3_shader.h"
 
 struct fd_ringbuffer;
-enum adreno_state_block;
 
-void fd4_emit_constant(struct fd_ringbuffer *ring,
-		enum adreno_state_block sb,
+void fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
 		const uint32_t *dwords, struct pipe_resource *prsc);
 
@@ -96,4 +94,6 @@ void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
 void fd4_emit_restore(struct fd_context *ctx);
 
+void fd4_emit_init(struct pipe_context *pctx);
+
 #endif /* FD4_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index c2d9834..bc5267a 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -351,9 +351,16 @@ struct fd_context {
 	void (*emit_sysmem_prep)(struct fd_context *ctx);
 
 	/* draw: */
-	void (*draw_vbo)(struct fd_context *pctx, const struct pipe_draw_info *info);
+	void (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info);
 	void (*clear)(struct fd_context *ctx, unsigned buffers,
 			const union pipe_color_union *color, double depth, unsigned stencil);
+
+	/* constant emit:  (note currently not used/needed for a2xx) */
+	void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type,
+			uint32_t regid, uint32_t offset, uint32_t sizedwords,
+			const uint32_t *dwords, struct pipe_resource *prsc);
+	void (*emit_const_bo)(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+			uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets);
 };
 
 static inline struct fd_context *
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index 1b78763..6aec258 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -40,6 +40,7 @@
 #include "util/u_dynarray.h"
 #include "util/u_pack_color.h"
 
+#include "disasm.h"
 #include "adreno_common.xml.h"
 #include "adreno_pm4.xml.h"
 
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index d402772..75425e9 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -412,3 +412,151 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
 
 	debug_printf("\n");
 }
+
+/* This has to reach into the fd_context a bit more than the rest of
+ * ir3, but it needs to be aligned with the compiler, so both agree
+ * on which const regs hold what.  And the logic is identical between
+ * a3xx/a4xx, the only difference is small details in the actual
+ * CP_LOAD_STATE packets (which is handled inside the generation
+ * specific ctx->emit_const(_bo)() fxns)
+ */
+
+#include "freedreno_resource.h"
+
+static void
+emit_user_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		struct fd_constbuf_stateobj *constbuf)
+{
+	struct fd_context *ctx = fd_context(v->shader->pctx);
+	const unsigned index = 0;     /* user consts are index 0 */
+	/* TODO save/restore dirty_mask for binning pass instead: */
+	uint32_t dirty_mask = constbuf->enabled_mask;
+
+	if (dirty_mask & (1 << index)) {
+		struct pipe_constant_buffer *cb = &constbuf->cb[index];
+		unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+
+		/* in particular, with binning shader we may end up with
+		 * unused consts, ie. we could end up w/ constlen that is
+		 * smaller than first_driver_param.  In that case truncate
+		 * the user consts early to avoid HLSQ lockup caused by
+		 * writing too many consts
+		 */
+		uint32_t max_const = MIN2(v->first_driver_param, v->constlen);
+
+		// I expect that size should be a multiple of vec4's:
+		assert(size == align(size, 4));
+
+		/* and even if the start of the const buffer is before
+		 * first_immediate, the end may not be:
+		 */
+		size = MIN2(size, 4 * max_const);
+
+		if (size > 0) {
+			fd_wfi(ctx, ring);
+			ctx->emit_const(ring, v->type, 0,
+					cb->buffer_offset, size,
+					cb->user_buffer, cb->buffer);
+			constbuf->dirty_mask &= ~(1 << index);
+		}
+	}
+}
+
+static void
+emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		struct fd_constbuf_stateobj *constbuf)
+{
+	if (v->constlen > v->first_driver_param) {
+		struct fd_context *ctx = fd_context(v->shader->pctx);
+		uint32_t offset = v->first_driver_param;  /* UBOs after user consts */
+		uint32_t params = MIN2(4, v->constlen - v->first_driver_param) * 4;
+		uint32_t offsets[params];
+		struct fd_bo *bos[params];
+
+		for (uint32_t i = 0; i < params; i++) {
+			const uint32_t index = i + 1;   /* UBOs start at index 1 */
+			struct pipe_constant_buffer *cb = &constbuf->cb[index];
+			assert(!cb->user_buffer);
+
+			if ((constbuf->enabled_mask & (1 << index)) && cb->buffer) {
+				offsets[i] = cb->buffer_offset;
+				bos[i] = fd_resource(cb->buffer)->bo;
+			} else {
+				offsets[i] = 0;
+				bos[i] = NULL;
+			}
+		}
+
+		fd_wfi(ctx, ring);
+		ctx->emit_const_bo(ring, v->type, false, offset * 4, params, bos, offsets);
+	}
+}
+
+static void
+emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
+{
+	struct fd_context *ctx = fd_context(v->shader->pctx);
+	int size = v->immediates_count;
+	uint32_t base = v->first_immediate;
+
+	/* truncate size to avoid writing constants that shader
+	 * does not use:
+	 */
+	size = MIN2(size + base, v->constlen) - base;
+
+	/* convert out of vec4: */
+	base *= 4;
+	size *= 4;
+
+	if (size > 0) {
+		fd_wfi(ctx, ring);
+		ctx->emit_const(ring, v->type, base,
+			0, size, v->immediates[0].val, NULL);
+	}
+}
+
+void
+ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		const struct pipe_draw_info *info, uint32_t dirty)
+{
+	struct fd_context *ctx = fd_context(v->shader->pctx);
+
+	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
+		struct fd_constbuf_stateobj *constbuf;
+		bool shader_dirty;
+
+		if (v->type == SHADER_VERTEX) {
+			constbuf = &ctx->constbuf[PIPE_SHADER_VERTEX];
+			shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_VP);
+		} else if (v->type == SHADER_FRAGMENT) {
+			constbuf = &ctx->constbuf[PIPE_SHADER_FRAGMENT];
+			shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_FP);
+		} else {
+			unreachable("bad shader type");
+			return;
+		}
+
+		emit_user_consts(v, ring, constbuf);
+		emit_ubos(v, ring, constbuf);
+		if (shader_dirty)
+			emit_immediates(v, ring);
+	}
+
+	/* emit driver params every time: */
+	/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
+	if (info && (v->type == SHADER_VERTEX)) {
+		uint32_t offset = v->first_driver_param + 4;  /* driver params after UBOs */
+		if (v->constlen >= offset) {
+			uint32_t vertex_params[4] = {
+				info->indexed ? info->index_bias : info->start,
+				0,
+				0,
+				0
+			};
+
+			fd_wfi(ctx, ring);
+			ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
+					ARRAY_SIZE(vertex_params), vertex_params, NULL);
+		}
+	}
+}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 5365d56..ef16d7b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -224,6 +224,10 @@ struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
 		struct ir3_shader_key key);
 void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin);
 
+struct fd_ringbuffer;
+void ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		const struct pipe_draw_info *info, uint32_t dirty);
+
 static inline const char *
 ir3_shader_stage(struct ir3_shader *shader)
 {




More information about the mesa-commit mailing list