Mesa (master): freedreno/a6xx: de-duplicate vinterp/vpsrepl state building

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jun 24 22:54:58 UTC 2020


Module: Mesa
Branch: master
Commit: 68d6aa3dd02a33b650aeb66fbca83072a52202a1
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=68d6aa3dd02a33b650aeb66fbca83072a52202a1

Author: Rob Clark <robdclark at chromium.org>
Date:   Mon Jun 22 08:39:12 2020 -0700

freedreno/a6xx: de-duplicate vinterp/vpsrepl state building

When we flip the texcoord patch, we'll setup PNTC input slot in the
pre-built interp stateobj, rather than this being a draw-time (slow-
path) built stateobj.  But rather than duplicate more of the slow-
path logic, refactor it out into a helper that is reused in both
cases.

Signed-off-by: Rob Clark <robdclark at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5595>

---

 src/gallium/drivers/freedreno/a6xx/fd6_program.c | 163 ++++++++++-------------
 1 file changed, 71 insertions(+), 92 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index be561a040c6..7b2ec2388c7 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -856,42 +856,15 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
 		fd6_emit_immediates(screen, fs, ring);
 }
 
+static void emit_interp_state(struct fd_ringbuffer *ring, struct ir3_shader_variant *fs,
+		bool rasterflat, bool sprite_coord_mode, uint32_t sprite_coord_enable);
+
 static struct fd_ringbuffer *
 create_interp_stateobj(struct fd_context *ctx, struct fd6_program_state *state)
 {
-	const struct ir3_shader_variant *fs = state->fs;
 	struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 18 * 4);
-	uint32_t vinterp[8] = {0};
 
-	/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
-	for (int j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
-		/* NOTE: varyings are packed, so if compmask is 0xb
-		 * then first, third, and fourth component occupy
-		 * three consecutive varying slots:
-		 */
-		unsigned compmask = fs->inputs[j].compmask;
-
-		uint32_t inloc = fs->inputs[j].inloc;
-
-		if (fs->inputs[j].interpolate == INTERP_MODE_FLAT) {
-			uint32_t loc = inloc;
-
-			for (int i = 0; i < 4; i++) {
-				if (compmask & (1 << i)) {
-					vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
-					loc++;
-				}
-			}
-		}
-	}
-
-	OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
-	for (int i = 0; i < 8; i++)
-		OUT_RING(ring, vinterp[i]);    /* VPC_VARYING_INTERP[i].MODE */
-
-	OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
-	for (int i = 0; i < 8; i++)
-		OUT_RING(ring, 0x00000000);    /* VPC_VARYING_PS_REPL[i] */
+	emit_interp_state(ring, state->fs, false, false, 0);
 
 	return ring;
 }
@@ -912,83 +885,89 @@ fd6_program_interp_state(struct fd6_emit *emit)
 		struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
 				emit->ctx->batch->submit, 18 * 4, FD_RINGBUFFER_STREAMING);
 
-		/* slow-path: */
-		struct ir3_shader_variant *fs = state->fs;
-		uint32_t vinterp[8], vpsrepl[8];
+		emit_interp_state(ring, state->fs, emit->rasterflat,
+				emit->sprite_coord_mode, emit->sprite_coord_enable);
 
-		memset(vinterp, 0, sizeof(vinterp));
-		memset(vpsrepl, 0, sizeof(vpsrepl));
+		return ring;
+	}
+}
 
-		for (int j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
+static void
+emit_interp_state(struct fd_ringbuffer *ring, struct ir3_shader_variant *fs,
+		bool rasterflat, bool sprite_coord_mode, uint32_t sprite_coord_enable)
+{
+	uint32_t vinterp[8], vpsrepl[8];
 
-			/* NOTE: varyings are packed, so if compmask is 0xb
-			 * then first, third, and fourth component occupy
-			 * three consecutive varying slots:
-			 */
-			unsigned compmask = fs->inputs[j].compmask;
+	memset(vinterp, 0, sizeof(vinterp));
+	memset(vpsrepl, 0, sizeof(vpsrepl));
 
-			uint32_t inloc = fs->inputs[j].inloc;
+	for (int j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
 
-			if ((fs->inputs[j].interpolate == INTERP_MODE_FLAT) ||
-					(fs->inputs[j].rasterflat && emit->rasterflat)) {
-				uint32_t loc = inloc;
+		/* NOTE: varyings are packed, so if compmask is 0xb
+		 * then first, third, and fourth component occupy
+		 * three consecutive varying slots:
+		 */
+		unsigned compmask = fs->inputs[j].compmask;
+
+		uint32_t inloc = fs->inputs[j].inloc;
 
-				for (int i = 0; i < 4; i++) {
-					if (compmask & (1 << i)) {
-						vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
-						loc++;
-					}
+		if ((fs->inputs[j].interpolate == INTERP_MODE_FLAT) ||
+				(fs->inputs[j].rasterflat && rasterflat)) {
+			uint32_t loc = inloc;
+
+			for (int i = 0; i < 4; i++) {
+				if (compmask & (1 << i)) {
+					vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
+					loc++;
 				}
 			}
+		}
 
-			gl_varying_slot slot = fs->inputs[j].slot;
+		gl_varying_slot slot = fs->inputs[j].slot;
 
-			/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
-			if (slot >= VARYING_SLOT_VAR0) {
-				unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
-				/* Replace the .xy coordinates with S/T from the point sprite. Set
-				 * interpolation bits for .zw such that they become .01
+		/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
+		if (slot >= VARYING_SLOT_VAR0) {
+			unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
+			/* Replace the .xy coordinates with S/T from the point sprite. Set
+			 * interpolation bits for .zw such that they become .01
+			 */
+			if (sprite_coord_enable & texmask) {
+				/* mask is two 2-bit fields, where:
+				 *   '01' -> S
+				 *   '10' -> T
+				 *   '11' -> 1 - T  (flip mode)
 				 */
-				if (emit->sprite_coord_enable & texmask) {
-					/* mask is two 2-bit fields, where:
-					 *   '01' -> S
-					 *   '10' -> T
-					 *   '11' -> 1 - T  (flip mode)
-					 */
-					unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001;
-					uint32_t loc = inloc;
-					if (compmask & 0x1) {
-						vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
-						loc++;
-					}
-					if (compmask & 0x2) {
-						vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
-						loc++;
-					}
-					if (compmask & 0x4) {
-						/* .z <- 0.0f */
-						vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
-						loc++;
-					}
-					if (compmask & 0x8) {
-						/* .w <- 1.0f */
-						vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
-						loc++;
-					}
+				unsigned mask = sprite_coord_mode ? 0b1101 : 0b1001;
+				uint32_t loc = inloc;
+				if (compmask & 0x1) {
+					vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
+					loc++;
+				}
+				if (compmask & 0x2) {
+					vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
+					loc++;
+				}
+				if (compmask & 0x4) {
+					/* .z <- 0.0f */
+					vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
+					loc++;
+				}
+				if (compmask & 0x8) {
+					/* .w <- 1.0f */
+					vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
+					loc++;
 				}
 			}
 		}
+	}
 
-		OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
-		for (int i = 0; i < 8; i++)
-			OUT_RING(ring, vinterp[i]);     /* VPC_VARYING_INTERP[i].MODE */
-
-		OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
-		for (int i = 0; i < 8; i++)
-			OUT_RING(ring, vpsrepl[i]);     /* VPC_VARYING_PS_REPL[i] */
+	OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
+	for (int i = 0; i < 8; i++)
+		OUT_RING(ring, vinterp[i]);     /* VPC_VARYING_INTERP[i].MODE */
 
-		return ring;
-	}
+	OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
+	for (int i = 0; i < 8; i++)
+		OUT_RING(ring, vpsrepl[i]);     /* VPC_VARYING_PS_REPL[i] */
 }
 
 static struct ir3_program_state *



More information about the mesa-commit mailing list