Mesa (master): freedreno/a3xx: rework vinterp/vpsrepl

Thu Nov 26 17:49:51 UTC 2015

Module: Mesa
Branch: master
Commit: 5adf4a5cdaec98c9195a66485120e8e00c517fc2
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5adf4a5cdaec98c9195a66485120e8e00c517fc2

Author: Rob Clark <robclark at freedesktop.org>
Date:   Thu Nov 26 12:18:14 2015 -0500

freedreno/a3xx: rework vinterp/vpsrepl

Make the interpolation / point-sprite replacement mode setup deal with
varying packing.

In a later commit, we switch to packing just the varying components that
are actually used by the frag shader, so we won't be able to assume
everything is vec4's aligned to vec4.  Which would highly confuse the
previous vinterp/vpsrepl logic.

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/a3xx/fd3_program.c |   49 ++++++++++++++++------
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 4ed04b3..01daa0f 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -387,23 +387,27 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 
 		/* figure out VARYING_INTERP / FLAT_SHAD register values: */
 		for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
+			/* NOTE: varyings are packed, so if compmask is 0xb
+			 * then first, third, and fourth component occupy
+			 * three consecutive varying slots:
+			 */
+			unsigned compmask = fp->inputs[j].compmask;
 
 			/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
 			 * instead.. rather than -8 everywhere else..
 			 */
 			uint32_t inloc = fp->inputs[j].inloc - 8;
 
-			/* currently assuming varyings aligned to 4 (not
-			 * packed):
-			 */
-			debug_assert((inloc % 4) == 0);
-
 			if ((fp->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) ||
 					(fp->inputs[j].rasterflat && emit->rasterflat)) {
 				uint32_t loc = inloc;
-				for (i = 0; i < 4; i++, loc++) {
-					vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
-					flatshade[loc / 32] |= 1 << (loc % 32);
+
+				for (i = 0; i < 4; i++) {
+					if (compmask & (1 << i)) {
+						vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
+						flatshade[loc / 32] |= 1 << (loc % 32);
+						loc++;
+					}
 				}
 			}
 
@@ -416,10 +420,31 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 				 * interpolation bits for .zw such that they become .01
 				 */
 				if (emit->sprite_coord_enable & texmask) {
-					vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
-							<< ((inloc % 16) * 2);
-					vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
-					vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+					/* mask is two 2-bit fields, where:
+					 *   '01' -> S
+					 *   '10' -> T
+					 *   '11' -> 1 - T  (flip mode)
+					 */
+					unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001;
+					uint32_t loc = inloc;
+					if (compmask & 0x1) {
+						vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
+						loc++;
+					}
+					if (compmask & 0x2) {
+						vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
+						loc++;
+					}
+					if (compmask & 0x4) {
+						/* .z <- 0.0f */
+						vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
+						loc++;
+					}
+					if (compmask & 0x8) {
+						/* .w <- 1.0f */
+						vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
+						loc++;
+					}
 				}
 			}
 		}