Mesa (master): panfrost/midgard: Broadcast swizzle

Thu Jun 20 18:52:54 UTC 2019

Module: Mesa
Branch: master
Commit: 195e297a92a51c9f6ad4ff75b37b8120d70e61d5
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=195e297a92a51c9f6ad4ff75b37b8120d70e61d5

Author: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
Date:   Wed Jun 19 07:23:27 2019 -0700

panfrost/midgard: Broadcast swizzle

Fixes regression in shaders using ball/etc by explicitly passing through
the number of channels in the NIR op and broadcasting the last
components of the channel appropriately, as the Midgard ops are all vec4
implicitly but NIR can be vec2/3.

v2: Don't also regress every other swizzle in Equestria.

v3: Don't regress the swizzles at Canterlot High either.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
Acked-by: Tomeu Vizoso <tomeu.vizoso at collabora.com>

---

 .../drivers/panfrost/midgard/midgard_compile.c     | 50 ++++++++++++++++------
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
index 1cc551b603c..baf637d666a 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@@ -139,10 +139,23 @@ mask_of(unsigned nr_comp)
  * the corresponding Midgard source */
 
 static midgard_vector_alu_src
-vector_alu_modifiers(nir_alu_src *src, bool is_int)
+vector_alu_modifiers(nir_alu_src *src, bool is_int, unsigned broadcast_count)
 {
         if (!src) return blank_alu_src;
 
+        /* Figure out how many components there are so we can adjust the
+         * swizzle.  Specifically we want to broadcast the last channel so
+         * things like ball2/3 work
+         */
+
+        if (broadcast_count) {
+                uint8_t last_component = src->swizzle[broadcast_count - 1];
+
+                for (unsigned c = broadcast_count; c < NIR_MAX_VEC_COMPONENTS; ++c) {
+                        src->swizzle[c] = last_component;
+                }
+        }
+
         midgard_vector_alu_src alu_src = {
                 .rep_low = 0,
                 .rep_high = 0,
@@ -698,6 +711,12 @@ emit_indirect_offset(compiler_context *ctx, nir_src *src)
 	case nir_op_##nir: \
 		op = midgard_alu_op_##_op; \
 		break;
+
+#define ALU_CASE_BCAST(nir, _op, count) \
+        case nir_op_##nir: \
+                op = midgard_alu_op_##_op; \
+                broadcast_swizzle = count; \
+                break;
 static bool
 nir_is_fzero_constant(nir_src src)
 {
@@ -730,6 +749,13 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
 
         unsigned op;
 
+        /* Number of components valid to check for the instruction (the rest
+         * will be forced to the last), or 0 to use as-is. Relevant as
+         * ball-type instructions have a channel count in NIR but are all vec4
+         * in Midgard */
+
+        unsigned broadcast_swizzle = 0;
+
         switch (instr->op) {
                 ALU_CASE(fadd, fadd);
                 ALU_CASE(fmul, fmul);
@@ -805,20 +831,20 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
                 ALU_CASE(ishr, iasr);
                 ALU_CASE(ushr, ilsr);
 
-                ALU_CASE(b32all_fequal2, fball_eq);
-                ALU_CASE(b32all_fequal3, fball_eq);
+                ALU_CASE_BCAST(b32all_fequal2, fball_eq, 2);
+                ALU_CASE_BCAST(b32all_fequal3, fball_eq, 3);
                 ALU_CASE(b32all_fequal4, fball_eq);
 
-                ALU_CASE(b32any_fnequal2, fbany_neq);
-                ALU_CASE(b32any_fnequal3, fbany_neq);
+                ALU_CASE_BCAST(b32any_fnequal2, fbany_neq, 2);
+                ALU_CASE_BCAST(b32any_fnequal3, fbany_neq, 3);
                 ALU_CASE(b32any_fnequal4, fbany_neq);
 
-                ALU_CASE(b32all_iequal2, iball_eq);
-                ALU_CASE(b32all_iequal3, iball_eq);
+                ALU_CASE_BCAST(b32all_iequal2, iball_eq, 2);
+                ALU_CASE_BCAST(b32all_iequal3, iball_eq, 3);
                 ALU_CASE(b32all_iequal4, iball_eq);
 
-                ALU_CASE(b32any_inequal2, ibany_neq);
-                ALU_CASE(b32any_inequal3, ibany_neq);
+                ALU_CASE_BCAST(b32any_inequal2, ibany_neq, 2);
+                ALU_CASE_BCAST(b32any_inequal3, ibany_neq, 3);
                 ALU_CASE(b32any_inequal4, ibany_neq);
 
                 /* Source mods will be shoved in later */
@@ -967,8 +993,8 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
                 /* Writemask only valid for non-SSA NIR */
                 .mask = expand_writemask(mask_of(nr_components)),
 
-                .src1 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[0], is_int)),
-                .src2 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[1], is_int)),
+                .src1 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[0], is_int, broadcast_swizzle)),
+                .src2 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[1], is_int, broadcast_swizzle)),
         };
 
         /* Apply writemask if non-SSA, keeping in mind that we can't write to components that don't exist */
@@ -1033,7 +1059,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
                         for (int j = 0; j < 4; ++j)
                                 nirmods[0]->swizzle[j] = original_swizzle[i]; /* Pull from the correct component */
 
-                        ins.alu.src1 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[0], is_int));
+                        ins.alu.src1 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[0], is_int, broadcast_swizzle));
                         emit_mir_instruction(ctx, ins);
                 }
         } else {