Mesa (master): pan/mdg: fix midgard writemask encoding for stores

Wed Apr 21 21:39:45 UTC 2021

Module: Mesa
Branch: master
Commit: 9703ca56991c0a74aeb240477a21c074ff156244
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9703ca56991c0a74aeb240477a21c074ff156244

Author: Italo Nicola <italonicola at collabora.com>
Date:   Wed Apr  7 00:05:35 2021 +0000

pan/mdg: fix midgard writemask encoding for stores

This commits fixes a regression caused by commit
616394cf31cecc0165857dd032a316da5b0a2440.

Fixes dEQP-GLES31.functional.compute.shared_var.atomic.min.*,
dEQP-GLES31.functional.ssbo.atomic.min.*,
dEQP-GLES31.functional.compute.shared_var.atomic.max.* and
dEQP-GLES31.functional.ssbo.atomic.max.*.

Midgard's non-image and non-varying store operations have a unique
behavior for the load/store writemask, with each bit being responsible
for 1/4th of the size of the total write operation. This means that we
have to pack the writemask differently since we stopped using st_u128
for everything.

Signed-off-by: Italo Nicola <italonicola at collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10067>

---

 src/panfrost/midgard/helpers.h      |  5 ++
 src/panfrost/midgard/midgard_emit.c | 96 ++++++++++++++++++++++++++++++-------
 2 files changed, 84 insertions(+), 17 deletions(-)

diff --git a/src/panfrost/midgard/helpers.h b/src/panfrost/midgard/helpers.h
index e3fce61e01e..f89e7520454 100644
--- a/src/panfrost/midgard/helpers.h
+++ b/src/panfrost/midgard/helpers.h
@@ -79,6 +79,11 @@
                 OP_IS_UNSIGNED_CMP(op) \
         )
 
+#define OP_IS_COMMON_STORE(op) ( \
+                op >= midgard_op_st_u8 && \
+                op <= midgard_op_st_u128 \
+        )
+
 /* ALU control words are single bit fields with a lot of space */
 
 #define ALU_ENAB_VEC_MUL  (1 << 17)
diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c
index 7f320a98226..53e227d8ed0 100644
--- a/src/panfrost/midgard/midgard_emit.c
+++ b/src/panfrost/midgard/midgard_emit.c
@@ -401,32 +401,94 @@ mir_pack_tex_ooo(midgard_block *block, midgard_bundle *bundle, midgard_instructi
         ins->texture.out_of_order = count;
 }
 
-/* Load store masks are 4-bits. Load/store ops pack for that. vec4 is the
- * natural mask width; vec8 is constrained to be in pairs, vec2 is duplicated. TODO: 8-bit?
+/* Load store masks are 4-bits. Load/store ops pack for that.
+ * For most operations, vec4 is the natural mask width; vec8 is constrained to
+ * be in pairs, vec2 is duplicated. TODO: 8-bit?
+ * For common stores (i.e. ST.*), each bit masks a single byte in the 32-bit
+ * case, 2 bytes in the 64-bit case and 4 bytes in the 128-bit case.
  */
 
+static unsigned
+midgard_pack_common_store_mask(midgard_instruction *ins) {
+        unsigned comp_sz = nir_alu_type_get_type_size(ins->dest_type);
+        unsigned mask = ins->mask;
+        unsigned packed = 0;
+        unsigned nr_comp;
+
+        switch (ins->op) {
+                case midgard_op_st_u8:
+                        packed |= mask & 1;
+                        break;
+                case midgard_op_st_u16:
+                        nr_comp = 16 / comp_sz;
+                        for (int i = 0; i < nr_comp; i++) {
+                                if (mask & (1 << i)) {
+                                        if (comp_sz == 16)
+                                                packed |= 0x3;
+                                        else if (comp_sz == 8)
+                                                packed |= 1 << i;
+                                }
+                        }
+                        break;
+                case midgard_op_st_u32:
+                case midgard_op_st_u64:
+                case midgard_op_st_u128: {
+                        unsigned total_sz = 32;
+                        if (ins->op == midgard_op_st_u128)
+                                total_sz = 128;
+                        else if (ins->op == midgard_op_st_u64)
+                                total_sz = 64;
+
+                        nr_comp = total_sz / comp_sz;
+
+                        /* Each writemask bit masks 1/4th of the value to be stored. */
+                        assert(comp_sz >= total_sz / 4);
+
+                        for (int i = 0; i < nr_comp; i++) {
+                                if (mask & (1 << i)) {
+                                        if (comp_sz == total_sz)
+                                                packed |= 0xF;
+                                        else if (comp_sz == total_sz / 2)
+                                                packed |= 0x3 << i;
+                                        else if (comp_sz == total_sz / 4)
+                                                packed |= 0x1 << i;
+                                }
+                        }
+                        break;
+                }
+                default:
+                        unreachable("unexpected ldst opcode");
+        }
+
+        return packed;
+}
+
 static void
 mir_pack_ldst_mask(midgard_instruction *ins)
 {
         unsigned sz = nir_alu_type_get_type_size(ins->dest_type);
         unsigned packed = ins->mask;
 
-        if (sz == 64) {
-                packed = ((ins->mask & 0x2) ? (0x8 | 0x4) : 0) |
-                         ((ins->mask & 0x1) ? (0x2 | 0x1) : 0);
-        } else if (sz == 16) {
-                packed = 0;
-
-                for (unsigned i = 0; i < 4; ++i) {
-                        /* Make sure we're duplicated */
-                        bool u = (ins->mask & (1 << (2*i + 0))) != 0;
-                        ASSERTED bool v = (ins->mask & (1 << (2*i + 1))) != 0;
-                        assert(u == v);
-
-                        packed |= (u << i);
-                }
+        if (OP_IS_COMMON_STORE(ins->op)) {
+                packed = midgard_pack_common_store_mask(ins);
         } else {
-                assert(sz == 32);
+                if (sz == 64) {
+                        packed = ((ins->mask & 0x2) ? (0x8 | 0x4) : 0) |
+                                ((ins->mask & 0x1) ? (0x2 | 0x1) : 0);
+                } else if (sz == 16) {
+                        packed = 0;
+
+                        for (unsigned i = 0; i < 4; ++i) {
+                                /* Make sure we're duplicated */
+                                bool u = (ins->mask & (1 << (2*i + 0))) != 0;
+                                ASSERTED bool v = (ins->mask & (1 << (2*i + 1))) != 0;
+                                assert(u == v);
+
+                                packed |= (u << i);
+                        }
+                } else {
+                        assert(sz == 32);
+                }
         }
 
         ins->load_store.mask = packed;