Mesa (main): panfrost: Rewrite the clear colour packing code

Mon Aug 16 16:50:52 UTC 2021

Module: Mesa
Branch: main
Commit: b9c095cc2c6874625a50805b7914cca74b8742bb
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b9c095cc2c6874625a50805b7914cca74b8742bb

Author: Alyssa Rosenzweig <alyssa at collabora.com>
Date:   Fri Aug 13 17:36:59 2021 +0000

panfrost: Rewrite the clear colour packing code

At the beginning of a render pass, the hardware will fill the tilebuffer
with an arbitrary 128-bit word. To implement colour clears, the driver
must pack the API-specific clear colour according to the 128-bit layout
of the tilebuffer. This layout depends only on the render target format.

The existing code to handle this was based on loose guesswork. It works
for the format / clear colour combinations tested in dEQP-GLES3, but it
is severely deficient in the general case.  It works by matching on the
PIPE format of the render target (not the layout of the tilebuffer). For
special cased PIPE formats, it open codes a buggy pack routine.
Otherwise, it defaults to util_pack_color in the hope that will work.
Since util_pack_color doesn't know anything about Mali tilebuffer
layouts, that means it's defaulting to wrong behaviour.

Now that we understand internal tilebuffer layouts, let's rewrite the
packing code. Instead of matching PIPE formats, map the PIPE format to
the internal tilebuffer layout using the common table, ensuring the
mapping remains in sync with the render target descriptor. Then for
blendable tilebuffer formats, pack using a common float -> fixed point
path supporting optional sRGB translation. Raw formats use
util_pack_color as before.

For formats with less than 8 bits per channel, the new code uses the
fractional bits of the fixed-point representation. This is required for
correct dithering if the clear colour is not exactly representable in
the final low precision format.

In summary, at least the following bugs in the old code are fixed:

   * Swapped R/B channels with sRGB
   * Swapped R/B channels with some missing formats
   * Incorrect dithering with RGB565, RGB5_A1

Fixes the following test cases:

   dEQP-EGL.functional.wide_color.window_8888_colorspace_srgb
   dEQP-EGL.functional.wide_color.pbuffer_8888_colorspace_srgb
   dEQP-EGL.functional.wide_color.window_888_colorspace_srgb
   dEQP-EGL.functional.wide_color.pbuffer_888_colorspace_srgb

Later in the series, unit tests are added for the new implementation.

Signed-off-by: Alyssa Rosenzweig <alyssa at collabora.com>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12365>

---

 src/gallium/drivers/panfrost/pan_job.c |  87 -----------------
 src/panfrost/lib/meson.build           |   1 +
 src/panfrost/lib/pan_clear.c           | 166 +++++++++++++++++++++++++++++++++
 src/panfrost/lib/pan_util.h            |   9 ++
 4 files changed, 176 insertions(+), 87 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index f940557a113..3fd8c5ce28e 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -922,93 +922,6 @@ panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)
         }
 }
 
-/* Helper to smear a 32-bit color across 128-bit components */
-
-static void
-pan_pack_color_32(uint32_t *packed, uint32_t v)
-{
-        for (unsigned i = 0; i < 4; ++i)
-                packed[i] = v;
-}
-
-static void
-pan_pack_color_64(uint32_t *packed, uint32_t lo, uint32_t hi)
-{
-        for (unsigned i = 0; i < 4; i += 2) {
-                packed[i + 0] = lo;
-                packed[i + 1] = hi;
-        }
-}
-
-static void
-pan_pack_color(uint32_t *packed, const union pipe_color_union *color, enum pipe_format format)
-{
-        /* Alpha magicked to 1.0 if there is no alpha */
-
-        bool has_alpha = util_format_has_alpha(format);
-        float clear_alpha = has_alpha ? color->f[3] : 1.0f;
-
-        /* Packed color depends on the framebuffer format */
-
-        const struct util_format_description *desc =
-                util_format_description(format);
-
-        if (util_format_is_rgba8_variant(desc) && desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) {
-                pan_pack_color_32(packed,
-                                  ((uint32_t) float_to_ubyte(clear_alpha) << 24) |
-                                  ((uint32_t) float_to_ubyte(color->f[2]) << 16) |
-                                  ((uint32_t) float_to_ubyte(color->f[1]) <<  8) |
-                                  ((uint32_t) float_to_ubyte(color->f[0]) <<  0));
-        } else if (format == PIPE_FORMAT_B5G6R5_UNORM) {
-                /* First, we convert the components to R5, G6, B5 separately */
-                unsigned r5 = _mesa_roundevenf(SATURATE(color->f[0]) * 31.0);
-                unsigned g6 = _mesa_roundevenf(SATURATE(color->f[1]) * 63.0);
-                unsigned b5 = _mesa_roundevenf(SATURATE(color->f[2]) * 31.0);
-
-                /* Then we pack into a sparse u32. TODO: Why these shifts? */
-                pan_pack_color_32(packed, (b5 << 25) | (g6 << 14) | (r5 << 5));
-        } else if (format == PIPE_FORMAT_B4G4R4A4_UNORM) {
-                /* Convert to 4-bits */
-                unsigned r4 = _mesa_roundevenf(SATURATE(color->f[0]) * 15.0);
-                unsigned g4 = _mesa_roundevenf(SATURATE(color->f[1]) * 15.0);
-                unsigned b4 = _mesa_roundevenf(SATURATE(color->f[2]) * 15.0);
-                unsigned a4 = _mesa_roundevenf(SATURATE(clear_alpha) * 15.0);
-
-                /* Pack on *byte* intervals */
-                pan_pack_color_32(packed, (a4 << 28) | (b4 << 20) | (g4 << 12) | (r4 << 4));
-        } else if (format == PIPE_FORMAT_B5G5R5A1_UNORM) {
-                /* Scale as expected but shift oddly */
-                unsigned r5 = _mesa_roundevenf(SATURATE(color->f[0]) * 31.0);
-                unsigned g5 = _mesa_roundevenf(SATURATE(color->f[1]) * 31.0);
-                unsigned b5 = _mesa_roundevenf(SATURATE(color->f[2]) * 31.0);
-                unsigned a1 = _mesa_roundevenf(SATURATE(clear_alpha) * 1.0);
-
-                pan_pack_color_32(packed, (a1 << 31) | (b5 << 25) | (g5 << 15) | (r5 << 5));
-        } else {
-                /* Otherwise, it's generic subject to replication */
-
-                union util_color out = { 0 };
-                unsigned size = util_format_get_blocksize(format);
-
-                util_pack_color(color->f, format, &out);
-
-                if (size == 1) {
-                        unsigned b = out.ui[0];
-                        unsigned s = b | (b << 8);
-                        pan_pack_color_32(packed, s | (s << 16));
-                } else if (size == 2)
-                        pan_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16));
-                else if (size == 3 || size == 4)
-                        pan_pack_color_32(packed, out.ui[0]);
-                else if (size == 6 || size == 8)
-                        pan_pack_color_64(packed, out.ui[0], out.ui[1]);
-                else if (size == 12 || size == 16)
-                        memcpy(packed, out.ui, 16);
-                else
-                        unreachable("Unknown generic format size packing clear colour");
-        }
-}
-
 void
 panfrost_batch_clear(struct panfrost_batch *batch,
                      unsigned buffers,
diff --git a/src/panfrost/lib/meson.build b/src/panfrost/lib/meson.build
index f01cb6c19b2..551e6304677 100644
--- a/src/panfrost/lib/meson.build
+++ b/src/panfrost/lib/meson.build
@@ -56,6 +56,7 @@ libpanfrost_lib_files = files(
   'pan_bo.c',
   'pan_blend.c',
   'pan_blitter.c',
+  'pan_clear.c',
   'pan_cs.c',
   'pan_indirect_dispatch.c',
   'pan_indirect_draw.c',
diff --git a/src/panfrost/lib/pan_clear.c b/src/panfrost/lib/pan_clear.c
new file mode 100644
index 00000000000..6247348565d
--- /dev/null
+++ b/src/panfrost/lib/pan_clear.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2019-2021 Collabora, Ltd.
+ * Copyright (C) 2019 Alyssa Rosenzweig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <string.h>
+#include "pan_util.h"
+#include "pan_format.h"
+#include "gallium/auxiliary/util/u_pack_color.h"
+#include "util/rounding.h"
+#include "util/format_srgb.h"
+
+/* Clear colours are packed as the internal format of the tilebuffer, looked up
+ * in the blendable formats table given the render target format.
+ *
+ * Raw formats may emulate arbitrary formats with blend shaders. For these, we
+ * defer to util_pack_colour to pack in the API format.
+ *
+ * Blendable formats, on the other hand, include extra "fractional" bits in the
+ * tilebuffer for dithering. These have a packed fixed-point representation:
+ * for a channel with m integer bits and n fractional bits, multiply by ((2^m)
+ * - 1) * 2^n and round to the nearest even.
+ */
+
+/* Replicate a 32-bit value to fill 128-bit */
+
+static void
+pan_pack_color_32(uint32_t *packed, uint32_t v)
+{
+        for (unsigned i = 0; i < 4; ++i)
+                packed[i] = v;
+}
+
+/* For m integer bits and n fractional bits, calculate the conversion factor,
+ * multiply the source value, and convert to integer rounding to even */
+
+static inline uint32_t
+float_to_fixed(float f, unsigned bits_int, unsigned bits_frac)
+{
+        float factor = ((1 << bits_int) - 1) << bits_frac;
+        return _mesa_roundevenf(f * factor);
+}
+
+/* These values are shared across hardware versions. Don't include GenXML. */
+enum mali_color_buffer_internal_format {
+        MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW = 0,
+        MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8 = 1,
+        MALI_COLOR_BUFFER_INTERNAL_FORMAT_R10G10B10A2 = 2,
+        MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A2 = 3,
+        MALI_COLOR_BUFFER_INTERNAL_FORMAT_R4G4B4A4 = 4,
+        MALI_COLOR_BUFFER_INTERNAL_FORMAT_R5G6B5A0 = 5,
+        MALI_COLOR_BUFFER_INTERNAL_FORMAT_R5G5B5A1 = 6,
+        MALI_COLOR_BUFFER_NUM_FORMATS,
+};
+
+struct mali_tib_layout {
+        unsigned int_r, frac_r;
+        unsigned int_g, frac_g;
+        unsigned int_b, frac_b;
+        unsigned int_a, frac_a;
+};
+
+static const struct mali_tib_layout tib_layouts[MALI_COLOR_BUFFER_NUM_FORMATS] = {
+        [MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8] = { 8, 0, 8, 0, 8, 0, 8, 0 },
+        [MALI_COLOR_BUFFER_INTERNAL_FORMAT_R10G10B10A2] = { 10, 0, 10, 0, 10, 0, 2, 0 },
+        [MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A2] = { 8, 2, 8, 2, 8, 2, 2, 0 },
+        [MALI_COLOR_BUFFER_INTERNAL_FORMAT_R4G4B4A4] = { 4, 4, 4, 4, 4, 4, 4, 4 },
+        [MALI_COLOR_BUFFER_INTERNAL_FORMAT_R5G6B5A0] = { 5, 5, 6, 4, 5, 5, 0, 2 },
+        [MALI_COLOR_BUFFER_INTERNAL_FORMAT_R5G5B5A1] = { 5, 5, 5, 5, 5, 5, 1, 1 },
+};
+
+/* Raw values are stored as-is but replicated for multisampling */
+
+static void
+pan_pack_raw(uint32_t *packed, const union pipe_color_union *color, enum pipe_format format)
+{
+        union util_color out = { 0 };
+        unsigned size = util_format_get_blocksize(format);
+        assert(size <= 16);
+
+        util_pack_color(color->f, format, &out);
+
+        if (size == 1) {
+                unsigned s = out.ui[0] | (out.ui[0] << 8);
+                pan_pack_color_32(packed, s | (s << 16));
+        } else if (size == 2)
+                pan_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16));
+        else if (size <= 4)
+                pan_pack_color_32(packed, out.ui[0]);
+        else if (size <= 8) {
+                memcpy(packed + 0, out.ui, 8);
+                memcpy(packed + 2, out.ui, 8);
+        } else {
+                memcpy(packed, out.ui, 16);
+        }
+}
+
+void
+pan_pack_color(uint32_t *packed, const union pipe_color_union *color, enum pipe_format format)
+{
+        /* Set of blendable formats is common across versions. TODO: v9 */
+        enum mali_color_buffer_internal_format internal =
+                panfrost_blendable_formats_v7[format].internal;
+
+        if (internal == MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW) {
+                pan_pack_raw(packed, color, format);
+                return;
+        }
+
+        /* Saturate to [0, 1] by definition of UNORM. Prevents overflow. */
+        float r = SATURATE(color->f[0]);
+        float g = SATURATE(color->f[1]);
+        float b = SATURATE(color->f[2]);
+        float a = SATURATE(color->f[3]);
+
+        /* Fill in alpha = 1.0 by default */
+        if (!util_format_has_alpha(format))
+                a = 1.0;
+
+        /* Convert colourspace while we still have floats */
+        if (util_format_is_srgb(format)) {
+                r = util_format_linear_to_srgb_float(r);
+                g = util_format_linear_to_srgb_float(g);
+                b = util_format_linear_to_srgb_float(b);
+        }
+
+        /* Look up the layout of the tilebuffer */
+        assert(internal < MALI_COLOR_BUFFER_NUM_FORMATS);
+        struct mali_tib_layout l = tib_layouts[internal];
+
+        unsigned count_r = l.int_r + l.frac_r;
+        unsigned count_g = l.int_g + l.frac_g + count_r;
+        unsigned count_b = l.int_b + l.frac_b + count_g;
+        ASSERTED unsigned count_a = l.int_a + l.frac_a + count_b;
+
+        /* Must fill the word */
+        assert(count_a == 32);
+
+        /* Convert the transformed float colour to the given layout */
+        uint32_t ur = float_to_fixed(r, l.int_r, l.frac_r) << 0;
+        uint32_t ug = float_to_fixed(g, l.int_g, l.frac_g) << count_r;
+        uint32_t ub = float_to_fixed(b, l.int_b, l.frac_b) << count_g;
+        uint32_t ua = float_to_fixed(a, l.int_a, l.frac_a) << count_b;
+
+        pan_pack_color_32(packed, ur | ug | ub | ua);
+}
diff --git a/src/panfrost/lib/pan_util.h b/src/panfrost/lib/pan_util.h
index 3a13b5e11d5..099f4d75076 100644
--- a/src/panfrost/lib/pan_util.h
+++ b/src/panfrost/lib/pan_util.h
@@ -28,6 +28,10 @@
 #ifndef PAN_UTIL_H
 #define PAN_UTIL_H
 
+#include <stdint.h>
+#include <stdbool.h>
+#include "util/format/u_format.h"
+
 #define PAN_DBG_PERF            0x0001
 #define PAN_DBG_TRACE           0x0002
 #define PAN_DBG_DEQP            0x0004
@@ -43,6 +47,8 @@
 #define PAN_DBG_LINEAR          0x1000
 #define PAN_DBG_NO_CACHE        0x2000
 
+struct panfrost_device;
+
 unsigned
 panfrost_translate_swizzle_4(const unsigned char swizzle[4]);
 
@@ -54,4 +60,7 @@ panfrost_format_to_bifrost_blend(const struct panfrost_device *dev,
                                  enum pipe_format format,
                                  bool dithered);
 
+void
+pan_pack_color(uint32_t *packed, const union pipe_color_union *color, enum pipe_format format);
+
 #endif /* PAN_UTIL_H */