Mesa (master): util: Fix big-endian handling of z/s formats.

Wed Apr 21 20:26:03 UTC 2021

Module: Mesa
Branch: master
Commit: 8bd91d1368815c5bbf975044df8bfcf3186ac3b7
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=8bd91d1368815c5bbf975044df8bfcf3186ac3b7

Author: Eric Anholt <eric at anholt.net>
Date:   Mon Apr 19 15:58:06 2021 -0700

util: Fix big-endian handling of z/s formats.

channel datatypes in Mesa are the host's endianness.  For example,
PIPE_FORMAT_R32_UINT doesn't do a bswap in and out in u_format_table.c's
pack/unpack functions.  So, z32_unorm shouldn't be byte swapping either,
and neither should z24s8 which is also a packed format, and once you've
got those it becomes clear that all of the swaps in this file were
mistaken.

Things would mostly work out because it's unusual to read/write Z/S data
through the GL API, and even for drivers like softpipe as long as the pack
and unpack both swap it could work anyway.  However, the bug would be
visible in glReadPixels() with the matching datatype which would hit the
memcpy fastpath without doing another swap.

Caught by a mesa/main unit test on transitioning to using these
pack/unpack functions.

Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
Reviewed-by: Adam Jackson <ajax at redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10336>

---

 src/util/format/u_format_zs.c | 97 ++++++++++++++++---------------------------
 1 file changed, 35 insertions(+), 62 deletions(-)

diff --git a/src/util/format/u_format_zs.c b/src/util/format/u_format_zs.c
index 442a66f7a06..2ebf0ca3af5 100644
--- a/src/util/format/u_format_zs.c
+++ b/src/util/format/u_format_zs.c
@@ -146,8 +146,7 @@ util_format_z16_unorm_unpack_z_float(float *restrict dst_row, unsigned dst_strid
       float *dst = dst_row;
       const uint16_t *src = (const uint16_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint16_t value = util_cpu_to_le16(*src++);
-         *dst++ = z16_unorm_to_z32_float(value);
+         *dst++ = z16_unorm_to_z32_float(*src++);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -164,9 +163,7 @@ util_format_z16_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned dst_strid
       const float *src = src_row;
       uint16_t *dst = (uint16_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint16_t value;
-         value = z32_float_to_z16_unorm(*src++);
-         *dst++ = util_le16_to_cpu(value);
+         *dst++ = z32_float_to_z16_unorm(*src++);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -183,8 +180,7 @@ util_format_z16_unorm_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned dst_
       uint32_t *dst = dst_row;
       const uint16_t *src = (const uint16_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint16_t value = util_cpu_to_le16(*src++);
-         *dst++ = z16_unorm_to_z32_unorm(value);
+         *dst++ = z16_unorm_to_z32_unorm(*src++);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -201,9 +197,7 @@ util_format_z16_unorm_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_str
       const uint32_t *src = src_row;
       uint16_t *dst = (uint16_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint16_t value;
-         value = z32_unorm_to_z16_unorm(*src++);
-         *dst++ = util_le16_to_cpu(value);
+         *dst++ = z32_unorm_to_z16_unorm(*src++);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -220,8 +214,7 @@ util_format_z32_unorm_unpack_z_float(float *restrict dst_row, unsigned dst_strid
       float *dst = dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z32_unorm_to_z32_float(value);
+         *dst++ = z32_unorm_to_z32_float(*src++);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -238,9 +231,7 @@ util_format_z32_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned dst_strid
       const float *src = src_row;
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = z32_float_to_z32_unorm(*src++);
-         *dst++ = util_le32_to_cpu(value);
+         *dst++ =z32_float_to_z32_unorm(*src++);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -392,8 +383,7 @@ util_format_z24_unorm_s8_uint_unpack_z_float(float *restrict dst_row, unsigned d
       float *dst = dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value =  util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_float(value & 0xffffff);
+         *dst++ = z24_unorm_to_z32_float((*src++) & 0xffffff);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -410,10 +400,10 @@ util_format_z24_unorm_s8_uint_pack_z_float(uint8_t *restrict dst_row, unsigned d
       const float *src = src_row;
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_le32_to_cpu(*dst);
+         uint32_t value = *dst;
          value &= 0xff000000;
          value |= z32_float_to_z24_unorm(*src++);
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -431,8 +421,7 @@ util_format_z24_unorm_s8_uint_unpack_z24(uint8_t *restrict dst_row, unsigned dst
       uint32_t *dst = (uint32_t *)dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value =  util_cpu_to_le32(*src++);
-         *dst++ = (value & 0xffffff);
+         *dst++ = ((*src++) & 0xffffff);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -449,11 +438,11 @@ util_format_z24_unorm_s8_uint_pack_z24(uint8_t *restrict dst_row, unsigned dst_s
       const uint32_t *src = (const uint32_t *)src_row;
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_le32_to_cpu(*dst);
+         uint32_t value = *dst;
          value &= 0xff000000;
          value |= *src & 0xffffff;
          src++;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -470,8 +459,7 @@ util_format_z24_unorm_s8_uint_unpack_z_32unorm(uint32_t *restrict dst_row, unsig
       uint32_t *dst = dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_unorm(value & 0xffffff);
+         *dst++ = z24_unorm_to_z32_unorm((*src++) & 0xffffff);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -488,10 +476,10 @@ util_format_z24_unorm_s8_uint_pack_z_32unorm(uint8_t *restrict dst_row, unsigned
       const uint32_t *src = src_row;
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_le32_to_cpu(*dst);
+         uint32_t value = *dst;
          value &= 0xff000000;
          value |= z32_unorm_to_z24_unorm(*src++);
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -508,8 +496,7 @@ util_format_z24_unorm_s8_uint_unpack_s_8uint(uint8_t *restrict dst_row, unsigned
       uint8_t *dst = dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = value >> 24;
+         *dst++ = (*src++) >> 24;
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -529,7 +516,7 @@ util_format_z24_unorm_s8_uint_pack_s_8uint(uint8_t *restrict dst_row, unsigned d
          uint32_t value = util_le32_to_cpu(*dst);
          value &= 0x00ffffff;
          value |= (uint32_t)*src++ << 24;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -566,8 +553,7 @@ util_format_s8_uint_z24_unorm_unpack_z_float(float *restrict dst_row, unsigned d
       float *dst = dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_float(value >> 8);
+         *dst++ = z24_unorm_to_z32_float((*src++) >> 8);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -584,10 +570,10 @@ util_format_s8_uint_z24_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned d
       const float *src = src_row;
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_le32_to_cpu(*dst);
+         uint32_t value = *dst;
          value &= 0x000000ff;
          value |= z32_float_to_z24_unorm(*src++) << 8;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -604,7 +590,7 @@ util_format_s8_uint_z24_unorm_unpack_z_32unorm(uint32_t *restrict dst_row, unsig
       uint32_t *dst = dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
+         uint32_t value = *src++;
          *dst++ = z24_unorm_to_z32_unorm(value >> 8);
       }
       src_row += src_stride/sizeof(*src_row);
@@ -622,10 +608,10 @@ util_format_s8_uint_z24_unorm_pack_z_32unorm(uint8_t *restrict dst_row, unsigned
       const uint32_t *src = src_row;
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_le32_to_cpu(*dst);
+         uint32_t value = *dst;
          value &= 0x000000ff;
          value |= *src++ & 0xffffff00;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -642,8 +628,7 @@ util_format_s8_uint_z24_unorm_unpack_s_8uint(uint8_t *restrict dst_row, unsigned
       uint8_t *dst = dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = value & 0xff;
+         *dst++ = (*src++) & 0xff;
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -660,10 +645,10 @@ util_format_s8_uint_z24_unorm_pack_s_8uint(uint8_t *restrict dst_row, unsigned d
       const uint8_t *src = src_row;
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_le32_to_cpu(*dst);
+         uint32_t value = *dst;
          value &= 0xffffff00;
          value |= *src++;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -680,8 +665,7 @@ util_format_z24x8_unorm_unpack_z_float(float *restrict dst_row, unsigned dst_str
       float *dst = dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_float(value & 0xffffff);
+         *dst++ = z24_unorm_to_z32_float((*src++) & 0xffffff);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -698,9 +682,7 @@ util_format_z24x8_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned dst_str
       const float *src = src_row;
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = z32_float_to_z24_unorm(*src++);
-         *dst++ = util_le32_to_cpu(value);
+         *dst++ = z32_float_to_z24_unorm(*src++);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -717,8 +699,7 @@ util_format_z24x8_unorm_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned ds
       uint32_t *dst = dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_unorm(value & 0xffffff);
+         *dst++ = z24_unorm_to_z32_unorm((*src++) & 0xffffff);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -735,9 +716,7 @@ util_format_z24x8_unorm_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_s
       const uint32_t *src = src_row;
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = z32_unorm_to_z24_unorm(*src++);
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = z32_unorm_to_z24_unorm(*src++);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -754,8 +733,7 @@ util_format_x8z24_unorm_unpack_z_float(float *restrict dst_row, unsigned dst_str
       float *dst = dst_row;
       const uint32_t *src = (uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_float(value >> 8);
+         *dst++ = z24_unorm_to_z32_float((*src++) >> 8);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -772,9 +750,7 @@ util_format_x8z24_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned dst_str
       const float *src = src_row;
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = z32_float_to_z24_unorm(*src++) << 8;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = z32_float_to_z24_unorm(*src++) << 8;
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -791,8 +767,7 @@ util_format_x8z24_unorm_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned ds
       uint32_t *dst = dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_unorm(value >> 8);
+         *dst++ = z24_unorm_to_z32_unorm((*src++) >> 8);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -809,9 +784,7 @@ util_format_x8z24_unorm_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_s
       const uint32_t *src = src_row;
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = z32_unorm_to_z24_unorm(*src++) << 8;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = z32_unorm_to_z24_unorm(*src++) << 8;
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -921,7 +894,7 @@ util_format_z32_float_s8x24_uint_pack_s_8uint(uint8_t *restrict dst_row, unsigne
       const uint8_t *src = src_row;
       uint32_t *dst = ((uint32_t *)dst_row) + 1;
       for(x = 0; x < width; ++x) {
-         *dst = util_cpu_to_le32(*src);
+         *dst = *src;
          src += 1;
          dst += 2;
       }