Mesa (master): util: Switch the non-block formats to unpacking rgba rows instead of rects.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Apr 19 18:24:14 UTC 2021


Module: Mesa
Branch: master
Commit: 2b5178ee4820d69386dfa0ecb28fe9fe7c4f0964
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2b5178ee4820d69386dfa0ecb28fe9fe7c4f0964

Author: Eric Anholt <eric at anholt.net>
Date:   Fri Apr  2 14:35:01 2021 -0700

util: Switch the non-block formats to unpacking rgba rows instead of rects.

We have only a few callers of unpack that do rects, so add a helper that
iterates over y adding the strides.  This saves us 36kb of generated code
and means that adding cpu-specific variants for RGBA format unpack will be
much simpler.

Reviewed-by: Jesse Natalie <jenatali at microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10014>

---

 .../auxiliary/translate/translate_generic.c        |   7 +-
 src/gallium/frontends/nine/device9.c               |   8 +-
 src/mesa/main/pack.c                               |   2 +-
 src/util/format/u_format.c                         |  67 +++++--
 src/util/format/u_format.h                         |  44 ++++-
 src/util/format/u_format_bptc.c                    |  62 +++++--
 src/util/format/u_format_other.c                   | 203 +++++++++------------
 src/util/format/u_format_other.h                   |  36 ++--
 src/util/format/u_format_pack.py                   |  17 +-
 src/util/format/u_format_table.py                  |  11 +-
 src/util/tests/format/u_format_test.c              |  19 +-
 11 files changed, 272 insertions(+), 204 deletions(-)

diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 123f077386f..a0c18531caa 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -50,9 +50,8 @@ struct translate_generic {
    struct {
       enum translate_element_type type;
 
-      void (*fetch)(void *restrict dst, unsigned dst_stride,
-                    const uint8_t *restrict src, unsigned src_stride,
-                    unsigned width, unsigned height);
+      void (*fetch)(void *restrict dst, const uint8_t *restrict src,
+                    unsigned width);
       unsigned buffer;
       unsigned input_offset;
       unsigned instance_divisor;
@@ -625,7 +624,7 @@ generic_run_one(struct translate_generic *tg,
          if (likely(copy_size >= 0)) {
             memcpy(dst, src, copy_size);
          } else {
-            tg->attrib[attr].fetch(data, 0, src, 0, 1, 1);
+            tg->attrib[attr].fetch(data, src, 1);
 
             if (0)
                debug_printf("Fetch linear attr %d  from %p  stride %d  index %d: "
diff --git a/src/gallium/frontends/nine/device9.c b/src/gallium/frontends/nine/device9.c
index ec2e9f4bc71..527d40d62ec 100644
--- a/src/gallium/frontends/nine/device9.c
+++ b/src/gallium/frontends/nine/device9.c
@@ -848,16 +848,13 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
     {
         D3DLOCKED_RECT lock;
         HRESULT hr;
-        const struct util_format_unpack_description *unpack =
-            util_format_unpack_description(surf->base.info.format);
-        assert(unpack);
 
         hr = NineSurface9_LockRect(surf, &lock, NULL, D3DLOCK_READONLY);
         if (FAILED(hr))
             ret_err("Failed to map cursor source image.\n",
                     D3DERR_DRIVERINTERNALERROR);
 
-        unpack->unpack_rgba_8unorm(ptr, transfer->stride,
+        util_format_unpack_rgba_8unorm_rect(surf->base.info.format, ptr, transfer->stride,
                                    lock.pBits, lock.Pitch,
                                    This->cursor.w, This->cursor.h);
 
@@ -865,7 +862,8 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
             void *data = lock.pBits;
             /* SetCursor assumes 32x32 argb with pitch 128 */
             if (lock.Pitch != 128) {
-                unpack->unpack_rgba_8unorm(This->cursor.hw_upload_temp, 128,
+                util_format_unpack_rgba_8unorm_rect(surf->base.info.format,
+                                           This->cursor.hw_upload_temp, 128,
                                            lock.pBits, lock.Pitch,
                                            32, 32);
                 data = This->cursor.hw_upload_temp;
diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index 4e8db1b7952..257f9dd93e8 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -1641,7 +1641,7 @@ _mesa_unpack_ubyte_rgba_row(mesa_format format, uint32_t n,
       util_format_unpack_description((enum pipe_format)format);
 
    if (unpack->unpack_rgba_8unorm) {
-      unpack->unpack_rgba_8unorm((uint8_t *)dst, 0, src, 0, n, 1);
+      unpack->unpack_rgba_8unorm((uint8_t *)dst, src, n);
    } else {
       /* get float values, convert to ubyte */
       {
diff --git a/src/util/format/u_format.c b/src/util/format/u_format.c
index 43f57caa882..1c3ca57fc76 100644
--- a/src/util/format/u_format.c
+++ b/src/util/format/u_format.c
@@ -359,6 +359,47 @@ util_get_depth_format_mrd(const struct util_format_description *desc)
    return mrd;
 }
 
+void
+util_format_unpack_rgba_rect(enum pipe_format format,
+                   void *dst, unsigned dst_stride,
+                   const void *src, unsigned src_stride,
+                   unsigned w, unsigned h)
+{
+   const struct util_format_unpack_description *unpack =
+      util_format_unpack_description(format);
+
+   /* Optimized function for block-compressed formats */
+   if (unpack->unpack_rgba_rect) {
+      unpack->unpack_rgba_rect(dst, dst_stride, src, src_stride, w, h);
+   } else {
+     for (unsigned y = 0; y < h; y++) {
+        unpack->unpack_rgba(dst, src, w);
+        src = (const char *)src + src_stride;
+        dst = (char *)dst + dst_stride;
+     }
+  }
+}
+
+void
+util_format_unpack_rgba_8unorm_rect(enum pipe_format format,
+                   void *dst, unsigned dst_stride,
+                   const void *src, unsigned src_stride,
+                   unsigned w, unsigned h)
+{
+   const struct util_format_unpack_description *unpack =
+      util_format_unpack_description(format);
+
+   /* Optimized function for block-compressed formats */
+   if (unpack->unpack_rgba_8unorm_rect) {
+      unpack->unpack_rgba_8unorm_rect(dst, dst_stride, src, src_stride, w, h);
+   } else {
+     for (unsigned y = 0; y < h; y++) {
+        unpack->unpack_rgba_8unorm(dst, src, w);
+        src = (const char *)src + src_stride;
+        dst = (char *)dst + dst_stride;
+     }
+  }
+}
 
 void
 util_format_read_4(enum pipe_format format,
@@ -367,8 +408,6 @@ util_format_read_4(enum pipe_format format,
                    unsigned x, unsigned y, unsigned w, unsigned h)
 {
    const struct util_format_description *format_desc;
-   const struct util_format_unpack_description *unpack =
-      util_format_unpack_description(format);
    const uint8_t *src_row;
 
    format_desc = util_format_description(format);
@@ -378,7 +417,7 @@ util_format_read_4(enum pipe_format format,
 
    src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8);
 
-   unpack->unpack_rgba(dst, dst_stride, src_row, src_stride, w, h);
+   util_format_unpack_rgba_rect(format, dst, dst_stride, src_row, src_stride, w, h);
 }
 
 
@@ -413,10 +452,7 @@ void
 util_format_read_4ub(enum pipe_format format, uint8_t *dst, unsigned dst_stride, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)
 {
    const struct util_format_description *format_desc;
-   const struct util_format_unpack_description *unpack =
-      util_format_unpack_description(format);
    const uint8_t *src_row;
-   uint8_t *dst_row;
 
    format_desc = util_format_description(format);
 
@@ -424,9 +460,8 @@ util_format_read_4ub(enum pipe_format format, uint8_t *dst, unsigned dst_stride,
    assert(y % format_desc->block.height == 0);
 
    src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8);
-   dst_row = dst;
 
-   unpack->unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, w, h);
+   util_format_unpack_rgba_8unorm_rect(format, dst, dst_stride, src_row, src_stride, w, h);
 }
 
 
@@ -715,7 +750,7 @@ util_format_translate(enum pipe_format dst_format,
          return FALSE;
 
       while (height >= y_step) {
-         unpack->unpack_rgba_8unorm(tmp_row, tmp_stride, src_row, src_stride, width, y_step);
+         util_format_unpack_rgba_8unorm_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, y_step);
          pack->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step);
 
          dst_row += dst_step;
@@ -724,7 +759,7 @@ util_format_translate(enum pipe_format dst_format,
       }
 
       if (height) {
-         unpack->unpack_rgba_8unorm(tmp_row, tmp_stride, src_row, src_stride, width, height);
+         util_format_unpack_rgba_8unorm_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, height);
          pack->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, height);
       }
 
@@ -746,7 +781,7 @@ util_format_translate(enum pipe_format dst_format,
          return FALSE;
 
       while (height >= y_step) {
-         unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, y_step);
+         util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, y_step);
          pack->pack_rgba_sint(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step);
 
          dst_row += dst_step;
@@ -755,7 +790,7 @@ util_format_translate(enum pipe_format dst_format,
       }
 
       if (height) {
-         unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, height);
+         util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, height);
          pack->pack_rgba_sint(dst_row, dst_stride, tmp_row, tmp_stride, width, height);
       }
 
@@ -777,7 +812,7 @@ util_format_translate(enum pipe_format dst_format,
          return FALSE;
 
       while (height >= y_step) {
-         unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, y_step);
+         util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, y_step);
          pack->pack_rgba_uint(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step);
 
          dst_row += dst_step;
@@ -786,7 +821,7 @@ util_format_translate(enum pipe_format dst_format,
       }
 
       if (height) {
-         unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, height);
+         util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, height);
          pack->pack_rgba_uint(dst_row, dst_stride, tmp_row, tmp_stride, width, height);
       }
 
@@ -807,7 +842,7 @@ util_format_translate(enum pipe_format dst_format,
          return FALSE;
 
       while (height >= y_step) {
-         unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, y_step);
+         util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, y_step);
          pack->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step);
 
          dst_row += dst_step;
@@ -816,7 +851,7 @@ util_format_translate(enum pipe_format dst_format,
       }
 
       if (height) {
-         unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, height);
+         util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, height);
          pack->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, height);
       }
 
diff --git a/src/util/format/u_format.h b/src/util/format/u_format.h
index 9911ff5a40d..a1b5ec1ecbc 100644
--- a/src/util/format/u_format.h
+++ b/src/util/format/u_format.h
@@ -315,10 +315,20 @@ struct util_format_unpack_description {
     * Unpack pixel blocks to R8G8B8A8_UNORM.
     * Note: strides are in bytes.
     *
-    * Only defined for non-depth-stencil formats.
+    * Only defined for non-block non-depth-stencil formats.
     */
    void
-   (*unpack_rgba_8unorm)(uint8_t *restrict dst, unsigned dst_stride,
+   (*unpack_rgba_8unorm)(uint8_t *restrict dst, const uint8_t *restrict src,
+                         unsigned width);
+
+   /**
+    * Unpack pixel blocks to R8G8B8A8_UNORM.
+    * Note: strides are in bytes.
+    *
+    * Only defined for block non-depth-stencil formats.
+    */
+   void
+   (*unpack_rgba_8unorm_rect)(uint8_t *restrict dst, unsigned dst_stride,
                          const uint8_t *restrict src, unsigned src_stride,
                          unsigned width, unsigned height);
 
@@ -338,10 +348,22 @@ struct util_format_unpack_description {
     *
     * Note: strides are in bytes.
     *
-    * Only defined for non-depth-stencil formats.
+    * Only defined for non-block non-depth-stencil formats.
+    */
+   void
+   (*unpack_rgba)(void *restrict dst, const uint8_t *restrict src,
+                  unsigned width);
+
+   /**
+    * Unpack pixel blocks to R32G32B32A32_UINT/_INT_FLOAT based on whether the
+    * type is pure uint, int, or other.
+    *
+    * Note: strides are in bytes.
+    *
+    * Only defined for block non-depth-stencil formats.
     */
    void
-   (*unpack_rgba)(void *restrict dst, unsigned dst_stride,
+   (*unpack_rgba_rect)(void *restrict dst, unsigned dst_stride,
                   const uint8_t *restrict src, unsigned src_stride,
                   unsigned width, unsigned height);
 
@@ -1477,7 +1499,7 @@ util_format_unpack_rgba(enum pipe_format format, void *dst,
    const struct util_format_unpack_description *desc =
       util_format_unpack_description(format);
 
-   desc->unpack_rgba(dst, 0, (const uint8_t *)src, 0, w, 1);
+   desc->unpack_rgba(dst, (const uint8_t *)src, w);
 }
 
 static inline void
@@ -1558,6 +1580,18 @@ util_format_write_4ub(enum pipe_format format,
                       void *dst, unsigned dst_stride, 
                       unsigned x, unsigned y, unsigned w, unsigned h);
 
+void
+util_format_unpack_rgba_rect(enum pipe_format format,
+                             void *dst, unsigned dst_stride,
+                             const void *src, unsigned src_stride,
+                             unsigned w, unsigned h);
+
+void
+util_format_unpack_rgba_8unorm_rect(enum pipe_format format,
+                                    void *dst, unsigned dst_stride,
+                                    const void *src, unsigned src_stride,
+                                    unsigned w, unsigned h);
+
 /*
  * Generic format conversion;
  */
diff --git a/src/util/format/u_format_bptc.c b/src/util/format/u_format_bptc.c
index 7358124dde2..7cf9cf40e56 100644
--- a/src/util/format/u_format_bptc.c
+++ b/src/util/format/u_format_bptc.c
@@ -62,10 +62,14 @@ util_format_bptc_rgba_unorm_unpack_rgba_float(void *restrict dst_row, unsigned d
    decompress_rgba_unorm(width, height,
                          src_row, src_stride,
                          temp_block, width * 4 * sizeof(uint8_t));
-   util_format_r8g8b8a8_unorm_unpack_rgba_float(
-                      dst_row, dst_stride,
-                      temp_block, width * 4 * sizeof(uint8_t),
-                      width, height);
+   /* Direct call to row unpack instead of util_format_rgba_unpack_rect()
+    * to avoid table lookup that would pull in all unpack symbols.
+    */
+   for (int y = 0; y < height; y++) {
+      util_format_r8g8b8a8_unorm_unpack_rgba_float((char *)dst_row + dst_stride * y,
+                                                    temp_block + 4 * width * y,
+                                                    width);
+   }
    free((void *) temp_block);
 }
 
@@ -76,10 +80,15 @@ util_format_bptc_rgba_unorm_pack_rgba_float(uint8_t *restrict dst_row, unsigned
 {
    uint8_t *temp_block;
    temp_block = malloc(width * height * 4 * sizeof(uint8_t));
-   util_format_r32g32b32a32_float_unpack_rgba_8unorm(
-                        temp_block, width * 4 * sizeof(uint8_t),
-                        (uint8_t *)src_row, src_stride,
-                        width, height);
+   /* Direct call to row unpack instead of util_format_rgba_unpack_rect()
+    * to avoid table lookup that would pull in all unpack symbols.
+    */
+   for (int y = 0; y < height; y++) {
+      util_format_r32g32b32a32_float_unpack_rgba_8unorm(
+                        temp_block + 4 * width * y,
+                        (uint8_t *)src_row + src_stride * y,
+                        width);
+   }
    compress_rgba_unorm(width, height,
                        temp_block, width * 4 * sizeof(uint8_t),
                        dst_row, dst_stride);
@@ -131,9 +140,15 @@ util_format_bptc_srgba_unpack_rgba_float(void *restrict dst_row, unsigned dst_st
    decompress_rgba_unorm(width, height,
                          src_row, src_stride,
                          temp_block, width * 4 * sizeof(uint8_t));
-   util_format_r8g8b8a8_srgb_unpack_rgba_float(dst_row, dst_stride,
-                                               temp_block, width * 4 * sizeof(uint8_t),
-                                               width, height);
+
+   /* Direct call to row unpack instead of util_format_rgba_unpack_rect()
+    * to avoid table lookup that would pull in all unpack symbols.
+    */
+   for (int y = 0; y < height; y++) {
+      util_format_r8g8b8a8_srgb_unpack_rgba_float((char *)dst_row + dst_stride * y,
+                                                  temp_block + width * 4 * y,
+                                                  width);
+   }
 
    free((void *) temp_block);
 }
@@ -171,10 +186,15 @@ util_format_bptc_rgb_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigne
                         src_row, src_stride,
                         temp_block, width * 4 * sizeof(float),
                         true);
-   util_format_r32g32b32a32_float_unpack_rgba_8unorm(
-                        dst_row, dst_stride,
-                        (const uint8_t *)temp_block, width * 4 * sizeof(float),
-                        width, height);
+   /* Direct call to row unpack instead of util_format_rgba_unpack_rect()
+    * to avoid table lookup that would pull in all unpack symbols.
+    */
+   for (int y = 0; y < height; y++) {
+      util_format_r32g32b32a32_float_unpack_rgba_8unorm(
+          dst_row + dst_stride * y,
+          (const uint8_t *)temp_block + width * 4 * sizeof(float) * y,
+          width);
+   }
    free((void *) temp_block);
 }
 
@@ -229,10 +249,14 @@ util_format_bptc_rgb_ufloat_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsign
                         src_row, src_stride,
                         temp_block, width * 4 * sizeof(float),
                         false);
-   util_format_r32g32b32a32_float_unpack_rgba_8unorm(
-                        dst_row, dst_stride,
-                        (const uint8_t *)temp_block, width * 4 * sizeof(float),
-                        width, height);
+   /* Direct call to row unpack instead of util_format_rgba_unpack_8unorm()
+    * to avoid table lookup that would pull in all unpack symbols.
+    */
+   for (int y = 0; y < height; y++) {
+      util_format_r32g32b32a32_float_unpack_rgba_8unorm(dst_row + dst_stride * y,
+                                                        (void *)(temp_block + 4 * width * y),
+                                                        width);
+   }
    free((void *) temp_block);
 }
 
diff --git a/src/util/format/u_format_other.c b/src/util/format/u_format_other.c
index 795ad8cce29..ce4b9d0976a 100644
--- a/src/util/format/u_format_other.c
+++ b/src/util/format/u_format_other.c
@@ -33,23 +33,19 @@
 
 
 void
-util_format_r9g9b9e5_float_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride,
-                                        const uint8_t *restrict src_row, unsigned src_stride,
-                                        unsigned width, unsigned height)
+util_format_r9g9b9e5_float_unpack_rgba_float(void *restrict dst_row,
+                                        const uint8_t *restrict src_row,
+                                        unsigned width)
 {
-   unsigned x, y;
-   for(y = 0; y < height; y += 1) {
-      float *dst = dst_row;
-      const uint8_t *src = src_row;
-      for(x = 0; x < width; x += 1) {
-         uint32_t value = util_cpu_to_le32(*(const uint32_t *)src);
-         rgb9e5_to_float3(value, dst);
-         dst[3] = 1; /* a */
-         src += 4;
-         dst += 4;
-      }
-      src_row += src_stride;
-      dst_row = (uint8_t *)dst_row + dst_stride;
+   unsigned x;
+   float *dst = dst_row;
+   const uint8_t *src = src_row;
+   for(x = 0; x < width; x += 1) {
+      uint32_t value = util_cpu_to_le32(*(const uint32_t *)src);
+      rgb9e5_to_float3(value, dst);
+      dst[3] = 1; /* a */
+      src += 4;
+      dst += 4;
    }
 }
 
@@ -85,27 +81,23 @@ util_format_r9g9b9e5_float_fetch_rgba(void *restrict in_dst, const uint8_t *rest
 
 
 void
-util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
-                                         const uint8_t *restrict src_row, unsigned src_stride,
-                                         unsigned width, unsigned height)
+util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *restrict dst_row,
+                                         const uint8_t *restrict src_row,
+                                         unsigned width)
 {
-   unsigned x, y;
+   unsigned x;
    float p[3];
-   for(y = 0; y < height; y += 1) {
-      uint8_t *dst = dst_row;
-      const uint8_t *src = src_row;
-      for(x = 0; x < width; x += 1) {
-         uint32_t value = util_cpu_to_le32(*(const uint32_t *)src);
-         rgb9e5_to_float3(value, p);
-         dst[0] = float_to_ubyte(p[0]); /* r */
-         dst[1] = float_to_ubyte(p[1]); /* g */
-         dst[2] = float_to_ubyte(p[2]); /* b */
-         dst[3] = 255; /* a */
-         src += 4;
-         dst += 4;
-      }
-      src_row += src_stride;
-      dst_row += dst_stride/sizeof(*dst_row);
+   uint8_t *dst = dst_row;
+   const uint8_t *src = src_row;
+   for(x = 0; x < width; x += 1) {
+      uint32_t value = util_cpu_to_le32(*(const uint32_t *)src);
+      rgb9e5_to_float3(value, p);
+      dst[0] = float_to_ubyte(p[0]); /* r */
+      dst[1] = float_to_ubyte(p[1]); /* g */
+      dst[2] = float_to_ubyte(p[2]); /* b */
+      dst[3] = 255; /* a */
+      src += 4;
+      dst += 4;
    }
 }
 
@@ -137,23 +129,19 @@ util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned
 
 
 void
-util_format_r11g11b10_float_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride,
-                                        const uint8_t *restrict src_row, unsigned src_stride,
-                                        unsigned width, unsigned height)
+util_format_r11g11b10_float_unpack_rgba_float(void *restrict dst_row,
+                                        const uint8_t *restrict src_row,
+                                        unsigned width)
 {
-   unsigned x, y;
-   for(y = 0; y < height; y += 1) {
-      float *dst = dst_row;
-      const uint8_t *src = src_row;
-      for(x = 0; x < width; x += 1) {
-         uint32_t value = util_cpu_to_le32(*(const uint32_t *)src);
-         r11g11b10f_to_float3(value, dst);
-         dst[3] = 1; /* a */
-         src += 4;
-         dst += 4;
-      }
-      src_row += src_stride;
-      dst_row = (uint8_t *)dst_row + dst_stride;
+   unsigned x;
+   float *dst = dst_row;
+   const uint8_t *src = src_row;
+   for(x = 0; x < width; x += 1) {
+      uint32_t value = util_cpu_to_le32(*(const uint32_t *)src);
+      r11g11b10f_to_float3(value, dst);
+      dst[3] = 1; /* a */
+      src += 4;
+      dst += 4;
    }
 }
 
@@ -189,27 +177,23 @@ util_format_r11g11b10_float_fetch_rgba(void *restrict in_dst, const uint8_t *res
 
 
 void
-util_format_r11g11b10_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
-                                         const uint8_t *restrict src_row, unsigned src_stride,
-                                         unsigned width, unsigned height)
+util_format_r11g11b10_float_unpack_rgba_8unorm(uint8_t *restrict dst_row,
+                                         const uint8_t *restrict src_row,
+                                         unsigned width)
 {
-   unsigned x, y;
+   unsigned x;
    float p[3];
-   for(y = 0; y < height; y += 1) {
-      uint8_t *dst = dst_row;
-      const uint8_t *src = src_row;
-      for(x = 0; x < width; x += 1) {
-         uint32_t value = util_cpu_to_le32(*(const uint32_t *)src);
-         r11g11b10f_to_float3(value, p);
-         dst[0] = float_to_ubyte(p[0]); /* r */
-         dst[1] = float_to_ubyte(p[1]); /* g */
-         dst[2] = float_to_ubyte(p[2]); /* b */
-         dst[3] = 255; /* a */
-         src += 4;
-         dst += 4;
-      }
-      src_row += src_stride;
-      dst_row += dst_stride/sizeof(*dst_row);
+   uint8_t *dst = dst_row;
+   const uint8_t *src = src_row;
+   for(x = 0; x < width; x += 1) {
+      uint32_t value = util_cpu_to_le32(*(const uint32_t *)src);
+      r11g11b10f_to_float3(value, p);
+      dst[0] = float_to_ubyte(p[0]); /* r */
+      dst[1] = float_to_ubyte(p[1]); /* g */
+      dst[2] = float_to_ubyte(p[2]); /* b */
+      dst[3] = 255; /* a */
+      src += 4;
+      dst += 4;
    }
 }
 
@@ -256,58 +240,47 @@ r8g8bx_derive(int16_t r, int16_t g)
 }
 
 void
-util_format_r8g8bx_snorm_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride,
-                                      const uint8_t *restrict src_row, unsigned src_stride,
-                                      unsigned width, unsigned height)
+util_format_r8g8bx_snorm_unpack_rgba_float(void *restrict dst_row,
+                                      const uint8_t *restrict src_row, unsigned width)
 {
-   unsigned x, y;
-
-   for(y = 0; y < height; y += 1) {
-      float *dst = dst_row;
-      const uint16_t *src = (const uint16_t *)src_row;
-      for(x = 0; x < width; x += 1) {
-         uint16_t value = util_cpu_to_le16(*src++);
-         int16_t r, g;
-
-         r = ((int16_t)(value << 8)) >> 8;
-         g = ((int16_t)(value << 0)) >> 8;
-
-         dst[0] = (float)(r * (1.0f/0x7f)); /* r */
-         dst[1] = (float)(g * (1.0f/0x7f)); /* g */
-         dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */
-         dst[3] = 1.0f; /* a */
-         dst += 4;
-      }
-      src_row += src_stride;
-      dst_row = (uint8_t *)dst_row + dst_stride;
+   unsigned x;
+   float *dst = dst_row;
+   const uint16_t *src = (const uint16_t *)src_row;
+   for(x = 0; x < width; x += 1) {
+      uint16_t value = util_cpu_to_le16(*src++);
+      int16_t r, g;
+
+      r = ((int16_t)(value << 8)) >> 8;
+      g = ((int16_t)(value << 0)) >> 8;
+
+      dst[0] = (float)(r * (1.0f/0x7f)); /* r */
+      dst[1] = (float)(g * (1.0f/0x7f)); /* g */
+      dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */
+      dst[3] = 1.0f; /* a */
+      dst += 4;
    }
 }
 
 
 void
-util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
-                                       const uint8_t *restrict src_row, unsigned src_stride,
-                                       unsigned width, unsigned height)
+util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *restrict dst,
+                                       const uint8_t *restrict src_row,
+                                       unsigned width)
 {
-   unsigned x, y;
-   for(y = 0; y < height; y += 1) {
-      uint8_t *dst = dst_row;
-      const uint16_t *src = (const uint16_t *)src_row;
-      for(x = 0; x < width; x += 1) {
-         uint16_t value = util_cpu_to_le16(*src++);
-         int16_t r, g;
-
-         r = ((int16_t)(value << 8)) >> 8;
-         g = ((int16_t)(value << 0)) >> 8;
-
-         dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */
-         dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */
-         dst[2] = r8g8bx_derive(r, g); /* b */
-         dst[3] = 255; /* a */
-         dst += 4;
-      }
-      src_row += src_stride;
-      dst_row += dst_stride/sizeof(*dst_row);
+   unsigned x;
+   const uint16_t *src = (const uint16_t *)src_row;
+   for(x = 0; x < width; x += 1) {
+      uint16_t value = util_cpu_to_le16(*src++);
+      int16_t r, g;
+
+      r = ((int16_t)(value << 8)) >> 8;
+      g = ((int16_t)(value << 0)) >> 8;
+
+      dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */
+      dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */
+      dst[2] = r8g8bx_derive(r, g); /* b */
+      dst[3] = 255; /* a */
+      dst += 4;
    }
 }
 
diff --git a/src/util/format/u_format_other.h b/src/util/format/u_format_other.h
index 4f656c148f5..3547e9e673f 100644
--- a/src/util/format/u_format_other.h
+++ b/src/util/format/u_format_other.h
@@ -34,9 +34,9 @@
 
 
 void
-util_format_r9g9b9e5_float_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride,
-                                        const uint8_t *restrict src_row, unsigned src_stride,
-                                        unsigned width, unsigned height);
+util_format_r9g9b9e5_float_unpack_rgba_float(void *restrict dst_row,
+                                        const uint8_t *restrict src_row,
+                                        unsigned width);
 
 void
 util_format_r9g9b9e5_float_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride,
@@ -48,9 +48,9 @@ util_format_r9g9b9e5_float_fetch_rgba(void *restrict dst, const uint8_t *restric
                                        unsigned i, unsigned j);
 
 void
-util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
-                                         const uint8_t *restrict src_row, unsigned src_stride,
-                                         unsigned width, unsigned height);
+util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *restrict dst_row,
+                                         const uint8_t *restrict src_row,
+                                         unsigned width);
 
 void
 util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
@@ -59,9 +59,9 @@ util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned
 
 
 void
-util_format_r11g11b10_float_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride,
-                                        const uint8_t *restrict src_row, unsigned src_stride,
-                                        unsigned width, unsigned height);
+util_format_r11g11b10_float_unpack_rgba_float(void *restrict dst_row,
+                                        const uint8_t *restrict src_row,
+                                        unsigned width);
 
 void
 util_format_r11g11b10_float_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride,
@@ -73,9 +73,9 @@ util_format_r11g11b10_float_fetch_rgba(void *restrict dst, const uint8_t *restri
                                        unsigned i, unsigned j);
 
 void
-util_format_r11g11b10_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
-                                         const uint8_t *restrict src_row, unsigned src_stride,
-                                         unsigned width, unsigned height);
+util_format_r11g11b10_float_unpack_rgba_8unorm(uint8_t *restrict dst_row,
+                                         const uint8_t *restrict src_row,
+                                         unsigned width);
 
 void
 util_format_r11g11b10_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
@@ -84,9 +84,9 @@ util_format_r11g11b10_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned
 
 
 void
-util_format_r8g8bx_snorm_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride,
-                                      const uint8_t *restrict src_row, unsigned src_stride,
-                                      unsigned width, unsigned height);
+util_format_r8g8bx_snorm_unpack_rgba_float(void *restrict dst_row,
+                                      const uint8_t *restrict src_row,
+                                      unsigned width);
 
 void
 util_format_r8g8bx_snorm_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride,
@@ -98,9 +98,9 @@ util_format_r8g8bx_snorm_fetch_rgba(void *restrict dst, const uint8_t *restrict
                                      unsigned i, unsigned j);
 
 void
-util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
-                                       const uint8_t *restrict src_row, unsigned src_stride,
-                                       unsigned width, unsigned height);
+util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *restrict dst_row,
+                                       const uint8_t *restrict src_row,
+                                       unsigned width);
 
 void
 util_format_r8g8bx_snorm_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
diff --git a/src/util/format/u_format_pack.py b/src/util/format/u_format_pack.py
index 35e8d341ec4..e32c01f8a08 100644
--- a/src/util/format/u_format_pack.py
+++ b/src/util/format/u_format_pack.py
@@ -617,7 +617,7 @@ def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix):
     else:
         dst_proto_type = 'void'
 
-    proto = 'util_format_%s_unpack_%s(%s *restrict dst_row, unsigned dst_stride, const uint8_t *restrict src_row, unsigned src_stride, unsigned width, unsigned height)' % (
+    proto = 'util_format_%s_unpack_%s(%s *restrict dst_row, const uint8_t *restrict src, unsigned width)' % (
         name, dst_suffix, dst_proto_type)
     print('void %s;' % proto, file=sys.stdout2)
 
@@ -626,19 +626,14 @@ def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix):
     print('{')
 
     if is_format_supported(format):
-        print('   unsigned x, y;')
-        print('   for(y = 0; y < height; y += %u) {' % (format.block_height,))
-        print('      %s *dst = dst_row;' % (dst_native_type))
-        print('      const uint8_t *src = src_row;')
-        print('      for(x = 0; x < width; x += %u) {' % (format.block_width,))
+        print('   %s *dst = dst_row;' % (dst_native_type))
+        print(
+            '   for (unsigned x = 0; x < width; x += %u) {' % (format.block_width,))
         
         generate_unpack_kernel(format, dst_channel, dst_native_type)
     
-        print('         src += %u;' % (format.block_size() / 8,))
-        print('         dst += 4;')
-        print('      }')
-        print('      src_row += src_stride;')
-        print('      dst_row = (uint8_t *)dst_row + dst_stride;')
+        print('      src += %u;' % (format.block_size() / 8,))
+        print('      dst += 4;')
         print('   }')
 
     print('}')
diff --git a/src/util/format/u_format_table.py b/src/util/format/u_format_table.py
index 607c999f46a..b9176956cc3 100644
--- a/src/util/format/u_format_table.py
+++ b/src/util/format/u_format_table.py
@@ -255,10 +255,17 @@ def write_format_table(formats):
         print("   [%s] = {" % (format.name,))
 
         if format.colorspace != ZS and not format.is_pure_color():
-            print("      .unpack_rgba_8unorm = &util_format_%s_unpack_rgba_8unorm," % sn)
             if format.layout == 's3tc' or format.layout == 'rgtc':
                 print("      .fetch_rgba_8unorm = &util_format_%s_fetch_rgba_8unorm," % sn)
-            print("      .unpack_rgba = &util_format_%s_unpack_rgba_float," % sn)
+            if format.block_width > 1:
+                print(
+                    "      .unpack_rgba_8unorm_rect = &util_format_%s_unpack_rgba_8unorm," % sn)
+                print(
+                    "      .unpack_rgba_rect = &util_format_%s_unpack_rgba_float," % sn)
+            else:
+                print(
+                    "      .unpack_rgba_8unorm = &util_format_%s_unpack_rgba_8unorm," % sn)
+                print("      .unpack_rgba = &util_format_%s_unpack_rgba_float," % sn)
 
         if format.has_depth():
             print("      .unpack_z_32unorm = &util_format_%s_unpack_z_32unorm," % sn)
diff --git a/src/util/tests/format/u_format_test.c b/src/util/tests/format/u_format_test.c
index e6473c2bf6d..9f14b671ff8 100644
--- a/src/util/tests/format/u_format_test.c
+++ b/src/util/tests/format/u_format_test.c
@@ -241,13 +241,11 @@ static boolean
 test_format_unpack_rgba(const struct util_format_description *format_desc,
                         const struct util_format_test_case *test)
 {
-   const struct util_format_unpack_description *unpack =
-      util_format_unpack_description(format_desc->format);
    float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } };
    unsigned i, j, k;
    boolean success;
 
-   unpack->unpack_rgba(&unpacked[0][0][0], sizeof unpacked[0],
+   util_format_unpack_rgba_rect(format_desc->format, &unpacked[0][0][0], sizeof unpacked[0],
                        test->packed, 0,
                        format_desc->block.width, format_desc->block.height);
 
@@ -361,8 +359,6 @@ static boolean
 test_format_unpack_rgba_8unorm(const struct util_format_description *format_desc,
                                const struct util_format_test_case *test)
 {
-   const struct util_format_unpack_description *unpack =
-      util_format_unpack_description(format_desc->format);
    uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } };
    uint8_t expected[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } };
    unsigned i, j, k;
@@ -371,7 +367,7 @@ test_format_unpack_rgba_8unorm(const struct util_format_description *format_desc
    if (util_format_is_pure_integer(format_desc->format))
       return FALSE;
 
-   unpack->unpack_rgba_8unorm(&unpacked[0][0][0], sizeof unpacked[0],
+   util_format_unpack_rgba_8unorm_rect(format_desc->format, &unpacked[0][0][0], sizeof unpacked[0],
                               test->packed, 0,
                               format_desc->block.width, format_desc->block.height);
 
@@ -814,6 +810,13 @@ test_all(void)
          } \
       }
 
+#     define TEST_ONE_UNPACK_RECT_FUNC(name) \
+      if (util_format_unpack_description(format)->name || util_format_unpack_description(format)->name##_rect) {               \
+         if (!test_one_func(format_desc, &test_format_##name, #name)) { \
+           success = FALSE; \
+         } \
+      }
+
 #     define TEST_FORMAT_METADATA(name) \
       if (!test_format_metadata(format_desc, &test_format_##name, #name)) { \
          success = FALSE; \
@@ -825,9 +828,9 @@ test_all(void)
       }
 
       TEST_ONE_PACK_FUNC(pack_rgba_float);
-      TEST_ONE_UNPACK_FUNC(unpack_rgba);
+      TEST_ONE_UNPACK_RECT_FUNC(unpack_rgba);
       TEST_ONE_PACK_FUNC(pack_rgba_8unorm);
-      TEST_ONE_UNPACK_FUNC(unpack_rgba_8unorm);
+      TEST_ONE_UNPACK_RECT_FUNC(unpack_rgba_8unorm);
 
       TEST_ONE_UNPACK_FUNC(unpack_z_32unorm);
       TEST_ONE_PACK_FUNC(pack_z_32unorm);



More information about the mesa-commit mailing list