Mesa (lp-surface-tiling): llvmpipe: implement tiled/linear conversion for Z /stencil images

Tue Mar 23 22:19:45 UTC 2010

Module: Mesa
Branch: lp-surface-tiling
Commit: 6605fa28c147f30df351da0e4413cab33e4db5da
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=6605fa28c147f30df351da0e4413cab33e4db5da

Author: Brian Paul <brianp at vmware.com>
Date:   Tue Mar 23 16:06:41 2010 -0600

llvmpipe: implement tiled/linear conversion for Z/stencil images

---

 src/gallium/drivers/llvmpipe/lp_texture.c    |   21 +---
 src/gallium/drivers/llvmpipe/lp_tile_image.c |  190 ++++++++++++++++++++------
 src/gallium/drivers/llvmpipe/lp_tile_image.h |   16 +--
 3 files changed, 153 insertions(+), 74 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 6d23cc2..6e4a65c 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -176,19 +176,6 @@ llvmpipe_texture_destroy(struct pipe_texture *pt)
 }
 
 
-static unsigned
-tiled_stride(unsigned width, unsigned height)
-{
-   /* size in tiles */
-   unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE;
-   /*unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE;*/
-
-   unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;
-
-   return tiled_stride;
-}
-
-
 /**
  * Map a texture for read/write (rendering).  Without any synchronization.
  */
@@ -453,7 +440,6 @@ llvmpipe_transfer_map( struct pipe_context *pipe,
 {
    struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
    ubyte *map;
-   struct llvmpipe_texture *lpt;
    enum pipe_format format;
    enum lp_texture_usage tex_usage;
 
@@ -465,8 +451,7 @@ llvmpipe_transfer_map( struct pipe_context *pipe,
    }
 
    assert(transfer->texture);
-   lpt = llvmpipe_texture(transfer->texture);
-   format = lpt->base.format;
+   format = transfer->texture->format;
 
    /*
     * Transfers, like other pipe operations, must happen in order, so flush the
@@ -594,13 +579,11 @@ llvmpipe_get_texture_image(struct llvmpipe_texture *lpt,
       if (layout == LP_TEXTURE_LINEAR)
          lp_tiled_to_linear(other_data, target_data,
                             width, height, lpt->base.format,
-                            tiled_stride(width, height),
                             lpt->stride[level]);
       else
          lp_linear_to_tiled(other_data, target_data,
                             width, height, lpt->base.format,
-                            lpt->stride[level], 
-                            tiled_stride(width, height));
+                            lpt->stride[level]);
 
       /* target image is now equal to the other image */
       target_img->timestamp = other_img->timestamp;
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c
index c1980b3..f893eca 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_image.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c
@@ -25,6 +25,14 @@
  **************************************************************************/
 
 
+/**
+ * Code to convert images from tiled to linear and back.
+ * XXX there are quite a few assumptions about color and z/stencil being
+ * 32bpp.
+ */
+
+
+#include "util/u_format.h"
 #include "lp_tile_soa.h"
 #include "lp_tile_image.h"
 
@@ -33,33 +41,102 @@
 
 
 /**
+ * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout
+ * at dst, with dst_stride words between rows.
+ */
+static void
+untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride)
+{
+   uint32_t *d0 = dst;
+   uint32_t *d1 = d0 + dst_stride;
+   uint32_t *d2 = d1 + dst_stride;
+   uint32_t *d3 = d2 + dst_stride;
+
+   d0[0] = src[0];   d0[1] = src[1];   d0[2] = src[4];   d0[3] = src[5];
+   d1[0] = src[2];   d1[1] = src[3];   d1[2] = src[6];   d1[3] = src[7];
+   d2[0] = src[8];   d2[1] = src[9];   d2[2] = src[12];  d2[3] = src[13];
+   d3[0] = src[10];  d3[1] = src[11];  d3[2] = src[14];  d3[3] = src[15];
+}
+
+
+
+/**
+ * Convert a 4x4 rect of 32-bit words from a linear layout into tiled
+ * layout (in which all 16 words are contiguous).
+ */
+static void
+tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride)
+{
+   const uint32_t *s0 = src;
+   const uint32_t *s1 = s0 + src_stride;
+   const uint32_t *s2 = s1 + src_stride;
+   const uint32_t *s3 = s2 + src_stride;
+
+   dst[0] = s0[0];   dst[1] = s0[1];   dst[4] = s0[2];   dst[5] = s0[3];
+   dst[2] = s1[0];   dst[3] = s1[1];   dst[6] = s1[2];   dst[7] = s1[3];
+   dst[8] = s2[0];   dst[9] = s2[1];   dst[12] = s2[2];  dst[13] = s2[3];
+   dst[10] = s3[0];  dst[11] = s3[1];  dst[14] = s3[2];  dst[15] = s3[3];
+}
+
+
+
+/**
  * Convert a tiled image into a linear image.
  * \param src_stride  source row stride in bytes (bytes per row of tiles)
  * \param dst_stride  dest row stride in bytes
  */
 void
-lp_tiled_to_linear(const uint8_t *src,
-                   uint8_t *dst,
+lp_tiled_to_linear(const void *src, void *dst,
                    unsigned width, unsigned height,
-                   enum pipe_format format,
-                   unsigned src_stride,
-                   unsigned dst_stride)
+                   enum pipe_format format, unsigned dst_stride)
 {
-   const unsigned tiles_per_row = src_stride / BYTES_PER_TILE;
-   unsigned i, j;
-
-   for (j = 0; j < height; j += TILE_SIZE) {
-      for (i = 0; i < width; i += TILE_SIZE) {
-         unsigned tile_offset =
-            ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE);
-         unsigned byte_offset = tile_offset * BYTES_PER_TILE;
-         const uint8_t *src_tile = src + byte_offset;
-
-         lp_tile_write_4ub(format,
-                           src_tile,
-                           dst,
-                           dst_stride,
-                           i, j, TILE_SIZE, TILE_SIZE);
+   /* Note that Z/stencil surfaces use a different tiling size than
+    * color surfaces.
+    */
+   if (util_format_is_depth_or_stencil(format)) {
+      const uint src_stride = dst_stride * TILE_VECTOR_WIDTH;
+      const uint bpp = 4;
+      const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
+      const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp);
+      const uint32_t *src32 = (const uint32_t *) src;
+      uint32_t *dst32 = (uint32_t *) dst;
+      uint i, j;
+
+      dst_stride /= bpp;   /* convert from bytes to words */
+
+      for (j = 0; j < height; j += tile_h) {
+         for (i = 0; i < width; i += tile_w) {
+            /* compute offsets in 32-bit words */
+            uint src_offset =
+               (j / tile_h * tiles_per_row + i / tile_w) * (tile_w * tile_h);
+            uint dst_offset = j * dst_stride + i;
+            untile_4_4_uint32(src32 + src_offset,
+                              dst32 + dst_offset,
+                              dst_stride);
+         }
+      }
+   }
+   else {
+      /* color image */
+      const uint bpp = 4;
+      const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
+      const uint bytes_per_tile = tile_w * tile_h * bpp;
+      const uint src_stride = dst_stride * tile_w;
+      const uint tiles_per_row = src_stride / bytes_per_tile;
+      uint i, j;
+
+      for (j = 0; j < height; j += tile_h) {
+         for (i = 0; i < width; i += tile_w) {
+            uint tile_offset =
+               ((j / tile_h) * tiles_per_row + i / tile_w);
+            uint byte_offset = tile_offset * bytes_per_tile;
+            const uint8_t *src_tile = (uint8_t *) src + byte_offset;
+
+            lp_tile_write_4ub(format,
+                              src_tile,
+                              dst, dst_stride,
+                              i, j, tile_w, tile_h);
+         }
       }
    }
 }
@@ -71,28 +148,53 @@ lp_tiled_to_linear(const uint8_t *src,
  * \param dst_stride  dest row stride in bytes (bytes per row of tiles)
  */
 void
-lp_linear_to_tiled(const uint8_t *src,
-                   uint8_t *dst,
+lp_linear_to_tiled(const void *src, void *dst,
                    unsigned width, unsigned height,
-                   enum pipe_format format,
-                   unsigned src_stride,
-                   unsigned dst_stride)
+                   enum pipe_format format, unsigned src_stride)
 {
-   const unsigned tiles_per_row = dst_stride / BYTES_PER_TILE;
-   unsigned i, j;
-
-   for (j = 0; j < height; j += TILE_SIZE) {
-      for (i = 0; i < width; i += TILE_SIZE) {
-         unsigned tile_offset =
-            ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE);
-         unsigned byte_offset = tile_offset * BYTES_PER_TILE;
-         uint8_t *dst_tile = dst + byte_offset;
-
-         lp_tile_read_4ub(format,
-                          dst_tile,
-                          src,
-                          src_stride,
-                          i, j, TILE_SIZE, TILE_SIZE);
+   if (util_format_is_depth_or_stencil(format)) {
+      const uint dst_stride = src_stride * TILE_VECTOR_WIDTH;
+      const uint bpp = 4;
+      const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
+      const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp);
+      const uint32_t *src32 = (const uint32_t *) src;
+      uint32_t *dst32 = (uint32_t *) dst;
+      uint i, j;
+
+      src_stride /= bpp;   /* convert from bytes to words */
+
+      for (j = 0; j < height; j += tile_h) {
+         for (i = 0; i < width; i += tile_w) {
+            /* compute offsets in 32-bit words */
+            uint src_offset = j * src_stride + i;
+            uint dst_offset =
+               (j / tile_h * tiles_per_row + i / tile_w) * (tile_w * tile_h);
+            tile_4_4_uint32(src32 + src_offset,
+                            dst32 + dst_offset,
+                            src_stride);
+         }
+      }
+   }
+   else {
+      const uint bpp = 4;
+      const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
+      const uint bytes_per_tile = tile_w * tile_h * bpp;
+      const uint dst_stride = src_stride * tile_w;
+      const uint tiles_per_row = dst_stride / bytes_per_tile;
+      uint i, j;
+
+      for (j = 0; j < height; j += TILE_SIZE) {
+         for (i = 0; i < width; i += TILE_SIZE) {
+            uint tile_offset =
+               ((j / tile_h) * tiles_per_row + i / tile_w);
+            uint byte_offset = tile_offset * bytes_per_tile;
+            uint8_t *dst_tile = (uint8_t *) dst + byte_offset;
+
+            lp_tile_read_4ub(format,
+                             dst_tile,
+                             src, src_stride,
+                             i, j, tile_w, tile_h);
+         }
       }
    }
 }
@@ -102,7 +204,7 @@ lp_linear_to_tiled(const uint8_t *src,
  * For testing only.
  */
 void
-test_tiled_linear_conversion(uint8_t *data,
+test_tiled_linear_conversion(void *data,
                              enum pipe_format format,
                              unsigned width, unsigned height,
                              unsigned stride)
@@ -113,13 +215,13 @@ test_tiled_linear_conversion(uint8_t *data,
 
    uint8_t *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4);
 
-   unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;
+   /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/
 
    lp_linear_to_tiled(data, tiled, width, height, format,
-                      stride, tiled_stride);
+                      stride);
 
    lp_tiled_to_linear(tiled, data, width, height, format,
-                      tiled_stride, stride);
+                      stride);
 
    free(tiled);
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.h b/src/gallium/drivers/llvmpipe/lp_tile_image.h
index 60d472e..ea50c55 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_image.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_image.h
@@ -30,25 +30,19 @@
 
 
 void
-lp_tiled_to_linear(const uint8_t *src,
-                   uint8_t *dst,
+lp_tiled_to_linear(const void *src, void *dst,
                    unsigned width, unsigned height,
-                   enum pipe_format format,
-                   unsigned src_stride,
-                   unsigned dst_stride);
+                   enum pipe_format format, unsigned dst_stride);
 
 
 void
-lp_linear_to_tiled(const uint8_t *src,
-                   uint8_t *dst,
+lp_linear_to_tiled(const void *src, void *dst,
                    unsigned width, unsigned height,
-                   enum pipe_format format,
-                   unsigned src_stride,
-                   unsigned dst_stride);
+                   enum pipe_format format, unsigned src_stride);
 
 
 void
-test_tiled_linear_conversion(uint8_t *data,
+test_tiled_linear_conversion(void *data,
                              enum pipe_format format,
                              unsigned width, unsigned height,
                              unsigned stride);