[Mesa-dev] [PATCH 08/22] i965: Support very simple expansion options for tiled memcpy

Chris Wilson chris at chris-wilson.co.uk
Sat Aug 5 09:40:00 UTC 2017


A big limitation of the current direct memcpy routine is that it only
recognises a couple of (admittedly) common colour types, and cannot do
any inline conversion. If we pass the mesa_format down to memcpy and
tell it the direction of the transfer, we can start accepting a few
mixed transfers and be less picky overall.
---
 src/mesa/drivers/dri/i965/intel_pixel_read.c   |  20 +--
 src/mesa/drivers/dri/i965/intel_tex_image.c    |  16 +--
 src/mesa/drivers/dri/i965/intel_tex_subimage.c |  14 +-
 src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 186 +++++++++++++++++--------
 src/mesa/drivers/dri/i965/intel_tiled_memcpy.h |  17 ++-
 5 files changed, 162 insertions(+), 91 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index cd4fbab097..eb3166ca82 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -84,17 +84,13 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
    /* The miptree's buffer. */
    struct brw_bo *bo;
 
-   uint32_t cpp;
-   mem_copy_fn mem_copy = NULL;
+   mem_copy_fn mem_copy;
 
    /* This fastpath is restricted to specific renderbuffer types:
     * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
     * more types.
     */
-   if (!brw->has_llc ||
-       !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
-       pixels == NULL ||
-       _mesa_is_bufferobj(pack->BufferObj) ||
+   if (!brw->has_llc || pixels == NULL || _mesa_is_bufferobj(pack->BufferObj) ||
        pack->Alignment > 4 ||
        pack->SkipPixels > 0 ||
        pack->SkipRows > 0 ||
@@ -115,15 +111,8 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
    if (rb->NumSamples > 1)
       return false;
 
-   /* We can't handle copying from RGBX or BGRX because the tiled_memcpy
-    * function doesn't set the last channel to 1. Note this checks BaseFormat
-    * rather than TexFormat in case the RGBX format is being simulated with an
-    * RGBA format.
-    */
-   if (rb->_BaseFormat == GL_RGB)
-      return false;
-
-   if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp))
+   mem_copy = intel_get_memcpy(rb->Format, format, type, INTEL_DOWNLOAD);
+   if (mem_copy == NULL)
       return false;
 
    if (!irb->mt ||
@@ -196,6 +185,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
        pack->Alignment, pack->RowLength, pack->SkipPixels,
        pack->SkipRows);
 
+   uint32_t cpp = _mesa_get_format_bytes(rb->Format);
    tiled_to_linear(
       xoffset * cpp, (xoffset + width) * cpp,
       yoffset, yoffset + height,
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index beed1609bd..770a9a78a8 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -387,8 +387,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
    /* The miptree's buffer. */
    struct brw_bo *bo;
 
-   uint32_t cpp;
-   mem_copy_fn mem_copy = NULL;
+   mem_copy_fn mem_copy;
 
    /* This fastpath is restricted to specific texture types:
     * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
@@ -400,7 +399,6 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
     * we need tests.
     */
    if (!brw->has_llc ||
-       !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
        !(texImage->TexObject->Target == GL_TEXTURE_2D ||
          texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
        pixels == NULL ||
@@ -414,15 +412,8 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
        packing->Invert)
       return false;
 
-   /* We can't handle copying from RGBX or BGRX because the tiled_memcpy
-    * function doesn't set the last channel to 1. Note this checks BaseFormat
-    * rather than TexFormat in case the RGBX format is being simulated with an
-    * RGBA format.
-    */
-   if (texImage->_BaseFormat == GL_RGB)
-      return false;
-
-   if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp))
+   mem_copy = intel_get_memcpy(texImage->TexFormat, format, type, INTEL_DOWNLOAD);
+   if (mem_copy == NULL)
       return false;
 
    /* If this is a nontrivial texture view, let another path handle it instead. */
@@ -486,6 +477,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
    xoffset += level_x;
    yoffset += level_y;
 
+   uint32_t cpp = _mesa_get_format_bytes(texImage->TexFormat);
    tiled_to_linear(
       xoffset * cpp, (xoffset + width) * cpp,
       yoffset, yoffset + height,
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index a34203087f..d6fcc93b98 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -27,6 +27,7 @@
 #include "main/image.h"
 #include "main/macros.h"
 #include "main/mtypes.h"
+#include "main/glformats.h"
 #include "main/pbo.h"
 #include "main/texobj.h"
 #include "main/texstore.h"
@@ -86,8 +87,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
    /* The miptree's buffer. */
    struct brw_bo *bo;
 
-   uint32_t cpp;
-   mem_copy_fn mem_copy = NULL;
+   mem_copy_fn mem_copy;
 
    /* This fastpath is restricted to specific texture types:
     * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
@@ -98,8 +98,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
     * with _mesa_image_row_stride. However, before removing the restrictions
     * we need tests.
     */
-   if (!(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
-       !(texImage->TexObject->Target == GL_TEXTURE_2D ||
+   if (!(texImage->TexObject->Target == GL_TEXTURE_2D ||
          texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
        pixels == NULL ||
        _mesa_is_bufferobj(packing->BufferObj) ||
@@ -116,7 +115,11 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
    if (ctx->_ImageTransferState)
       return false;
 
-   if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp))
+   if (format == GL_COLOR_INDEX)
+      return false;
+
+   mem_copy = intel_get_memcpy(texImage->TexFormat, format, type, INTEL_UPLOAD);
+   if (mem_copy == NULL)
       return false;
 
    /* If this is a nontrivial texture view, let another path handle it instead. */
@@ -193,6 +196,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
    xoffset += level_x;
    yoffset += level_y;
 
+   uint32_t cpp = _mesa_get_format_bytes(texImage->TexFormat);
    linear_to_tiled(
       xoffset * cpp, (xoffset + width) * cpp,
       yoffset, yoffset + height,
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index 53a5679691..65dd950c08 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -33,6 +33,11 @@
 
 #include "util/macros.h"
 
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/mtypes.h"
+#include "main/glformats.h"
+
 #include "brw_context.h"
 #include "intel_tiled_memcpy.h"
 
@@ -213,6 +218,37 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
    return dst;
 }
 
+static inline void *
+rgbx8(void *dst, const void *src, size_t bytes)
+{
+   uint8_t *d = dst;
+   uint8_t const *s = src;
+
+   while (bytes >= 4) {
+      d[0] = s[2];
+      d[1] = s[1];
+      d[2] = s[0];
+      d[3] = 0xff;
+      d += 4;
+      s += 4;
+      bytes -= 4;
+   }
+   return dst;
+}
+
+static inline void *
+bgrx8(void *dst, const void *src, size_t bytes)
+{
+   uint32_t *d = dst;
+   uint32_t const *s = src;
+
+   while (bytes >= 4) {
+      *d++ = *s++ | 0xff000000;
+      bytes -= 4;
+   }
+   return dst;
+}
+
 /**
  * Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3).
  * These ranges are in bytes, i.e. pixels * bytes-per-pixel.
@@ -473,7 +509,9 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
                                  dst, src, src_pitch, swizzle_bit,
                                  rgba8_copy, rgba8_copy_aligned_dst);
       else
-         unreachable("not reached");
+         return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
+                                 dst, src, src_pitch, swizzle_bit,
+                                 mem_copy, memcpy);
    } else {
       if (mem_copy == memcpy)
          return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
@@ -484,10 +522,11 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
                                  dst, src, src_pitch, swizzle_bit,
                                  rgba8_copy, rgba8_copy_aligned_dst);
       else
-         unreachable("not reached");
+         return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
+                                 dst, src, src_pitch, swizzle_bit,
+                                 mem_copy, mem_copy);
    }
-   linear_to_xtiled(x0, x1, x2, x3, y0, y1,
-                    dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
+   unreachable("not reached");
 }
 
 /**
@@ -516,7 +555,9 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
                                  dst, src, src_pitch, swizzle_bit,
                                  rgba8_copy, rgba8_copy_aligned_dst);
       else
-         unreachable("not reached");
+         return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
+                                 dst, src, src_pitch, swizzle_bit,
+                                 mem_copy, mem_copy);
    } else {
       if (mem_copy == memcpy)
          return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
@@ -526,10 +567,11 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
                                  dst, src, src_pitch, swizzle_bit,
                                  rgba8_copy, rgba8_copy_aligned_dst);
       else
-         unreachable("not reached");
+         return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
+                                 dst, src, src_pitch, swizzle_bit,
+                                 mem_copy, mem_copy);
    }
-   linear_to_ytiled(x0, x1, x2, x3, y0, y1,
-                    dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
+   unreachable("not reached");
 }
 
 /**
@@ -558,7 +600,9 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
                                  dst, src, dst_pitch, swizzle_bit,
                                  rgba8_copy, rgba8_copy_aligned_src);
       else
-         unreachable("not reached");
+         return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 mem_copy, mem_copy);
    } else {
       if (mem_copy == memcpy)
          return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
@@ -568,10 +612,11 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
                                  dst, src, dst_pitch, swizzle_bit,
                                  rgba8_copy, rgba8_copy_aligned_src);
       else
-         unreachable("not reached");
+         return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 mem_copy, mem_copy);
    }
-   xtiled_to_linear(x0, x1, x2, x3, y0, y1,
-                    dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
+   unreachable("not reached");
 }
 
 /**
@@ -600,7 +645,9 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
                                  dst, src, dst_pitch, swizzle_bit,
                                  rgba8_copy, rgba8_copy_aligned_src);
       else
-         unreachable("not reached");
+         return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 mem_copy, mem_copy);
    } else {
       if (mem_copy == memcpy)
          return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
@@ -610,10 +657,11 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
                                  dst, src, dst_pitch, swizzle_bit,
                                  rgba8_copy, rgba8_copy_aligned_src);
       else
-         unreachable("not reached");
+         return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 mem_copy, mem_copy);
    }
-   ytiled_to_linear(x0, x1, x2, x3, y0, y1,
-                    dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
+   unreachable("not reached");
 }
 
 /**
@@ -812,51 +860,75 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
  * \param[in]  tiledFormat The format of the tiled image
  * \param[in]  format      The GL format of the client data
  * \param[in]  type        The GL type of the client data
- * \param[out] mem_copy    Will be set to one of either the standard
- *                         library's memcpy or a different copy function
- *                         that performs an RGBA to BGRA conversion
- * \param[out] cpp         Number of bytes per channel
  *
- * \return true if the format and type combination are valid
+ * \return the mem_copy_fn if the format and type combination are valid
  */
-bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
-                      GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp)
+mem_copy_fn intel_get_memcpy(mesa_format tiledFormat,
+                             GLenum format, GLenum type,
+                             enum intel_memcpy_direction direction)
 {
-   if (type == GL_UNSIGNED_INT_8_8_8_8_REV &&
-       !(format == GL_RGBA || format == GL_BGRA))
-      return false; /* Invalid type/format combination */
-
-   if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) ||
-       (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) {
-      *cpp = 1;
-      *mem_copy = memcpy;
-   } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) ||
-              (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) ||
-              (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) ||
-              (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) {
-      *cpp = 4;
-      if (format == GL_BGRA) {
-         *mem_copy = memcpy;
-      } else if (format == GL_RGBA) {
-         *mem_copy = rgba8_copy;
-      }
-   } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
-              (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) ||
-              (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) ||
-              (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) {
-      *cpp = 4;
-      if (format == GL_BGRA) {
-         /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
-          * use the same function.
-          */
-         *mem_copy = rgba8_copy;
-      } else if (format == GL_RGBA) {
-         *mem_copy = memcpy;
-      }
+   mesa_format user_format;
+   mem_copy_fn fn = NULL;
+
+   if (type == GL_BITMAP)
+      return NULL;
+
+   /* Stencil tiling is a lie, though we could do similar manual detiling */
+   switch ((int)tiledFormat) {
+   case MESA_FORMAT_S_UINT8:
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+   case MESA_FORMAT_Z24_UNORM_X8_UINT:
+   case MESA_FORMAT_Z24_UNORM_S8_UINT:
+      return NULL;
    }
 
-   if (!(*mem_copy))
-      return false;
+   if (_mesa_is_format_compressed(tiledFormat))
+      return NULL;
+
+   user_format = _mesa_format_from_format_and_type(format, type);
+   if (_mesa_format_is_mesa_array_format(user_format))
+      user_format = _mesa_format_from_array_format(user_format);
+
+   if (user_format == tiledFormat) {
+      /* Prevent any implicit conversions */
+      if (_mesa_unpack_format_to_base_format(format) !=
+          _mesa_get_format_base_format(tiledFormat))
+         fn = memcpy;
+   } else switch ((int)tiledFormat) {
+   case MESA_FORMAT_B8G8R8A8_UNORM:
+      if (user_format == MESA_FORMAT_R8G8B8A8_UNORM)
+         fn = rgba8_copy;
+      else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM)
+         fn = direction == INTEL_UPLOAD ? rgbx8 : memcpy;
+      else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM)
+         fn = direction == INTEL_UPLOAD ? bgrx8 : memcpy;
+      break;
+   case MESA_FORMAT_B8G8R8X8_UNORM:
+      if (user_format == MESA_FORMAT_B8G8R8A8_UNORM)
+         fn = direction == INTEL_UPLOAD ? memcpy : bgrx8;
+      else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM)
+         fn = direction == INTEL_UPLOAD ? rgba8_copy : bgrx8;
+      else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM)
+         fn = rgba8_copy;
+      break;
+
+   case MESA_FORMAT_R8G8B8A8_UNORM:
+      if (user_format == MESA_FORMAT_B8G8R8A8_UNORM)
+         fn = rgba8_copy;
+      else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM)
+         fn = direction == INTEL_UPLOAD ? rgbx8 : memcpy;
+      else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM)
+         fn = direction == INTEL_UPLOAD ? bgrx8 : memcpy;
+      break;
+   case MESA_FORMAT_R8G8B8X8_UNORM:
+      if (user_format == MESA_FORMAT_R8G8B8A8_UNORM)
+         fn = direction == INTEL_UPLOAD ? memcpy : bgrx8;
+      else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM)
+         fn = direction == INTEL_UPLOAD ? rgba8_copy : bgrx8;
+      else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM)
+         fn = rgba8_copy;
+      break;
+   }
 
-   return true;
+   return fn;
 }
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
index 62ec8847fb..e9c43920a1 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
@@ -55,7 +55,20 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
                 enum isl_tiling tiling,
                 mem_copy_fn mem_copy);
 
-bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
-                      GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp);
+/* Tells intel_get_memcpy() whether the memcpy() is
+ *
+ *  - an upload to the GPU with an aligned destination and a potentially
+ *    unaligned source; or
+ *  - a download from the GPU with an aligned source and a potentially
+ *    unaligned destination.
+ */
+enum intel_memcpy_direction {
+   INTEL_UPLOAD,
+   INTEL_DOWNLOAD
+};
+
+mem_copy_fn intel_get_memcpy(mesa_format tiledFormat,
+                             GLenum format, GLenum type,
+                             enum intel_memcpy_direction direction);
 
 #endif /* INTEL_TILED_MEMCPY */
-- 
2.13.3



More information about the mesa-dev mailing list