[Mesa-dev] [PATCH 08/22] i965: Support very simple expansion options for tiled memcpy
Chris Wilson
chris at chris-wilson.co.uk
Sat Aug 5 09:40:00 UTC 2017
A big limitation of the current direct memcpy routine is that it only
recognises a couple of (admittedly) common colour types, and cannot do
any inline conversion. If we pass the mesa_format down to memcpy and
tell it the direction of the transfer, we can start accepting a few
mixed transfers and be less picky overall.
---
src/mesa/drivers/dri/i965/intel_pixel_read.c | 20 +--
src/mesa/drivers/dri/i965/intel_tex_image.c | 16 +--
src/mesa/drivers/dri/i965/intel_tex_subimage.c | 14 +-
src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 186 +++++++++++++++++--------
src/mesa/drivers/dri/i965/intel_tiled_memcpy.h | 17 ++-
5 files changed, 162 insertions(+), 91 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index cd4fbab097..eb3166ca82 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -84,17 +84,13 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
/* The miptree's buffer. */
struct brw_bo *bo;
- uint32_t cpp;
- mem_copy_fn mem_copy = NULL;
+ mem_copy_fn mem_copy;
/* This fastpath is restricted to specific renderbuffer types:
* a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
* more types.
*/
- if (!brw->has_llc ||
- !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
- pixels == NULL ||
- _mesa_is_bufferobj(pack->BufferObj) ||
+ if (!brw->has_llc || pixels == NULL || _mesa_is_bufferobj(pack->BufferObj) ||
pack->Alignment > 4 ||
pack->SkipPixels > 0 ||
pack->SkipRows > 0 ||
@@ -115,15 +111,8 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
if (rb->NumSamples > 1)
return false;
- /* We can't handle copying from RGBX or BGRX because the tiled_memcpy
- * function doesn't set the last channel to 1. Note this checks BaseFormat
- * rather than TexFormat in case the RGBX format is being simulated with an
- * RGBA format.
- */
- if (rb->_BaseFormat == GL_RGB)
- return false;
-
- if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp))
+ mem_copy = intel_get_memcpy(rb->Format, format, type, INTEL_DOWNLOAD);
+ if (mem_copy == NULL)
return false;
if (!irb->mt ||
@@ -196,6 +185,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
pack->Alignment, pack->RowLength, pack->SkipPixels,
pack->SkipRows);
+ uint32_t cpp = _mesa_get_format_bytes(rb->Format);
tiled_to_linear(
xoffset * cpp, (xoffset + width) * cpp,
yoffset, yoffset + height,
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index beed1609bd..770a9a78a8 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -387,8 +387,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
/* The miptree's buffer. */
struct brw_bo *bo;
- uint32_t cpp;
- mem_copy_fn mem_copy = NULL;
+ mem_copy_fn mem_copy;
/* This fastpath is restricted to specific texture types:
* a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
@@ -400,7 +399,6 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
* we need tests.
*/
if (!brw->has_llc ||
- !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
!(texImage->TexObject->Target == GL_TEXTURE_2D ||
texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
pixels == NULL ||
@@ -414,15 +412,8 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
packing->Invert)
return false;
- /* We can't handle copying from RGBX or BGRX because the tiled_memcpy
- * function doesn't set the last channel to 1. Note this checks BaseFormat
- * rather than TexFormat in case the RGBX format is being simulated with an
- * RGBA format.
- */
- if (texImage->_BaseFormat == GL_RGB)
- return false;
-
- if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp))
+ mem_copy = intel_get_memcpy(texImage->TexFormat, format, type, INTEL_DOWNLOAD);
+ if (mem_copy == NULL)
return false;
/* If this is a nontrivial texture view, let another path handle it instead. */
@@ -486,6 +477,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
xoffset += level_x;
yoffset += level_y;
+ uint32_t cpp = _mesa_get_format_bytes(texImage->TexFormat);
tiled_to_linear(
xoffset * cpp, (xoffset + width) * cpp,
yoffset, yoffset + height,
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index a34203087f..d6fcc93b98 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -27,6 +27,7 @@
#include "main/image.h"
#include "main/macros.h"
#include "main/mtypes.h"
+#include "main/glformats.h"
#include "main/pbo.h"
#include "main/texobj.h"
#include "main/texstore.h"
@@ -86,8 +87,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
/* The miptree's buffer. */
struct brw_bo *bo;
- uint32_t cpp;
- mem_copy_fn mem_copy = NULL;
+ mem_copy_fn mem_copy;
/* This fastpath is restricted to specific texture types:
* a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
@@ -98,8 +98,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
* with _mesa_image_row_stride. However, before removing the restrictions
* we need tests.
*/
- if (!(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
- !(texImage->TexObject->Target == GL_TEXTURE_2D ||
+ if (!(texImage->TexObject->Target == GL_TEXTURE_2D ||
texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
pixels == NULL ||
_mesa_is_bufferobj(packing->BufferObj) ||
@@ -116,7 +115,11 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
if (ctx->_ImageTransferState)
return false;
- if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp))
+ if (format == GL_COLOR_INDEX)
+ return false;
+
+ mem_copy = intel_get_memcpy(texImage->TexFormat, format, type, INTEL_UPLOAD);
+ if (mem_copy == NULL)
return false;
/* If this is a nontrivial texture view, let another path handle it instead. */
@@ -193,6 +196,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
xoffset += level_x;
yoffset += level_y;
+ uint32_t cpp = _mesa_get_format_bytes(texImage->TexFormat);
linear_to_tiled(
xoffset * cpp, (xoffset + width) * cpp,
yoffset, yoffset + height,
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index 53a5679691..65dd950c08 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -33,6 +33,11 @@
#include "util/macros.h"
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/mtypes.h"
+#include "main/glformats.h"
+
#include "brw_context.h"
#include "intel_tiled_memcpy.h"
@@ -213,6 +218,37 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
return dst;
}
+static inline void *
+rgbx8(void *dst, const void *src, size_t bytes)
+{
+ uint8_t *d = dst;
+ uint8_t const *s = src;
+
+ while (bytes >= 4) {
+ d[0] = s[2];
+ d[1] = s[1];
+ d[2] = s[0];
+ d[3] = 0xff;
+ d += 4;
+ s += 4;
+ bytes -= 4;
+ }
+ return dst;
+}
+
+static inline void *
+bgrx8(void *dst, const void *src, size_t bytes)
+{
+ uint32_t *d = dst;
+ uint32_t const *s = src;
+
+ while (bytes >= 4) {
+ *d++ = *s++ | 0xff000000;
+ bytes -= 4;
+ }
+ return dst;
+}
+
/**
* Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3).
* These ranges are in bytes, i.e. pixels * bytes-per-pixel.
@@ -473,7 +509,9 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, src_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_dst);
else
- unreachable("not reached");
+ return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
+ dst, src, src_pitch, swizzle_bit,
+ mem_copy, memcpy);
} else {
if (mem_copy == memcpy)
return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
@@ -484,10 +522,11 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, src_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_dst);
else
- unreachable("not reached");
+ return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
+ dst, src, src_pitch, swizzle_bit,
+ mem_copy, mem_copy);
}
- linear_to_xtiled(x0, x1, x2, x3, y0, y1,
- dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
+ unreachable("not reached");
}
/**
@@ -516,7 +555,9 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, src_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_dst);
else
- unreachable("not reached");
+ return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
+ dst, src, src_pitch, swizzle_bit,
+ mem_copy, mem_copy);
} else {
if (mem_copy == memcpy)
return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
@@ -526,10 +567,11 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, src_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_dst);
else
- unreachable("not reached");
+ return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
+ dst, src, src_pitch, swizzle_bit,
+ mem_copy, mem_copy);
}
- linear_to_ytiled(x0, x1, x2, x3, y0, y1,
- dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
+ unreachable("not reached");
}
/**
@@ -558,7 +600,9 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
else
- unreachable("not reached");
+ return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
+ dst, src, dst_pitch, swizzle_bit,
+ mem_copy, mem_copy);
} else {
if (mem_copy == memcpy)
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
@@ -568,10 +612,11 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
else
- unreachable("not reached");
+ return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
+ dst, src, dst_pitch, swizzle_bit,
+ mem_copy, mem_copy);
}
- xtiled_to_linear(x0, x1, x2, x3, y0, y1,
- dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
+ unreachable("not reached");
}
/**
@@ -600,7 +645,9 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
else
- unreachable("not reached");
+ return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
+ dst, src, dst_pitch, swizzle_bit,
+ mem_copy, mem_copy);
} else {
if (mem_copy == memcpy)
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
@@ -610,10 +657,11 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
else
- unreachable("not reached");
+ return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
+ dst, src, dst_pitch, swizzle_bit,
+ mem_copy, mem_copy);
}
- ytiled_to_linear(x0, x1, x2, x3, y0, y1,
- dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
+ unreachable("not reached");
}
/**
@@ -812,51 +860,75 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
* \param[in] tiledFormat The format of the tiled image
* \param[in] format The GL format of the client data
* \param[in] type The GL type of the client data
- * \param[out] mem_copy Will be set to one of either the standard
- * library's memcpy or a different copy function
- * that performs an RGBA to BGRA conversion
- * \param[out] cpp Number of bytes per channel
*
- * \return true if the format and type combination are valid
+ * \return the mem_copy_fn if the format and type combination are valid
*/
-bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
- GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp)
+mem_copy_fn intel_get_memcpy(mesa_format tiledFormat,
+ GLenum format, GLenum type,
+ enum intel_memcpy_direction direction)
{
- if (type == GL_UNSIGNED_INT_8_8_8_8_REV &&
- !(format == GL_RGBA || format == GL_BGRA))
- return false; /* Invalid type/format combination */
-
- if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) ||
- (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) {
- *cpp = 1;
- *mem_copy = memcpy;
- } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) ||
- (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) ||
- (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) ||
- (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) {
- *cpp = 4;
- if (format == GL_BGRA) {
- *mem_copy = memcpy;
- } else if (format == GL_RGBA) {
- *mem_copy = rgba8_copy;
- }
- } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
- (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) ||
- (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) ||
- (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) {
- *cpp = 4;
- if (format == GL_BGRA) {
- /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
- * use the same function.
- */
- *mem_copy = rgba8_copy;
- } else if (format == GL_RGBA) {
- *mem_copy = memcpy;
- }
+ mesa_format user_format;
+ mem_copy_fn fn = NULL;
+
+ if (type == GL_BITMAP)
+ return NULL;
+
+ /* Stencil tiling is a lie, though we could do similar manual detiling */
+ switch ((int)tiledFormat) {
+ case MESA_FORMAT_S_UINT8:
+ case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case MESA_FORMAT_Z24_UNORM_X8_UINT:
+ case MESA_FORMAT_Z24_UNORM_S8_UINT:
+ return NULL;
}
- if (!(*mem_copy))
- return false;
+ if (_mesa_is_format_compressed(tiledFormat))
+ return NULL;
+
+ user_format = _mesa_format_from_format_and_type(format, type);
+ if (_mesa_format_is_mesa_array_format(user_format))
+ user_format = _mesa_format_from_array_format(user_format);
+
+ if (user_format == tiledFormat) {
+ /* Prevent any implicit conversions */
+ if (_mesa_unpack_format_to_base_format(format) !=
+ _mesa_get_format_base_format(tiledFormat))
+ fn = memcpy;
+ } else switch ((int)tiledFormat) {
+ case MESA_FORMAT_B8G8R8A8_UNORM:
+ if (user_format == MESA_FORMAT_R8G8B8A8_UNORM)
+ fn = rgba8_copy;
+ else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM)
+ fn = direction == INTEL_UPLOAD ? rgbx8 : memcpy;
+ else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM)
+ fn = direction == INTEL_UPLOAD ? bgrx8 : memcpy;
+ break;
+ case MESA_FORMAT_B8G8R8X8_UNORM:
+ if (user_format == MESA_FORMAT_B8G8R8A8_UNORM)
+ fn = direction == INTEL_UPLOAD ? memcpy : bgrx8;
+ else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM)
+ fn = direction == INTEL_UPLOAD ? rgba8_copy : bgrx8;
+ else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM)
+ fn = rgba8_copy;
+ break;
+
+ case MESA_FORMAT_R8G8B8A8_UNORM:
+ if (user_format == MESA_FORMAT_B8G8R8A8_UNORM)
+ fn = rgba8_copy;
+ else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM)
+ fn = direction == INTEL_UPLOAD ? rgbx8 : memcpy;
+ else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM)
+ fn = direction == INTEL_UPLOAD ? bgrx8 : memcpy;
+ break;
+ case MESA_FORMAT_R8G8B8X8_UNORM:
+ if (user_format == MESA_FORMAT_R8G8B8A8_UNORM)
+ fn = direction == INTEL_UPLOAD ? memcpy : bgrx8;
+ else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM)
+ fn = direction == INTEL_UPLOAD ? rgba8_copy : bgrx8;
+ else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM)
+ fn = rgba8_copy;
+ break;
+ }
- return true;
+ return fn;
}
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
index 62ec8847fb..e9c43920a1 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
@@ -55,7 +55,20 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
enum isl_tiling tiling,
mem_copy_fn mem_copy);
-bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
- GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp);
+/* Tells intel_get_memcpy() whether the memcpy() is
+ *
+ * - an upload to the GPU with an aligned destination and a potentially
+ * unaligned source; or
+ * - a download from the GPU with an aligned source and a potentially
+ * unaligned destination.
+ */
+enum intel_memcpy_direction {
+ INTEL_UPLOAD,
+ INTEL_DOWNLOAD
+};
+
+mem_copy_fn intel_get_memcpy(mesa_format tiledFormat,
+ GLenum format, GLenum type,
+ enum intel_memcpy_direction direction);
#endif /* INTEL_TILED_MEMCPY */
--
2.13.3
More information about the mesa-dev
mailing list