[Mesa-dev] [PATCH 1/3] i965: Support very simple expansion options for tiled memcpy
Chris Wilson
chris at chris-wilson.co.uk
Wed Oct 18 11:11:12 UTC 2017
A big limitation of the current direct memcpy routine is that it only
recognises a couple of (admittedly) common colour types, and cannot do
any inline conversion. If we pass the mesa_format down to memcpy and
tell it the direction of the transfer, we can start accepting a few
mixed transfers and be less picky overall.
The principal benefit code-wise is that this pushes the decision about
handled formats from the multiple callers to the tiled-memcpy backend.
Cc: Matt Turner <mattst88 at gmail.com>
Cc: Kenneth Graunke <kenneth at whitecape.org>
---
src/mesa/drivers/dri/i965/intel_pixel_read.c | 26 +---
src/mesa/drivers/dri/i965/intel_tex_image.c | 47 +++----
src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 185 +++++++++++++++++--------
src/mesa/drivers/dri/i965/intel_tiled_memcpy.h | 17 ++-
4 files changed, 167 insertions(+), 108 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index 4528d6d265..9cb48e5ed2 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -82,20 +82,12 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
return false;
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
- int dst_pitch;
-
- /* The miptree's buffer. */
- struct brw_bo *bo;
-
- uint32_t cpp;
- mem_copy_fn mem_copy = NULL;
/* This fastpath is restricted to specific renderbuffer types:
* a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
* more types.
*/
if (!devinfo->has_llc ||
- !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
pixels == NULL ||
_mesa_is_bufferobj(pack->BufferObj) ||
pack->Alignment > 4 ||
@@ -118,15 +110,9 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
if (rb->NumSamples > 1)
return false;
- /* We can't handle copying from RGBX or BGRX because the tiled_memcpy
- * function doesn't set the last channel to 1. Note this checks BaseFormat
- * rather than TexFormat in case the RGBX format is being simulated with an
- * RGBA format.
- */
- if (rb->_BaseFormat == GL_RGB)
- return false;
-
- if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp))
+ mem_copy_fn mem_copy =
+ intel_get_memcpy(rb->Format, format, type, INTEL_DOWNLOAD);
+ if (mem_copy == NULL)
return false;
if (!irb->mt ||
@@ -152,7 +138,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
*/
intel_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, false);
- bo = irb->mt->bo;
+ struct brw_bo *bo = irb->mt->bo;
if (brw_batch_references(&brw->batch, bo)) {
perf_debug("Flushing before mapping a referenced bo.\n");
@@ -171,7 +157,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
xoffset += slice_offset_x;
yoffset += slice_offset_y;
- dst_pitch = _mesa_image_row_stride(pack, width, format, type);
+ int dst_pitch = _mesa_image_row_stride(pack, width, format, type);
/* For a window-system renderbuffer, the buffer is actually flipped
* vertically, so we need to handle that. Since the detiling function
@@ -199,6 +185,8 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
pack->Alignment, pack->RowLength, pack->SkipPixels,
pack->SkipRows);
+ uint32_t cpp = _mesa_get_format_bytes(rb->Format);
+
tiled_to_linear(
xoffset * cpp, (xoffset + width) * cpp,
yoffset, yoffset + height,
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 37c8e24f03..1039d80bf3 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -187,13 +187,6 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
struct brw_context *brw = brw_context(ctx);
const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct intel_texture_image *image = intel_texture_image(texImage);
- int src_pitch;
-
- /* The miptree's buffer. */
- struct brw_bo *bo;
-
- uint32_t cpp;
- mem_copy_fn mem_copy = NULL;
/* This fastpath is restricted to specific texture types:
* a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
@@ -205,7 +198,6 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
* we need tests.
*/
if (!devinfo->has_llc ||
- !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
!(texImage->TexObject->Target == GL_TEXTURE_2D ||
texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
pixels == NULL ||
@@ -223,7 +215,12 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
if (ctx->_ImageTransferState)
return false;
- if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp))
+ if (format == GL_COLOR_INDEX)
+ return false;
+
+ mem_copy_fn mem_copy =
+ intel_get_memcpy(texImage->TexFormat, format, type, INTEL_UPLOAD);
+ if (mem_copy == NULL)
return false;
/* If this is a nontrivial texture view, let another path handle it instead. */
@@ -258,7 +255,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
intel_miptree_access_raw(brw, image->mt, level, 0, true);
- bo = image->mt->bo;
+ struct brw_bo *bo = image->mt->bo;
if (brw_batch_references(&brw->batch, bo)) {
perf_debug("Flushing before mapping a referenced bo.\n");
@@ -271,7 +268,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
return false;
}
- src_pitch = _mesa_image_row_stride(packing, width, format, type);
+ int src_pitch = _mesa_image_row_stride(packing, width, format, type);
/* We postponed printing this message until having committed to executing
* the function.
@@ -290,6 +287,8 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
xoffset += level_x;
yoffset += level_y;
+ uint32_t cpp = _mesa_get_format_bytes(texImage->TexFormat);
+
linear_to_tiled(
xoffset * cpp, (xoffset + width) * cpp,
yoffset, yoffset + height,
@@ -627,13 +626,6 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
struct brw_context *brw = brw_context(ctx);
const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct intel_texture_image *image = intel_texture_image(texImage);
- int dst_pitch;
-
- /* The miptree's buffer. */
- struct brw_bo *bo;
-
- uint32_t cpp;
- mem_copy_fn mem_copy = NULL;
/* This fastpath is restricted to specific texture types:
* a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
@@ -645,7 +637,6 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
* we need tests.
*/
if (!devinfo->has_llc ||
- !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
!(texImage->TexObject->Target == GL_TEXTURE_2D ||
texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
pixels == NULL ||
@@ -659,15 +650,9 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
packing->Invert)
return false;
- /* We can't handle copying from RGBX or BGRX because the tiled_memcpy
- * function doesn't set the last channel to 1. Note this checks BaseFormat
- * rather than TexFormat in case the RGBX format is being simulated with an
- * RGBA format.
- */
- if (texImage->_BaseFormat == GL_RGB)
- return false;
-
- if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp))
+ mem_copy_fn mem_copy =
+ intel_get_memcpy(texImage->TexFormat, format, type, INTEL_DOWNLOAD);
+ if (mem_copy == NULL)
return false;
/* If this is a nontrivial texture view, let another path handle it instead. */
@@ -702,7 +687,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
intel_miptree_access_raw(brw, image->mt, level, 0, true);
- bo = image->mt->bo;
+ struct brw_bo *bo = image->mt->bo;
if (brw_batch_references(&brw->batch, bo)) {
perf_debug("Flushing before mapping a referenced bo.\n");
@@ -715,7 +700,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
return false;
}
- dst_pitch = _mesa_image_row_stride(packing, width, format, type);
+ int dst_pitch = _mesa_image_row_stride(packing, width, format, type);
DBG("%s: level=%d x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
"mesa_format=0x%x tiling=%d "
@@ -731,6 +716,8 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
xoffset += level_x;
yoffset += level_y;
+ uint32_t cpp = _mesa_get_format_bytes(texImage->TexFormat);
+
tiled_to_linear(
xoffset * cpp, (xoffset + width) * cpp,
yoffset, yoffset + height,
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index 53a5679691..dff6976bdd 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -33,6 +33,11 @@
#include "util/macros.h"
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/mtypes.h"
+#include "main/glformats.h"
+
#include "brw_context.h"
#include "intel_tiled_memcpy.h"
@@ -213,6 +218,37 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
return dst;
}
+static inline void *
+rgbx8(void *dst, const void *src, size_t bytes)
+{
+ uint8_t *d = dst;
+ uint8_t const *s = src;
+
+ while (bytes >= 4) {
+ d[0] = s[2];
+ d[1] = s[1];
+ d[2] = s[0];
+ d[3] = 0xff;
+ d += 4;
+ s += 4;
+ bytes -= 4;
+ }
+ return dst;
+}
+
+static inline void *
+bgrx8(void *dst, const void *src, size_t bytes)
+{
+ uint32_t *d = dst;
+ uint32_t const *s = src;
+
+ while (bytes >= 4) {
+ *d++ = *s++ | 0xff000000;
+ bytes -= 4;
+ }
+ return dst;
+}
+
/**
* Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3).
* These ranges are in bytes, i.e. pixels * bytes-per-pixel.
@@ -473,7 +509,9 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, src_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_dst);
else
- unreachable("not reached");
+ return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
+ dst, src, src_pitch, swizzle_bit,
+ mem_copy, memcpy);
} else {
if (mem_copy == memcpy)
return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
@@ -484,10 +522,11 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, src_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_dst);
else
- unreachable("not reached");
+ return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
+ dst, src, src_pitch, swizzle_bit,
+ mem_copy, mem_copy);
}
- linear_to_xtiled(x0, x1, x2, x3, y0, y1,
- dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
+ unreachable("not reached");
}
/**
@@ -516,7 +555,9 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, src_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_dst);
else
- unreachable("not reached");
+ return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
+ dst, src, src_pitch, swizzle_bit,
+ mem_copy, mem_copy);
} else {
if (mem_copy == memcpy)
return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
@@ -526,10 +567,11 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, src_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_dst);
else
- unreachable("not reached");
+ return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
+ dst, src, src_pitch, swizzle_bit,
+ mem_copy, mem_copy);
}
- linear_to_ytiled(x0, x1, x2, x3, y0, y1,
- dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
+ unreachable("not reached");
}
/**
@@ -558,7 +600,9 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
else
- unreachable("not reached");
+ return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
+ dst, src, dst_pitch, swizzle_bit,
+ mem_copy, mem_copy);
} else {
if (mem_copy == memcpy)
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
@@ -568,10 +612,11 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
else
- unreachable("not reached");
+ return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
+ dst, src, dst_pitch, swizzle_bit,
+ mem_copy, mem_copy);
}
- xtiled_to_linear(x0, x1, x2, x3, y0, y1,
- dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
+ unreachable("not reached");
}
/**
@@ -600,7 +645,9 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
else
- unreachable("not reached");
+ return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
+ dst, src, dst_pitch, swizzle_bit,
+ mem_copy, mem_copy);
} else {
if (mem_copy == memcpy)
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
@@ -610,10 +657,11 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
else
- unreachable("not reached");
+ return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
+ dst, src, dst_pitch, swizzle_bit,
+ mem_copy, mem_copy);
}
- ytiled_to_linear(x0, x1, x2, x3, y0, y1,
- dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
+ unreachable("not reached");
}
/**
@@ -812,51 +860,74 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
* \param[in] tiledFormat The format of the tiled image
* \param[in] format The GL format of the client data
* \param[in] type The GL type of the client data
- * \param[out] mem_copy Will be set to one of either the standard
- * library's memcpy or a different copy function
- * that performs an RGBA to BGRA conversion
- * \param[out] cpp Number of bytes per channel
*
- * \return true if the format and type combination are valid
+ * \return the mem_copy_fn if the format and type combination are valid
*/
-bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
- GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp)
+mem_copy_fn intel_get_memcpy(mesa_format tiledFormat,
+ GLenum format, GLenum type,
+ enum intel_memcpy_direction direction)
{
- if (type == GL_UNSIGNED_INT_8_8_8_8_REV &&
- !(format == GL_RGBA || format == GL_BGRA))
- return false; /* Invalid type/format combination */
-
- if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) ||
- (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) {
- *cpp = 1;
- *mem_copy = memcpy;
- } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) ||
- (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) ||
- (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) ||
- (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) {
- *cpp = 4;
- if (format == GL_BGRA) {
- *mem_copy = memcpy;
- } else if (format == GL_RGBA) {
- *mem_copy = rgba8_copy;
- }
- } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
- (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) ||
- (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) ||
- (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) {
- *cpp = 4;
- if (format == GL_BGRA) {
- /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
- * use the same function.
- */
- *mem_copy = rgba8_copy;
- } else if (format == GL_RGBA) {
- *mem_copy = memcpy;
- }
+ if (type == GL_BITMAP)
+ return NULL;
+
+ /* Stencil tiling is a lie, though we could do similar manual detiling */
+ switch ((int)tiledFormat) {
+ case MESA_FORMAT_S_UINT8:
+ case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case MESA_FORMAT_Z24_UNORM_X8_UINT:
+ case MESA_FORMAT_Z24_UNORM_S8_UINT:
+ return NULL;
}
- if (!(*mem_copy))
- return false;
+ if (_mesa_is_format_compressed(tiledFormat))
+ return NULL;
+
+ mesa_format user_format = _mesa_format_from_format_and_type(format, type);
+ if (_mesa_format_is_mesa_array_format(user_format))
+ user_format = _mesa_format_from_array_format(user_format);
+
+ mem_copy_fn fn = NULL;
+
+ if (user_format == tiledFormat) {
+ /* Prevent any implicit conversions */
+ if (_mesa_unpack_format_to_base_format(format) !=
+ _mesa_get_format_base_format(tiledFormat))
+ fn = memcpy;
+ } else switch ((int)tiledFormat) {
+ case MESA_FORMAT_B8G8R8A8_UNORM:
+ if (user_format == MESA_FORMAT_R8G8B8A8_UNORM)
+ fn = rgba8_copy;
+ else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM)
+ fn = direction == INTEL_UPLOAD ? rgbx8 : memcpy;
+ else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM)
+ fn = direction == INTEL_UPLOAD ? bgrx8 : memcpy;
+ break;
+ case MESA_FORMAT_B8G8R8X8_UNORM:
+ if (user_format == MESA_FORMAT_B8G8R8A8_UNORM)
+ fn = direction == INTEL_UPLOAD ? memcpy : bgrx8;
+ else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM)
+ fn = direction == INTEL_UPLOAD ? rgba8_copy : bgrx8;
+ else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM)
+ fn = rgba8_copy;
+ break;
+
+ case MESA_FORMAT_R8G8B8A8_UNORM:
+ if (user_format == MESA_FORMAT_B8G8R8A8_UNORM)
+ fn = rgba8_copy;
+ else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM)
+ fn = direction == INTEL_UPLOAD ? rgbx8 : memcpy;
+ else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM)
+ fn = direction == INTEL_UPLOAD ? bgrx8 : memcpy;
+ break;
+ case MESA_FORMAT_R8G8B8X8_UNORM:
+ if (user_format == MESA_FORMAT_R8G8B8A8_UNORM)
+ fn = direction == INTEL_UPLOAD ? memcpy : bgrx8;
+ else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM)
+ fn = direction == INTEL_UPLOAD ? rgba8_copy : bgrx8;
+ else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM)
+ fn = rgba8_copy;
+ break;
+ }
- return true;
+ return fn;
}
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
index 62ec8847fb..e9c43920a1 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
@@ -55,7 +55,20 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
enum isl_tiling tiling,
mem_copy_fn mem_copy);
-bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
- GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp);
+/* Tells intel_get_memcpy() whether the memcpy() is
+ *
+ * - an upload to the GPU with an aligned destination and a potentially
+ * unaligned source; or
+ * - a download from the GPU with an aligned source and a potentially
+ * unaligned destination.
+ */
+enum intel_memcpy_direction {
+ INTEL_UPLOAD,
+ INTEL_DOWNLOAD
+};
+
+mem_copy_fn intel_get_memcpy(mesa_format tiledFormat,
+ GLenum format, GLenum type,
+ enum intel_memcpy_direction direction);
#endif /* INTEL_TILED_MEMCPY */
--
2.15.0.rc1
More information about the mesa-dev
mailing list