xf86-video-intel: src/sna/blt.c
Chris Wilson
ickle at kemper.freedesktop.org
Fri Apr 3 03:14:48 PDT 2015
src/sna/blt.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 141 insertions(+)
New commits:
commit de61dae3bb64137db311cc75f3b084f991da2179
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Apr 3 11:11:09 2015 +0100
sna: Implement missing 9^10^11 swizzle mode
As found on my gm45...
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/blt.c b/src/sna/blt.c
index 9df7b2b..8dbac4e 100644
--- a/src/sna/blt.c
+++ b/src/sna/blt.c
@@ -746,6 +746,142 @@ memcpy_from_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp,
}
}
+#define swizzle_9_10_11(X) ((X) ^ ((((X) ^ ((X) >> 1) ^ ((X) >> 2)) >> 3) & 64))
+
+fast_memcpy static void
+memcpy_to_tiled_x__swizzle_9_10_11(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ const unsigned tile_width = 512;
+ const unsigned tile_height = 8;
+ const unsigned tile_size = 4096;
+
+ const unsigned cpp = bpp / 8;
+ const unsigned stride_tiles = dst_stride / tile_width;
+ const unsigned swizzle_pixels = 64 / cpp;
+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+ const unsigned tile_mask = (1 << tile_pixels) - 1;
+
+ unsigned x, y;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+
+ for (y = 0; y < height; ++y) {
+ const uint32_t dy = y + dst_y;
+ const uint32_t tile_row =
+ (dy / tile_height * stride_tiles * tile_size +
+ (dy & (tile_height-1)) * tile_width);
+ const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
+ uint32_t dx = dst_x;
+
+ x = width * cpp;
+ if (dx & (swizzle_pixels - 1)) {
+ const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
+ const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
+ uint32_t offset =
+ tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ memcpy((char *)dst + swizzle_9_10_11(offset), src_row, length * cpp);
+
+ src_row += length * cpp;
+ x -= length * cpp;
+ dx += length;
+ }
+ while (x >= 64) {
+ uint32_t offset =
+ tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ memcpy((char *)dst + swizzle_9_10_11(offset), src_row, 64);
+
+ src_row += 64;
+ x -= 64;
+ dx += swizzle_pixels;
+ }
+ if (x) {
+ uint32_t offset =
+ tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ memcpy((char *)dst + swizzle_9_10_11(offset), src_row, x);
+ }
+ }
+}
+
+fast_memcpy static void
+memcpy_from_tiled_x__swizzle_9_10_11(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ const unsigned tile_width = 512;
+ const unsigned tile_height = 8;
+ const unsigned tile_size = 4096;
+
+ const unsigned cpp = bpp / 8;
+ const unsigned stride_tiles = src_stride / tile_width;
+ const unsigned swizzle_pixels = 64 / cpp;
+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+ const unsigned tile_mask = (1 << tile_pixels) - 1;
+
+ unsigned x, y;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
+
+ for (y = 0; y < height; ++y) {
+ const uint32_t sy = y + src_y;
+ const uint32_t tile_row =
+ (sy / tile_height * stride_tiles * tile_size +
+ (sy & (tile_height-1)) * tile_width);
+ uint8_t *dst_row = (uint8_t *)dst + dst_stride * y;
+ uint32_t sx = src_x;
+
+ x = width * cpp;
+ if (sx & (swizzle_pixels - 1)) {
+ const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels);
+ const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx;
+ uint32_t offset =
+ tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ memcpy(dst_row, (const char *)src + swizzle_9_10_11(offset), length * cpp);
+
+ dst_row += length * cpp;
+ x -= length * cpp;
+ sx += length;
+ }
+ while (x >= 64) {
+ uint32_t offset =
+ tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ memcpy(dst_row, (const char *)src + swizzle_9_10_11(offset), 64);
+
+ dst_row += 64;
+ x -= 64;
+ sx += swizzle_pixels;
+ }
+ if (x) {
+ uint32_t offset =
+ tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ memcpy(dst_row, (const char *)src + swizzle_9_10_11(offset), x);
+ }
+ }
+}
+
static fast_memcpy void
memcpy_to_tiled_x__gen2(const void *src, void *dst, int bpp,
int32_t src_stride, int32_t dst_stride,
@@ -894,6 +1030,11 @@ void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling)
kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_11;
kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_11;
break;
+ case I915_BIT_6_SWIZZLE_9_10_11:
+ DBG(("%s: 6^9^10^11 swizzling\n", __FUNCTION__));
+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_10_11;
+ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_10_11;
+ break;
}
}
More information about the xorg-commit
mailing list