[Pixman] [PATCH 2/3] Add AVX blt and composite_copy_area
Matt Turner
mattst88 at gmail.com
Sun May 15 20:44:39 PDT 2011
Signed-off-by: Matt Turner <mattst88 at gmail.com>
---
pixman/pixman-avx.c | 191 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 191 insertions(+), 0 deletions(-)
diff --git a/pixman/pixman-avx.c b/pixman/pixman-avx.c
index 09b867d..221bf32 100644
--- a/pixman/pixman-avx.c
+++ b/pixman/pixman-avx.c
@@ -94,11 +94,200 @@ create_mask_2x32_256 (uint32_t mask0,
}
#endif
+static pixman_bool_t
+pixman_blt_avx (uint32_t *src_bits,
+ uint32_t *dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height)
+{
+ uint8_t * src_bytes;
+ uint8_t * dst_bytes;
+ int byte_width;
+
+ if (src_bpp != dst_bpp)
+ return FALSE;
+
+ if (src_bpp == 16)
+ {
+ src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+ src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+ byte_width = 2 * width;
+ src_stride *= 2;
+ dst_stride *= 2;
+ }
+ else if (src_bpp == 32)
+ {
+ src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+ src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+ byte_width = 4 * width;
+ src_stride *= 4;
+ dst_stride *= 4;
+ }
+ else
+ {
+ return FALSE;
+ }
+
+ while (height--)
+ {
+ int w;
+ uint8_t *s = src_bytes;
+ uint8_t *d = dst_bytes;
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ w = byte_width;
+
+ while (w >= 2 && ((unsigned long)d & 3))
+ {
+ *(uint16_t *)d = *(uint16_t *)s;
+ w -= 2;
+ s += 2;
+ d += 2;
+ }
+
+ while (w >= 4 && ((unsigned long)d & 31))
+ {
+ *(uint32_t *)d = *(uint32_t *)s;
+
+ w -= 4;
+ s += 4;
+ d += 4;
+ }
+
+ while (w >= 128)
+ {
+ __m256i ymm0, ymm1, ymm2, ymm3;
+
+ ymm0 = load_256_unaligned ((__m256i*)(s));
+ ymm1 = load_256_unaligned ((__m256i*)(s + 32));
+ ymm2 = load_256_unaligned ((__m256i*)(s + 64));
+ ymm3 = load_256_unaligned ((__m256i*)(s + 96));
+
+ save_256_aligned ((__m256i*)(d), ymm0);
+ save_256_aligned ((__m256i*)(d + 32), ymm1);
+ save_256_aligned ((__m256i*)(d + 64), ymm2);
+ save_256_aligned ((__m256i*)(d + 96), ymm3);
+
+ w -= 128;
+ s += 128;
+ d += 128;
+ }
+
+ while (w >= 32)
+ {
+ save_256_aligned ((__m256i*)d, load_256_unaligned ((__m256i*)s) );
+
+ w -= 32;
+ s += 32;
+ d += 32;
+ }
+
+ while (w >= 4)
+ {
+ *(uint32_t *)d = *(uint32_t *)s;
+
+ w -= 4;
+ s += 4;
+ d += 4;
+ }
+
+ if (w >= 2)
+ {
+ *(uint16_t *)d = *(uint16_t *)s;
+ w -= 2;
+ s += 2;
+ d += 2;
+ }
+ }
+
+
+ return TRUE;
+}
+
+static void
+avx_composite_copy_area (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ pixman_blt_avx (src_image->bits.bits,
+ dst_image->bits.bits,
+ src_image->bits.rowstride,
+ dst_image->bits.rowstride,
+ PIXMAN_FORMAT_BPP (src_image->bits.format),
+ PIXMAN_FORMAT_BPP (dst_image->bits.format),
+ src_x, src_y, dest_x, dest_y, width, height);
+}
+
static const pixman_fast_path_t avx_fast_paths[] =
{
+ /* PIXMAN_OP_OVER */
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, avx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, avx_composite_copy_area),
+
+ /* PIXMAN_OP_SRC */
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, avx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, avx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, avx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, avx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, avx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, avx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, avx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, avx_composite_copy_area),
+
{ PIXMAN_OP_NONE },
};
+static pixman_bool_t
+avx_blt (pixman_implementation_t *imp,
+ uint32_t * src_bits,
+ uint32_t * dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height)
+{
+ if (!pixman_blt_avx (
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dst_x, dst_y, width, height))
+
+ {
+ return _pixman_implementation_blt (
+ imp->delegate,
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dst_x, dst_y, width, height);
+ }
+
+ return TRUE;
+}
+
static uint32_t *
avx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
{
@@ -198,6 +387,8 @@ _pixman_implementation_create_avx (pixman_implementation_t *fallback)
/* AVX constants */
mask_ff000000 = create_mask_2x32_256 (0xff000000, 0xff000000);
+ imp->blt = avx_blt;
+
imp->src_iter_init = avx_src_iter_init;
return imp;
--
1.7.3.4
More information about the Pixman
mailing list