[Pixman] [PATCH 2/3] Add AVX blt and composite_copy_area

Matt Turner mattst88 at gmail.com
Sun May 15 20:44:39 PDT 2011


Signed-off-by: Matt Turner <mattst88 at gmail.com>
---
 pixman/pixman-avx.c |  191 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 191 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-avx.c b/pixman/pixman-avx.c
index 09b867d..221bf32 100644
--- a/pixman/pixman-avx.c
+++ b/pixman/pixman-avx.c
@@ -94,11 +94,200 @@ create_mask_2x32_256 (uint32_t mask0,
 }
 #endif
 
+static pixman_bool_t
+pixman_blt_avx (uint32_t *src_bits,
+                uint32_t *dst_bits,
+                int       src_stride,
+                int       dst_stride,
+                int       src_bpp,
+                int       dst_bpp,
+                int       src_x,
+                int       src_y,
+                int       dst_x,
+                int       dst_y,
+                int       width,
+                int       height)
+{
+    uint8_t *   src_bytes;
+    uint8_t *   dst_bytes;
+    int byte_width;
+
+    if (src_bpp != dst_bpp)
+	return FALSE;
+
+    if (src_bpp == 16)
+    {
+	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+	src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+	byte_width = 2 * width;
+	src_stride *= 2;
+	dst_stride *= 2;
+    }
+    else if (src_bpp == 32)
+    {
+	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+	byte_width = 4 * width;
+	src_stride *= 4;
+	dst_stride *= 4;
+    }
+    else
+    {
+	return FALSE;
+    }
+
+    while (height--)
+    {
+	int w;
+	uint8_t *s = src_bytes;
+	uint8_t *d = dst_bytes;
+	src_bytes += src_stride;
+	dst_bytes += dst_stride;
+	w = byte_width;
+
+	while (w >= 2 && ((unsigned long)d & 3))
+	{
+	    *(uint16_t *)d = *(uint16_t *)s;
+	    w -= 2;
+	    s += 2;
+	    d += 2;
+	}
+
+	while (w >= 4 && ((unsigned long)d & 31))
+	{
+	    *(uint32_t *)d = *(uint32_t *)s;
+
+	    w -= 4;
+	    s += 4;
+	    d += 4;
+	}
+
+	while (w >= 128)
+	{
+	    __m256i ymm0, ymm1, ymm2, ymm3;
+
+	    ymm0 = load_256_unaligned ((__m256i*)(s));
+	    ymm1 = load_256_unaligned ((__m256i*)(s + 32));
+	    ymm2 = load_256_unaligned ((__m256i*)(s + 64));
+	    ymm3 = load_256_unaligned ((__m256i*)(s + 96));
+
+	    save_256_aligned ((__m256i*)(d),    ymm0);
+	    save_256_aligned ((__m256i*)(d + 32), ymm1);
+	    save_256_aligned ((__m256i*)(d + 64), ymm2);
+	    save_256_aligned ((__m256i*)(d + 96), ymm3);
+
+	    w -= 128;
+	    s += 128;
+	    d += 128;
+	}
+
+	while (w >= 32)
+	{
+	    save_256_aligned ((__m256i*)d, load_256_unaligned ((__m256i*)s) );
+
+	    w -= 32;
+	    s += 32;
+	    d += 32;
+	}
+
+	while (w >= 4)
+	{
+	    *(uint32_t *)d = *(uint32_t *)s;
+
+	    w -= 4;
+	    s += 4;
+	    d += 4;
+	}
+
+	if (w >= 2)
+	{
+	    *(uint16_t *)d = *(uint16_t *)s;
+	    w -= 2;
+	    s += 2;
+	    d += 2;
+	}
+    }
+
+
+    return TRUE;
+}
+
+static void
+avx_composite_copy_area (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         pixman_image_t *         src_image,
+                         pixman_image_t *         mask_image,
+                         pixman_image_t *         dst_image,
+                         int32_t                  src_x,
+                         int32_t                  src_y,
+                         int32_t                  mask_x,
+                         int32_t                  mask_y,
+                         int32_t                  dest_x,
+                         int32_t                  dest_y,
+                         int32_t                  width,
+                         int32_t                  height)
+{
+    pixman_blt_avx (src_image->bits.bits,
+                    dst_image->bits.bits,
+                    src_image->bits.rowstride,
+                    dst_image->bits.rowstride,
+                    PIXMAN_FORMAT_BPP (src_image->bits.format),
+                    PIXMAN_FORMAT_BPP (dst_image->bits.format),
+                    src_x, src_y, dest_x, dest_y, width, height);
+}
+
 static const pixman_fast_path_t avx_fast_paths[] =
 {
+    /* PIXMAN_OP_OVER */
+    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, avx_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, avx_composite_copy_area),
+
+    /* PIXMAN_OP_SRC */
+    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, avx_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, avx_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, avx_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, avx_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, avx_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, avx_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, avx_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, avx_composite_copy_area),
+
     { PIXMAN_OP_NONE },
 };
 
+static pixman_bool_t
+avx_blt (pixman_implementation_t *imp,
+         uint32_t *               src_bits,
+         uint32_t *               dst_bits,
+         int                      src_stride,
+         int                      dst_stride,
+         int                      src_bpp,
+         int                      dst_bpp,
+         int                      src_x,
+         int                      src_y,
+         int                      dst_x,
+         int                      dst_y,
+         int                      width,
+         int                      height)
+{
+    if (!pixman_blt_avx (
+            src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+            src_x, src_y, dst_x, dst_y, width, height))
+
+    {
+	return _pixman_implementation_blt (
+	    imp->delegate,
+	    src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+	    src_x, src_y, dst_x, dst_y, width, height);
+    }
+
+    return TRUE;
+}
+
 static uint32_t *
 avx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
 {
@@ -198,6 +387,8 @@ _pixman_implementation_create_avx (pixman_implementation_t *fallback)
     /* AVX constants */
     mask_ff000000 = create_mask_2x32_256 (0xff000000, 0xff000000);
 
+    imp->blt = avx_blt;
+
     imp->src_iter_init = avx_src_iter_init;
 
     return imp;
-- 
1.7.3.4



More information about the Pixman mailing list