[Pixman] [PATCH 3/3] Add AVX fill

Matt Turner mattst88 at gmail.com
Sun May 15 20:44:40 PDT 2011


Signed-off-by: Matt Turner <mattst88 at gmail.com>
---
 pixman/pixman-avx.c |  174 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 174 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-avx.c b/pixman/pixman-avx.c
index 221bf32..0d23dc2 100644
--- a/pixman/pixman-avx.c
+++ b/pixman/pixman-avx.c
@@ -95,6 +95,156 @@ create_mask_2x32_256 (uint32_t mask0,
 #endif
 
 static pixman_bool_t
+pixman_fill_avx (uint32_t *bits,
+                 int       stride,
+                 int       bpp,
+                 int       x,
+                 int       y,
+                 int       width,
+                 int       height,
+                 uint32_t  data)
+{
+    uint32_t byte_width;
+    uint8_t         *byte_line;
+
+    __m256i ymm_def;
+
+    if (bpp == 8)
+    {
+	uint8_t b;
+	uint16_t w;
+
+	stride = stride * (int) sizeof (uint32_t) / 1;
+	byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
+	byte_width = width;
+	stride *= 1;
+
+	b = data & 0xff;
+	w = (b << 8) | b;
+	data = (w << 16) | w;
+    }
+    else if (bpp == 16)
+    {
+	stride = stride * (int) sizeof (uint32_t) / 2;
+	byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+	byte_width = 2 * width;
+	stride *= 2;
+
+        data = (data & 0xffff) * 0x00010001;
+    }
+    else if (bpp == 32)
+    {
+	stride = stride * (int) sizeof (uint32_t) / 4;
+	byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+	byte_width = 4 * width;
+	stride *= 4;
+    }
+    else
+    {
+	return FALSE;
+    }
+
+    ymm_def = create_mask_2x32_256 (data, data);
+
+    while (height--)
+    {
+	int w;
+	uint8_t *d = byte_line;
+	byte_line += stride;
+	w = byte_width;
+
+	while (w >= 1 && ((unsigned long)d & 1))
+	{
+	    *(uint8_t *)d = data;
+	    w -= 1;
+	    d += 1;
+	}
+
+	while (w >= 2 && ((unsigned long)d & 3))
+	{
+	    *(uint16_t *)d = data;
+	    w -= 2;
+	    d += 2;
+	}
+
+	while (w >= 4 && ((unsigned long)d & 31))
+	{
+	    *(uint32_t *)d = data;
+
+	    w -= 4;
+	    d += 4;
+	}
+
+	while (w >= 256)
+	{
+	    save_256_aligned ((__m256i*)(d),     ymm_def);
+	    save_256_aligned ((__m256i*)(d + 32),  ymm_def);
+	    save_256_aligned ((__m256i*)(d + 64),  ymm_def);
+	    save_256_aligned ((__m256i*)(d + 96),  ymm_def);
+	    save_256_aligned ((__m256i*)(d + 128),  ymm_def);
+	    save_256_aligned ((__m256i*)(d + 160),  ymm_def);
+	    save_256_aligned ((__m256i*)(d + 192),  ymm_def);
+	    save_256_aligned ((__m256i*)(d + 224), ymm_def);
+
+	    d += 256;
+	    w -= 256;
+	}
+
+	if (w >= 128)
+	{
+	    save_256_aligned ((__m256i*)(d),     ymm_def);
+	    save_256_aligned ((__m256i*)(d + 32),  ymm_def);
+	    save_256_aligned ((__m256i*)(d + 64),  ymm_def);
+	    save_256_aligned ((__m256i*)(d + 96),  ymm_def);
+
+	    d += 128;
+	    w -= 128;
+	}
+
+	if (w >= 64)
+	{
+	    save_256_aligned ((__m256i*)(d),     ymm_def);
+	    save_256_aligned ((__m256i*)(d + 32),  ymm_def);
+
+	    d += 64;
+	    w -= 64;
+	}
+
+	if (w >= 32)
+	{
+	    save_256_aligned ((__m256i*)(d),     ymm_def);
+
+	    d += 32;
+	    w -= 32;
+	}
+
+	while (w >= 4)
+	{
+	    *(uint32_t *)d = data;
+
+	    w -= 4;
+	    d += 4;
+	}
+
+	if (w >= 2)
+	{
+	    *(uint16_t *)d = data;
+	    w -= 2;
+	    d += 2;
+	}
+
+	if (w >= 1)
+	{
+	    *(uint8_t *)d = data;
+	    w -= 1;
+	    d += 1;
+	}
+    }
+
+    return TRUE;
+}
+
+static pixman_bool_t
 pixman_blt_avx (uint32_t *src_bits,
                 uint32_t *dst_bits,
                 int       src_stride,
@@ -288,6 +438,29 @@ avx_blt (pixman_implementation_t *imp,
     return TRUE;
 }
 
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+static pixman_bool_t
+avx_fill (pixman_implementation_t *imp,
+          uint32_t *               bits,
+          int                      stride,
+          int                      bpp,
+          int                      x,
+          int                      y,
+          int                      width,
+          int                      height,
+          uint32_t xor)
+{
+    if (!pixman_fill_avx (bits, stride, bpp, x, y, width, height, xor))
+    {
+	return _pixman_implementation_fill (
+	    imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+    }
+
+    return TRUE;
+}
+
 static uint32_t *
 avx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
 {
@@ -388,6 +561,7 @@ _pixman_implementation_create_avx (pixman_implementation_t *fallback)
     mask_ff000000 = create_mask_2x32_256 (0xff000000, 0xff000000);
 
     imp->blt = avx_blt;
+    imp->fill = avx_fill;
 
     imp->src_iter_init = avx_src_iter_init;
 
-- 
1.7.3.4



More information about the Pixman mailing list