[Pixman] [PATCH 3/3] Add AVX fill
Matt Turner
mattst88 at gmail.com
Sun May 15 20:44:40 PDT 2011
Signed-off-by: Matt Turner <mattst88 at gmail.com>
---
pixman/pixman-avx.c | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 174 insertions(+), 0 deletions(-)
diff --git a/pixman/pixman-avx.c b/pixman/pixman-avx.c
index 221bf32..0d23dc2 100644
--- a/pixman/pixman-avx.c
+++ b/pixman/pixman-avx.c
@@ -95,6 +95,156 @@ create_mask_2x32_256 (uint32_t mask0,
#endif
static pixman_bool_t
+pixman_fill_avx (uint32_t *bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t data)
+{
+ uint32_t byte_width;
+ uint8_t *byte_line;
+
+ __m256i ymm_def;
+
+ if (bpp == 8)
+ {
+ uint8_t b;
+ uint16_t w;
+
+ stride = stride * (int) sizeof (uint32_t) / 1;
+ byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
+ byte_width = width;
+ stride *= 1;
+
+ b = data & 0xff;
+ w = (b << 8) | b;
+ data = (w << 16) | w;
+ }
+ else if (bpp == 16)
+ {
+ stride = stride * (int) sizeof (uint32_t) / 2;
+ byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+ byte_width = 2 * width;
+ stride *= 2;
+
+ data = (data & 0xffff) * 0x00010001;
+ }
+ else if (bpp == 32)
+ {
+ stride = stride * (int) sizeof (uint32_t) / 4;
+ byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+ byte_width = 4 * width;
+ stride *= 4;
+ }
+ else
+ {
+ return FALSE;
+ }
+
+ ymm_def = create_mask_2x32_256 (data, data);
+
+ while (height--)
+ {
+ int w;
+ uint8_t *d = byte_line;
+ byte_line += stride;
+ w = byte_width;
+
+ while (w >= 1 && ((unsigned long)d & 1))
+ {
+ *(uint8_t *)d = data;
+ w -= 1;
+ d += 1;
+ }
+
+ while (w >= 2 && ((unsigned long)d & 3))
+ {
+ *(uint16_t *)d = data;
+ w -= 2;
+ d += 2;
+ }
+
+ while (w >= 4 && ((unsigned long)d & 31))
+ {
+ *(uint32_t *)d = data;
+
+ w -= 4;
+ d += 4;
+ }
+
+ while (w >= 256)
+ {
+ save_256_aligned ((__m256i*)(d), ymm_def);
+ save_256_aligned ((__m256i*)(d + 32), ymm_def);
+ save_256_aligned ((__m256i*)(d + 64), ymm_def);
+ save_256_aligned ((__m256i*)(d + 96), ymm_def);
+ save_256_aligned ((__m256i*)(d + 128), ymm_def);
+ save_256_aligned ((__m256i*)(d + 160), ymm_def);
+ save_256_aligned ((__m256i*)(d + 192), ymm_def);
+ save_256_aligned ((__m256i*)(d + 224), ymm_def);
+
+ d += 256;
+ w -= 256;
+ }
+
+ if (w >= 128)
+ {
+ save_256_aligned ((__m256i*)(d), ymm_def);
+ save_256_aligned ((__m256i*)(d + 32), ymm_def);
+ save_256_aligned ((__m256i*)(d + 64), ymm_def);
+ save_256_aligned ((__m256i*)(d + 96), ymm_def);
+
+ d += 128;
+ w -= 128;
+ }
+
+ if (w >= 64)
+ {
+ save_256_aligned ((__m256i*)(d), ymm_def);
+ save_256_aligned ((__m256i*)(d + 32), ymm_def);
+
+ d += 64;
+ w -= 64;
+ }
+
+ if (w >= 32)
+ {
+ save_256_aligned ((__m256i*)(d), ymm_def);
+
+ d += 32;
+ w -= 32;
+ }
+
+ while (w >= 4)
+ {
+ *(uint32_t *)d = data;
+
+ w -= 4;
+ d += 4;
+ }
+
+ if (w >= 2)
+ {
+ *(uint16_t *)d = data;
+ w -= 2;
+ d += 2;
+ }
+
+ if (w >= 1)
+ {
+ *(uint8_t *)d = data;
+ w -= 1;
+ d += 1;
+ }
+ }
+
+ return TRUE;
+}
+
+static pixman_bool_t
pixman_blt_avx (uint32_t *src_bits,
uint32_t *dst_bits,
int src_stride,
@@ -288,6 +438,29 @@ avx_blt (pixman_implementation_t *imp,
return TRUE;
}
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+static pixman_bool_t
+avx_fill (pixman_implementation_t *imp,
+ uint32_t * bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ if (!pixman_fill_avx (bits, stride, bpp, x, y, width, height, xor))
+ {
+ return _pixman_implementation_fill (
+ imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+ }
+
+ return TRUE;
+}
+
static uint32_t *
avx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
{
@@ -388,6 +561,7 @@ _pixman_implementation_create_avx (pixman_implementation_t *fallback)
mask_ff000000 = create_mask_2x32_256 (0xff000000, 0xff000000);
imp->blt = avx_blt;
+ imp->fill = avx_fill;
imp->src_iter_init = avx_src_iter_init;
--
1.7.3.4
More information about the Pixman
mailing list