[Pixman] [PATCH 3/3] mmx: Use _mm_shuffle_pi16 when available
Matt Turner
mattst88 at gmail.com
Sun Feb 19 11:59:28 PST 2012
The pshufw x86 instruction is part of Extended 3DNow! and SSE1. The
equivalent ARM wshufh instruction was available from the first iwMMXt
instrucion set.
This instruction is already used in the SSE2 code.
Reduces code size by ~9%.
amd64
text data bss dec hex filename
29925 2240 0 32165 7da5 .libs/libpixman_mmx_la-pixman-mmx.o
27237 2240 0 29477 7325 .libs/libpixman_mmx_la-pixman-mmx.o
x86
text data bss dec hex filename
27677 1792 0 29469 731d .libs/libpixman_mmx_la-pixman-mmx.o
24959 1792 0 26751 687f .libs/libpixman_mmx_la-pixman-mmx.o
arm
text data bss dec hex filename
30176 1792 0 31968 7ce0 .libs/libpixman_iwmmxt_la-pixman-mmx.o
27384 1792 0 29176 71f8 .libs/libpixman_iwmmxt_la-pixman-mmx.o
---
pixman/pixman-mmx.c | 25 +++++++++++++++++++++++++
1 files changed, 25 insertions(+), 0 deletions(-)
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 6103474..86bc7eb 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -54,6 +54,10 @@ _mm_empty (void)
{
}
+
+/* ARM's mmintrin.h also doesn't define the _MM_SHUFFLE macro. */
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
#endif
#ifdef USE_X86_MMX_EXT
@@ -65,6 +69,15 @@ _mm_mulhi_pu16 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi16 (__m64 __A, int const __N)
+{
+ return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
+}
+
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
#endif
/* Notes about writing mmx code
@@ -253,6 +266,9 @@ pix_add (__m64 a, __m64 b)
static force_inline __m64
expand_alpha (__m64 pixel)
{
+#if defined USE_X86_MMX_EXT || defined USE_ARM_IWMMXT
+ return _mm_shuffle_pi16(pixel, _MM_SHUFFLE (3, 3, 3, 3));
+#else
__m64 t1, t2;
t1 = shift (pixel, -48);
@@ -262,11 +278,15 @@ expand_alpha (__m64 pixel)
t1 = _mm_or_si64 (t1, t2);
return t1;
+#endif
}
static force_inline __m64
expand_alpha_rev (__m64 pixel)
{
+#if defined USE_X86_MMX_EXT || defined USE_ARM_IWMMXT
+ return _mm_shuffle_pi16(pixel, _MM_SHUFFLE (0, 0, 0, 0));
+#else
__m64 t1, t2;
/* move alpha to low 16 bits and zero the rest */
@@ -279,11 +299,15 @@ expand_alpha_rev (__m64 pixel)
t1 = _mm_or_si64 (t1, t2);
return t1;
+#endif
}
static force_inline __m64
invert_colors (__m64 pixel)
{
+#if defined USE_X86_MMX_EXT || defined USE_ARM_IWMMXT
+ return _mm_shuffle_pi16(pixel, _MM_SHUFFLE (3, 0, 1, 2));
+#else
__m64 x, y, z;
x = y = z = pixel;
@@ -299,6 +323,7 @@ invert_colors (__m64 pixel)
x = _mm_or_si64 (x, z);
return x;
+#endif
}
static force_inline __m64
--
1.7.3.4
More information about the Pixman
mailing list