[Pixman] [PATCH 2/3] mmx: Use _mm_mulhi_pu16 when available

Matt Turner mattst88 at gmail.com
Sun Feb 19 11:59:27 PST 2012


The pmulhuw x86 instruction is part of Extended 3DNow! and SSE1. The
equivalent ARM wmuluh instruction was available from the first iwMMXt
instrucion set.

This instruction is already used in the SSE2 code.

Reduces code size by ~5%.

amd64
  text    data     bss     dec     hex filename
 31325    2240       0   33565    831d .libs/libpixman_mmx_la-pixman-mmx.o
 29925    2240       0   32165    7da5 .libs/libpixman_mmx_la-pixman-mmx.o

x86
  text    data     bss     dec     hex filename
 29165    1792       0   30957    78ed .libs/libpixman_mmx_la-pixman-mmx.o
 27677    1792       0   29469    731d .libs/libpixman_mmx_la-pixman-mmx.o

arm
  text    data     bss     dec     hex filename
 31632    1792       0   33424    8290 .libs/libpixman_iwmmxt_la-pixman-mmx.o
 30176    1792       0   31968    7ce0 .libs/libpixman_iwmmxt_la-pixman-mmx.o

Signed-off-by: Matt Turner <mattst88 at gmail.com>
---
 pixman/pixman-mmx.c |   22 +++++++++++++++++++++-
 1 files changed, 21 insertions(+), 1 deletions(-)

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 937ce8f..6103474 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -56,6 +56,17 @@ _mm_empty (void)
 }
 #endif
 
+#ifdef USE_X86_MMX_EXT
+/* We have to compile with -msse to use xmmintrin.h, but that causes SSE
+ * instructions to be generated that we don't want. Just duplicate the
+ * functions we want to use.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
+}
+#endif
+
 /* Notes about writing mmx code
  *
  * give memory operands as the second operand. If you give it as the
@@ -128,6 +139,9 @@ typedef struct
     mmxdatafield mmx_ffff0000ffff0000;
     mmxdatafield mmx_0000ffff00000000;
     mmxdatafield mmx_000000000000ffff;
+#if defined USE_X86_MMX_EXT || defined USE_ARM_IWMMXT
+    mmxdatafield mmx_4x0101;
+#endif
 } mmx_data_t;
 
 #if defined(_MSC_VER)
@@ -155,6 +169,9 @@ static const mmx_data_t c =
     MMXDATA_INIT (.mmx_ffff0000ffff0000,         0xffff0000ffff0000),
     MMXDATA_INIT (.mmx_0000ffff00000000,         0x0000ffff00000000),
     MMXDATA_INIT (.mmx_000000000000ffff,         0x000000000000ffff),
+#if defined USE_X86_MMX_EXT || defined USE_ARM_IWMMXT
+    MMXDATA_INIT (.mmx_4x0101,                   0x0101010101010101),
+#endif
 };
 
 #ifdef USE_CVT_INTRINSICS
@@ -218,9 +235,12 @@ pix_multiply (__m64 a, __m64 b)
 
     res = _mm_mullo_pi16 (a, b);
     res = _mm_adds_pu16 (res, MC (4x0080));
+#if defined USE_X86_MMX_EXT || defined USE_ARM_IWMMXT
+    res = _mm_mulhi_pu16 (res, MC (4x0101));
+#else
     res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8));
     res = _mm_srli_pi16 (res, 8);
-
+#endif
     return res;
 }
 
-- 
1.7.3.4



More information about the Pixman mailing list