[Pixman] [PATCH 4/5] loongson: optimize _mm_set_pi* functions with shuffle instructions

Matt Turner mattst88 at gmail.com
Wed Jun 27 19:38:43 PDT 2012


---
 pixman/loongson-mmintrin.h |   43 +++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 43 insertions(+), 0 deletions(-)

diff --git a/pixman/loongson-mmintrin.h b/pixman/loongson-mmintrin.h
index f0931ac..086c6e0 100644
--- a/pixman/loongson-mmintrin.h
+++ b/pixman/loongson-mmintrin.h
@@ -182,9 +182,34 @@ _mm_packs_pi32 (__m64 __m1, __m64 __m2)
 	return ret;
 }
 
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_set_pi16 (uint16_t __w3, uint16_t __w2, uint16_t __w1, uint16_t __w0)
 {
+	if (__builtin_constant_p (__w3) &&
+	    __builtin_constant_p (__w2) &&
+	    __builtin_constant_p (__w1) &&
+	    __builtin_constant_p (__w0))
+	{
+		uint64_t val = ((uint64_t)__w3 << 48)
+			     | ((uint64_t)__w2 << 32)
+			     | ((uint64_t)__w1 << 16)
+			     | ((uint64_t)__w0 <<  0);
+		return *(__m64 *)&val;
+	}
+	else if (__w3 == __w2 && __w2 == __w1 && __w1 == __w0)
+	{
+		/* TODO: handle other cases */
+		uint64_t val = __w3;
+		uint64_t imm = _MM_SHUFFLE (0, 0, 0, 0);
+		__m64 ret;
+		asm("pshufh %0, %1, %2\n\t"
+		    : "=f" (ret)
+		    : "f" (*(__m64 *)&val), "f" (*(__m64 *)&imm)
+		);
+		return ret;
+	}
 	uint64_t val = ((uint64_t)__w3 << 48)
 		     | ((uint64_t)__w2 << 32)
 		     | ((uint64_t)__w1 << 16)
@@ -195,10 +220,28 @@ _mm_set_pi16 (uint16_t __w3, uint16_t __w2, uint16_t __w1, uint16_t __w0)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_set_pi32 (unsigned __i1, unsigned __i0)
 {
+	if (__builtin_constant_p (__i1) &&
+	    __builtin_constant_p (__i0))
+	{
+		uint64_t val = ((uint64_t)__i1 << 32)
+			     | ((uint64_t)__i0 <<  0);
+		return *(__m64 *)&val;
+	}
+	else if (__i1 == __i0)
+	{
+		uint64_t imm = _MM_SHUFFLE (1, 0, 1, 0);
+		__m64 ret;
+		asm("pshufh %0, %1, %2\n\t"
+		    : "=f" (ret)
+		    : "f" (*(__m32 *)&__i1), "f" (*(__m64 *)&imm)
+		);
+		return ret;
+	}
 	uint64_t val = ((uint64_t)__i1 << 32)
 		     | ((uint64_t)__i0 <<  0);
 	return *(__m64 *)&val;
 }
+#undef _MM_SHUFFLE
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_shuffle_pi16 (__m64 __m, int64_t __n)
-- 
1.7.3.4



More information about the Pixman mailing list