[Pixman] [PATCH 7/8] mmx: add ARM/iwmmxt inline assembly blit code

Matt Turner mattst88 at gmail.com
Fri Sep 23 11:54:16 PDT 2011


Signed-off-by: Matt Turner <mattst88 at gmail.com>
---
 pixman/pixman-mmx.c |   51 ++++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index f848ab4..4e384c1 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -2933,8 +2933,12 @@ pixman_blt_mmx (uint32_t *src_bits,
 
 	while (w >= 64)
 	{
-#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX
+#ifndef USE_X86_MMX
+	    __m64 v0, v1, v2, v3, v4, v5, v6, v7;
+#endif
+#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)))
 	    __asm__ (
+# ifdef USE_X86_MMX
 	        "movq	  (%1),	  %%mm0\n"
 	        "movq	 8(%1),	  %%mm1\n"
 	        "movq	16(%1),	  %%mm2\n"
@@ -2953,19 +2957,44 @@ pixman_blt_mmx (uint32_t *src_bits,
 	        "movq	%%mm6,	48(%0)\n"
 	        "movq	%%mm7,	56(%0)\n"
 		:
+# elif defined USE_ARM_IWMMXT
+		"wldrd	%0,	[%9]\n"
+		"wldrd	%1,	[%9,  #8]\n"
+		"wldrd	%2,	[%9, #16]\n"
+		"wldrd	%3,	[%9, #24]\n"
+		"wldrd	%4,	[%9, #32]\n"
+		"wldrd	%5,	[%9, #40]\n"
+		"wldrd	%6,	[%9, #48]\n"
+		"wldrd	%7,	[%9, #56]\n"
+
+		"wstrd	%0,	[%8]\n"
+		"wstrd	%1,	[%8,  #8]\n"
+		"wstrd	%2,	[%8, #16]\n"
+		"wstrd	%3,	[%8, #24]\n"
+		"wstrd	%4,	[%8, #32]\n"
+		"wstrd	%5,	[%8, #40]\n"
+		"wstrd	%6,	[%8, #48]\n"
+		"wstrd	%7,	[%8, #56]\n"
+		: "=&y" (v0), "=&y" (v1), "=&y" (v2), "=&y" (v3),
+		  "=&y" (v4), "=&y" (v5), "=&y" (v6), "=&y" (v7)
+# endif
 		: "r" (d), "r" (s)
-		: "memory",
+		: "memory"
+# ifdef USE_X86_MMX
+		  ,
 		  "%mm0", "%mm1", "%mm2", "%mm3",
-		  "%mm4", "%mm5", "%mm6", "%mm7");
+		  "%mm4", "%mm5", "%mm6", "%mm7"
+# endif
+		);
 #else
-	    __m64 v0 = ldq_u((uint64_t *)(s + 0));
-	    __m64 v1 = ldq_u((uint64_t *)(s + 8));
-	    __m64 v2 = ldq_u((uint64_t *)(s + 16));
-	    __m64 v3 = ldq_u((uint64_t *)(s + 24));
-	    __m64 v4 = ldq_u((uint64_t *)(s + 32));
-	    __m64 v5 = ldq_u((uint64_t *)(s + 40));
-	    __m64 v6 = ldq_u((uint64_t *)(s + 48));
-	    __m64 v7 = ldq_u((uint64_t *)(s + 56));
+	    v0 = *(__m64 *)(s + 0);
+	    v1 = *(__m64 *)(s + 8);
+	    v2 = *(__m64 *)(s + 16);
+	    v3 = *(__m64 *)(s + 24);
+	    v4 = *(__m64 *)(s + 32);
+	    v5 = *(__m64 *)(s + 40);
+	    v6 = *(__m64 *)(s + 48);
+	    v7 = *(__m64 *)(s + 56);
 	    *(__m64 *)(d + 0)  = v0;
 	    *(__m64 *)(d + 8)  = v1;
 	    *(__m64 *)(d + 16) = v2;
-- 
1.7.3.4



More information about the Pixman mailing list