[Pixman] [PATCH 7/8] mmx: add ARM/iwmmxt inline assembly blit code
Matt Turner
mattst88 at gmail.com
Fri Sep 23 11:54:16 PDT 2011
Signed-off-by: Matt Turner <mattst88 at gmail.com>
---
pixman/pixman-mmx.c | 51 ++++++++++++++++++++++++++++++++++++++++-----------
1 files changed, 40 insertions(+), 11 deletions(-)
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index f848ab4..4e384c1 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -2933,8 +2933,12 @@ pixman_blt_mmx (uint32_t *src_bits,
while (w >= 64)
{
-#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX
+#ifndef USE_X86_MMX
+ __m64 v0, v1, v2, v3, v4, v5, v6, v7;
+#endif
+#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)))
__asm__ (
+# ifdef USE_X86_MMX
"movq (%1), %%mm0\n"
"movq 8(%1), %%mm1\n"
"movq 16(%1), %%mm2\n"
@@ -2953,19 +2957,44 @@ pixman_blt_mmx (uint32_t *src_bits,
"movq %%mm6, 48(%0)\n"
"movq %%mm7, 56(%0)\n"
:
+# elif defined USE_ARM_IWMMXT
+ "wldrd %0, [%9]\n"
+ "wldrd %1, [%9, #8]\n"
+ "wldrd %2, [%9, #16]\n"
+ "wldrd %3, [%9, #24]\n"
+ "wldrd %4, [%9, #32]\n"
+ "wldrd %5, [%9, #40]\n"
+ "wldrd %6, [%9, #48]\n"
+ "wldrd %7, [%9, #56]\n"
+
+ "wstrd %0, [%8]\n"
+ "wstrd %1, [%8, #8]\n"
+ "wstrd %2, [%8, #16]\n"
+ "wstrd %3, [%8, #24]\n"
+ "wstrd %4, [%8, #32]\n"
+ "wstrd %5, [%8, #40]\n"
+ "wstrd %6, [%8, #48]\n"
+ "wstrd %7, [%8, #56]\n"
+ : "=&y" (v0), "=&y" (v1), "=&y" (v2), "=&y" (v3),
+ "=&y" (v4), "=&y" (v5), "=&y" (v6), "=&y" (v7)
+# endif
: "r" (d), "r" (s)
- : "memory",
+ : "memory"
+# ifdef USE_X86_MMX
+ ,
"%mm0", "%mm1", "%mm2", "%mm3",
- "%mm4", "%mm5", "%mm6", "%mm7");
+ "%mm4", "%mm5", "%mm6", "%mm7"
+# endif
+ );
#else
- __m64 v0 = ldq_u((uint64_t *)(s + 0));
- __m64 v1 = ldq_u((uint64_t *)(s + 8));
- __m64 v2 = ldq_u((uint64_t *)(s + 16));
- __m64 v3 = ldq_u((uint64_t *)(s + 24));
- __m64 v4 = ldq_u((uint64_t *)(s + 32));
- __m64 v5 = ldq_u((uint64_t *)(s + 40));
- __m64 v6 = ldq_u((uint64_t *)(s + 48));
- __m64 v7 = ldq_u((uint64_t *)(s + 56));
+ v0 = *(__m64 *)(s + 0);
+ v1 = *(__m64 *)(s + 8);
+ v2 = *(__m64 *)(s + 16);
+ v3 = *(__m64 *)(s + 24);
+ v4 = *(__m64 *)(s + 32);
+ v5 = *(__m64 *)(s + 40);
+ v6 = *(__m64 *)(s + 48);
+ v7 = *(__m64 *)(s + 56);
*(__m64 *)(d + 0) = v0;
*(__m64 *)(d + 8) = v1;
*(__m64 *)(d + 16) = v2;
--
1.7.3.4
More information about the Pixman
mailing list