[Pixman] [PATCH 2/3] mmx: add a8 fetcher
Matt Turner
mattst88 at gmail.com
Wed Apr 18 17:37:37 PDT 2012
oprofile of xfce4-terminal-a1
210535 9.0407 libpixman-1.so.0.25.3 fetch_scanline_a8
144802 6.0054 libpixman-1.so.0.25.3 mmx_fetch_a8
Loongson:
add_8_8_8 = L1: 17.98 L2: 17.28 M: 14.28 ( 19.79%) HT: 11.11 VT: 10.38 R: 9.97 RT: 5.14 ( 55Kops/s)
add_8_8_8 = L1: 20.44 L2: 19.65 M: 15.62 ( 21.53%) HT: 12.86 VT: 11.98 R: 11.32 RT: 6.13 ( 64Kops/s)
src_8888_8_0565 = L1: 19.97 L2: 18.59 M: 13.42 ( 32.55%) HT: 11.46 VT: 10.78 R: 10.33 RT: 5.87 ( 61Kops/s)
src_8888_8_0565 = L1: 21.16 L2: 19.68 M: 13.94 ( 33.64%) HT: 12.31 VT: 11.52 R: 11.02 RT: 6.54 ( 68Kops/s)
src_x888_8_x888 = L1: 20.54 L2: 18.88 M: 13.07 ( 40.74%) HT: 11.05 VT: 10.36 R: 10.02 RT: 5.68 ( 60Kops/s)
src_x888_8_x888 = L1: 21.92 L2: 20.15 M: 13.35 ( 41.42%) HT: 11.70 VT: 10.95 R: 10.53 RT: 6.18 ( 65Kops/s)
over_x888_8_0565 = L1: 10.32 L2: 9.85 M: 7.63 ( 21.13%) HT: 6.56 VT: 6.30 R: 6.12 RT: 3.80 ( 43Kops/s)
over_x888_8_0565 = L1: 10.64 L2: 10.17 M: 7.74 ( 21.35%) HT: 6.83 VT: 6.55 R: 6.34 RT: 4.03 ( 46Kops/s)
ARM/iwMMXt:
add_8_8_8 = L1: 13.10 L2: 11.67 M: 10.74 ( 13.46%) HT: 8.62 VT: 8.15 R: 7.94 RT: 4.39 ( 44Kops/s)
add_8_8_8 = L1: 13.81 L2: 12.79 M: 11.63 ( 13.93%) HT: 9.33 VT: 9.20 R: 9.04 RT: 5.43 ( 52Kops/s)
src_8888_8_0565 = L1: 16.62 L2: 15.07 M: 12.52 ( 27.46%) HT: 10.07 VT: 10.17 R: 9.95 RT: 5.64 ( 54Kops/s)
src_8888_8_0565 = L1: 16.84 L2: 16.11 M: 13.22 ( 27.71%) HT: 11.74 VT: 10.90 R: 10.80 RT: 6.66 ( 62Kops/s)
src_x888_8_x888 = L1: 17.49 L2: 16.22 M: 13.73 ( 38.73%) HT: 10.10 VT: 10.33 R: 9.55 RT: 5.21 ( 52Kops/s)
src_x888_8_x888 = L1: 19.33 L2: 17.66 M: 14.26 ( 38.43%) HT: 11.53 VT: 10.83 R: 10.57 RT: 6.12 ( 58Kops/s)
over_x888_8_0565 = L1: 7.57 L2: 7.29 M: 6.37 ( 15.97%) HT: 5.53 VT: 5.33 R: 5.21 RT: 3.22 ( 35Kops/s)
over_x888_8_0565 = L1: 8.15 L2: 7.56 M: 6.50 ( 15.58%) HT: 5.73 VT: 5.49 R: 5.50 RT: 3.53 ( 38Kops/s)
---
pixman/loongson-mmintrin.h | 22 +++++++++++++++++++++
pixman/pixman-mmx.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 0 deletions(-)
diff --git a/pixman/loongson-mmintrin.h b/pixman/loongson-mmintrin.h
index 508366c..76ae892 100644
--- a/pixman/loongson-mmintrin.h
+++ b/pixman/loongson-mmintrin.h
@@ -183,6 +183,17 @@ _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("punpckhhw %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
{
__m64 ret;
@@ -207,6 +218,17 @@ _mm_unpacklo_pi8_f (__m32 __m1, __m64 __m2)
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("punpcklhw %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_si64 (__m64 __m1, __m64 __m2)
{
__m64 ret;
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 23fcc17..c127f4a 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -3254,6 +3254,51 @@ mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
return iter->buffer;
}
+static uint32_t *
+mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int w = iter->width;
+ uint32_t *dst = iter->buffer;
+ uint8_t *src = iter->bits;
+
+ iter->bits += iter->stride;
+
+ while (w && (((unsigned long)dst) & 15))
+ {
+ *dst++ = *(src++) << 24;
+ w--;
+ }
+
+ while (w >= 8)
+ {
+ __m64 mm0 = ldq_u ((__m64 *)src);
+
+ __m64 mm1 = _mm_unpacklo_pi8 (_mm_setzero_si64(), mm0);
+ __m64 mm2 = _mm_unpackhi_pi8 (_mm_setzero_si64(), mm0);
+ __m64 mm3 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm1);
+ __m64 mm4 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm1);
+ __m64 mm5 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm2);
+ __m64 mm6 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm2);
+
+ *(__m64 *)(dst + 0) = mm3;
+ *(__m64 *)(dst + 2) = mm4;
+ *(__m64 *)(dst + 4) = mm5;
+ *(__m64 *)(dst + 6) = mm6;
+
+ dst += 8;
+ src += 8;
+ w -= 8;
+ }
+
+ while (w)
+ {
+ *dst++ = *(src++) << 24;
+ w--;
+ }
+
+ return iter->buffer;
+}
+
typedef struct
{
pixman_format_code_t format;
@@ -3263,6 +3308,7 @@ typedef struct
static const fetcher_info_t fetchers[] =
{
{ PIXMAN_r5g6b5, mmx_fetch_r5g6b5 },
+ { PIXMAN_a8, mmx_fetch_a8 },
{ PIXMAN_null }
};
--
1.7.3.4
More information about the Pixman
mailing list