[Pixman] [PATCH 2/2] ARM: added 'neon_composite_src_8888_8_0565' fast path

Siarhei Siamashka siarhei.siamashka at gmail.com
Tue Aug 24 14:57:31 PDT 2010


From: Siarhei Siamashka <siarhei.siamashka at nokia.com>

---
 pixman/pixman-arm-neon-asm.S |   49 ++++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-arm-neon.c     |    4 +++
 2 files changed, 53 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 325f6e7..00cfde3 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -917,6 +917,55 @@ generate_composite_function \
 
 /******************************************************************************/
 
+.macro pixman_composite_src_n_8_0565_process_pixblock_head
+    /* in */
+    vmull.u8    q15, d24, d2
+    vmull.u8    q3,  d24, d1
+    vmull.u8    q2,  d24, d0
+    vrshr.u16   q12, q15, #8
+    vrshr.u16   q11, q3,  #8
+    vrshr.u16   q10, q2,  #8
+    vraddhn.u16 d16, q15, q12
+    vraddhn.u16 d19, q3,  q11
+    vraddhn.u16 d18, q2,  q10
+.endm
+
+.macro pixman_composite_src_n_8_0565_process_pixblock_tail
+    /* convert to r5g6b5 */
+    vshll.u8    q14, d16, #8
+    vshll.u8    q8,  d19, #8
+    vshll.u8    q9,  d18, #8
+    vsri.u16    q14, q8,  #5
+    vsri.u16    q14, q9,  #11
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_src_8888_8_0565_process_pixblock_tail_head
+    pixman_composite_src_n_8_0565_process_pixblock_tail
+    vld4.8      {d0, d1, d2, d3}, [SRC]!
+    cache_preload 8, 8
+    vld1.8      {d24}, [MASK]!
+    pixman_composite_src_n_8_0565_process_pixblock_head
+    vst1.16     {d28, d29}, [DST_W, :128]!
+.endm
+
+generate_composite_function \
+    pixman_composite_src_8888_8_0565_asm_neon, 32, 8, 16, \
+    FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_src_n_8_0565_process_pixblock_head, \
+    pixman_composite_src_n_8_0565_process_pixblock_tail, \
+    pixman_composite_src_8888_8_0565_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    0,  /* src_basereg   */ \
+    24  /* mask_basereg  */
+
+/******************************************************************************/
+
 .macro pixman_composite_src_0565_0565_process_pixblock_head
 .endm
 
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index dc88f50..30358d5 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -90,6 +90,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
                                         uint32_t, 1, uint32_t, 1, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
                                         uint32_t, 1, uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, src_8888_8_0565,
+                                        uint32_t, 1, uint8_t, 1, uint16_t, 1)
 
 void
 pixman_composite_src_n_8_asm_neon (int32_t   w,
@@ -198,6 +200,8 @@ pixman_blt_neon (uint32_t *src_bits,
 
 static const pixman_fast_path_t arm_neon_fast_paths[] =
 {
+    PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, a8,       r5g6b5,   neon_composite_src_8888_8_0565),
+    PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, a8,       b5g6r5,   neon_composite_src_8888_8_0565),
     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     r5g6b5,   neon_composite_src_0565_0565),
     PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     b5g6r5,   neon_composite_src_0565_0565),
     PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
-- 
1.6.4.4



More information about the Pixman mailing list