pixman: Branch 'master' - 8 commits

Wed Dec 9 02:03:44 PST 2009

pixman/pixman-arm-neon-asm.S |  285 +++++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-arm-neon.c     |  279 +++++++++++++++++++++++++-----------------
 2 files changed, 456 insertions(+), 108 deletions(-)

New commits:
commit 78a60047ac0f85423e0474ef54930e1f537f646b
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Wed Dec 9 11:29:13 2009 +0200

    ARM: added 'neon_composite_over_n_8888' fast path

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 57680bb..691a194 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -630,6 +630,36 @@ generate_composite_function \
 
 /******************************************************************************/
 
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_8888_process_pixblock_tail_head
+    pixman_composite_over_8888_8888_process_pixblock_tail
+    vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
+    vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
+    pixman_composite_over_8888_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_n_8888_init
+    add         DUMMY, sp, #ARGS_STACK_OFFSET
+    vld1.32     {d3[0]}, [DUMMY]
+    vdup.8      d0, d3[0]
+    vdup.8      d1, d3[1]
+    vdup.8      d2, d3[2]
+    vdup.8      d3, d3[3]
+.endm
+
+generate_composite_function \
+    pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    pixman_composite_over_n_8888_init, \
+    default_cleanup, \
+    pixman_composite_over_8888_8888_process_pixblock_head, \
+    pixman_composite_over_8888_8888_process_pixblock_tail, \
+    pixman_composite_over_n_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
 .macro pixman_composite_over_n_8_0565_process_pixblock_head
     /* in */
     vmull.u8    q0, d24, d8
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 8ae79ae..fef98a1 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -259,6 +259,7 @@ BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
 BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
 
 BIND_N_NULL_DST(over_n_0565, uint16_t, 1)
+BIND_N_NULL_DST(over_n_8888, uint32_t, 1)
 
 BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
 BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
@@ -403,6 +404,8 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, neon_composite_over_n_8_8888    },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888    },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_over_n_0565      },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_a8r8g8b8, neon_composite_over_n_8888      },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_over_n_8888      },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid,    PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888 },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid,    PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888 },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_8888_8_8888 },
commit 96fd17488f0966d2df53623195810dc640bf5ca6
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Wed Dec 9 11:02:04 2009 +0200

    ARM: added 'neon_composite_over_n_0565' fast path

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index bffc676..57680bb 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -344,6 +344,75 @@ generate_composite_function \
 
 /******************************************************************************/
 
+.macro pixman_composite_over_n_0565_process_pixblock_head
+    /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
+       and put data into d6 - red, d7 - green, d30 - blue */
+    vshrn.u16   d6, q2, #8
+    vshrn.u16   d7, q2, #3
+    vsli.u16    q2, q2, #5
+    vsri.u8     d6, d6, #5
+    vsri.u8     d7, d7, #6
+    vshrn.u16   d30, q2, #2
+    /* now do alpha blending, storing results in 8-bit planar format
+       into d16 - red, d19 - green, d18 - blue */
+    vmull.u8    q10, d3, d6
+    vmull.u8    q11, d3, d7
+    vmull.u8    q12, d3, d30
+    vrshr.u16   q13, q10, #8
+    vrshr.u16   q3, q11, #8
+    vrshr.u16   q15, q12, #8
+    vraddhn.u16 d20, q10, q13
+    vraddhn.u16 d23, q11, q3
+    vraddhn.u16 d22, q12, q15
+.endm
+
+.macro pixman_composite_over_n_0565_process_pixblock_tail
+    /* ... continue alpha blending */
+    vqadd.u8    d16, d2, d20
+    vqadd.u8    q9, q0, q11
+    /* convert the result to r5g6b5 and store it into {d28, d29} */
+    vshll.u8    q14, d16, #8
+    vshll.u8    q8, d19, #8
+    vshll.u8    q9, d18, #8
+    vsri.u16    q14, q8, #5
+    vsri.u16    q14, q9, #11
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_0565_process_pixblock_tail_head
+    pixman_composite_over_n_0565_process_pixblock_tail
+    vld1.16     {d4, d5}, [DST_R, :128]!
+    vst1.16     {d28, d29}, [DST_W, :128]!
+    pixman_composite_over_n_0565_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_n_0565_init
+    add         DUMMY, sp, #ARGS_STACK_OFFSET
+    vld1.32     {d3[0]}, [DUMMY]
+    vdup.8      d0, d3[0]
+    vdup.8      d1, d3[1]
+    vdup.8      d2, d3[2]
+    vdup.8      d3, d3[3]
+    vmvn.8      d3, d3      /* invert source alpha */
+.endm
+
+generate_composite_function \
+    pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \
+    FLAG_DST_READWRITE, \
+    8, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    pixman_composite_over_n_0565_init, \
+    default_cleanup, \
+    pixman_composite_over_n_0565_process_pixblock_head, \
+    pixman_composite_over_n_0565_process_pixblock_tail, \
+    pixman_composite_over_n_0565_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    0,  /* src_basereg   */ \
+    24  /* mask_basereg  */
+
+/******************************************************************************/
+
 .macro pixman_composite_src_8888_0565_process_pixblock_head
     vshll.u8    q8, d1, #8
     vshll.u8    q14, d2, #8
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 4894285..8ae79ae 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -71,6 +71,46 @@ neon_composite_##name (pixman_implementation_t *imp,                    \
                                         src_line, src_stride);          \
 }
 
+#define BIND_N_NULL_DST(name, dst_type, dst_cnt)                        \
+void                                                                    \
+pixman_composite_##name##_asm_neon (int32_t    w,                       \
+                                    int32_t    h,                       \
+                                    dst_type  *dst,                     \
+                                    int32_t    dst_stride,              \
+                                    uint32_t   src);                    \
+                                                                        \
+static void                                                             \
+neon_composite_##name (pixman_implementation_t *imp,                    \
+                       pixman_op_t              op,                     \
+                       pixman_image_t *         src_image,              \
+                       pixman_image_t *         mask_image,             \
+                       pixman_image_t *         dst_image,              \
+                       int32_t                  src_x,                  \
+                       int32_t                  src_y,                  \
+                       int32_t                  mask_x,                 \
+                       int32_t                  mask_y,                 \
+                       int32_t                  dest_x,                 \
+                       int32_t                  dest_y,                 \
+                       int32_t                  width,                  \
+                       int32_t                  height)                 \
+{                                                                       \
+    dst_type  *dst_line;                                                \
+    int32_t    dst_stride;                                              \
+    uint32_t   src;                                                     \
+                                                                        \
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
+                                                                        \
+    if (src == 0)                                                       \
+	return;                                                         \
+                                                                        \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);              \
+                                                                        \
+    pixman_composite_##name##_asm_neon (width, height,                  \
+                                        dst_line, dst_stride,           \
+                                        src);                           \
+}
+
 #define BIND_N_MASK_DST(name, mask_type, mask_cnt, dst_type, dst_cnt)   \
 void                                                                    \
 pixman_composite_##name##_asm_neon (int32_t    w,                       \
@@ -218,6 +258,8 @@ BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
 BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
 BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
 
+BIND_N_NULL_DST(over_n_0565, uint16_t, 1)
+
 BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
 BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
 
@@ -360,6 +402,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, neon_composite_over_n_8_8888    },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, neon_composite_over_n_8_8888    },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888    },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_over_n_0565      },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid,    PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888 },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid,    PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888 },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_8888_8_8888 },
commit 2d332c7a569803107e11b41c7b2c020b4050e26e
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Wed Dec 9 10:33:01 2009 +0200

    ARM: added 'neon_composite_src_0565_8888' fast path

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index d35704d..bffc676 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -387,6 +387,41 @@ generate_composite_function \
 
 /******************************************************************************/
 
+.macro pixman_composite_src_0565_8888_process_pixblock_head
+    vshrn.u16   d30, q0, #8
+    vshrn.u16   d29, q0, #3
+    vsli.u16    q0, q0, #5
+    vmov.u8     d31, #255
+    vsri.u8     d30, d30, #5
+    vsri.u8     d29, d29, #6
+    vshrn.u16   d28, q0, #2
+.endm
+
+.macro pixman_composite_src_0565_8888_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_src_0565_8888_process_pixblock_tail_head
+    pixman_composite_src_0565_8888_process_pixblock_tail
+    vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
+    vld1.16    {d0, d1}, [SRC]!
+    pixman_composite_src_0565_8888_process_pixblock_head
+    cache_preload 8, 8
+.endm
+
+generate_composite_function \
+    pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \
+    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    10, /* prefetch distance */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_src_0565_8888_process_pixblock_head, \
+    pixman_composite_src_0565_8888_process_pixblock_tail, \
+    pixman_composite_src_0565_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
 .macro pixman_composite_add_8000_8000_process_pixblock_head
     vqadd.u8    q14, q0, q2
     vqadd.u8    q15, q1, q3
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index c7f0870..4894285 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -214,6 +214,7 @@ BIND_SRC_NULL_DST(src_8888_8888, uint32_t, 1, uint32_t, 1)
 BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
 BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
 BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
+BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
 BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
 BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
 
@@ -344,6 +345,10 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
     { PIXMAN_OP_SRC,  PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_8888_0565    },
     { PIXMAN_OP_SRC,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_8888_0565    },
     { PIXMAN_OP_SRC,  PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_8888_0565    },
+    { PIXMAN_OP_SRC,  PIXMAN_r5g6b5,   PIXMAN_null,     PIXMAN_a8r8g8b8, neon_composite_src_0565_8888    },
+    { PIXMAN_OP_SRC,  PIXMAN_r5g6b5,   PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_src_0565_8888    },
+    { PIXMAN_OP_SRC,  PIXMAN_b5g6r5,   PIXMAN_null,     PIXMAN_a8b8g8r8, neon_composite_src_0565_8888    },
+    { PIXMAN_OP_SRC,  PIXMAN_b5g6r5,   PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_src_0565_8888    },
     { PIXMAN_OP_SRC,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_src_8888_8888    },
     { PIXMAN_OP_SRC,  PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_src_8888_8888    },
     { PIXMAN_OP_SRC,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_src_8888_8888    },
commit 062da411d81c7d970a302dd2c283ef5327b867da
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Tue Dec 8 15:04:41 2009 +0200

    ARM: added 'neon_composite_add_8888_8888_8888' fast path

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 81f0b92..d35704d 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -983,6 +983,53 @@ generate_composite_function \
 
 /******************************************************************************/
 
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_head
+    /* expecting source data in {d0, d1, d2, d3} */
+    /* destination data in {d4, d5, d6, d7} */
+    /* mask in {d24, d25, d26, d27} */
+    vmull.u8    q8, d27, d0
+    vmull.u8    q9, d27, d1
+    vmull.u8    q10, d27, d2
+    vmull.u8    q11, d27, d3
+    vrshr.u16   q0, q8, #8
+    vrshr.u16   q1, q9, #8
+    vrshr.u16   q12, q10, #8
+    vrshr.u16   q13, q11, #8
+    vraddhn.u16 d0, q0, q8
+    vraddhn.u16 d1, q1, q9
+    vraddhn.u16 d2, q12, q10
+    vraddhn.u16 d3, q13, q11
+    vqadd.u8    q14, q0, q2
+    vqadd.u8    q15, q1, q3
+.endm
+
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+    pixman_composite_add_8888_8888_8888_process_pixblock_tail
+    vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
+    vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
+    vld4.8      {d24, d25, d26, d27}, [MASK]!
+    vld4.8      {d0, d1, d2, d3}, [SRC]!
+    cache_preload 8, 8
+    pixman_composite_add_8888_8888_8888_process_pixblock_head
+.endm
+
+generate_composite_function \
+    pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    10, /* prefetch distance */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
 .macro pixman_composite_over_8888_n_8888_process_pixblock_head
     /* expecting source data in {d0, d1, d2, d3} */
     /* destination data in {d4, d5, d6, d7} */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index fe444f2..c7f0870 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -227,6 +227,7 @@ BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
 BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
 
 BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
+BIND_SRC_MASK_DST(add_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
 BIND_SRC_MASK_DST(over_8888_8_8888, uint32_t, 1, uint8_t, 1, uint32_t, 1)
 BIND_SRC_MASK_DST(over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
 
@@ -369,6 +370,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_over_8888_8888   },
     { PIXMAN_OP_ADD,  PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8,       neon_composite_add_n_8_8        },
     { PIXMAN_OP_ADD,  PIXMAN_a8,       PIXMAN_a8,       PIXMAN_a8,       neon_composite_add_8_8_8        },
+    { PIXMAN_OP_ADD,  PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, neon_composite_add_8888_8888_8888 },
     { PIXMAN_OP_ADD,  PIXMAN_a8,       PIXMAN_null,     PIXMAN_a8,       neon_composite_add_8000_8000    },
     { PIXMAN_OP_ADD,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, neon_composite_add_8888_8888    },
     { PIXMAN_OP_ADD,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, neon_composite_add_8888_8888    },
commit 3d0eedb5d9af97fed68e2da03d6aee40197e2a76
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Tue Dec 8 14:39:41 2009 +0200

    ARM: added 'neon_composite_add_8888_8888' fast path

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 49d840c..81f0b92 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -427,6 +427,38 @@ generate_composite_function \
 
 /******************************************************************************/
 
+.macro pixman_composite_add_8888_8888_process_pixblock_tail_head
+    vld1.8      {d0, d1, d2, d3}, [SRC]!
+                                    PF add PF_X, PF_X, #8
+                                    PF tst PF_CTL, #0xF
+    vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
+                                    PF addne PF_X, PF_X, #8
+                                    PF subne PF_CTL, PF_CTL, #1
+        vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
+                                    PF cmp PF_X, ORIG_W
+                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+                                    PF subge PF_X, PF_X, ORIG_W
+                                    PF subges PF_CTL, PF_CTL, #0x10
+    vqadd.u8    q14, q0, q2
+                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+    vqadd.u8    q15, q1, q3
+.endm
+
+generate_composite_function \
+    pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \
+    FLAG_DST_READWRITE, \
+    8, /* number of pixels, processed in a single block */ \
+    10, /* prefetch distance */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_add_8000_8000_process_pixblock_head, \
+    pixman_composite_add_8000_8000_process_pixblock_tail, \
+    pixman_composite_add_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
 .macro pixman_composite_over_8888_8888_process_pixblock_head
     vmvn.8      d24, d3  /* get inverted alpha */
     /* do alpha blending */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 675b64f..fe444f2 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -215,6 +215,7 @@ BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
 BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
 BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
 BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
+BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
 
 BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
 BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
@@ -369,6 +370,8 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
     { PIXMAN_OP_ADD,  PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8,       neon_composite_add_n_8_8        },
     { PIXMAN_OP_ADD,  PIXMAN_a8,       PIXMAN_a8,       PIXMAN_a8,       neon_composite_add_8_8_8        },
     { PIXMAN_OP_ADD,  PIXMAN_a8,       PIXMAN_null,     PIXMAN_a8,       neon_composite_add_8000_8000    },
+    { PIXMAN_OP_ADD,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, neon_composite_add_8888_8888    },
+    { PIXMAN_OP_ADD,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, neon_composite_add_8888_8888    },
     { PIXMAN_OP_NONE },
 };
 
commit 86b54c6701666d087f0234047128fbf0fd6468b6
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Mon Dec 7 22:53:30 2009 +0200

    ARM: added 'neon_composite_over_8888_8_8888' fast path

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 9f3b566..49d840c 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -1060,6 +1060,42 @@ generate_composite_function \
 
 /******************************************************************************/
 
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head
+    vld4.8     {d4, d5, d6, d7}, [DST_R, :128]!
+    pixman_composite_over_8888_n_8888_process_pixblock_tail
+    vld4.8     {d0, d1, d2, d3}, [SRC]!
+    cache_preload 8, 8
+    vld1.8     {d15}, [MASK]!
+    pixman_composite_over_8888_n_8888_process_pixblock_head
+    vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_8888_8_8888_init
+    vpush       {d8-d15}
+.endm
+
+.macro pixman_composite_over_8888_8_8888_cleanup
+    vpop        {d8-d15}
+.endm
+
+generate_composite_function \
+    pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    pixman_composite_over_8888_8_8888_init, \
+    pixman_composite_over_8888_8_8888_cleanup, \
+    pixman_composite_over_8888_n_8888_process_pixblock_head, \
+    pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+    pixman_composite_over_8888_8_8888_process_pixblock_tail_head \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    0,  /* src_basereg   */ \
+    15  /* mask_basereg  */
+
+/******************************************************************************/
+
 .macro pixman_composite_src_0888_0888_process_pixblock_head
 .endm
 
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 62d82f6..675b64f 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -226,6 +226,7 @@ BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
 BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
 
 BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
+BIND_SRC_MASK_DST(over_8888_8_8888, uint32_t, 1, uint8_t, 1, uint32_t, 1)
 BIND_SRC_MASK_DST(over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
 
 void
@@ -354,6 +355,10 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888    },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid,    PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888 },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid,    PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_8888_8_8888 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, neon_composite_over_8888_8_8888 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_a8b8g8r8, neon_composite_over_8888_8_8888 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, neon_composite_over_8888_8_8888 },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, neon_composite_over_8888_8888_8888 },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_over_8888_0565   },
     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_over_8888_0565   },
commit aec1524e773758369ab627553dc5c23d18619a85
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Mon Dec 7 22:42:17 2009 +0200

    ARM: added 'neon_composite_over_8888_8888_8888' fast path

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index e66fb86..9f3b566 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -1024,6 +1024,42 @@ generate_composite_function \
 
 /******************************************************************************/
 
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head
+    vld4.8     {d4, d5, d6, d7}, [DST_R, :128]!
+    pixman_composite_over_8888_n_8888_process_pixblock_tail
+    vld4.8     {d0, d1, d2, d3}, [SRC]!
+    cache_preload 8, 8
+    vld4.8     {d12, d13, d14, d15}, [MASK]!
+    pixman_composite_over_8888_n_8888_process_pixblock_head
+    vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_8888_8888_8888_init
+    vpush       {d8-d15}
+.endm
+
+.macro pixman_composite_over_8888_8888_8888_cleanup
+    vpop        {d8-d15}
+.endm
+
+generate_composite_function \
+    pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    pixman_composite_over_8888_8888_8888_init, \
+    pixman_composite_over_8888_8888_8888_cleanup, \
+    pixman_composite_over_8888_n_8888_process_pixblock_head, \
+    pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+    pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    0,  /* src_basereg   */ \
+    12  /* mask_basereg  */
+
+/******************************************************************************/
+
 .macro pixman_composite_src_0888_0888_process_pixblock_head
 .endm
 
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 39d7bf9..62d82f6 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -226,6 +226,7 @@ BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
 BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
 
 BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
+BIND_SRC_MASK_DST(over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
 
 void
 pixman_composite_src_n_8_asm_neon (int32_t   w,
@@ -353,6 +354,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888    },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid,    PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888 },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid,    PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, neon_composite_over_8888_8888_8888 },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_over_8888_0565   },
     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_over_8888_0565   },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, neon_composite_over_8888_8888   },
commit ba59d53d0b61effc422c4004a9f0e6cf848598d8
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Tue Dec 8 14:13:12 2009 +0200

    ARM: minor source formatting changes
    
    Now it's a bit harder to exceed 80 characters line limit
    when binding assembly functions.

diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 9194924..39d7bf9 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -35,27 +35,27 @@
 
 #define BIND_SRC_NULL_DST(name, src_type, src_cnt, dst_type, dst_cnt)   \
 void                                                                    \
-pixman_##name##_asm_neon (int32_t   w,                                  \
-                          int32_t   h,                                  \
-                          dst_type *dst,                                \
-                          int32_t   dst_stride,                         \
-                          src_type *src,                                \
-                          int32_t   src_stride);                        \
+pixman_composite_##name##_asm_neon (int32_t   w,                        \
+                                    int32_t   h,                        \
+                                    dst_type *dst,                      \
+                                    int32_t   dst_stride,               \
+                                    src_type *src,                      \
+                                    int32_t   src_stride);              \
                                                                         \
 static void                                                             \
-neon_##name (pixman_implementation_t *imp,                              \
-             pixman_op_t              op,                               \
-             pixman_image_t *         src_image,                        \
-             pixman_image_t *         mask_image,                       \
-             pixman_image_t *         dst_image,                        \
-             int32_t                  src_x,                            \
-             int32_t                  src_y,                            \
-             int32_t                  mask_x,                           \
-             int32_t                  mask_y,                           \
-             int32_t                  dest_x,                           \
-             int32_t                  dest_y,                           \
-             int32_t                  width,                            \
-             int32_t                  height)                           \
+neon_composite_##name (pixman_implementation_t *imp,                    \
+                       pixman_op_t              op,                     \
+                       pixman_image_t *         src_image,              \
+                       pixman_image_t *         mask_image,             \
+                       pixman_image_t *         dst_image,              \
+                       int32_t                  src_x,                  \
+                       int32_t                  src_y,                  \
+                       int32_t                  mask_x,                 \
+                       int32_t                  mask_y,                 \
+                       int32_t                  dest_x,                 \
+                       int32_t                  dest_y,                 \
+                       int32_t                  width,                  \
+                       int32_t                  height)                 \
 {                                                                       \
     dst_type *dst_line;                                                 \
     src_type *src_line;                                                 \
@@ -66,36 +66,36 @@ neon_##name (pixman_implementation_t *imp,                              \
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
                            dst_stride, dst_line, dst_cnt);              \
                                                                         \
-    pixman_##name##_asm_neon (width, height,                            \
-                              dst_line, dst_stride,                     \
-                              src_line, src_stride);                    \
+    pixman_composite_##name##_asm_neon (width, height,                  \
+                                        dst_line, dst_stride,           \
+                                        src_line, src_stride);          \
 }
 
 #define BIND_N_MASK_DST(name, mask_type, mask_cnt, dst_type, dst_cnt)   \
 void                                                                    \
-pixman_##name##_asm_neon (int32_t    w,                                 \
-                          int32_t    h,                                 \
-                          dst_type  *dst,                               \
-                          int32_t    dst_stride,                        \
-                          uint32_t   src,                               \
-                          int32_t    unused,                            \
-                          mask_type *mask,                              \
-                          int32_t    mask_stride);                      \
+pixman_composite_##name##_asm_neon (int32_t    w,                       \
+                                    int32_t    h,                       \
+                                    dst_type  *dst,                     \
+                                    int32_t    dst_stride,              \
+                                    uint32_t   src,                     \
+                                    int32_t    unused,                  \
+                                    mask_type *mask,                    \
+                                    int32_t    mask_stride);            \
                                                                         \
 static void                                                             \
-neon_##name (pixman_implementation_t *imp,                              \
-             pixman_op_t              op,                               \
-             pixman_image_t *         src_image,                        \
-             pixman_image_t *         mask_image,                       \
-             pixman_image_t *         dst_image,                        \
-             int32_t                  src_x,                            \
-             int32_t                  src_y,                            \
-             int32_t                  mask_x,                           \
-             int32_t                  mask_y,                           \
-             int32_t                  dest_x,                           \
-             int32_t                  dest_y,                           \
-             int32_t                  width,                            \
-             int32_t                  height)                           \
+neon_composite_##name (pixman_implementation_t *imp,                    \
+                       pixman_op_t              op,                     \
+                       pixman_image_t *         src_image,              \
+                       pixman_image_t *         mask_image,             \
+                       pixman_image_t *         dst_image,              \
+                       int32_t                  src_x,                  \
+                       int32_t                  src_y,                  \
+                       int32_t                  mask_x,                 \
+                       int32_t                  mask_y,                 \
+                       int32_t                  dest_x,                 \
+                       int32_t                  dest_y,                 \
+                       int32_t                  width,                  \
+                       int32_t                  height)                 \
 {                                                                       \
     dst_type  *dst_line;                                                \
     mask_type *mask_line;                                               \
@@ -112,36 +112,36 @@ neon_##name (pixman_implementation_t *imp,                              \
     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
                            mask_stride, mask_line, mask_cnt);           \
                                                                         \
-    pixman_##name##_asm_neon (width, height,                            \
-                              dst_line, dst_stride,                     \
-                              src, 0,                                   \
-                              mask_line, mask_stride);                  \
+    pixman_composite_##name##_asm_neon (width, height,                  \
+                                        dst_line, dst_stride,           \
+                                        src, 0,                         \
+                                        mask_line, mask_stride);        \
 }
 
 #define BIND_SRC_N_DST(name, src_type, src_cnt, dst_type, dst_cnt)      \
 void                                                                    \
-pixman_##name##_asm_neon (int32_t    w,                                 \
-                          int32_t    h,                                 \
-                          dst_type  *dst,                               \
-                          int32_t    dst_stride,                        \
-                          src_type  *src,                               \
-                          int32_t    src_stride,                        \
-                          uint32_t   mask);                             \
+pixman_composite_##name##_asm_neon (int32_t    w,                       \
+                                    int32_t    h,                       \
+                                    dst_type  *dst,                     \
+                                    int32_t    dst_stride,              \
+                                    src_type  *src,                     \
+                                    int32_t    src_stride,              \
+                                    uint32_t   mask);                   \
                                                                         \
 static void                                                             \
-neon_##name (pixman_implementation_t *imp,                              \
-             pixman_op_t              op,                               \
-             pixman_image_t *         src_image,                        \
-             pixman_image_t *         mask_image,                       \
-             pixman_image_t *         dst_image,                        \
-             int32_t                  src_x,                            \
-             int32_t                  src_y,                            \
-             int32_t                  mask_x,                           \
-             int32_t                  mask_y,                           \
-             int32_t                  dest_x,                           \
-             int32_t                  dest_y,                           \
-             int32_t                  width,                            \
-             int32_t                  height)                           \
+neon_composite_##name (pixman_implementation_t *imp,                    \
+                       pixman_op_t              op,                     \
+                       pixman_image_t *         src_image,              \
+                       pixman_image_t *         mask_image,             \
+                       pixman_image_t *         dst_image,              \
+                       int32_t                  src_x,                  \
+                       int32_t                  src_y,                  \
+                       int32_t                  mask_x,                 \
+                       int32_t                  mask_y,                 \
+                       int32_t                  dest_x,                 \
+                       int32_t                  dest_y,                 \
+                       int32_t                  width,                  \
+                       int32_t                  height)                 \
 {                                                                       \
     dst_type  *dst_line;                                                \
     src_type  *src_line;                                                \
@@ -158,38 +158,38 @@ neon_##name (pixman_implementation_t *imp,                              \
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
                            src_stride, src_line, src_cnt);              \
                                                                         \
-    pixman_##name##_asm_neon (width, height,                            \
-                              dst_line, dst_stride,                     \
-                              src_line, src_stride,                     \
-                              mask);                                    \
+    pixman_composite_##name##_asm_neon (width, height,                  \
+                                        dst_line, dst_stride,           \
+                                        src_line, src_stride,           \
+                                        mask);                          \
 }
 
 #define BIND_SRC_MASK_DST(name, src_type, src_cnt, mask_type, mask_cnt, \
                           dst_type, dst_cnt)                            \
 void                                                                    \
-pixman_##name##_asm_neon (int32_t    w,                                 \
-                          int32_t    h,                                 \
-                          dst_type  *dst,                               \
-                          int32_t    dst_stride,                        \
-                          src_type  *src,                               \
-                          int32_t    src_stride,                        \
-                          mask_type *mask,                              \
-                          int32_t    mask_stride);                      \
+pixman_composite_##name##_asm_neon (int32_t    w,                       \
+                                    int32_t    h,                       \
+                                    dst_type  *dst,                     \
+                                    int32_t    dst_stride,              \
+                                    src_type  *src,                     \
+                                    int32_t    src_stride,              \
+                                    mask_type *mask,                    \
+                                    int32_t    mask_stride);            \
                                                                         \
 static void                                                             \
-neon_##name (pixman_implementation_t *imp,                              \
-             pixman_op_t              op,                               \
-             pixman_image_t *         src_image,                        \
-             pixman_image_t *         mask_image,                       \
-             pixman_image_t *         dst_image,                        \
-             int32_t                  src_x,                            \
-             int32_t                  src_y,                            \
-             int32_t                  mask_x,                           \
-             int32_t                  mask_y,                           \
-             int32_t                  dest_x,                           \
-             int32_t                  dest_y,                           \
-             int32_t                  width,                            \
-             int32_t                  height)                           \
+neon_composite_##name (pixman_implementation_t *imp,                    \
+                       pixman_op_t              op,                     \
+                       pixman_image_t *         src_image,              \
+                       pixman_image_t *         mask_image,             \
+                       pixman_image_t *         dst_image,              \
+                       int32_t                  src_x,                  \
+                       int32_t                  src_y,                  \
+                       int32_t                  mask_x,                 \
+                       int32_t                  mask_y,                 \
+                       int32_t                  dest_x,                 \
+                       int32_t                  dest_y,                 \
+                       int32_t                  width,                  \
+                       int32_t                  height)                 \
 {                                                                       \
     dst_type  *dst_line;                                                \
     src_type  *src_line;                                                \
@@ -203,29 +203,29 @@ neon_##name (pixman_implementation_t *imp,                              \
     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
                            mask_stride, mask_line, mask_cnt);           \
                                                                         \
-    pixman_##name##_asm_neon (width, height,                            \
-                              dst_line, dst_stride,                     \
-                              src_line, src_stride,                     \
-                              mask_line, mask_stride);                  \
+    pixman_composite_##name##_asm_neon (width, height,                  \
+                                        dst_line, dst_stride,           \
+                                        src_line, src_stride,           \
+                                        mask_line, mask_stride);        \
 }
 
 
-BIND_SRC_NULL_DST(composite_src_8888_8888, uint32_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(composite_src_0565_0565, uint16_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(composite_src_0888_0888, uint8_t, 3, uint8_t, 3)
-BIND_SRC_NULL_DST(composite_src_8888_0565, uint32_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(composite_add_8000_8000, uint8_t, 1, uint8_t, 1)
+BIND_SRC_NULL_DST(src_8888_8888, uint32_t, 1, uint32_t, 1)
+BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
+BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
+BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
+BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
 
-BIND_SRC_NULL_DST(composite_over_8888_0565, uint32_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(composite_over_8888_8888, uint32_t, 1, uint32_t, 1)
+BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
+BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
 
-BIND_N_MASK_DST(composite_over_n_8_0565, uint8_t, 1, uint16_t, 1)
-BIND_N_MASK_DST(composite_over_n_8_8888, uint8_t, 1, uint32_t, 1)
-BIND_N_MASK_DST(composite_add_n_8_8, uint8_t, 1, uint8_t, 1)
+BIND_N_MASK_DST(over_n_8_0565, uint8_t, 1, uint16_t, 1)
+BIND_N_MASK_DST(over_n_8_8888, uint8_t, 1, uint32_t, 1)
+BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
 
-BIND_SRC_N_DST(composite_over_8888_n_8888, uint32_t, 1, uint32_t, 1)
+BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
 
-BIND_SRC_MASK_DST(composite_add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
+BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
 
 void
 pixman_composite_src_n_8_asm_neon (int32_t   w,