pixman: Branch 'master' - 8 commits
Siarhei Siamashka
siamashka at kemper.freedesktop.org
Wed Dec 9 02:03:44 PST 2009
pixman/pixman-arm-neon-asm.S | 285 +++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-arm-neon.c | 279 +++++++++++++++++++++++++-----------------
2 files changed, 456 insertions(+), 108 deletions(-)
New commits:
commit 78a60047ac0f85423e0474ef54930e1f537f646b
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Wed Dec 9 11:29:13 2009 +0200
ARM: added 'neon_composite_over_n_8888' fast path
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 57680bb..691a194 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -630,6 +630,36 @@ generate_composite_function \
/******************************************************************************/
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_8888_process_pixblock_tail_head
+ pixman_composite_over_8888_8888_process_pixblock_tail
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ pixman_composite_over_8888_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_n_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d3[0]}, [DUMMY]
+ vdup.8 d0, d3[0]
+ vdup.8 d1, d3[1]
+ vdup.8 d2, d3[2]
+ vdup.8 d3, d3[3]
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8888_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_n_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
.macro pixman_composite_over_n_8_0565_process_pixblock_head
/* in */
vmull.u8 q0, d24, d8
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 8ae79ae..fef98a1 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -259,6 +259,7 @@ BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
BIND_N_NULL_DST(over_n_0565, uint16_t, 1)
+BIND_N_NULL_DST(over_n_8888, uint32_t, 1)
BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
@@ -403,6 +404,8 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, neon_composite_over_n_8_8888 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_n_0565 },
+ { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_over_n_8888 },
+ { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_over_n_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, neon_composite_over_8888_8_8888 },
commit 96fd17488f0966d2df53623195810dc640bf5ca6
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Wed Dec 9 11:02:04 2009 +0200
ARM: added 'neon_composite_over_n_0565' fast path
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index bffc676..57680bb 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -344,6 +344,75 @@ generate_composite_function \
/******************************************************************************/
+.macro pixman_composite_over_n_0565_process_pixblock_head
+ /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
+ and put data into d6 - red, d7 - green, d30 - blue */
+ vshrn.u16 d6, q2, #8
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vsri.u8 d6, d6, #5
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+ /* now do alpha blending, storing results in 8-bit planar format
+ into d16 - red, d19 - green, d18 - blue */
+ vmull.u8 q10, d3, d6
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+ vrshr.u16 q13, q10, #8
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+ vraddhn.u16 d22, q12, q15
+.endm
+
+.macro pixman_composite_over_n_0565_process_pixblock_tail
+ /* ... continue alpha blending */
+ vqadd.u8 d16, d2, d20
+ vqadd.u8 q9, q0, q11
+ /* convert the result to r5g6b5 and store it into {d28, d29} */
+ vshll.u8 q14, d16, #8
+ vshll.u8 q8, d19, #8
+ vshll.u8 q9, d18, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_0565_process_pixblock_tail_head
+ pixman_composite_over_n_0565_process_pixblock_tail
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ pixman_composite_over_n_0565_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_n_0565_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d3[0]}, [DUMMY]
+ vdup.8 d0, d3[0]
+ vdup.8 d1, d3[1]
+ vdup.8 d2, d3[2]
+ vdup.8 d3, d3[3]
+ vmvn.8 d3, d3 /* invert source alpha */
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_0565_init, \
+ default_cleanup, \
+ pixman_composite_over_n_0565_process_pixblock_head, \
+ pixman_composite_over_n_0565_process_pixblock_tail, \
+ pixman_composite_over_n_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+/******************************************************************************/
+
.macro pixman_composite_src_8888_0565_process_pixblock_head
vshll.u8 q8, d1, #8
vshll.u8 q14, d2, #8
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 4894285..8ae79ae 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -71,6 +71,46 @@ neon_composite_##name (pixman_implementation_t *imp, \
src_line, src_stride); \
}
+#define BIND_N_NULL_DST(name, dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_neon (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ uint32_t src); \
+ \
+static void \
+neon_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ int32_t dst_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
+ \
+ if (src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ \
+ pixman_composite_##name##_asm_neon (width, height, \
+ dst_line, dst_stride, \
+ src); \
+}
+
#define BIND_N_MASK_DST(name, mask_type, mask_cnt, dst_type, dst_cnt) \
void \
pixman_composite_##name##_asm_neon (int32_t w, \
@@ -218,6 +258,8 @@ BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
+BIND_N_NULL_DST(over_n_0565, uint16_t, 1)
+
BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
@@ -360,6 +402,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, neon_composite_over_n_8_8888 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, neon_composite_over_n_8_8888 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888 },
+ { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_n_0565 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, neon_composite_over_8888_8_8888 },
commit 2d332c7a569803107e11b41c7b2c020b4050e26e
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Wed Dec 9 10:33:01 2009 +0200
ARM: added 'neon_composite_src_0565_8888' fast path
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index d35704d..bffc676 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -387,6 +387,41 @@ generate_composite_function \
/******************************************************************************/
+.macro pixman_composite_src_0565_8888_process_pixblock_head
+ vshrn.u16 d30, q0, #8
+ vshrn.u16 d29, q0, #3
+ vsli.u16 q0, q0, #5
+ vmov.u8 d31, #255
+ vsri.u8 d30, d30, #5
+ vsri.u8 d29, d29, #6
+ vshrn.u16 d28, q0, #2
+.endm
+
+.macro pixman_composite_src_0565_8888_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_src_0565_8888_process_pixblock_tail_head
+ pixman_composite_src_0565_8888_process_pixblock_tail
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld1.16 {d0, d1}, [SRC]!
+ pixman_composite_src_0565_8888_process_pixblock_head
+ cache_preload 8, 8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0565_8888_process_pixblock_head, \
+ pixman_composite_src_0565_8888_process_pixblock_tail, \
+ pixman_composite_src_0565_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
.macro pixman_composite_add_8000_8000_process_pixblock_head
vqadd.u8 q14, q0, q2
vqadd.u8 q15, q1, q3
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index c7f0870..4894285 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -214,6 +214,7 @@ BIND_SRC_NULL_DST(src_8888_8888, uint32_t, 1, uint32_t, 1)
BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
+BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
@@ -344,6 +345,10 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
{ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_8888_0565 },
{ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_src_8888_0565 },
{ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_src_8888_0565 },
+ { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_src_0565_8888 },
+ { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_src_0565_8888 },
+ { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_a8b8g8r8, neon_composite_src_0565_8888 },
+ { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_src_0565_8888 },
{ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_src_8888_8888 },
{ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_src_8888_8888 },
{ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_src_8888_8888 },
commit 062da411d81c7d970a302dd2c283ef5327b867da
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Tue Dec 8 15:04:41 2009 +0200
ARM: added 'neon_composite_add_8888_8888_8888' fast path
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 81f0b92..d35704d 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -983,6 +983,53 @@ generate_composite_function \
/******************************************************************************/
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_head
+ /* expecting source data in {d0, d1, d2, d3} */
+ /* destination data in {d4, d5, d6, d7} */
+ /* mask in {d24, d25, d26, d27} */
+ vmull.u8 q8, d27, d0
+ vmull.u8 q9, d27, d1
+ vmull.u8 q10, d27, d2
+ vmull.u8 q11, d27, d3
+ vrshr.u16 q0, q8, #8
+ vrshr.u16 q1, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q9
+ vraddhn.u16 d2, q12, q10
+ vraddhn.u16 d3, q13, q11
+ vqadd.u8 q14, q0, q2
+ vqadd.u8 q15, q1, q3
+.endm
+
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vld4.8 {d24, d25, d26, d27}, [MASK]!
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 8, 8
+ pixman_composite_add_8888_8888_8888_process_pixblock_head
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
.macro pixman_composite_over_8888_n_8888_process_pixblock_head
/* expecting source data in {d0, d1, d2, d3} */
/* destination data in {d4, d5, d6, d7} */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index fe444f2..c7f0870 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -227,6 +227,7 @@ BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
+BIND_SRC_MASK_DST(add_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
BIND_SRC_MASK_DST(over_8888_8_8888, uint32_t, 1, uint8_t, 1, uint32_t, 1)
BIND_SRC_MASK_DST(over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
@@ -369,6 +370,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
{ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_over_8888_8888 },
{ PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, neon_composite_add_n_8_8 },
{ PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_a8, PIXMAN_a8, neon_composite_add_8_8_8 },
+ { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, neon_composite_add_8888_8888_8888 },
{ PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, neon_composite_add_8000_8000 },
{ PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_add_8888_8888 },
{ PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, neon_composite_add_8888_8888 },
commit 3d0eedb5d9af97fed68e2da03d6aee40197e2a76
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Tue Dec 8 14:39:41 2009 +0200
ARM: added 'neon_composite_add_8888_8888' fast path
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 49d840c..81f0b92 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -427,6 +427,38 @@ generate_composite_function \
/******************************************************************************/
+.macro pixman_composite_add_8888_8888_process_pixblock_tail_head
+ vld1.8 {d0, d1, d2, d3}, [SRC]!
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ vqadd.u8 q14, q0, q2
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vqadd.u8 q15, q1, q3
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8000_8000_process_pixblock_head, \
+ pixman_composite_add_8000_8000_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
.macro pixman_composite_over_8888_8888_process_pixblock_head
vmvn.8 d24, d3 /* get inverted alpha */
/* do alpha blending */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 675b64f..fe444f2 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -215,6 +215,7 @@ BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
+BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
@@ -369,6 +370,8 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
{ PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, neon_composite_add_n_8_8 },
{ PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_a8, PIXMAN_a8, neon_composite_add_8_8_8 },
{ PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, neon_composite_add_8000_8000 },
+ { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_add_8888_8888 },
+ { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, neon_composite_add_8888_8888 },
{ PIXMAN_OP_NONE },
};
commit 86b54c6701666d087f0234047128fbf0fd6468b6
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Mon Dec 7 22:53:30 2009 +0200
ARM: added 'neon_composite_over_8888_8_8888' fast path
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 9f3b566..49d840c 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -1060,6 +1060,42 @@ generate_composite_function \
/******************************************************************************/
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ pixman_composite_over_8888_n_8888_process_pixblock_tail
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 8, 8
+ vld1.8 {d15}, [MASK]!
+ pixman_composite_over_8888_n_8888_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_8888_8_8888_init
+ vpush {d8-d15}
+.endm
+
+.macro pixman_composite_over_8888_8_8888_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_8888_8_8888_init, \
+ pixman_composite_over_8888_8_8888_cleanup, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8_8888_process_pixblock_tail_head \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 15 /* mask_basereg */
+
+/******************************************************************************/
+
.macro pixman_composite_src_0888_0888_process_pixblock_head
.endm
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 62d82f6..675b64f 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -226,6 +226,7 @@ BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
+BIND_SRC_MASK_DST(over_8888_8_8888, uint32_t, 1, uint8_t, 1, uint32_t, 1)
BIND_SRC_MASK_DST(over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
void
@@ -354,6 +355,10 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888 },
+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, neon_composite_over_8888_8_8888 },
+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, neon_composite_over_8888_8_8888 },
+ { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, neon_composite_over_8888_8_8888 },
+ { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, neon_composite_over_8888_8_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, neon_composite_over_8888_8888_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_8888_0565 },
{ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_over_8888_0565 },
commit aec1524e773758369ab627553dc5c23d18619a85
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Mon Dec 7 22:42:17 2009 +0200
ARM: added 'neon_composite_over_8888_8888_8888' fast path
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index e66fb86..9f3b566 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -1024,6 +1024,42 @@ generate_composite_function \
/******************************************************************************/
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ pixman_composite_over_8888_n_8888_process_pixblock_tail
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 8, 8
+ vld4.8 {d12, d13, d14, d15}, [MASK]!
+ pixman_composite_over_8888_n_8888_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_8888_8888_8888_init
+ vpush {d8-d15}
+.endm
+
+.macro pixman_composite_over_8888_8888_8888_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_8888_8888_8888_init, \
+ pixman_composite_over_8888_8888_8888_cleanup, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 12 /* mask_basereg */
+
+/******************************************************************************/
+
.macro pixman_composite_src_0888_0888_process_pixblock_head
.endm
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 39d7bf9..62d82f6 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -226,6 +226,7 @@ BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
+BIND_SRC_MASK_DST(over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
void
pixman_composite_src_n_8_asm_neon (int32_t w,
@@ -353,6 +354,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888 },
+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, neon_composite_over_8888_8888_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_8888_0565 },
{ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_over_8888_0565 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_over_8888_8888 },
commit ba59d53d0b61effc422c4004a9f0e6cf848598d8
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Tue Dec 8 14:13:12 2009 +0200
ARM: minor source formatting changes
Now it's a bit harder to exceed 80 characters line limit
when binding assembly functions.
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 9194924..39d7bf9 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -35,27 +35,27 @@
#define BIND_SRC_NULL_DST(name, src_type, src_cnt, dst_type, dst_cnt) \
void \
-pixman_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride); \
+pixman_composite_##name##_asm_neon (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride); \
\
static void \
-neon_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
+neon_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
{ \
dst_type *dst_line; \
src_type *src_line; \
@@ -66,36 +66,36 @@ neon_##name (pixman_implementation_t *imp, \
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
dst_stride, dst_line, dst_cnt); \
\
- pixman_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride); \
+ pixman_composite_##name##_asm_neon (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride); \
}
#define BIND_N_MASK_DST(name, mask_type, mask_cnt, dst_type, dst_cnt) \
void \
-pixman_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- uint32_t src, \
- int32_t unused, \
- mask_type *mask, \
- int32_t mask_stride); \
+pixman_composite_##name##_asm_neon (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ uint32_t src, \
+ int32_t unused, \
+ mask_type *mask, \
+ int32_t mask_stride); \
\
static void \
-neon_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
+neon_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
{ \
dst_type *dst_line; \
mask_type *mask_line; \
@@ -112,36 +112,36 @@ neon_##name (pixman_implementation_t *imp, \
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
mask_stride, mask_line, mask_cnt); \
\
- pixman_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src, 0, \
- mask_line, mask_stride); \
+ pixman_composite_##name##_asm_neon (width, height, \
+ dst_line, dst_stride, \
+ src, 0, \
+ mask_line, mask_stride); \
}
#define BIND_SRC_N_DST(name, src_type, src_cnt, dst_type, dst_cnt) \
void \
-pixman_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride, \
- uint32_t mask); \
+pixman_composite_##name##_asm_neon (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride, \
+ uint32_t mask); \
\
static void \
-neon_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
+neon_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
{ \
dst_type *dst_line; \
src_type *src_line; \
@@ -158,38 +158,38 @@ neon_##name (pixman_implementation_t *imp, \
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
src_stride, src_line, src_cnt); \
\
- pixman_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride, \
- mask); \
+ pixman_composite_##name##_asm_neon (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride, \
+ mask); \
}
#define BIND_SRC_MASK_DST(name, src_type, src_cnt, mask_type, mask_cnt, \
dst_type, dst_cnt) \
void \
-pixman_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride, \
- mask_type *mask, \
- int32_t mask_stride); \
+pixman_composite_##name##_asm_neon (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride, \
+ mask_type *mask, \
+ int32_t mask_stride); \
\
static void \
-neon_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
+neon_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
{ \
dst_type *dst_line; \
src_type *src_line; \
@@ -203,29 +203,29 @@ neon_##name (pixman_implementation_t *imp, \
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
mask_stride, mask_line, mask_cnt); \
\
- pixman_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride, \
- mask_line, mask_stride); \
+ pixman_composite_##name##_asm_neon (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride, \
+ mask_line, mask_stride); \
}
-BIND_SRC_NULL_DST(composite_src_8888_8888, uint32_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(composite_src_0565_0565, uint16_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(composite_src_0888_0888, uint8_t, 3, uint8_t, 3)
-BIND_SRC_NULL_DST(composite_src_8888_0565, uint32_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(composite_add_8000_8000, uint8_t, 1, uint8_t, 1)
+BIND_SRC_NULL_DST(src_8888_8888, uint32_t, 1, uint32_t, 1)
+BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
+BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
+BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
+BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
-BIND_SRC_NULL_DST(composite_over_8888_0565, uint32_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(composite_over_8888_8888, uint32_t, 1, uint32_t, 1)
+BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
+BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
-BIND_N_MASK_DST(composite_over_n_8_0565, uint8_t, 1, uint16_t, 1)
-BIND_N_MASK_DST(composite_over_n_8_8888, uint8_t, 1, uint32_t, 1)
-BIND_N_MASK_DST(composite_add_n_8_8, uint8_t, 1, uint8_t, 1)
+BIND_N_MASK_DST(over_n_8_0565, uint8_t, 1, uint16_t, 1)
+BIND_N_MASK_DST(over_n_8_8888, uint8_t, 1, uint32_t, 1)
+BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
-BIND_SRC_N_DST(composite_over_8888_n_8888, uint32_t, 1, uint32_t, 1)
+BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
-BIND_SRC_MASK_DST(composite_add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
+BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
void
pixman_composite_src_n_8_asm_neon (int32_t w,
More information about the xorg-commit
mailing list