pixman: Branch 'master' - 3 commits
Siarhei Siamashka
siamashka at kemper.freedesktop.org
Wed Dec 9 05:54:28 PST 2009
pixman/pixman-arm-neon-asm.S | 130 +++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-arm-neon.c | 6 +
2 files changed, 136 insertions(+)
New commits:
commit ce78288d7783a27700223c39e23880f4f425f70b
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Fri Nov 6 02:25:47 2009 +0200
ARM: added 'neon_composite_src_pixbuf_8888' fast path
This is ARM NEON optimized conversion of native RGBA format used by
GTK/GDK into native 32bpp RGBA format used by cairo/pixman.
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 8010e80..db1833d 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -1408,3 +1408,60 @@ generate_composite_function \
0, /* dst_r_basereg */ \
0, /* src_basereg */ \
0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_head
+ vmull.u8 q8, d3, d0
+ vmull.u8 q9, d3, d1
+ vmull.u8 q10, d3, d2
+.endm
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail
+ vrshr.u16 q11, q8, #8
+ vswp d3, d31
+ vrshr.u16 q12, q9, #8
+ vrshr.u16 q13, q10, #8
+ vraddhn.u16 d30, q11, q8
+ vraddhn.u16 d29, q12, q9
+ vraddhn.u16 d28, q13, q10
+.endm
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head
+ vrshr.u16 q11, q8, #8
+ vswp d3, d31
+ vrshr.u16 q12, q9, #8
+ vrshr.u16 q13, q10, #8
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ vraddhn.u16 d30, q11, q8
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vraddhn.u16 d29, q12, q9
+ vraddhn.u16 d28, q13, q10
+ vmull.u8 q8, d3, d0
+ vmull.u8 q9, d3, d1
+ vmull.u8 q10, d3, d2
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+.endm
+
+generate_composite_function \
+ pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_pixbuf_8888_process_pixblock_head, \
+ pixman_composite_src_pixbuf_8888_process_pixblock_tail, \
+ pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 21ed436..00237da 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -257,6 +257,7 @@ BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
BIND_SRC_NULL_DST(src_0888_8888_rev, uint8_t, 3, uint32_t, 1)
BIND_SRC_NULL_DST(src_0888_0565_rev, uint8_t, 3, uint16_t, 1)
+BIND_SRC_NULL_DST(src_pixbuf_8888, uint32_t, 1, uint32_t, 1)
BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
@@ -401,6 +402,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
{ PIXMAN_OP_SRC, PIXMAN_r8g8b8, PIXMAN_null, PIXMAN_r8g8b8, neon_composite_src_0888_0888 },
{ PIXMAN_OP_SRC, PIXMAN_b8g8r8, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_src_0888_8888_rev },
{ PIXMAN_OP_SRC, PIXMAN_b8g8r8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_0888_0565_rev },
+ { PIXMAN_OP_SRC, PIXMAN_pixbuf, PIXMAN_pixbuf, PIXMAN_a8r8g8b8, neon_composite_src_pixbuf_8888 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, neon_composite_over_n_8_0565 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, neon_composite_over_n_8_0565 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, neon_composite_over_n_8_8888 },
commit a732d3baeb0697b91a713fd6b51b68ee7ca68e03
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Thu Nov 5 20:27:38 2009 +0200
ARM: added 'neon_composite_src_0888_0565_rev' fast path
This is ARM NEON optimized conversion of native RGB format used by
GTK/GDK into r5g6b5 format.
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index bb68be6..8010e80 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -1370,3 +1370,41 @@ generate_composite_function \
0, /* dst_r_basereg */ \
0, /* src_basereg */ \
0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_head
+ vshll.u8 q8, d1, #8
+ vshll.u8 q9, d2, #8
+.endm
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail
+ vshll.u8 q14, d0, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head
+ vshll.u8 q14, d0, #8
+ vld3.8 {d0, d1, d2}, [SRC]!
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+ vshll.u8 q8, d1, #8
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ vshll.u8 q9, d2, #8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0888_0565_rev_process_pixblock_head, \
+ pixman_composite_src_0888_0565_rev_process_pixblock_tail, \
+ pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index ac0c558..21ed436 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -256,6 +256,7 @@ BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
BIND_SRC_NULL_DST(src_0888_8888_rev, uint8_t, 3, uint32_t, 1)
+BIND_SRC_NULL_DST(src_0888_0565_rev, uint8_t, 3, uint16_t, 1)
BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
@@ -399,6 +400,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
{ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_src_8888_8888 },
{ PIXMAN_OP_SRC, PIXMAN_r8g8b8, PIXMAN_null, PIXMAN_r8g8b8, neon_composite_src_0888_0888 },
{ PIXMAN_OP_SRC, PIXMAN_b8g8r8, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_src_0888_8888_rev },
+ { PIXMAN_OP_SRC, PIXMAN_b8g8r8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_0888_0565_rev },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, neon_composite_over_n_8_0565 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, neon_composite_over_n_8_0565 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, neon_composite_over_n_8_8888 },
commit a1386a1ceb0c50d2e23cf30be30ea165d2d2ea7c
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Thu Nov 5 19:43:09 2009 +0200
ARM: added 'neon_src_0888_8888_rev' fast path
This is ARM NEON optimized conversion of native RGB format used by
GTK/GDK into native 32bpp RGB format used by cairo/pixman.
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 691a194..bb68be6 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -1335,3 +1335,38 @@ generate_composite_function \
0, /* dst_r_basereg */ \
0, /* src_basereg */ \
0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_head
+ vswp d0, d2
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head
+ vst4.8 {d0, d1, d2, d3}, [DST_W]!
+ vld3.8 {d0, d1, d2}, [SRC]!
+ vswp d0, d2
+ cache_preload 8, 8
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_init
+ veor d3, d3, d3
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ pixman_composite_src_0888_8888_rev_init, \
+ default_cleanup, \
+ pixman_composite_src_0888_8888_rev_process_pixblock_head, \
+ pixman_composite_src_0888_8888_rev_process_pixblock_tail, \
+ pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index fef98a1..ac0c558 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -255,6 +255,7 @@ BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
+BIND_SRC_NULL_DST(src_0888_8888_rev, uint8_t, 3, uint32_t, 1)
BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
@@ -397,6 +398,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
{ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_src_8888_8888 },
{ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_src_8888_8888 },
{ PIXMAN_OP_SRC, PIXMAN_r8g8b8, PIXMAN_null, PIXMAN_r8g8b8, neon_composite_src_0888_0888 },
+ { PIXMAN_OP_SRC, PIXMAN_b8g8r8, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_src_0888_8888_rev },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, neon_composite_over_n_8_0565 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, neon_composite_over_n_8_0565 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, neon_composite_over_n_8_8888 },
More information about the xorg-commit
mailing list