[Pixman] [PATCH 14/32] armv6: Add in_n_8888 fast path
Ben Avison
bavison at riscosopen.org
Thu Aug 7 09:50:10 PDT 2014
lowlevel-blt-bench results:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 18.8 0.1 63.9 0.9 100.0% +239.0%
L2 16.0 0.4 58.5 1.3 100.0% +265.8%
M 13.1 0.0 56.8 0.1 100.0% +332.6%
HT 11.6 0.0 31.3 0.3 100.0% +169.6%
VT 11.4 0.0 27.2 0.2 100.0% +139.2%
R 11.0 0.1 28.2 0.2 100.0% +156.1%
RT 6.8 0.1 12.9 0.2 100.0% +89.0%
---
pixman/pixman-arm-simd-asm.S | 77 ++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-arm-simd.c | 4 ++
2 files changed, 81 insertions(+), 0 deletions(-)
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index f4b3a3e..ca34b5e 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -1612,3 +1612,80 @@ generate_composite_function \
over_8888_x_0565_process_tail
/******************************************************************************/
+
+.macro in_n_8888_init
+ /* Source components and constant half are loop invariants */
+ ldr SRC, [sp, #ARGS_STACK_OFFSET]
+ ldr MASK, =0x00800080
+ uxtb16 STRIDE_S, SRC @ rb
+ uxtb16 SRC, SRC, ror #8 @ ag
+ /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+ uadd8 SCRATCH, MASK, MASK
+.endm
+
+.macro in_n_8888_1pixel dst, rb, ag, a
+ mla dst, rb, a, MASK
+ mla a, ag, a, MASK
+ uxtab16 dst, dst, dst, ror #8
+ uxtab16 a, a, a, ror #8
+ mov dst, dst, ror #8
+ sel dst, dst, a
+.endm
+
+.macro in_n_8888_2pixels dst0, dst1, rb, ag, a0, a1
+ mla dst0, rb, a0, MASK
+ mla a0, ag, a0, MASK
+ mla dst1, rb, a1, MASK
+ mla a1, ag, a1, MASK
+ uxtab16 dst0, dst0, dst0, ror #8
+ uxtab16 a0, a0, a0, ror #8
+ uxtab16 dst1, dst1, dst1, ror #8
+ uxtab16 a1, a1, a1, ror #8
+ mov dst0, dst0, ror #8
+ mov dst1, dst1, ror #8
+ sel dst0, dst0, a0
+ sel dst1, dst1, a1
+.endm
+
+.macro in_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ .if numbytes == 4
+ ldrb SCRATCH, [DST, #3]
+ add DST, DST, #4
+ .elseif numbytes == 8
+ ldrb SCRATCH, [DST, #3]
+ ldrb STRIDE_M, [DST, #7]
+ add DST, DST, #8
+ .else // numbytes == 16
+ ldrb SCRATCH, [DST, #3] @ it's OK, SCRATCH isn't used for prefetch of dest buffer
+ ldrb STRIDE_M, [DST, #7]
+ pixld , 16, 0, DST, 0
+ .endif
+.endm
+
+.macro in_n_8888_process_tail cond, numbytes, firstreg
+ .if numbytes == 4
+ in_n_8888_1pixel WK3, STRIDE_S, SRC, SCRATCH
+ pixst , 4, 3, DST
+ .elseif numbytes == 8
+ in_n_8888_2pixels WK2, WK3, STRIDE_S, SRC, SCRATCH, STRIDE_M
+ pixst , 8, 2, DST
+ .else // numbytes == 16
+ in_n_8888_2pixels WK0, WK1, STRIDE_S, SRC, SCRATCH, STRIDE_M
+ uxtb SCRATCH, WK2, ror #24
+ uxtb STRIDE_M, WK3, ror #24
+ in_n_8888_2pixels WK2, WK3, STRIDE_S, SRC, SCRATCH, STRIDE_M
+ pixst , 16, 0, DST
+ .endif
+.endm
+
+generate_composite_function \
+ pixman_composite_in_n_8888_asm_armv6, 0, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE, \
+ 2, /* prefetch distance */ \
+ in_n_8888_init, \
+ nop_macro, /* newline */ \
+ nop_macro, /* cleanup */ \
+ in_n_8888_process_head, \
+ in_n_8888_process_tail
+
+/******************************************************************************/
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 9b9b926..8bdda82 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -59,6 +59,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_0565,
uint16_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888,
uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, in_n_8888,
+ uint32_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
uint32_t, 1, uint32_t, 1)
@@ -273,6 +275,8 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_STD_FAST_PATH (IN, a8r8g8b8, null, a8, armv6_composite_in_8888_8),
PIXMAN_STD_FAST_PATH (IN, a8b8g8r8, null, a8, armv6_composite_in_8888_8),
+ PIXMAN_STD_FAST_PATH (IN, solid, null, a8r8g8b8, armv6_composite_in_n_8888),
+ PIXMAN_STD_FAST_PATH (IN, solid, null, a8b8g8r8, armv6_composite_in_n_8888),
PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, a8r8g8b8, armv6_composite_in_reverse_8888_8888),
PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, x8r8g8b8, armv6_composite_in_reverse_8888_8888),
PIXMAN_STD_FAST_PATH (IN_REVERSE, a8b8g8r8, null, a8b8g8r8, armv6_composite_in_reverse_8888_8888),
--
1.7.5.4
More information about the Pixman
mailing list