[Pixman] [PATCH 11/12] ARMv6: Add fast path for add_8888_8888
Tomeu Vizoso
tomeu at tomeuvizoso.net
Wed Mar 12 01:25:06 PDT 2014
From: Ben Avison <bavison at riscosopen.org>
lowlevel-blt-bench results:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 27.6 0.1 125.9 0.8 100.0% +356.0%
L2 14.0 0.5 30.8 1.6 100.0% +120.3%
M 12.2 0.0 26.7 0.1 100.0% +118.8%
HT 10.2 0.1 17.0 0.1 100.0% +67.1%
VT 10.0 0.0 16.6 0.1 100.0% +65.7%
R 9.7 0.0 15.9 0.1 100.0% +64.8%
RT 5.8 0.1 7.6 0.1 100.0% +30.5%
Trimmed cairo-perf-trace results:
Before After
Mean StdDev Mean StdDev Confidence Change
t-xfce4-terminal-a1 18.6 0.1 18.4 0.1 100.0% +1.0%
---
pixman/pixman-arm-simd-asm.S | 58 ++++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-arm-simd.c | 8 ++++++
2 files changed, 66 insertions(+)
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index 4f9a015..158de73 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -350,6 +350,64 @@ generate_composite_function \
/******************************************************************************/
+.macro test_zero numregs, reg1, reg2, reg3, reg4
+ teq WK®1, #0
+ .if numregs >= 2
+ teqeq WK®2, #0
+ .if numregs >= 3
+ teqeq WK®3, #0
+ .if numregs == 4
+ teqeq WK®4, #0
+ .endif
+ .endif
+ .endif
+.endm
+
+.macro add_8888_8888_2pixels dst1, dst2
+ uqadd8 WK&dst1, WK&dst1, MASK
+ uqadd8 WK&dst2, WK&dst2, STRIDE_M
+.endm
+
+.macro add_8888_8888_1pixel dst
+ uqadd8 WK&dst, WK&dst, MASK
+.endm
+
+.macro add_8888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ pixld , numbytes, firstreg, SRC, 0
+ add DST, DST, #numbytes
+.endm
+
+.macro add_8888_8888_process_tail cond, numbytes, firstreg
+ test_zero %(numbytes/4), firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
+ beq 01f
+ .if numbytes == 16
+ ldrd MASK, STRIDE_M, [DST, #-16]
+ add_8888_8888_2pixels firstreg, %(firstreg+1)
+ ldrd MASK, STRIDE_M, [DST, #-8]
+ add_8888_8888_2pixels %(firstreg+2), %(firstreg+3)
+ .elseif numbytes == 8
+ ldrd MASK, STRIDE_M, [DST, #-8]
+ add_8888_8888_2pixels firstreg, %(firstreg+1)
+ .else
+ ldr MASK, [DST, #-4]
+ add_8888_8888_1pixel firstreg
+ .endif
+ pixst , numbytes, firstreg, DST
+01:
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8888_8888_asm_armv6, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH | FLAG_NO_PRELOAD_DST, \
+ 2, /* prefetch distance */ \
+ nop_macro, /* init */ \
+ nop_macro, /* newline */ \
+ nop_macro, /* cleanup */ \
+ add_8888_8888_process_head, \
+ add_8888_8888_process_tail
+
+/******************************************************************************/
+
.macro over_8888_8888_init
/* Hold loop invariant in MASK */
ldr MASK, =0x00800080
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 855b703..d227065 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -44,6 +44,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_8888,
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8888_8888,
+ uint32_t, 1, uint32_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
uint32_t, 1, uint32_t, 1)
@@ -238,6 +240,12 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888),
PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, armv6_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, x8r8g8b8, armv6_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, x8r8g8b8, null, x8r8g8b8, armv6_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, armv6_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, x8b8g8r8, armv6_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, x8b8g8r8, null, x8b8g8r8, armv6_composite_add_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888),
--
1.8.5.3
More information about the Pixman
mailing list