[Pixman] [PATCH 4/4] ARMv6: add platform-specific fast path for over_n_8888
y
y
Tue Feb 5 16:33:08 PST 2013
From: Ben Avison <bavison at riscosopen.org>
Cairo-perf-traces benefits even more from a platform-specific fast path for
over_n_8888:
[ # ] backend test min(s) median(s) stddev. count
[ # ] image: pixman 0.29.3
[ 0] image t-firefox-chalkboard 6.525 6.541 0.44% 6/6
t-firefox-chalkboard speedup is 1.25x (5.47x for cumulative patches so far)
lowlevel-blt-bench results - this patch:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 21.3 0.1 45.8 0.2 100.0% +114.6%
L2 17.5 0.5 43.3 1.1 100.0% +148.4%
M 14.1 0.0 44.4 0.1 100.0% +215.8%
HT 12.7 0.1 26.9 0.2 100.0% +111.2%
VT 12.4 0.1 23.5 0.2 100.0% +89.4%
R 12.0 0.1 24.6 0.2 100.0% +104.4%
RT 8.2 0.1 12.1 0.3 100.0% +48.8%
or cumulative:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 11.3 0.1 45.8 0.2 100.0% +305.6%
L2 10.0 0.1 43.3 1.1 100.0% +332.0%
M 8.6 0.0 44.4 0.1 100.0% +414.7%
HT 5.1 0.0 26.9 0.2 100.0% +425.5%
VT 4.9 0.0 23.5 0.2 100.0% +376.1%
R 4.8 0.0 24.6 0.2 100.0% +408.1%
RT 2.1 0.0 12.1 0.3 100.0% +480.4%
---
pixman/pixman-arm-simd-asm.S | 48 ++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-arm-simd-asm.h | 20 ++++++++++-------
pixman/pixman-arm-simd.c | 8 +++++++
3 files changed, 68 insertions(+), 8 deletions(-)
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index c209688..dd77a1a 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -611,3 +611,51 @@ generate_composite_function \
/******************************************************************************/
+.macro over_n_8888_init
+ ldr SRC, [sp, #ARGS_STACK_OFFSET]
+ /* Hold loop invariant in MASK */
+ ldr MASK, =0x00800080
+ /* Hold multiplier for destination in STRIDE_M */
+ mov STRIDE_M, #255
+ sub STRIDE_M, STRIDE_M, SRC, lsr #24
+ /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+ uadd8 SCRATCH, MASK, MASK
+.endm
+
+.macro over_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ pixld , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_n_8888_1pixel dst
+ mul_8888_8 WK&dst, STRIDE_M, SCRATCH, MASK
+ uqadd8 WK&dst, WK&dst, SRC
+.endm
+
+.macro over_n_8888_process_tail cond, numbytes, firstreg
+ .set PROCESS_REG, firstreg
+ .rept numbytes / 4
+ over_n_8888_1pixel %(PROCESS_REG)
+ .set PROCESS_REG, PROCESS_REG+1
+ .endr
+ pixst , numbytes, firstreg, DST
+.endm
+
+startfunc pixman_composite_over_n_8888_asm_armv6
+ ldr ip, [sp]
+ /* Zero source is already filtered out in armv6_composite_over_n_8888() */
+ mvns ip, ip, asr #24 /* Source alpha = 0xff? */
+ beq pixman_composite_src_n_8888_asm_armv6
+ /* else drop through... */
+ .endfunc
+generate_composite_function \
+ pixman_composite_over_n_8888_asm_armv6_helper, 0, 0, 32 \
+ FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE \
+ 2, /* prefetch distance */ \
+ over_n_8888_init, \
+ nop_macro, /* newline */ \
+ nop_macro, /* cleanup */ \
+ over_n_8888_process_head, \
+ over_n_8888_process_tail
+
+/******************************************************************************/
+
diff --git a/pixman/pixman-arm-simd-asm.h b/pixman/pixman-arm-simd-asm.h
index 6543606..d267252 100644
--- a/pixman/pixman-arm-simd-asm.h
+++ b/pixman/pixman-arm-simd-asm.h
@@ -92,6 +92,17 @@
.set PREFETCH_TYPE_NONE, 0
.set PREFETCH_TYPE_STANDARD, 1
+.macro startfunc fname
+ .func fname
+ .global fname
+ /* For ELF format also set function visibility to hidden */
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+fname:
+.endm
+
/*
* Definitions of macros for load/store of pixel data.
*/
@@ -561,13 +572,7 @@
process_tail, \
process_inner_loop
- .func fname
- .global fname
- /* For ELF format also set function visibility to hidden */
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
+ startfunc fname
/*
* Make some macro arguments globally visible and accessible
@@ -679,7 +684,6 @@
SCRATCH .req r12
ORIG_W .req r14 /* width (pixels) */
-fname:
push {r4-r11, lr} /* save all registers */
subs Y, Y, #1
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index af062e1..454c6c0 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -47,6 +47,9 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888,
+ uint32_t, 1)
+
PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
uint32_t, 1, uint32_t, 1)
@@ -225,6 +228,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
+
PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
--
1.7.5.4
More information about the Pixman
mailing list