[Pixman] [PATCH 09/14] ARMv6: add platform-specific fast path for over_n_8888
Ben Avison
bavison at riscosopen.org
Tue Oct 1 16:00:29 PDT 2013
lowlevel-blt-bench results - this patch:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 21.3 0.1 45.8 0.2 100.0% +114.6%
L2 17.5 0.5 43.3 1.1 100.0% +148.4%
M 14.1 0.0 44.4 0.1 100.0% +215.8%
HT 12.7 0.1 26.9 0.2 100.0% +111.2%
VT 12.4 0.1 23.5 0.2 100.0% +89.4%
R 12.0 0.1 24.6 0.2 100.0% +104.4%
RT 8.2 0.1 12.1 0.3 100.0% +48.8%
or cumulative with preceding patch:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 11.3 0.1 45.8 0.2 100.0% +305.6%
L2 10.0 0.1 43.3 1.1 100.0% +332.0%
M 8.6 0.0 44.4 0.1 100.0% +414.7%
HT 5.1 0.0 26.9 0.2 100.0% +425.5%
VT 4.9 0.0 23.5 0.2 100.0% +376.1%
R 4.8 0.0 24.6 0.2 100.0% +408.1%
RT 2.1 0.0 12.1 0.3 100.0% +480.4%
Trimmed cairo-pref-traces does not show any significant change for this patch,
reflecting the fact that over_n_8888 is barely used in the traces.
---
pixman/pixman-arm-simd-asm.S | 48 ++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-arm-simd.c | 8 +++++++
2 files changed, 56 insertions(+)
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index 259fb88..e85d036 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -611,6 +611,54 @@ generate_composite_function \
/******************************************************************************/
+.macro over_n_8888_init
+ ldr SRC, [sp, #ARGS_STACK_OFFSET]
+ /* Hold loop invariant in MASK */
+ ldr MASK, =0x00800080
+ /* Hold multiplier for destination in STRIDE_M */
+ mov STRIDE_M, #255
+ sub STRIDE_M, STRIDE_M, SRC, lsr #24
+ /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+ uadd8 SCRATCH, MASK, MASK
+.endm
+
+.macro over_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ pixld , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_n_8888_1pixel dst
+ mul_8888_8 WK&dst, STRIDE_M, SCRATCH, MASK
+ uqadd8 WK&dst, WK&dst, SRC
+.endm
+
+.macro over_n_8888_process_tail cond, numbytes, firstreg
+ .set PROCESS_REG, firstreg
+ .rept numbytes / 4
+ over_n_8888_1pixel %(PROCESS_REG)
+ .set PROCESS_REG, PROCESS_REG+1
+ .endr
+ pixst , numbytes, firstreg, DST
+.endm
+
+startfunc pixman_composite_over_n_8888_asm_armv6
+ ldr ip, [sp]
+ /* Zero source is already filtered out in armv6_composite_over_n_8888() */
+ mvns ip, ip, asr #24 /* Source alpha = 0xff? */
+ beq pixman_composite_src_n_8888_asm_armv6
+ /* else drop through... */
+ .endfunc
+generate_composite_function \
+ pixman_composite_over_n_8888_asm_armv6_helper, 0, 0, 32 \
+ FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE \
+ 2, /* prefetch distance */ \
+ over_n_8888_init, \
+ nop_macro, /* newline */ \
+ nop_macro, /* cleanup */ \
+ over_n_8888_process_head, \
+ over_n_8888_process_tail
+
+/******************************************************************************/
+
#ifdef PROFILING
.p2align 9
#endif
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index af062e1..454c6c0 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -47,6 +47,9 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888,
+ uint32_t, 1)
+
PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
uint32_t, 1, uint32_t, 1)
@@ -225,6 +228,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
+
PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
--
1.7.10.4
More information about the Pixman
mailing list