[Pixman] [PATCH 10/12] ARMv6: Add fast path for over_reverse_n_8888
Tomeu Vizoso
tomeu at tomeuvizoso.net
Wed Mar 12 01:25:05 PDT 2014
From: Ben Avison <bavison at riscosopen.org>
lowlevel-blt-bench results:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 15.0 0.1 276.2 4.0 100.0% +1743.3%
L2 13.4 0.3 154.8 17.4 100.0% +1058.0%
M 11.4 0.0 73.7 0.8 100.0% +549.4%
HT 10.2 0.0 25.6 0.2 100.0% +150.9%
VT 10.0 0.0 23.0 0.3 100.0% +129.4%
R 9.8 0.1 22.9 0.2 100.0% +134.3%
RT 6.4 0.1 11.6 0.3 100.0% +80.8%
Trimmed cairo-perf-trace results:
Before After
Mean StdDev Mean StdDev Confidence Change
t-poppler 11.8 0.1 8.8 0.1 100.0% +34.6%
---
pixman/pixman-arm-simd-asm.S | 78 ++++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-arm-simd.c | 6 ++++
2 files changed, 84 insertions(+)
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index 20ad05a..4f9a015 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -979,6 +979,84 @@ generate_composite_function \
/******************************************************************************/
+.macro over_reverse_n_8888_init
+ ldr SRC, [sp, #ARGS_STACK_OFFSET]
+ ldr MASK, =0x00800080
+ /* Split source pixel into RB/AG parts */
+ uxtb16 STRIDE_S, SRC
+ uxtb16 STRIDE_M, SRC, ror #8
+ /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+ uadd8 SCRATCH, MASK, MASK
+ line_saved_regs STRIDE_D, ORIG_W
+.endm
+
+.macro over_reverse_n_8888_newline
+ mov STRIDE_D, #0xFF
+.endm
+
+.macro over_reverse_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ pixld , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_reverse_n_8888_1pixel d, is_only
+ teq WK&d, #0
+ beq 8f /* replace with source */
+ bics ORIG_W, STRIDE_D, WK&d, lsr #24
+ .if is_only == 1
+ beq 49f /* skip store */
+ .else
+ beq 9f /* write same value back */
+ .endif
+ mla SCRATCH, STRIDE_S, ORIG_W, MASK /* red/blue */
+ mla ORIG_W, STRIDE_M, ORIG_W, MASK /* alpha/green */
+ uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
+ uxtab16 ORIG_W, ORIG_W, ORIG_W, ror #8
+ mov SCRATCH, SCRATCH, ror #8
+ sel ORIG_W, SCRATCH, ORIG_W
+ uqadd8 WK&d, WK&d, ORIG_W
+ b 9f
+8: mov WK&d, SRC
+9:
+.endm
+
+.macro over_reverse_n_8888_tail numbytes, reg1, reg2, reg3, reg4
+ .if numbytes == 4
+ over_reverse_n_8888_1pixel reg1, 1
+ .else
+ and SCRATCH, WK®1, WK®2
+ .if numbytes == 16
+ and SCRATCH, SCRATCH, WK®3
+ and SCRATCH, SCRATCH, WK®4
+ .endif
+ mvns SCRATCH, SCRATCH, asr #24
+ beq 49f /* skip store if all opaque */
+ over_reverse_n_8888_1pixel reg1, 0
+ over_reverse_n_8888_1pixel reg2, 0
+ .if numbytes == 16
+ over_reverse_n_8888_1pixel reg3, 0
+ over_reverse_n_8888_1pixel reg4, 0
+ .endif
+ .endif
+ pixst , numbytes, reg1, DST
+49:
+.endm
+
+.macro over_reverse_n_8888_process_tail cond, numbytes, firstreg
+ over_reverse_n_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
+.endm
+
+generate_composite_function \
+ pixman_composite_over_reverse_n_8888_asm_armv6, 0, 0, 32 \
+ FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
+ 3, /* prefetch distance */ \
+ over_reverse_n_8888_init, \
+ over_reverse_n_8888_newline, \
+ nop_macro, /* cleanup */ \
+ over_reverse_n_8888_process_head, \
+ over_reverse_n_8888_process_tail
+
+/******************************************************************************/
+
#ifdef PROFILING
.p2align 9
#endif
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 5a50098..855b703 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -50,6 +50,9 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888,
uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888,
+ uint32_t, 1)
+
PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
uint32_t, 1, uint32_t, 1)
@@ -231,6 +234,9 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, armv6_composite_over_reverse_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888),
+
PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
--
1.8.5.3
More information about the Pixman
mailing list