[Pixman] [PATCH 4/4] ARMv6: add platform-specific fast path for over_n_8888

y y
Tue Feb 5 16:33:08 PST 2013


From: Ben Avison <bavison at riscosopen.org>

Cairo-perf-traces benefits even more from a platform-specific fast path for
over_n_8888:

[ # ]  backend                         test   min(s) median(s) stddev. count
[ # ]    image: pixman 0.29.3
[  0]    image         t-firefox-chalkboard    6.525    6.541   0.44%    6/6

t-firefox-chalkboard speedup is 1.25x (5.47x for cumulative patches so far)

lowlevel-blt-bench results - this patch:
    Before          After
    Mean   StdDev   Mean   StdDev  Confidence  Change
L1  21.3   0.1      45.8   0.2     100.0%      +114.6%
L2  17.5   0.5      43.3   1.1     100.0%      +148.4%
M   14.1   0.0      44.4   0.1     100.0%      +215.8%
HT  12.7   0.1      26.9   0.2     100.0%      +111.2%
VT  12.4   0.1      23.5   0.2     100.0%      +89.4%
R   12.0   0.1      24.6   0.2     100.0%      +104.4%
RT  8.2    0.1      12.1   0.3     100.0%      +48.8%

or cumulative:
    Before          After
    Mean   StdDev   Mean   StdDev  Confidence  Change
L1  11.3   0.1      45.8   0.2     100.0%      +305.6%
L2  10.0   0.1      43.3   1.1     100.0%      +332.0%
M   8.6    0.0      44.4   0.1     100.0%      +414.7%
HT  5.1    0.0      26.9   0.2     100.0%      +425.5%
VT  4.9    0.0      23.5   0.2     100.0%      +376.1%
R   4.8    0.0      24.6   0.2     100.0%      +408.1%
RT  2.1    0.0      12.1   0.3     100.0%      +480.4%
---
 pixman/pixman-arm-simd-asm.S |   48 ++++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-arm-simd-asm.h |   20 ++++++++++-------
 pixman/pixman-arm-simd.c     |    8 +++++++
 3 files changed, 68 insertions(+), 8 deletions(-)

diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index c209688..dd77a1a 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -611,3 +611,51 @@ generate_composite_function \
 
 /******************************************************************************/
 
+.macro over_n_8888_init
+        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
+        /* Hold loop invariant in MASK */
+        ldr     MASK, =0x00800080
+        /* Hold multiplier for destination in STRIDE_M */
+        mov     STRIDE_M, #255
+        sub     STRIDE_M, STRIDE_M, SRC, lsr #24
+        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+        uadd8   SCRATCH, MASK, MASK
+.endm
+
+.macro over_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+        pixld   , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_n_8888_1pixel dst
+        mul_8888_8  WK&dst, STRIDE_M, SCRATCH, MASK
+        uqadd8  WK&dst, WK&dst, SRC
+.endm
+
+.macro over_n_8888_process_tail  cond, numbytes, firstreg
+ .set PROCESS_REG, firstreg
+ .rept numbytes / 4
+        over_n_8888_1pixel %(PROCESS_REG)
+  .set PROCESS_REG, PROCESS_REG+1
+ .endr
+        pixst   , numbytes, firstreg, DST
+.endm
+
+startfunc pixman_composite_over_n_8888_asm_armv6
+        ldr     ip, [sp]
+        /* Zero source is already filtered out in armv6_composite_over_n_8888() */
+        mvns    ip, ip, asr #24 /* Source alpha = 0xff? */
+        beq     pixman_composite_src_n_8888_asm_armv6
+        /* else drop through... */
+ .endfunc
+generate_composite_function \
+    pixman_composite_over_n_8888_asm_armv6_helper, 0, 0, 32 \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE \
+    2, /* prefetch distance */ \
+    over_n_8888_init, \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    over_n_8888_process_head, \
+    over_n_8888_process_tail
+
+/******************************************************************************/
+
diff --git a/pixman/pixman-arm-simd-asm.h b/pixman/pixman-arm-simd-asm.h
index 6543606..d267252 100644
--- a/pixman/pixman-arm-simd-asm.h
+++ b/pixman/pixman-arm-simd-asm.h
@@ -92,6 +92,17 @@
 .set PREFETCH_TYPE_NONE,       0
 .set PREFETCH_TYPE_STANDARD,   1
 
+.macro startfunc fname
+ .func fname
+ .global fname
+ /* For ELF format also set function visibility to hidden */
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+fname:
+.endm
+
 /*
  * Definitions of macros for load/store of pixel data.
  */
@@ -561,13 +572,7 @@
                                    process_tail, \
                                    process_inner_loop
 
- .func fname
- .global fname
- /* For ELF format also set function visibility to hidden */
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
+ startfunc fname
 
 /*
  * Make some macro arguments globally visible and accessible
@@ -679,7 +684,6 @@
     SCRATCH     .req    r12
     ORIG_W      .req    r14 /* width (pixels) */
 
-fname:
         push    {r4-r11, lr}        /* save all registers */
 
         subs    Y, Y, #1
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index af062e1..454c6c0 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -47,6 +47,9 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
                                    uint32_t, 1, uint32_t, 1)
 
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888,
+                                 uint32_t, 1)
+
 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
                                      uint32_t, 1, uint32_t, 1)
 
@@ -225,6 +228,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
 
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
+
     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
 
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
-- 
1.7.5.4



More information about the Pixman mailing list