[Pixman] [PATCH 09/14] ARMv6: add platform-specific fast path for over_n_8888

Ben Avison bavison at riscosopen.org
Tue Oct 1 16:00:29 PDT 2013


lowlevel-blt-bench results - this patch:
    Before          After
    Mean   StdDev   Mean   StdDev  Confidence  Change
L1  21.3   0.1      45.8   0.2     100.0%      +114.6%
L2  17.5   0.5      43.3   1.1     100.0%      +148.4%
M   14.1   0.0      44.4   0.1     100.0%      +215.8%
HT  12.7   0.1      26.9   0.2     100.0%      +111.2%
VT  12.4   0.1      23.5   0.2     100.0%      +89.4%
R   12.0   0.1      24.6   0.2     100.0%      +104.4%
RT  8.2    0.1      12.1   0.3     100.0%      +48.8%

or cumulative with preceding patch:
    Before          After
    Mean   StdDev   Mean   StdDev  Confidence  Change
L1  11.3   0.1      45.8   0.2     100.0%      +305.6%
L2  10.0   0.1      43.3   1.1     100.0%      +332.0%
M   8.6    0.0      44.4   0.1     100.0%      +414.7%
HT  5.1    0.0      26.9   0.2     100.0%      +425.5%
VT  4.9    0.0      23.5   0.2     100.0%      +376.1%
R   4.8    0.0      24.6   0.2     100.0%      +408.1%
RT  2.1    0.0      12.1   0.3     100.0%      +480.4%

Trimmed cairo-pref-traces does not show any significant change for this patch,
reflecting the fact that over_n_8888 is barely used in the traces.
---
 pixman/pixman-arm-simd-asm.S |   48 ++++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-arm-simd.c     |    8 +++++++
 2 files changed, 56 insertions(+)

diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index 259fb88..e85d036 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -611,6 +611,54 @@ generate_composite_function \
 
 /******************************************************************************/
 
+.macro over_n_8888_init
+        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
+        /* Hold loop invariant in MASK */
+        ldr     MASK, =0x00800080
+        /* Hold multiplier for destination in STRIDE_M */
+        mov     STRIDE_M, #255
+        sub     STRIDE_M, STRIDE_M, SRC, lsr #24
+        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+        uadd8   SCRATCH, MASK, MASK
+.endm
+
+.macro over_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+        pixld   , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_n_8888_1pixel dst
+        mul_8888_8  WK&dst, STRIDE_M, SCRATCH, MASK
+        uqadd8  WK&dst, WK&dst, SRC
+.endm
+
+.macro over_n_8888_process_tail  cond, numbytes, firstreg
+ .set PROCESS_REG, firstreg
+ .rept numbytes / 4
+        over_n_8888_1pixel %(PROCESS_REG)
+  .set PROCESS_REG, PROCESS_REG+1
+ .endr
+        pixst   , numbytes, firstreg, DST
+.endm
+
+startfunc pixman_composite_over_n_8888_asm_armv6
+        ldr     ip, [sp]
+        /* Zero source is already filtered out in armv6_composite_over_n_8888() */
+        mvns    ip, ip, asr #24 /* Source alpha = 0xff? */
+        beq     pixman_composite_src_n_8888_asm_armv6
+        /* else drop through... */
+ .endfunc
+generate_composite_function \
+    pixman_composite_over_n_8888_asm_armv6_helper, 0, 0, 32 \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE \
+    2, /* prefetch distance */ \
+    over_n_8888_init, \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    over_n_8888_process_head, \
+    over_n_8888_process_tail
+
+/******************************************************************************/
+
 #ifdef PROFILING
 .p2align 9
 #endif
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index af062e1..454c6c0 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -47,6 +47,9 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
                                    uint32_t, 1, uint32_t, 1)
 
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888,
+                                 uint32_t, 1)
+
 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
                                      uint32_t, 1, uint32_t, 1)
 
@@ -225,6 +228,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
 
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
+
     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
 
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
-- 
1.7.10.4



More information about the Pixman mailing list