[Pixman] [PATCH 07/32] armv6: Add over_n_8888 fast path

Ben Avison bavison at riscosopen.org
Thu Aug 7 09:50:03 PDT 2014


This is used instead of the equivalent C fast path.

lowlevel-blt-bench results, compared to no fast path at all:

    Before          After
    Mean   StdDev   Mean   StdDev  Confidence  Change
L1  11.9   0.1      43.7   0.6     100.0%      +265.8%
L2  10.6   0.2      41.7   0.3     100.0%      +293.6%
M   9.4    0.0      42.2   0.1     100.0%      +350.6%
HT  8.4    0.0      25.6   0.4     100.0%      +202.6%
VT  8.3    0.0      22.6   0.3     100.0%      +170.7%
R   8.1    0.0      23.3   0.4     100.0%      +186.8%
RT  5.4    0.1      11.6   0.3     100.0%      +112.6%
---
 pixman/pixman-arm-simd-asm.S |   41 +++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-arm-simd.c     |    6 ++++++
 2 files changed, 47 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index 7b0727b..a74a0a8 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -1136,3 +1136,44 @@ generate_composite_function \
     in_reverse_8888_8888_process_tail
 
 /******************************************************************************/
+
+.macro over_n_8888_init
+        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
+        /* Hold loop invariant in MASK */
+        ldr     MASK, =0x00800080
+        /* Hold multiplier for destination in STRIDE_M */
+        mov     STRIDE_M, #255
+        sub     STRIDE_M, STRIDE_M, SRC, lsr #24
+        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+        uadd8   SCRATCH, MASK, MASK
+.endm
+
+.macro over_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+        pixld   , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_n_8888_1pixel dst
+        mul_8888_8  WK&dst, STRIDE_M, SCRATCH, MASK
+        uqadd8  WK&dst, WK&dst, SRC
+.endm
+
+.macro over_n_8888_process_tail  cond, numbytes, firstreg
+ .set PROCESS_REG, firstreg
+ .rept numbytes / 4
+        over_n_8888_1pixel %(PROCESS_REG)
+  .set PROCESS_REG, PROCESS_REG+1
+ .endr
+        pixst   , numbytes, firstreg, DST
+.endm
+
+generate_composite_function \
+    pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \
+    2, /* prefetch distance */ \
+    over_n_8888_init, \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    over_n_8888_process_head, \
+    over_n_8888_process_tail
+
+/******************************************************************************/
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index fa1ab5c..3223010 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -51,6 +51,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888,
                                    uint32_t, 1, uint32_t, 1)
 
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888,
+                                 uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888,
                                  uint32_t, 1)
 
@@ -240,6 +242,10 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
 
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, armv6_composite_over_reverse_n_8888),
     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888),
 
-- 
1.7.5.4



More information about the Pixman mailing list