[Pixman] [PATCH 13/14] ARMv6: Add fast path for add_8888_8888

Ben Avison bavison at riscosopen.org
Tue Oct 1 16:00:33 PDT 2013


lowlevel-blt-bench results:

    Before          After
    Mean   StdDev   Mean   StdDev  Confidence  Change
L1  27.6   0.1      125.9  0.8     100.0%      +356.0%
L2  14.0   0.5      30.8   1.6     100.0%      +120.3%
M   12.2   0.0      26.7   0.1     100.0%      +118.8%
HT  10.2   0.1      17.0   0.1     100.0%      +67.1%
VT  10.0   0.0      16.6   0.1     100.0%      +65.7%
R   9.7    0.0      15.9   0.1     100.0%      +64.8%
RT  5.8    0.1      7.6    0.1     100.0%      +30.5%

Trimmed cairo-perf-trace results:

                        Before          After
                        Mean   StdDev   Mean   StdDev  Confidence  Change
t-xfce4-terminal-a1     20.7   0.0      19.9   0.0     100.0%      +3.8%
---
 pixman/pixman-arm-simd-asm.S |   58 ++++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-arm-simd.c     |    8 ++++++
 2 files changed, 66 insertions(+)

diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index b54a3d6..5a99264 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -350,6 +350,64 @@ generate_composite_function \
 
 /******************************************************************************/
 
+.macro test_zero numregs, reg1, reg2, reg3, reg4
+        teq     WK&reg1, #0
+ .if numregs >= 2
+        teqeq   WK&reg2, #0
+  .if numregs >= 3
+        teqeq   WK&reg3, #0
+   .if numregs == 4
+        teqeq   WK&reg4, #0
+   .endif
+  .endif
+ .endif
+.endm
+
+.macro add_8888_8888_2pixels  dst1, dst2
+        uqadd8  WK&dst1, WK&dst1, MASK
+        uqadd8  WK&dst2, WK&dst2, STRIDE_M
+.endm
+
+.macro add_8888_8888_1pixel  dst
+        uqadd8  WK&dst, WK&dst, MASK
+.endm
+
+.macro add_8888_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+        pixld   , numbytes, firstreg, SRC, 0
+        add     DST, DST, #numbytes
+.endm
+
+.macro add_8888_8888_process_tail  cond, numbytes, firstreg
+        test_zero %(numbytes/4), firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
+        beq     01f
+ .if numbytes == 16
+        ldrd    MASK, STRIDE_M, [DST, #-16]
+        add_8888_8888_2pixels  firstreg, %(firstreg+1)
+        ldrd    MASK, STRIDE_M, [DST, #-8]
+        add_8888_8888_2pixels  %(firstreg+2), %(firstreg+3)
+ .elseif numbytes == 8
+        ldrd    MASK, STRIDE_M, [DST, #-8]
+        add_8888_8888_2pixels  firstreg, %(firstreg+1)
+ .else
+        ldr     MASK, [DST, #-4]
+        add_8888_8888_1pixel  firstreg
+ .endif
+        pixst   , numbytes, firstreg, DST
+01:
+.endm
+
+generate_composite_function \
+    pixman_composite_add_8888_8888_asm_armv6, 32, 0, 32, \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH | FLAG_NO_PRELOAD_DST, \
+    2, /* prefetch distance */ \
+    nop_macro, /* init */ \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    add_8888_8888_process_head, \
+    add_8888_8888_process_tail
+
+/******************************************************************************/
+
 .macro over_8888_8888_init
         /* Hold loop invariant in MASK */
         ldr     MASK, =0x00800080
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index f80f0e4..dd11838 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -44,6 +44,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_8888,
 
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
                                    uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8888_8888,
+                                   uint32_t, 1, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
                                    uint32_t, 1, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888,
@@ -244,6 +246,12 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888),
 
     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
+    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, armv6_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, x8r8g8b8, armv6_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD, x8r8g8b8, null, x8r8g8b8, armv6_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, armv6_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, x8b8g8r8, armv6_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD, x8b8g8r8, null, x8b8g8r8, armv6_composite_add_8888_8888),
 
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888),
-- 
1.7.10.4



More information about the Pixman mailing list