[Pixman] [PATCH 10/12] ARMv6: Add fast path for over_reverse_n_8888

Ben Avison bavison at riscosopen.org
Mon Mar 4 09:42:27 PST 2013


lowlevel-blt-bench results:

    Before          After
    Mean   StdDev   Mean   StdDev  Confidence  Change
L1  15.0   0.1      276.2  4.0     100.0%      +1743.3%
L2  13.4   0.3      154.8  17.4    100.0%      +1058.0%
M   11.4   0.0      73.7   0.8     100.0%      +549.4%
HT  10.2   0.0      25.6   0.2     100.0%      +150.9%
VT  10.0   0.0      23.0   0.3     100.0%      +129.4%
R   9.8    0.1      22.9   0.2     100.0%      +134.3%
RT  6.4    0.1      11.6   0.3     100.0%      +80.8%

Trimmed cairo-perf-trace results:

                        Before          After
                        Mean   StdDev   Mean   StdDev  Confidence  Change
t-poppler               11.8   0.1      8.8    0.1     100.0%      +34.6%
---
 pixman/pixman-arm-simd-asm.S |   78 ++++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-arm-simd.c     |    6 +++
 2 files changed, 84 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index 20ad05a..4f9a015 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -979,6 +979,84 @@ generate_composite_function \
 
 /******************************************************************************/
 
+.macro over_reverse_n_8888_init
+        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
+        ldr     MASK, =0x00800080
+        /* Split source pixel into RB/AG parts */
+        uxtb16  STRIDE_S, SRC
+        uxtb16  STRIDE_M, SRC, ror #8
+        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+        uadd8   SCRATCH, MASK, MASK
+        line_saved_regs  STRIDE_D, ORIG_W
+.endm
+
+.macro over_reverse_n_8888_newline
+        mov     STRIDE_D, #0xFF
+.endm
+
+.macro over_reverse_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+        pixld   , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_reverse_n_8888_1pixel  d, is_only
+        teq     WK&d, #0
+        beq     8f       /* replace with source */
+        bics    ORIG_W, STRIDE_D, WK&d, lsr #24
+ .if is_only == 1
+        beq     49f      /* skip store */
+ .else
+        beq     9f       /* write same value back */
+ .endif
+        mla     SCRATCH, STRIDE_S, ORIG_W, MASK /* red/blue */
+        mla     ORIG_W, STRIDE_M, ORIG_W, MASK  /* alpha/green */
+        uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
+        uxtab16 ORIG_W, ORIG_W, ORIG_W, ror #8
+        mov     SCRATCH, SCRATCH, ror #8
+        sel     ORIG_W, SCRATCH, ORIG_W
+        uqadd8  WK&d, WK&d, ORIG_W
+        b       9f
+8:      mov     WK&d, SRC
+9:
+.endm
+
+.macro over_reverse_n_8888_tail  numbytes, reg1, reg2, reg3, reg4
+ .if numbytes == 4
+        over_reverse_n_8888_1pixel  reg1, 1
+ .else
+        and     SCRATCH, WK&reg1, WK&reg2
+  .if numbytes == 16
+        and     SCRATCH, SCRATCH, WK&reg3
+        and     SCRATCH, SCRATCH, WK&reg4
+  .endif
+        mvns    SCRATCH, SCRATCH, asr #24
+        beq     49f /* skip store if all opaque */
+        over_reverse_n_8888_1pixel  reg1, 0
+        over_reverse_n_8888_1pixel  reg2, 0
+  .if numbytes == 16
+        over_reverse_n_8888_1pixel  reg3, 0
+        over_reverse_n_8888_1pixel  reg4, 0
+  .endif
+ .endif
+        pixst   , numbytes, reg1, DST
+49:
+.endm
+
+.macro over_reverse_n_8888_process_tail  cond, numbytes, firstreg
+        over_reverse_n_8888_tail  numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
+.endm
+
+generate_composite_function \
+    pixman_composite_over_reverse_n_8888_asm_armv6, 0, 0, 32 \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
+    3, /* prefetch distance */ \
+    over_reverse_n_8888_init, \
+    over_reverse_n_8888_newline, \
+    nop_macro, /* cleanup */ \
+    over_reverse_n_8888_process_head, \
+    over_reverse_n_8888_process_tail
+
+/******************************************************************************/
+
 #ifdef PROFILING
 .p2align 9
 #endif
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 5a50098..855b703 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -50,6 +50,9 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888,
                                    uint32_t, 1, uint32_t, 1)
 
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888,
+                                 uint32_t, 1)
+
 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
                                      uint32_t, 1, uint32_t, 1)
 
@@ -231,6 +234,9 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
 
+    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, armv6_composite_over_reverse_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888),
+
     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
 
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
-- 
1.7.5.4



More information about the Pixman mailing list