[Pixman] [PATCH 09/14] ARMv6: add platform-specific fast path for over_n_8888

Siarhei Siamashka siarhei.siamashka at gmail.com
Sun Oct 13 19:13:36 PDT 2013


On Wed,  2 Oct 2013 00:00:29 +0100
Ben Avison <bavison at riscosopen.org> wrote:

> lowlevel-blt-bench results - this patch:
>     Before          After
>     Mean   StdDev   Mean   StdDev  Confidence  Change
> L1  21.3   0.1      45.8   0.2     100.0%      +114.6%
> L2  17.5   0.5      43.3   1.1     100.0%      +148.4%
> M   14.1   0.0      44.4   0.1     100.0%      +215.8%
> HT  12.7   0.1      26.9   0.2     100.0%      +111.2%
> VT  12.4   0.1      23.5   0.2     100.0%      +89.4%
> R   12.0   0.1      24.6   0.2     100.0%      +104.4%
> RT  8.2    0.1      12.1   0.3     100.0%      +48.8%
> 
> or cumulative with preceding patch:
>     Before          After
>     Mean   StdDev   Mean   StdDev  Confidence  Change
> L1  11.3   0.1      45.8   0.2     100.0%      +305.6%
> L2  10.0   0.1      43.3   1.1     100.0%      +332.0%
> M   8.6    0.0      44.4   0.1     100.0%      +414.7%
> HT  5.1    0.0      26.9   0.2     100.0%      +425.5%
> VT  4.9    0.0      23.5   0.2     100.0%      +376.1%
> R   4.8    0.0      24.6   0.2     100.0%      +408.1%
> RT  2.1    0.0      12.1   0.3     100.0%      +480.4%
> 
> Trimmed cairo-pref-traces does not show any significant change for this patch,
> reflecting the fact that over_n_8888 is barely used in the traces.
> ---
>  pixman/pixman-arm-simd-asm.S |   48 ++++++++++++++++++++++++++++++++++++++++++
>  pixman/pixman-arm-simd.c     |    8 +++++++
>  2 files changed, 56 insertions(+)
> 
> diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
> index 259fb88..e85d036 100644
> --- a/pixman/pixman-arm-simd-asm.S
> +++ b/pixman/pixman-arm-simd-asm.S
> @@ -611,6 +611,54 @@ generate_composite_function \
>  
>  /******************************************************************************/
>  
> +.macro over_n_8888_init
> +        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
> +        /* Hold loop invariant in MASK */
> +        ldr     MASK, =0x00800080
> +        /* Hold multiplier for destination in STRIDE_M */
> +        mov     STRIDE_M, #255
> +        sub     STRIDE_M, STRIDE_M, SRC, lsr #24
> +        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
> +        uadd8   SCRATCH, MASK, MASK
> +.endm
> +
> +.macro over_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
> +        pixld   , numbytes, firstreg, DST, 0
> +.endm
> +
> +.macro over_n_8888_1pixel dst
> +        mul_8888_8  WK&dst, STRIDE_M, SCRATCH, MASK
> +        uqadd8  WK&dst, WK&dst, SRC
> +.endm
> +
> +.macro over_n_8888_process_tail  cond, numbytes, firstreg
> + .set PROCESS_REG, firstreg
> + .rept numbytes / 4
> +        over_n_8888_1pixel %(PROCESS_REG)
> +  .set PROCESS_REG, PROCESS_REG+1
> + .endr
> +        pixst   , numbytes, firstreg, DST
> +.endm
> +
> +startfunc pixman_composite_over_n_8888_asm_armv6
> +        ldr     ip, [sp]
> +        /* Zero source is already filtered out in armv6_composite_over_n_8888() */
> +        mvns    ip, ip, asr #24 /* Source alpha = 0xff? */
> +        beq     pixman_composite_src_n_8888_asm_armv6
> +        /* else drop through... */

My understanding is that the optimization of operators to do this
OVER->SRC reduction is normally expected to be handled by the
"optimize_operator" function. 

Maybe a tweak is needed here:

    http://cgit.freedesktop.org/pixman/tree/pixman/pixman-image.c?id=pixman-0.30.2#n440

to additionally set "flags |= FAST_PATH_IS_OPAQUE" if the solid color
is opaque:

-- 
Best regards,
Siarhei Siamashka


More information about the Pixman mailing list