[Pixman] [PATCH v2 1/2] armv6: Add over_n_8888 fast path (disabled)

Siarhei Siamashka siarhei.siamashka at gmail.com
Thu Sep 17 07:20:22 PDT 2015


On Mon,  7 Sep 2015 14:40:48 +0300
Pekka Paalanen <ppaalanen at gmail.com> wrote:

> From: Ben Avison <bavison at riscosopen.org>
> 
> This new fast path is initially disabled by putting the entries in the
> lookup table after the sentinel. The compiler cannot tell the new code
> is not used, so it cannot eliminate the code. Also the lookup table size
> will include the new fast path. When the follow-up patch then enables
> the new fast path, the binary layout (alignments, size, etc.) will stay
> the same compared to the disabled case.
> 
> Keeping the binary layout identical is important for benchmarking on
> Raspberry Pi 1. The addresses at which functions are loaded will have a
> significant impact on benchmark results, causing unexpected performance
> changes. Keeping all function addresses the same across the patch
> enabling a new fast path improves the reliability of benchmarks.

Don't we already have the PIXMAN_DISABLE environment variable exactly
for this purpose (testing different implementations without recompiling
the library)?

> Benchmark results are included in the patch enabling this fast path.
> 
> [Pekka: disabled the fast path, commit message]
> Signed-off-by: Pekka Paalanen <pekka.paalanen at collabora.co.uk>
>
> ---
>  pixman/pixman-arm-simd-asm.S | 41 +++++++++++++++++++++++++++++++++++++++++
>  pixman/pixman-arm-simd.c     |  7 +++++++
>  2 files changed, 48 insertions(+)
> 
> diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
> index 7b0727b..a74a0a8 100644
> --- a/pixman/pixman-arm-simd-asm.S
> +++ b/pixman/pixman-arm-simd-asm.S
> @@ -1136,3 +1136,44 @@ generate_composite_function \
>      in_reverse_8888_8888_process_tail
>  
>  /******************************************************************************/
> +
> +.macro over_n_8888_init
> +        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
> +        /* Hold loop invariant in MASK */
> +        ldr     MASK, =0x00800080
> +        /* Hold multiplier for destination in STRIDE_M */
> +        mov     STRIDE_M, #255
> +        sub     STRIDE_M, STRIDE_M, SRC, lsr #24
> +        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
> +        uadd8   SCRATCH, MASK, MASK
> +.endm
> +
> +.macro over_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
> +        pixld   , numbytes, firstreg, DST, 0
> +.endm
> +
> +.macro over_n_8888_1pixel dst
> +        mul_8888_8  WK&dst, STRIDE_M, SCRATCH, MASK
> +        uqadd8  WK&dst, WK&dst, SRC
> +.endm
> +
> +.macro over_n_8888_process_tail  cond, numbytes, firstreg
> + .set PROCESS_REG, firstreg
> + .rept numbytes / 4
> +        over_n_8888_1pixel %(PROCESS_REG)
> +  .set PROCESS_REG, PROCESS_REG+1
> + .endr
> +        pixst   , numbytes, firstreg, DST
> +.endm
> +
> +generate_composite_function \
> +    pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \
> +    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \
> +    2, /* prefetch distance */ \
> +    over_n_8888_init, \
> +    nop_macro, /* newline */ \
> +    nop_macro, /* cleanup */ \
> +    over_n_8888_process_head, \
> +    over_n_8888_process_tail
> +
> +/******************************************************************************/
> diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
> index f40ff36..62c0f41 100644
> --- a/pixman/pixman-arm-simd.c
> +++ b/pixman/pixman-arm-simd.c
> @@ -51,6 +51,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
>  PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888,
>                                     uint32_t, 1, uint32_t, 1)
>  
> +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888,
> +                                 uint32_t, 1)
>  PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888,
>                                   uint32_t, 1)
>  
> @@ -271,6 +273,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
>      SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
>  
>      { PIXMAN_OP_NONE },
> +
> +    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
> +    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
> +    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
> +    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
>  };
>  
>  pixman_implementation_t *



-- 
Best regards,
Siarhei Siamashka


More information about the Pixman mailing list