[Pixman] [PATCH 2/5] ARMv6: New source file for scaled operations
Siarhei Siamashka
siarhei.siamashka at gmail.com
Thu Dec 27 04:17:32 PST 2012
On Fri, 21 Dec 2012 18:49:53 -0000
"Ben Avison" <bavison at riscosopen.org> wrote:
> Citing precedent of pixman-arm-neon-asm-bilinear.S, I'm moving the scaled
> operations into their own source file, intending it to include both
> nearest-neighbour and bilinear-interpolation scaled operations. At the
> moment, these two operations are merely cut-and-pasted from the previous
> revision of pixman-arm-simd-asm.S.
>
> diff --git a/pixman/pixman-arm-simd-asm-scaled.S b/pixman/pixman-arm-simd-asm-scaled.S
> new file mode 100644
> index 0000000..7110995
> --- /dev/null
> +++ b/pixman/pixman-arm-simd-asm-scaled.S
> @@ -0,0 +1,165 @@
> +/*
> + * Copyright © 2008 Mozilla Corporation
> + * Copyright © 2010 Nokia Corporation
> + *
> + * Permission to use, copy, modify, distribute, and sell this software and its
> + * documentation for any purpose is hereby granted without fee, provided that
> + * the above copyright notice appear in all copies and that both that
> + * copyright notice and this permission notice appear in supporting
> + * documentation, and that the name of Mozilla Corporation not be used in
> + * advertising or publicity pertaining to distribution of the software without
> + * specific, written prior permission. Mozilla Corporation makes no
> + * representations about the suitability of this software for any purpose. It
> + * is provided "as is" without express or implied warranty.
> + *
> + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
> + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
> + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
> + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
> + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
> + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
> + * SOFTWARE.
> + *
> + * Author: Jeff Muizelaar (jeff at infidigm.net)
> + *
> + */
> +
> +/* Prevent the stack from becoming executable */
> +#if defined(__linux__) && defined(__ELF__)
> +.section .note.GNU-stack,"",%progbits
> +#endif
> +
> + .text
> + .arch armv6
> + .object_arch armv4
> + .arm
> + .altmacro
> + .p2align 2
> +
> +/* Supplementary macro for setting function attributes */
> +.macro pixman_asm_function fname
> + .func fname
> + .global fname
> +#ifdef __ELF__
> + .hidden fname
> + .type fname, %function
> +#endif
> +fname:
> +.endm
> +
> +/*
> + * Note: This code is only using armv5te instructions (not even armv6),
> + * but is scheduled for ARM Cortex-A8 pipeline. So it might need to
> + * be split into a few variants, tuned for each microarchitecture.
> + *
> + * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
> + * have efficient write combining), it needs to be changed to use 16-byte
> + * aligned writes using STM instruction.
> + *
> + * Nearest scanline scaler macro template uses the following arguments:
> + * fname - name of the function to generate
> + * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes
> + * t - type suffix for LDR/STR instructions
> + * prefetch_distance - prefetch in the source image by that many
> + * pixels ahead
> + * prefetch_braking_distance - stop prefetching when that many pixels are
> + * remaining before the end of scanline
> + */
> +
> +.macro generate_nearest_scanline_func fname, bpp_shift, t, \
> + prefetch_distance, \
> + prefetch_braking_distance
> +
> +pixman_asm_function fname
> + W .req r0
> + DST .req r1
> + SRC .req r2
> + VX .req r3
> + UNIT_X .req ip
> + TMP1 .req r4
> + TMP2 .req r5
> + VXMASK .req r6
> + PF_OFFS .req r7
> + SRC_WIDTH_FIXED .req r8
> +
> + ldr UNIT_X, [sp]
> + push {r4, r5, r6, r7, r8, r10}
> + mvn VXMASK, #((1 << bpp_shift) - 1)
> + ldr SRC_WIDTH_FIXED, [sp, #28]
> +
> + /* define helper macro */
> + .macro scale_2_pixels
> + ldr&t TMP1, [SRC, TMP1]
> + and TMP2, VXMASK, VX, asr #(16 - bpp_shift)
> + adds VX, VX, UNIT_X
> + str&t TMP1, [DST], #(1 << bpp_shift)
> +9: subpls VX, VX, SRC_WIDTH_FIXED
> + bpl 9b
> +
> + ldr&t TMP2, [SRC, TMP2]
> + and TMP1, VXMASK, VX, asr #(16 - bpp_shift)
> + adds VX, VX, UNIT_X
> + str&t TMP2, [DST], #(1 << bpp_shift)
> +9: subpls VX, VX, SRC_WIDTH_FIXED
> + bpl 9b
> + .endm
> +
> + /* now do the scaling */
> + and TMP1, VXMASK, VX, asr #(16 - bpp_shift)
> + adds VX, VX, UNIT_X
> +9: subpls VX, VX, SRC_WIDTH_FIXED
> + bpl 9b
> + subs W, W, #(8 + prefetch_braking_distance)
> + blt 2f
> + /* calculate prefetch offset */
> + mov PF_OFFS, #prefetch_distance
> + mla PF_OFFS, UNIT_X, PF_OFFS, VX
> +1: /* main loop, process 8 pixels per iteration with prefetch */
> + pld [SRC, PF_OFFS, asr #(16 - bpp_shift)]
> + add PF_OFFS, UNIT_X, lsl #3
> + scale_2_pixels
> + scale_2_pixels
> + scale_2_pixels
> + scale_2_pixels
> + subs W, W, #8
> + bge 1b
> +2:
> + subs W, W, #(4 - 8 - prefetch_braking_distance)
> + blt 2f
> +1: /* process the remaining pixels */
> + scale_2_pixels
> + scale_2_pixels
> + subs W, W, #4
> + bge 1b
> +2:
> + tst W, #2
> + beq 2f
> + scale_2_pixels
> +2:
> + tst W, #1
> + ldrne&t TMP1, [SRC, TMP1]
> + strne&t TMP1, [DST]
> + /* cleanup helper macro */
> + .purgem scale_2_pixels
> + .unreq DST
> + .unreq SRC
> + .unreq W
> + .unreq VX
> + .unreq UNIT_X
> + .unreq TMP1
> + .unreq TMP2
> + .unreq VXMASK
> + .unreq PF_OFFS
> + .unreq SRC_WIDTH_FIXED
> + /* return */
> + pop {r4, r5, r6, r7, r8, r10}
> + bx lr
> +.endfunc
> +.endm
> +
> +generate_nearest_scanline_func \
> + pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
> +
> +generate_nearest_scanline_func \
> + pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32
If you are moving the code, just do the creation of the new file,
removal of this code from the old file and the makefile tweaks as one
commit.
--
Best regards,
Siarhei Siamashka
More information about the Pixman
mailing list