[Pixman] [PATCH 2/5] ARMv6: New source file for scaled operations

Siarhei Siamashka siarhei.siamashka at gmail.com
Thu Dec 27 04:17:32 PST 2012


On Fri, 21 Dec 2012 18:49:53 -0000
"Ben Avison" <bavison at riscosopen.org> wrote:

> Citing precedent of pixman-arm-neon-asm-bilinear.S, I'm moving the scaled
> operations into their own source file, intending it to include both
> nearest-neighbour and bilinear-interpolation scaled operations. At the
> moment, these two operations are merely cut-and-pasted from the previous
> revision of pixman-arm-simd-asm.S.
> 
> diff --git a/pixman/pixman-arm-simd-asm-scaled.S b/pixman/pixman-arm-simd-asm-scaled.S
> new file mode 100644
> index 0000000..7110995
> --- /dev/null
> +++ b/pixman/pixman-arm-simd-asm-scaled.S
> @@ -0,0 +1,165 @@
> +/*
> + * Copyright © 2008 Mozilla Corporation
> + * Copyright © 2010 Nokia Corporation
> + *
> + * Permission to use, copy, modify, distribute, and sell this software and its
> + * documentation for any purpose is hereby granted without fee, provided that
> + * the above copyright notice appear in all copies and that both that
> + * copyright notice and this permission notice appear in supporting
> + * documentation, and that the name of Mozilla Corporation not be used in
> + * advertising or publicity pertaining to distribution of the software without
> + * specific, written prior permission.  Mozilla Corporation makes no
> + * representations about the suitability of this software for any purpose.  It
> + * is provided "as is" without express or implied warranty.
> + *
> + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
> + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
> + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
> + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
> + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
> + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
> + * SOFTWARE.
> + *
> + * Author:  Jeff Muizelaar (jeff at infidigm.net)
> + *
> + */
> +
> +/* Prevent the stack from becoming executable */
> +#if defined(__linux__) && defined(__ELF__)
> +.section .note.GNU-stack,"",%progbits
> +#endif
> +
> +	.text
> +	.arch armv6
> +	.object_arch armv4
> +	.arm
> +	.altmacro
> +	.p2align 2
> +
> +/* Supplementary macro for setting function attributes */
> +.macro pixman_asm_function fname
> +	.func fname
> +	.global fname
> +#ifdef __ELF__
> +	.hidden fname
> +	.type fname, %function
> +#endif
> +fname:
> +.endm
> +
> +/*
> + * Note: This code is only using armv5te instructions (not even armv6),
> + *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
> + *       be split into a few variants, tuned for each microarchitecture.
> + *
> + * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
> + * have efficient write combining), it needs to be changed to use 16-byte
> + * aligned writes using STM instruction.
> + *
> + * Nearest scanline scaler macro template uses the following arguments:
> + *  fname                     - name of the function to generate
> + *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
> + *  t                         - type suffix for LDR/STR instructions
> + *  prefetch_distance         - prefetch in the source image by that many
> + *                              pixels ahead
> + *  prefetch_braking_distance - stop prefetching when that many pixels are
> + *                              remaining before the end of scanline
> + */
> +
> +.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
> +                                      prefetch_distance,        \
> +                                      prefetch_braking_distance
> +
> +pixman_asm_function fname
> +	W		.req	r0
> +	DST		.req	r1
> +	SRC		.req	r2
> +	VX		.req	r3
> +	UNIT_X		.req	ip
> +	TMP1		.req	r4
> +	TMP2		.req	r5
> +	VXMASK		.req	r6
> +	PF_OFFS		.req	r7
> +	SRC_WIDTH_FIXED	.req	r8
> +
> +	ldr	UNIT_X, [sp]
> +	push	{r4, r5, r6, r7, r8, r10}
> +	mvn	VXMASK, #((1 << bpp_shift) - 1)
> +	ldr	SRC_WIDTH_FIXED, [sp, #28]
> +
> +	/* define helper macro */
> +	.macro	scale_2_pixels
> +		ldr&t	TMP1, [SRC, TMP1]
> +		and	TMP2, VXMASK, VX, asr #(16 - bpp_shift)
> +		adds	VX, VX, UNIT_X
> +		str&t	TMP1, [DST], #(1 << bpp_shift)
> +9:		subpls	VX, VX, SRC_WIDTH_FIXED
> +		bpl	9b
> +
> +		ldr&t	TMP2, [SRC, TMP2]
> +		and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
> +		adds	VX, VX, UNIT_X
> +		str&t	TMP2, [DST], #(1 << bpp_shift)
> +9:		subpls	VX, VX, SRC_WIDTH_FIXED
> +		bpl	9b
> +	.endm
> +
> +	/* now do the scaling */
> +	and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
> +	adds	VX, VX, UNIT_X
> +9:	subpls	VX, VX, SRC_WIDTH_FIXED
> +	bpl	9b
> +	subs	W, W, #(8 + prefetch_braking_distance)
> +	blt	2f
> +	/* calculate prefetch offset */
> +	mov	PF_OFFS, #prefetch_distance
> +	mla	PF_OFFS, UNIT_X, PF_OFFS, VX
> +1:	/* main loop, process 8 pixels per iteration with prefetch */
> +	pld	[SRC, PF_OFFS, asr #(16 - bpp_shift)]
> +	add	PF_OFFS, UNIT_X, lsl #3
> +	scale_2_pixels
> +	scale_2_pixels
> +	scale_2_pixels
> +	scale_2_pixels
> +	subs	W, W, #8
> +	bge	1b
> +2:
> +	subs	W, W, #(4 - 8 - prefetch_braking_distance)
> +	blt	2f
> +1:	/* process the remaining pixels */
> +	scale_2_pixels
> +	scale_2_pixels
> +	subs	W, W, #4
> +	bge	1b
> +2:
> +	tst	W, #2
> +	beq	2f
> +	scale_2_pixels
> +2:
> +	tst	W, #1
> +	ldrne&t	TMP1, [SRC, TMP1]
> +	strne&t	TMP1, [DST]
> +	/* cleanup helper macro */
> +	.purgem	scale_2_pixels
> +	.unreq	DST
> +	.unreq	SRC
> +	.unreq	W
> +	.unreq	VX
> +	.unreq	UNIT_X
> +	.unreq	TMP1
> +	.unreq	TMP2
> +	.unreq	VXMASK
> +	.unreq	PF_OFFS
> +	.unreq  SRC_WIDTH_FIXED
> +	/* return */
> +	pop	{r4, r5, r6, r7, r8, r10}
> +	bx	lr
> +.endfunc
> +.endm
> +
> +generate_nearest_scanline_func \
> +    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
> +
> +generate_nearest_scanline_func \
> +    pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32

If you are moving the code, just do the creation of the new file,
removal of this code from the old file and the makefile tweaks as one
commit.

-- 
Best regards,
Siarhei Siamashka


More information about the Pixman mailing list