[Pixman] [PATCH 3/5] ARMv6: New blit routines
Ben Avison
bavison at riscosopen.org
Sat Jan 19 08:16:51 PST 2013
These are usable either as various composite operations, or via the
top-level function pixman_blt() which now does some blitting for the
first time on an ARMv6 platform (previously it just returned FALSE).
src_8888_8888
Before After
Mean StdDev Mean StdDev Confidence Change
L1 414.5 9.4 445.8 3.6 100.0% +7.6%
L2 93.3 20.7 114.5 12.9 100.0% +22.7%
M 57.0 0.2 89.2 0.5 100.0% +56.4%
HT 28.7 0.3 39.6 0.4 100.0% +37.9%
VT 25.5 0.2 35.3 0.4 100.0% +38.4%
R 20.1 0.1 33.8 0.3 100.0% +67.8%
RT 7.8 0.2 12.7 0.4 100.0% +62.7%
src_0565_0565
Before After
Mean StdDev Mean StdDev Confidence Change
L1 397.4 6.1 412.5 5.2 100.0% +3.8%
L2 143.2 10.9 141.9 6.5 68.9% -0.9% (insignificant)
M 90.7 0.4 133.5 0.7 100.0% +47.1%
HT 38.6 0.3 53.7 0.7 100.0% +39.0%
VT 33.0 0.3 47.3 0.6 100.0% +43.3%
R 25.7 0.2 42.1 0.5 100.0% +64.1%
RT 8.0 0.2 13.3 0.3 100.0% +65.6%
src_8_8
Before After
Mean StdDev Mean StdDev Confidence Change
L1 716.5 9.8 768.2 20.4 100.0% +7.2%
L2 246.2 12.7 260.5 8.8 100.0% +5.8%
M 146.8 0.7 227.9 0.7 100.0% +55.2%
HT 44.9 0.6 62.1 1.0 100.0% +38.2%
VT 35.6 0.4 53.4 0.7 100.0% +50.0%
R 29.7 0.3 48.2 0.6 100.0% +62.2%
RT 8.6 0.2 12.9 0.4 100.0% +49.3%
---
pixman/pixman-arm-simd-asm.S | 61 ++++++++++++++++++++++++++++
pixman/pixman-arm-simd.c | 90 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 151 insertions(+), 0 deletions(-)
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index 64588a1..c6d8263 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -51,6 +51,67 @@
* preload If outputting 16 bytes causes 64 bytes to be read, whether an extra preload should be output
*/
+.macro blit_init
+ line_saved_regs STRIDE_D, STRIDE_S
+.endm
+
+.macro blit_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ pixld cond, numbytes, firstreg, SRC, unaligned_src
+.endm
+
+.macro blit_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment
+ WK4 .req STRIDE_D
+ WK5 .req STRIDE_S
+ WK6 .req MASK
+ WK7 .req STRIDE_M
+110: pixld , 16, 0, SRC, unaligned_src
+ pixld , 16, 4, SRC, unaligned_src
+ pld [SRC, SCRATCH]
+ pixst , 16, 0, DST
+ pixst , 16, 4, DST
+ subs X, X, #32*8/src_bpp
+ bhs 110b
+ .unreq WK4
+ .unreq WK5
+ .unreq WK6
+ .unreq WK7
+.endm
+
+generate_composite_function \
+ pixman_composite_src_8888_8888_asm_armv6, 32, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
+ 4, /* prefetch distance */ \
+ blit_init, \
+ nop_macro, /* newline */ \
+ nop_macro, /* cleanup */ \
+ blit_process_head, \
+ nop_macro, /* process tail */ \
+ blit_inner_loop
+
+generate_composite_function \
+ pixman_composite_src_0565_0565_asm_armv6, 16, 0, 16, \
+ FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
+ 4, /* prefetch distance */ \
+ blit_init, \
+ nop_macro, /* newline */ \
+ nop_macro, /* cleanup */ \
+ blit_process_head, \
+ nop_macro, /* process tail */ \
+ blit_inner_loop
+
+generate_composite_function \
+ pixman_composite_src_8_8_asm_armv6, 8, 0, 8, \
+ FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
+ 3, /* prefetch distance */ \
+ blit_init, \
+ nop_macro, /* newline */ \
+ nop_macro, /* cleanup */ \
+ blit_process_head, \
+ nop_macro, /* process tail */ \
+ blit_inner_loop
+
+/******************************************************************************/
+
.macro src_n_8888_init
ldr SRC, [sp, #ARGS_STACK_OFFSET]
mov STRIDE_S, SRC
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index f313df3..de66e57 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -376,6 +376,13 @@ pixman_composite_over_n_8_8888_asm_armv6 (int32_t width,
#endif
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_0565,
+ uint16_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8_8,
+ uint8_t, 1, uint8_t, 1)
+
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
uint8_t, 1, uint8_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
@@ -458,8 +465,90 @@ arm_simd_fill (pixman_implementation_t *imp,
}
}
+static pixman_bool_t
+arm_simd_blt (pixman_implementation_t *imp,
+ uint32_t * src_bits,
+ uint32_t * dst_bits,
+ int src_stride, /* in 32-bit words */
+ int dst_stride, /* in 32-bit words */
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dest_x,
+ int dest_y,
+ int width,
+ int height)
+{
+ if (src_bpp != dst_bpp)
+ return FALSE;
+
+ switch (src_bpp)
+ {
+ case 8:
+ pixman_composite_src_8_8_asm_armv6 (
+ width, height,
+ (uint8_t *)(((char *) dst_bits) +
+ dest_y * dst_stride * 4 + dest_x * 1), dst_stride * 4,
+ (uint8_t *)(((char *) src_bits) +
+ src_y * src_stride * 4 + src_x * 1), src_stride * 4);
+ return TRUE;
+ case 16:
+ pixman_composite_src_0565_0565_asm_armv6 (
+ width, height,
+ (uint16_t *)(((char *) dst_bits) +
+ dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2,
+ (uint16_t *)(((char *) src_bits) +
+ src_y * src_stride * 4 + src_x * 2), src_stride * 2);
+ return TRUE;
+ case 32:
+ pixman_composite_src_8888_8888_asm_armv6 (
+ width, height,
+ (uint32_t *)(((char *) dst_bits) +
+ dest_y * dst_stride * 4 + dest_x * 4), dst_stride,
+ (uint32_t *)(((char *) src_bits) +
+ src_y * src_stride * 4 + src_x * 4), src_stride);
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
static const pixman_fast_path_t arm_simd_fast_paths[] =
{
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, armv6_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, armv6_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, armv6_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, armv6_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, armv6_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, armv6_composite_src_8888_8888),
+
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, armv6_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, armv6_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, a1r5g5b5, armv6_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a1b5g5r5, null, a1b5g5r5, armv6_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, armv6_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a1b5g5r5, null, x1b5g5r5, armv6_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, armv6_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x1b5g5r5, null, x1b5g5r5, armv6_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a4r4g4b4, null, a4r4g4b4, armv6_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a4b4g4r4, null, a4b4g4r4, armv6_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a4r4g4b4, null, x4r4g4b4, armv6_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a4b4g4r4, null, x4b4g4r4, armv6_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x4r4g4b4, null, x4r4g4b4, armv6_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x4b4g4r4, null, x4b4g4r4, armv6_composite_src_0565_0565),
+
+ PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, armv6_composite_src_8_8),
+ PIXMAN_STD_FAST_PATH (SRC, r3g3b2, null, r3g3b2, armv6_composite_src_8_8),
+ PIXMAN_STD_FAST_PATH (SRC, b2g3r3, null, b2g3r3, armv6_composite_src_8_8),
+ PIXMAN_STD_FAST_PATH (SRC, a2r2g2b2, null, a2r2g2b2, armv6_composite_src_8_8),
+ PIXMAN_STD_FAST_PATH (SRC, a2b2g2r2, null, a2b2g2r2, armv6_composite_src_8_8),
+ PIXMAN_STD_FAST_PATH (SRC, c8, null, c8, armv6_composite_src_8_8),
+ PIXMAN_STD_FAST_PATH (SRC, g8, null, g8, armv6_composite_src_8_8),
+ PIXMAN_STD_FAST_PATH (SRC, x4a4, null, x4a4, armv6_composite_src_8_8),
+ PIXMAN_STD_FAST_PATH (SRC, x4c4, null, x4c4, armv6_composite_src_8_8),
+ PIXMAN_STD_FAST_PATH (SRC, x4g4, null, x4g4, armv6_composite_src_8_8),
+
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888),
@@ -494,6 +583,7 @@ _pixman_implementation_create_arm_simd (pixman_implementation_t *fallback)
{
pixman_implementation_t *imp = _pixman_implementation_create (fallback, arm_simd_fast_paths);
+ imp->blt = arm_simd_blt;
imp->fill = arm_simd_fill;
return imp;
--
1.7.5.4
More information about the Pixman
mailing list