[Pixman] [PATCH 24/32] armv6: Add optimised scanline fetcher for a1r5g5b5
Ben Avison
bavison at riscosopen.org
Thu Aug 7 09:50:20 PDT 2014
This supports a1r5g5b5 source images.
lowlevel-blt-bench results for src_1555_8888, which does not yet have a
dedicated fast path:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 24.5 0.2 57.0 1.1 100.0% +132.2%
L2 19.3 0.4 41.4 1.0 100.0% +114.3%
M 20.4 0.0 49.8 0.1 100.0% +144.7%
HT 12.8 0.1 21.4 0.3 100.0% +67.0%
VT 12.7 0.1 21.0 0.3 100.0% +65.4%
R 12.1 0.1 19.7 0.2 100.0% +63.1%
RT 5.6 0.1 7.0 0.2 100.0% +24.8%
---
pixman/pixman-arm-simd-asm.S | 70 ++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-arm-simd.c | 13 ++++++++
2 files changed, 83 insertions(+), 0 deletions(-)
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index b251187..6674a9d 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -2744,3 +2744,73 @@ generate_composite_function_single_scanline \
inout_reverse_process_tail
/******************************************************************************/
+
+.macro src_1555_8888_init
+ /* Hold loop invariant in MASK */
+ ldr MASK, =0x83E083E0
+ /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+ msr CPSR_s, #0x50000
+.endm
+
+.macro src_1555_8888_2pixels reg1, reg2, tmp1, tmp2, mask
+ bic WK®2, WK®1, mask @ 0RRRRR00000BBBBB0rrrrr00000bbbbb
+ and tmp1, WK®1, mask @ A00000GGGGG00000a00000ggggg00000
+ mov tmp2, WK®2, lsr #16 @ 00000000000000000RRRRR00000BBBBB
+ orr tmp1, tmp1, tmp1, lsr #5 @ A0000-GGGGGGGGGGa0000-gggggggggg
+ uxth WK®2, WK®2 @ 00000000000000000rrrrr00000bbbbb
+ mov WK®1, tmp1, lsl #16 @ a0000-gggggggggg0000000000000000
+ orr tmp2, tmp2, tmp2, lsl #5 @ 000000000000RRRRRRRRRRBBBBBBBBBB
+ orr WK®2, WK®2, WK®2, lsl #5 @ 000000000000rrrrrrrrrrbbbbbbbbbb
+ mov tmp1, tmp1, asr #10 @ AAAAAAAAAAA0000-GGGGGGGGGG------
+ pkhbt tmp2, tmp2, tmp2, lsl #4 @ 00000000RRRRRRRR------BBBBBBBBBB
+ pkhbt WK®2, WK®2, WK®2, lsl #4 @ 00000000rrrrrrrr------bbbbbbbbbb
+ mov WK®1, WK®1, asr #10 @ aaaaaaaaaaa0000-gggggggggg000000
+ pkhtb tmp2, tmp2, tmp2, asr #2 @ 00000000RRRRRRRR--------BBBBBBBB
+ pkhtb WK®2, WK®2, WK®2, asr #2 @ 00000000rrrrrrrr--------bbbbbbbb
+ sel WK®1, WK®2, WK®1 @ aaaaaaaarrrrrrrrggggggggbbbbbbbb
+ sel WK®2, tmp2, tmp1 @ AAAAAAAARRRRRRRRGGGGGGGGBBBBBBBB
+.endm
+
+.macro src_1555_8888_1pixel reg, tmp, mask
+ bic tmp, WK®, mask @ 00000000000000000rrrrr00000bbbbb
+ and WK®, mask, WK®, lsl #16 @ a00000ggggg000000000000000000000
+ orr tmp, tmp, tmp, lsl #5 @ 000000000000rrrrrrrrrrbbbbbbbbbb
+ orr WK®, WK®, lsr #5 @ a0000-gggggggggg0000000000000000
+ pkhbt tmp, tmp, tmp, lsl #4 @ 00000000rrrrrrrr------bbbbbbbbbb
+ mov WK®, WK®, asr #10 @ aaaaaaaaaaa0000-gggggggggg000000
+ pkhtb tmp, tmp, tmp, asr #2 @ 00000000rrrrrrrr--------bbbbbbbb
+ sel WK®, tmp, WK® @ aaaaaaaarrrrrrrrggggggggbbbbbbbb
+.endm
+
+.macro src_1555_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ .if numbytes == 16
+ pixldst ld,, 8, firstreg, %(firstreg+2),,, SRC, unaligned_src
+ .elseif numbytes == 8
+ pixld , 4, firstreg, SRC, unaligned_src
+ .elseif numbytes == 4
+ pixld , 2, firstreg, SRC, unaligned_src
+ .endif
+.endm
+
+.macro src_1555_8888_process_tail cond, numbytes, firstreg
+ .if numbytes == 16
+ src_1555_8888_2pixels firstreg, %(firstreg+1), STRIDE_M, SCRATCH, MASK
+ src_1555_8888_2pixels %(firstreg+2), %(firstreg+3), STRIDE_M, SCRATCH, MASK
+ .elseif numbytes == 8
+ src_1555_8888_2pixels firstreg, %(firstreg+1), STRIDE_M, SCRATCH, MASK
+ .else
+ src_1555_8888_1pixel firstreg, SCRATCH, MASK
+ .endif
+.endm
+
+generate_composite_function_single_scanline \
+ pixman_get_scanline_a1r5g5b5_asm_armv6, 16, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER, \
+ 3, /* prefetch distance */ \
+ src_1555_8888_init, \
+ nop_macro, /* newline */ \
+ nop_macro, /* cleanup */ \
+ src_1555_8888_process_head, \
+ src_1555_8888_process_tail
+
+/******************************************************************************/
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 514231a..e6c5d81 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -111,6 +111,7 @@ PIXMAN_ARM_BIND_COMBINE_U (armv6, add)
PIXMAN_ARM_BIND_GET_SCANLINE (armv6, r5g6b5)
PIXMAN_ARM_BIND_WRITE_BACK (armv6, r5g6b5)
+PIXMAN_ARM_BIND_GET_SCANLINE (armv6, a1r5g5b5)
PIXMAN_ARM_BIND_GET_SCANLINE (armv6, a8)
void
@@ -357,6 +358,18 @@ static const pixman_iter_info_t arm_simd_iters[] =
armv6_write_back_r5g6b5
},
+ { PIXMAN_a1r5g5b5,
+ (FAST_PATH_STANDARD_FLAGS |
+ FAST_PATH_ID_TRANSFORM |
+ FAST_PATH_NEAREST_FILTER |
+ FAST_PATH_SAMPLES_COVER_CLIP_NEAREST |
+ FAST_PATH_BITS_IMAGE),
+ ITER_NARROW | ITER_SRC,
+ _pixman_iter_init_bits_stride,
+ armv6_get_scanline_a1r5g5b5,
+ NULL
+ },
+
{ PIXMAN_a8,
(FAST_PATH_STANDARD_FLAGS |
FAST_PATH_ID_TRANSFORM |
--
1.7.5.4
More information about the Pixman
mailing list