[Pixman] [PATCH 2/3] armv7: Faster fill operations
Ben Avison
bavison at riscosopen.org
Wed Mar 4 17:56:00 PST 2015
This eliminates a number of branches over blocks of code that are either
empty or can be trivially combined with a separate code block at the start
and end of each scanline. This has a surprisingly big effect, at least on
Cortex-A7, for src_n_8:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 1570.4 133.1 1639.6 110.7 100.0% +4.4%
L2 1042.6 19.9 1086.6 23.4 100.0% +4.2%
M 1030.8 7.2 1036.8 3.2 100.0% +0.6%
HT 287.4 3.5 303.3 2.9 100.0% +5.5%
VT 262.0 2.6 263.3 2.6 99.9% +0.5%
R 206.5 2.4 209.9 2.4 100.0% +1.7%
RT 56.5 1.0 59.2 0.5 100.0% +4.7%
---
pixman/pixman-arm-neon-asm.h | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
index 76b3985..03257cc 100644
--- a/pixman/pixman-arm-neon-asm.h
+++ b/pixman/pixman-arm-neon-asm.h
@@ -468,6 +468,7 @@
tst DST_R, #0xF
beq 2f
+.if src_bpp > 0 || mask_bpp > 0 || dst_r_bpp > 0
.irp lowbit, 1, 2, 4, 8, 16
local skip1
.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
@@ -487,6 +488,7 @@ local skip1
1:
.endif
.endr
+.endif
pixdeinterleave src_bpp, src_basereg
pixdeinterleave mask_bpp, mask_basereg
pixdeinterleave dst_r_bpp, dst_r_basereg
@@ -503,6 +505,9 @@ local skip1
tst DST_W, #lowbit
beq 1f
.endif
+.if src_bpp == 0 && mask_bpp == 0 && dst_r_bpp == 0
+ sub W, W, #(lowbit * 8 / dst_w_bpp)
+.endif
pixst_a (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W
1:
.endif
@@ -533,6 +538,7 @@ local skip1
process_pixblock_tail_head
tst W, #(pixblock_size - 1)
beq 2f
+.if src_bpp > 0 || mask_bpp > 0 || dst_r_bpp > 0
.irp chunk_size, 16, 8, 4, 2, 1
.if pixblock_size > chunk_size
tst W, #chunk_size
@@ -550,6 +556,7 @@ local skip1
1:
.endif
.endr
+.endif
pixdeinterleave src_bpp, src_basereg
pixdeinterleave mask_bpp, mask_basereg
pixdeinterleave dst_r_bpp, dst_r_basereg
--
1.7.5.4
More information about the Pixman
mailing list