[Pixman] [PATCH 32/32] armv6: Add four more nearest-scaled-cover fast paths
Ben Avison
bavison at riscosopen.org
Thu Aug 7 09:50:28 PDT 2014
These complete the set of fast paths where currently pixman-fast-path.c
provides versions that get selected in preference to the armv6-optimised
scanline fetchers/combiners/writeback routines.
Because generation of these fast paths is macroised, the patch required
to add them is fairly simple.
lowlevel-blt-bench -n over_8888_8888:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 13.8 0.0 26.5 0.2 100.0% +91.7%
L2 9.4 0.2 22.9 0.4 100.0% +142.6%
M 8.6 0.0 23.8 0.0 100.0% +176.1%
HT 7.4 0.0 14.1 0.1 100.0% +91.2%
VT 7.3 0.0 13.4 0.1 100.0% +84.1%
R 7.0 0.0 13.0 0.1 100.0% +85.9%
RT 4.5 0.1 6.2 0.1 100.0% +36.6%
affine-bench * 0 0 1 over a8r8g8b8 a8r8g8b8:
Before After
Mean StdDev Mean StdDev Confidence Change
0.5 9.4 0.0 28.0 0.0 100.0% +197.4%
0.75 9.0 0.0 26.1 0.0 100.0% +190.2%
1.0 8.6 0.0 24.4 0.0 100.0% +184.6%
1.5 7.9 0.0 21.7 0.0 100.0% +173.4%
2.0 7.3 0.0 19.6 0.0 100.0% +166.6%
lowlevel-blt-bench -n src_x888_8888:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 108.6 2.0 66.3 0.9 100.0% -39.0%
L2 32.4 1.5 44.3 2.1 100.0% +36.8%
M 27.5 0.1 62.0 0.1 100.0% +125.6%
HT 20.3 0.1 28.7 0.2 100.0% +41.2%
VT 19.9 0.1 26.7 0.1 100.0% +34.4%
R 18.6 0.1 25.3 0.2 100.0% +36.3%
RT 8.7 0.1 9.8 0.2 100.0% +12.6%
affine-bench * 0 0 1 src x8r8g8b8 a8r8g8b8:
Before After
Mean StdDev Mean StdDev Confidence Change
0.5 45.2 0.0 97.2 0.1 100.0% +115.1%
0.75 35.9 0.1 76.7 0.1 100.0% +113.9%
1.0 29.6 0.1 61.1 0.1 100.0% +106.4%
1.5 21.4 0.0 52.7 0.1 100.0% +145.9%
2.0 16.7 0.0 43.0 0.1 100.0% +156.9%
lowlevel-blt-bench -n src_8888_0565:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 57.2 0.7 43.1 0.4 100.0% -24.7%
L2 23.0 1.0 32.8 1.0 100.0% +42.5%
M 24.8 0.0 42.2 0.0 100.0% +70.0%
HT 18.0 0.1 22.1 0.1 100.0% +22.5%
VT 17.1 0.1 21.0 0.1 100.0% +22.5%
R 16.5 0.1 20.0 0.1 100.0% +21.4%
RT 8.3 0.2 8.4 0.1 95.0% +1.0% (insignificant)
affine-bench * 0 0 1 src a8r8g8b8 r5g6b5:
Before After
Mean StdDev Mean StdDev Confidence Change
0.5 34.9 0.0 55.3 0.0 100.0% +58.7%
0.75 29.3 0.0 49.1 0.0 100.0% +67.4%
1.0 24.8 0.0 42.6 0.1 100.0% +71.6%
1.5 19.0 0.0 38.2 0.1 100.0% +100.7%
2.0 15.4 0.0 31.8 0.0 100.0% +107.1%
lowlevel-blt-bench -n over_8888_0565:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 9.8 0.0 15.3 0.1 100.0% +56.6%
L2 7.4 0.0 14.3 0.2 100.0% +91.7%
M 7.5 0.0 15.4 0.0 100.0% +106.0%
HT 6.5 0.0 10.1 0.0 100.0% +54.5%
VT 6.4 0.0 9.9 0.0 100.0% +54.6%
R 6.2 0.0 9.5 0.0 100.0% +52.1%
RT 4.2 0.0 4.6 0.1 100.0% +9.8%
affine-bench * 0 0 1 over a8r8g8b8 r5g6b5:
Before After
Mean StdDev Mean StdDev Confidence Change
0.5 8.0 0.0 17.3 0.0 100.0% +116.1%
0.75 7.8 0.0 16.5 0.0 100.0% +112.9%
1.0 7.5 0.0 15.7 0.0 100.0% +110.5%
1.5 7.0 0.0 14.8 0.0 100.0% +112.8%
2.0 6.5 0.0 13.7 0.0 100.0% +111.4%
---
pixman/pixman-arm-simd.c | 17 +++++++++++++++++
1 files changed, 17 insertions(+), 0 deletions(-)
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index fedb92d..e69216a 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -276,7 +276,11 @@ pixman_get_scanline_r5g6b5_nearest_scaled_cover_r5g6b5_asm_armv6(uint32_t
const uint16_t *source);
BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, src_8888_8888, SRC, src, uint32_t, uint32_t, a8r8g8b8, a8r8g8b8)
+BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, src_x888_8888, SRC, src, uint32_t, uint32_t, x8r8g8b8, a8r8g8b8)
+BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, src_8888_0565, SRC, src, uint32_t, uint16_t, a8r8g8b8, r5g6b5)
BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, src_0565_0565, SRC, src, uint16_t, uint16_t, r5g6b5, r5g6b5)
+BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, over_8888_8888, OVER, over, uint32_t, uint32_t, a8r8g8b8, a8r8g8b8)
+BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, over_8888_0565, OVER, over, uint32_t, uint16_t, a8r8g8b8, r5g6b5)
void
pixman_composite_src_n_8888_asm_armv6 (int32_t w,
@@ -490,12 +494,25 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, r5g6b5, r5g6b5, src_0565_0565),
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, b5g6r5, b5g6r5, src_0565_0565),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8r8g8b8, r5g6b5, src_8888_0565),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8r8g8b8, r5g6b5, src_8888_0565),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8b8g8r8, b5g6r5, src_8888_0565),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8b8g8r8, b5g6r5, src_8888_0565),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8r8g8b8, r5g6b5, over_8888_0565),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8b8g8r8, b5g6r5, over_8888_0565),
+
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8r8g8b8, a8r8g8b8, src_8888_8888),
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8r8g8b8, x8r8g8b8, src_8888_8888),
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8r8g8b8, x8r8g8b8, src_8888_8888),
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8b8g8r8, a8b8g8r8, src_8888_8888),
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8b8g8r8, x8b8g8r8, src_8888_8888),
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8b8g8r8, x8b8g8r8, src_8888_8888),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8r8g8b8, a8r8g8b8, src_x888_8888),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8b8g8r8, a8b8g8r8, src_x888_8888),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8r8g8b8, a8r8g8b8, over_8888_8888),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8r8g8b8, x8r8g8b8, over_8888_8888),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8b8g8r8, a8b8g8r8, over_8888_8888),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8b8g8r8, x8b8g8r8, over_8888_8888),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
--
1.7.5.4
More information about the Pixman
mailing list