[Pixman] [PATCH 3/4] armv6: Add four more nearest-scaled-cover fast paths

Ben Avison bavison at riscosopen.org
Tue Aug 25 16:23:25 PDT 2015


This patch implements shortcuts so that the nearest scaled cover fetcher
iterator is used in preference to the fast path in pixman-fast-path.c.
This is because the fetcher performs better than the fast paths.

This is presented as an alternative to the patch I posted last year which
used macroised C wrappers around the fetcher, rather than reusing
general_composite_rect. Judging by the following benchmarks:

lowlevel-blt-bench -n over_8888_8888

       Before          Old patch      New patch       Change
      Mean StdDev     Mean StdDev    Mean StdDev    Old     New
L1    13.8   0.10     25.0   0.19    24.7   0.19   +81.2%  +79.2%
L2     8.8   0.16     21.1   0.40    20.8   0.36  +140.2% +136.6%
M      8.1   0.01     22.5   0.04    21.8   0.05  +175.7% +167.6%
HT     7.0   0.02     13.1   0.05    11.4   0.04   +87.4%  +63.5%
VT     6.9   0.03     12.4   0.06    10.9   0.04   +79.9%  +59.4%
R      6.6   0.02     12.1   0.07    10.7   0.05   +83.2%  +61.6%
RT     4.2   0.06      5.7   0.09     4.3   0.08   +36.6%   +1.6%

lowlevel-blt-bench -n src_x888_8888

       Before          Old patch      New patch       Change
      Mean StdDev     Mean StdDev    Mean StdDev    Old     New
L1   108.6   3.02     64.4   0.99    47.8   1.06   -40.7%  -56.0%
L2    29.4   2.08     42.0   1.72    36.2   1.21   +43.0%  +23.3%
M     25.4   0.09     57.8   0.09    44.8   0.13  +127.9%  +76.5%
HT    18.8   0.11     26.7   0.20    18.1   0.16   +41.7%   -3.8%
VT    18.6   0.12     24.7   0.19    17.4   0.26   +32.8%   -6.2%
R     17.1   0.07     23.5   0.16    16.8   0.23   +37.2%   -1.9%
RT     8.2   0.14      9.1   0.18     5.9   0.14   +11.8%  -28.2%

lowlevel-blt-bench -n src_8888_0565

       Before          Old patch      New patch       Change
      Mean StdDev     Mean StdDev    Mean StdDev    Old     New
L1    58.0   0.93     40.0   0.42    20.9   0.17   -31.1%  -63.9%
L2    21.9   0.78     32.9   0.75    18.8   0.32   +50.1%  -14.3%
M     23.1   0.03     39.8   0.08    20.2   0.10   +72.4%  -12.5%
HT    16.7   0.11     20.5   0.15    10.5   0.30   +22.9%  -37.1%
VT    15.8   0.15     19.4   0.13    10.3   0.34   +23.0%  -34.9%
R     15.2   0.13     18.8   0.14     9.9   0.24   +24.0%  -34.8%
RT     7.6   0.07      8.1   0.18     3.9   0.15    +6.6%  -48.1%

lowlevel-blt-bench -n over_8888_0565

       Before          Old patch      New patch       Change
      Mean StdDev     Mean StdDev    Mean StdDev    Old     New
L1     9.8   0.02     14.9   0.09    14.5   0.08   +51.9%  +48.1%
L2     7.1   0.05     14.1   0.23    13.6   0.19   +98.8%  +91.8%
M      7.2   0.00     14.8   0.04    14.5   0.04  +106.3% +101.9%
HT     6.2   0.02      9.7   0.04     8.7   0.03   +55.9%  +39.3%
VT     6.1   0.01      9.5   0.05     8.5   0.04   +55.5%  +39.5%
R      5.9   0.02      9.1   0.03     8.2   0.04   +54.5%  +39.7%
RT     3.9   0.02      4.4   0.06     3.5   0.04   +12.7%  -10.1%

I find it hard to advocate this patch, even though it is somewhat simpler.
---
 pixman/pixman-arm-simd.c |   13 +++++++++++++
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index a72f9da..1c6c1e9 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -399,12 +399,25 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, armv6_composite_over_n_8888_8888_ca),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, armv6_composite_over_n_8888_8888_ca),
 
+    PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (SRC, a8r8g8b8, r5g6b5, src_8888_0565),
+    PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (SRC, x8r8g8b8, r5g6b5, src_8888_0565),
+    PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (SRC, a8b8g8r8, b5g6r5, src_8888_0565),
+    PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (SRC, x8b8g8r8, b5g6r5, src_8888_0565),
+    PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (OVER, a8r8g8b8, r5g6b5, over_8888_0565),
+    PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (OVER, a8b8g8r8, b5g6r5, over_8888_0565),
+
     PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (SRC, a8r8g8b8, a8r8g8b8, src_8888_8888),
     PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (SRC, a8r8g8b8, x8r8g8b8, src_8888_8888),
     PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (SRC, x8r8g8b8, x8r8g8b8, src_8888_8888),
     PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (SRC, a8b8g8r8, a8b8g8r8, src_8888_8888),
     PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (SRC, a8b8g8r8, x8b8g8r8, src_8888_8888),
     PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (SRC, x8b8g8r8, x8b8g8r8, src_8888_8888),
+    PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (SRC, x8r8g8b8, a8r8g8b8, src_x888_8888),
+    PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (SRC, x8b8g8r8, a8b8g8r8, src_x888_8888),
+    PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (OVER, a8r8g8b8, a8r8g8b8, over_8888_8888),
+    PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (OVER, a8r8g8b8, x8r8g8b8, over_8888_8888),
+    PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (OVER, a8b8g8r8, a8b8g8r8, over_8888_8888),
+    PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH_VIA_ITER (OVER, a8b8g8r8, x8b8g8r8, over_8888_8888),
 
     SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
     SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
-- 
1.7.5.4



More information about the Pixman mailing list