[Pixman] [PATCH 08/12] vmx: implement fast path vmx_composite_src_x888_8888
Oded Gabbay
oded.gabbay at gmail.com
Thu Jul 2 03:04:13 PDT 2015
POWER8, 8 cores, 3.4GHz, RHEL 7.1 ppc64le.
reference memcpy speed = 24764.8MB/s (6191.2MP/s for 32bpp fills)
Before After Change
---------------------------------------------
L1 1115.4 5006.49 +348.85%
L2 1112.26 4338.01 +290.02%
M 1110.54 2524.15 +127.29%
HT 745.41 1140.03 +52.94%
VT 749.03 1287.13 +71.84%
R 423.91 547.6 +29.18%
RT 205.79 194.98 -5.25%
Kops/s 1414 1361 -3.75%
cairo trimmed benchmarks :
Speedups
========
t-gnome-system-monitor 1402.62 -> 1212.75 : 1.16x
t-firefox-asteroids 533.92 -> 474.50 : 1.13x
Signed-off-by: Oded Gabbay <oded.gabbay at gmail.com>
---
pixman/pixman-vmx.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 58 insertions(+)
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 5c74a47..d5ddf4b 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -2967,6 +2967,62 @@ vmx_composite_copy_area (pixman_implementation_t *imp,
}
static void
+vmx_composite_src_x888_8888 (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ int32_t w;
+ int dst_stride, src_stride;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w && (uintptr_t)dst & 15)
+ {
+ *dst++ = *src++ | 0xff000000;
+ w--;
+ }
+
+ while (w >= 16)
+ {
+ vector unsigned int vmx_src1, vmx_src2, vmx_src3, vmx_src4;
+
+ vmx_src1 = load_128_unaligned (src);
+ vmx_src2 = load_128_unaligned (src + 4);
+ vmx_src3 = load_128_unaligned (src + 8);
+ vmx_src4 = load_128_unaligned (src + 12);
+
+ save_128_aligned (dst, vec_or (vmx_src1, mask_ff000000));
+ save_128_aligned (dst + 4, vec_or (vmx_src2, mask_ff000000));
+ save_128_aligned (dst + 8, vec_or (vmx_src3, mask_ff000000));
+ save_128_aligned (dst + 12, vec_or (vmx_src4, mask_ff000000));
+
+ dst += 16;
+ src += 16;
+ w -= 16;
+ }
+
+ while (w)
+ {
+ *dst++ = *src++ | 0xff000000;
+ w--;
+ }
+ }
+}
+
+static void
vmx_composite_over_8888_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
@@ -3200,6 +3256,8 @@ static const pixman_fast_path_t vmx_fast_paths[] =
PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, vmx_composite_add_8888_8888),
/* PIXMAN_OP_SRC */
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, vmx_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, vmx_composite_src_x888_8888),
PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, vmx_composite_copy_area),
PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, vmx_composite_copy_area),
PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, vmx_composite_copy_area),
--
2.4.3
More information about the Pixman
mailing list