[Pixman] [PATCH 2/3] vmx: implement fast path composite_add_8_8

Oded Gabbay oded.gabbay at gmail.com
Tue Jun 23 02:54:40 PDT 2015


Copied impl. from sse2 file and edited to use vmx functions

POWER8, 16 cores, 3.4GHz, ppc64le :

reference memcpy speed = 27036.4MB/s (6759.1MP/s for 32bpp fills)

                Before           After           Change
              ---------------------------------------------
L1              687.63          9140.84         +1229.33%
L2              715             7495.78         +948.36%
M               717.39          8460.14         +1079.29%
HT              569.56          1020.12         +79.11%
VT              520.3           1215.56         +133.63%
R               514.81          874.35          +69.84%
RT              341.28          305.42          -10.51%
Kops/s          1621            1579            -2.59%

Signed-off-by: Oded Gabbay <oded.gabbay at gmail.com>
---
 pixman/pixman-vmx.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index b3e06c2..39843de 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -2098,12 +2098,67 @@ vmx_composite_over_8888_8888 (pixman_implementation_t *imp,
     }
 }
 
+static void
+vmx_composite_add_8_8 (pixman_implementation_t *imp,
+            pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+    uint16_t t;
+
+    PIXMAN_IMAGE_GET_LINE (
+    src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+    dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+    while (height--)
+    {
+	dst = dst_line;
+	src = src_line;
+
+	dst_line += dst_stride;
+	src_line += src_stride;
+	w = width;
+
+	/* Small head */
+	while (w && (uintptr_t)dst & 3)
+	{
+	    t = (*dst) + (*src++);
+	    *dst++ = t | (0 - (t >> 8));
+	    w--;
+	}
+
+	vmx_combine_add_u (imp, op,
+		    (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
+
+	/* Small tail */
+	dst += w & 0xfffc;
+	src += w & 0xfffc;
+
+	w &= 3;
+
+	while (w)
+	{
+	    t = (*dst) + (*src++);
+	    *dst++ = t | (0 - (t >> 8));
+	    w--;
+	}
+    }
+}
+
 static const pixman_fast_path_t vmx_fast_paths[] =
 {
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, vmx_composite_over_8888_8888),
+
+    /* PIXMAN_OP_ADD */
+    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8),
+
     {   PIXMAN_OP_NONE	},
 };
 
-- 
2.4.3



More information about the Pixman mailing list