[Pixman] [PATCH 04/12] vmx: implement fast path vmx_blt

Oded Gabbay oded.gabbay at gmail.com
Thu Jul 2 03:04:09 PDT 2015


No changes were observed when running cairo trimmed benchmarks.

Signed-off-by: Oded Gabbay <oded.gabbay at gmail.com>
---
 pixman/pixman-vmx.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)

diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index b9acd6c..b42288b 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -2708,6 +2708,128 @@ vmx_fill (pixman_implementation_t *imp,
     return TRUE;
 }
 
+static pixman_bool_t
+vmx_blt (pixman_implementation_t *imp,
+          uint32_t *               src_bits,
+          uint32_t *               dst_bits,
+          int                      src_stride,
+          int                      dst_stride,
+          int                      src_bpp,
+          int                      dst_bpp,
+          int                      src_x,
+          int                      src_y,
+          int                      dest_x,
+          int                      dest_y,
+          int                      width,
+          int                      height)
+{
+    uint8_t *   src_bytes;
+    uint8_t *   dst_bytes;
+    int byte_width;
+
+    if (src_bpp != dst_bpp)
+	return FALSE;
+
+    if (src_bpp == 16)
+    {
+	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+	src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
+	byte_width = 2 * width;
+	src_stride *= 2;
+	dst_stride *= 2;
+    }
+    else if (src_bpp == 32)
+    {
+	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
+	byte_width = 4 * width;
+	src_stride *= 4;
+	dst_stride *= 4;
+    }
+    else
+    {
+	return FALSE;
+    }
+
+    while (height--)
+    {
+	int w;
+	uint8_t *s = src_bytes;
+	uint8_t *d = dst_bytes;
+	src_bytes += src_stride;
+	dst_bytes += dst_stride;
+	w = byte_width;
+
+	while (w >= 2 && ((uintptr_t)d & 3))
+	{
+	    *(uint16_t *)d = *(uint16_t *)s;
+	    w -= 2;
+	    s += 2;
+	    d += 2;
+	}
+
+	while (w >= 4 && ((uintptr_t)d & 15))
+	{
+	    *(uint32_t *)d = *(uint32_t *)s;
+
+	    w -= 4;
+	    s += 4;
+	    d += 4;
+	}
+
+	while (w >= 64)
+	{
+	    vector unsigned int vmx0, vmx1, vmx2, vmx3;
+
+	    vmx0 = load_128_unaligned ((uint32_t*) s);
+	    vmx1 = load_128_unaligned ((uint32_t*)(s + 16));
+	    vmx2 = load_128_unaligned ((uint32_t*)(s + 32));
+	    vmx3 = load_128_unaligned ((uint32_t*)(s + 48));
+
+	    save_128_aligned ((uint32_t*)(d), vmx0);
+	    save_128_aligned ((uint32_t*)(d + 16), vmx1);
+	    save_128_aligned ((uint32_t*)(d + 32), vmx2);
+	    save_128_aligned ((uint32_t*)(d + 48), vmx3);
+
+	    s += 64;
+	    d += 64;
+	    w -= 64;
+	}
+
+	while (w >= 16)
+	{
+	    save_128_aligned ((uint32_t*) d, load_128_unaligned ((uint32_t*) s));
+
+	    w -= 16;
+	    d += 16;
+	    s += 16;
+	}
+
+	while (w >= 4)
+	{
+	    *(uint32_t *)d = *(uint32_t *)s;
+
+	    w -= 4;
+	    s += 4;
+	    d += 4;
+	}
+
+	if (w >= 2)
+	{
+	    *(uint16_t *)d = *(uint16_t *)s;
+	    w -= 2;
+	    s += 2;
+	    d += 2;
+	}
+    }
+
+    return TRUE;
+}
+
 static void
 vmx_composite_over_8888_8888 (pixman_implementation_t *imp,
                                pixman_composite_info_t *info)
@@ -2812,6 +2934,7 @@ vmx_composite_add_8888_8888 (pixman_implementation_t *imp,
 
 static const pixman_fast_path_t vmx_fast_paths[] =
 {
+    /* PIXMAN_OP_OVER */
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888),
@@ -2865,6 +2988,7 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback)
     imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca;
     imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca;
 
+    imp->blt = vmx_blt;
     imp->fill = vmx_fill;
 
     return imp;
-- 
2.4.3



More information about the Pixman mailing list