[Pixman] [PATCH 04/12] vmx: implement fast path vmx_blt
Oded Gabbay
oded.gabbay at gmail.com
Thu Jul 2 03:04:09 PDT 2015
No changes were observed when running cairo trimmed benchmarks.
Signed-off-by: Oded Gabbay <oded.gabbay at gmail.com>
---
pixman/pixman-vmx.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 124 insertions(+)
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index b9acd6c..b42288b 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -2708,6 +2708,128 @@ vmx_fill (pixman_implementation_t *imp,
return TRUE;
}
+static pixman_bool_t
+vmx_blt (pixman_implementation_t *imp,
+ uint32_t * src_bits,
+ uint32_t * dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dest_x,
+ int dest_y,
+ int width,
+ int height)
+{
+ uint8_t * src_bytes;
+ uint8_t * dst_bytes;
+ int byte_width;
+
+ if (src_bpp != dst_bpp)
+ return FALSE;
+
+ if (src_bpp == 16)
+ {
+ src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+ src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
+ byte_width = 2 * width;
+ src_stride *= 2;
+ dst_stride *= 2;
+ }
+ else if (src_bpp == 32)
+ {
+ src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+ src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
+ byte_width = 4 * width;
+ src_stride *= 4;
+ dst_stride *= 4;
+ }
+ else
+ {
+ return FALSE;
+ }
+
+ while (height--)
+ {
+ int w;
+ uint8_t *s = src_bytes;
+ uint8_t *d = dst_bytes;
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ w = byte_width;
+
+ while (w >= 2 && ((uintptr_t)d & 3))
+ {
+ *(uint16_t *)d = *(uint16_t *)s;
+ w -= 2;
+ s += 2;
+ d += 2;
+ }
+
+ while (w >= 4 && ((uintptr_t)d & 15))
+ {
+ *(uint32_t *)d = *(uint32_t *)s;
+
+ w -= 4;
+ s += 4;
+ d += 4;
+ }
+
+ while (w >= 64)
+ {
+ vector unsigned int vmx0, vmx1, vmx2, vmx3;
+
+ vmx0 = load_128_unaligned ((uint32_t*) s);
+ vmx1 = load_128_unaligned ((uint32_t*)(s + 16));
+ vmx2 = load_128_unaligned ((uint32_t*)(s + 32));
+ vmx3 = load_128_unaligned ((uint32_t*)(s + 48));
+
+ save_128_aligned ((uint32_t*)(d), vmx0);
+ save_128_aligned ((uint32_t*)(d + 16), vmx1);
+ save_128_aligned ((uint32_t*)(d + 32), vmx2);
+ save_128_aligned ((uint32_t*)(d + 48), vmx3);
+
+ s += 64;
+ d += 64;
+ w -= 64;
+ }
+
+ while (w >= 16)
+ {
+ save_128_aligned ((uint32_t*) d, load_128_unaligned ((uint32_t*) s));
+
+ w -= 16;
+ d += 16;
+ s += 16;
+ }
+
+ while (w >= 4)
+ {
+ *(uint32_t *)d = *(uint32_t *)s;
+
+ w -= 4;
+ s += 4;
+ d += 4;
+ }
+
+ if (w >= 2)
+ {
+ *(uint16_t *)d = *(uint16_t *)s;
+ w -= 2;
+ s += 2;
+ d += 2;
+ }
+ }
+
+ return TRUE;
+}
+
static void
vmx_composite_over_8888_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
@@ -2812,6 +2934,7 @@ vmx_composite_add_8888_8888 (pixman_implementation_t *imp,
static const pixman_fast_path_t vmx_fast_paths[] =
{
+ /* PIXMAN_OP_OVER */
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888),
@@ -2865,6 +2988,7 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback)
imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca;
imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca;
+ imp->blt = vmx_blt;
imp->fill = vmx_fill;
return imp;
--
2.4.3
More information about the Pixman
mailing list