[Pixman] [PATCH 3/3] Use more unrolling for scaled src_0565_0656 with nearest filter

Siarhei Siamashka siarhei.siamashka at gmail.com
Wed Sep 8 00:45:08 PDT 2010


Benchmarks from Intel Core i7 860:

== before ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=1335.29 MPix/s

== after ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=1550.96 MPix/s

== performance of nonscaled operation as a reference ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=2401.31 MPix/s
---
 pixman/pixman-fast-path.c |   56 ++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 55 insertions(+), 1 deletions(-)

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 12036a9..a26fc77 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1393,11 +1393,65 @@ FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE);
 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL);
 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE);
 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL);
-FAST_NEAREST (565_565_none, 0565, 0565, uint16_t, uint16_t, SRC, NONE);
 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL);
 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE);
 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL);
 
+/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
+
+static inline void
+scaled_nearest_scanline_565_565_none_SRC (uint16_t *      dst,
+					  uint16_t *      src,
+					  int32_t         w,
+					  pixman_fixed_t  vx,
+					  pixman_fixed_t  unit_x,
+					  pixman_fixed_t  max_vx)
+{
+    while ((w -= 8) >= 0)
+    {
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+    }
+    if (w & 4)
+    {
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+    }
+    if (w & 2)
+    {
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+	*dst++ = src[vx >> 16];
+	vx += unit_x;
+    }
+    if (w & 1)
+	*dst++ = src[vx >> 16];
+}
+
+FAST_NEAREST_MAINLOOP (565_565_none_SRC,
+		       scaled_nearest_scanline_565_565_none_SRC,
+		       uint16_t, uint16_t, NONE);
+
 static force_inline uint32_t
 fetch_nearest (pixman_repeat_t src_repeat,
 	       pixman_format_code_t format,
-- 
1.7.2.2



More information about the Pixman mailing list