[Pixman] [PATCH] sse2: Implement simple bilinear scaling for x8r8g8b8 to a8r8g8b8

Chris Wilson chris at chris-wilson.co.uk
Wed Jan 23 06:37:41 PST 2013


Improves firefon-tron on a IVB i7-3720qm: 68.6s to 45.2s.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 pixman/pixman-sse2.c |   63 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index fc873cc..bc3e2f1 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5679,6 +5679,67 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
 			       NORMAL, FLAG_NONE)
 
 static force_inline void
+scaled_bilinear_scanline_sse2_0888_8888_SRC (uint32_t *       dst,
+					     const uint32_t * mask,
+					     const uint32_t * src_top,
+					     const uint32_t * src_bottom,
+					     int32_t          w,
+					     int              wt,
+					     int              wb,
+					     pixman_fixed_t   vx,
+					     pixman_fixed_t   unit_x,
+					     pixman_fixed_t   max_vx,
+					     pixman_bool_t    zero_src)
+{
+    BILINEAR_DECLARE_VARIABLES;
+    uint32_t pix1, pix2, pix3, pix4;
+
+    while ((w -= 4) >= 0)
+    {
+	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+	BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+	BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+	BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+	*dst++ = pix1 | 0xff000000;
+	*dst++ = pix2 | 0xff000000;
+	*dst++ = pix3 | 0xff000000;
+	*dst++ = pix4 | 0xff000000;
+    }
+
+    if (w & 2)
+    {
+	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+	BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+	*dst++ = pix1 | 0xff000000;
+	*dst++ = pix2 | 0xff000000;
+    }
+
+    if (w & 1)
+    {
+	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+	*dst = pix1 | 0xff000000;
+    }
+
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_0888_8888_cover_SRC,
+			       scaled_bilinear_scanline_sse2_0888_8888_SRC,
+			       uint32_t, uint32_t, uint32_t,
+			       COVER, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_0888_8888_pad_SRC,
+			       scaled_bilinear_scanline_sse2_0888_8888_SRC,
+			       uint32_t, uint32_t, uint32_t,
+			       PAD, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_0888_8888_none_SRC,
+			       scaled_bilinear_scanline_sse2_0888_8888_SRC,
+			       uint32_t, uint32_t, uint32_t,
+			       NONE, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_0888_8888_normal_SRC,
+			       scaled_bilinear_scanline_sse2_0888_8888_SRC,
+			       uint32_t, uint32_t, uint32_t,
+			       NORMAL, FLAG_NONE)
+
+static force_inline void
 scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t *       dst,
 					      const uint32_t * mask,
 					      const uint32_t * src_top,
@@ -6185,6 +6246,8 @@ static const pixman_fast_path_t sse2_fast_paths[] =
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
     SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8, sse2_0888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8, sse2_0888_8888),
 
     SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
     SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
-- 
1.7.10.4



More information about the Pixman mailing list