[Pixman] [PATCH 3/3] sse2: Implement a LERP_SRC combiner

Chris Wilson chris at chris-wilson.co.uk
Wed Sep 14 08:06:46 PDT 2011


Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 pixman/pixman-sse2.c |   80 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 80 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 6689c53..fee7457 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -1478,6 +1478,85 @@ sse2_combine_saturate_u (pixman_implementation_t *imp,
     }
 }
 
+static force_inline uint32_t
+core_combine_lerp_u_pixel_sse2 (uint32_t src,
+				uint32_t mask,
+				uint32_t dst)
+{
+    __m128i s = unpack_32_1x128 (src);
+    __m128i m = expand_alpha_1x128 (unpack_32_1x128 (mask));
+    __m128i d = unpack_32_1x128 (dst);
+
+    return pack_1x128_32 (over_1x128 (pix_multiply_1x128 (s, m), m, d));
+}
+
+static void
+sse2_combine_lerp_u (pixman_implementation_t *imp,
+		     pixman_op_t              op,
+		     uint32_t *               pd,
+		     const uint32_t *         ps,
+		     const uint32_t *         pm,
+		     int                      w)
+{
+    uint32_t s, m, d;
+
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_mask_lo, xmm_mask_hi;
+
+    if (pm == NULL)
+    {
+	memcpy (pd, ps, 4*w);
+	return;
+    }
+
+    while (w && (unsigned long)pd & 15)
+    {
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
+
+	*pd++ = core_combine_lerp_u_pixel_sse2 (s, m, d);
+	w--;
+    }
+
+    while (w >= 4)
+    {
+	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+	pix_multiply_2x128 (&xmm_src_lo,  &xmm_src_hi,
+			    &xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_src_lo,  &xmm_src_hi);
+
+	over_2x128 (&xmm_src_lo, &xmm_src_hi,
+		    &xmm_mask_lo, &xmm_mask_hi,
+		    &xmm_dst_lo, &xmm_dst_hi);
+
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+	ps += 4;
+	pd += 4;
+	pm += 4;
+	w -= 4;
+    }
+
+    while (w--)
+    {
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
+
+	*pd++ = core_combine_lerp_u_pixel_sse2 (s, m, d);
+    }
+}
+
 static void
 sse2_combine_src_ca (pixman_implementation_t *imp,
                      pixman_op_t              op,
@@ -5779,6 +5858,7 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
     imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u;
 
     imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u;
+    imp->combine_32[PIXMAN_OP_LERP_SRC] = sse2_combine_lerp_u;
 
     imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca;
     imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca;
-- 
1.7.5.4



More information about the Pixman mailing list