[Pixman] [PATCH] sse2: Skip src pixels that are zero in sse2_composite_over_8888_n_8888()

Søren Sandmann sandmann at daimi.au.dk
Mon Dec 20 14:50:03 PST 2010


From: Søren Sandmann Pedersen <ssp at redhat.com>

This is a big speed-up in the SVG helicopter game:

   http://ie.microsoft.com/testdrive/Performance/Helicopter/Default.xhtml

when rendered by Firefox 4 since it is compositing big images
consisting almost entirely of zeros.
---
 pixman/pixman-sse2.c |   75 +++++++++++++++++++++++++++++--------------------
 1 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 5907de0..032f13b 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -3051,37 +3051,45 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
 	while (w && (unsigned long)dst & 15)
 	{
 	    uint32_t s = *src++;
-	    uint32_t d = *dst;
-
-	    __m64 ms = unpack_32_1x64 (s);
-	    __m64 alpha    = expand_alpha_1x64 (ms);
-	    __m64 dest     = _mm_movepi64_pi64 (xmm_mask);
-	    __m64 alpha_dst = unpack_32_1x64 (d);
-
-	    *dst++ = pack_1x64_32 (
-		in_over_1x64 (&ms, &alpha, &dest, &alpha_dst));
 
+	    if (s)
+	    {
+		uint32_t d = *dst;
+		
+		__m64 ms = unpack_32_1x64 (s);
+		__m64 alpha    = expand_alpha_1x64 (ms);
+		__m64 dest     = _mm_movepi64_pi64 (xmm_mask);
+		__m64 alpha_dst = unpack_32_1x64 (d);
+		
+		*dst = pack_1x64_32 (
+		    in_over_1x64 (&ms, &alpha, &dest, &alpha_dst));
+	    }
+	    dst++;
 	    w--;
 	}
 
 	while (w >= 4)
 	{
 	    xmm_src = load_128_unaligned ((__m128i*)src);
-	    xmm_dst = load_128_aligned ((__m128i*)dst);
-
-	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-				&xmm_alpha_lo, &xmm_alpha_hi);
-
-	    in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
-			   &xmm_alpha_lo, &xmm_alpha_hi,
-			   &xmm_mask, &xmm_mask,
-			   &xmm_dst_lo, &xmm_dst_hi);
-
-	    save_128_aligned (
-		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
+	    if (!is_zero (xmm_src))
+	    {
+		xmm_dst = load_128_aligned ((__m128i*)dst);
+		
+		unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+		expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+				    &xmm_alpha_lo, &xmm_alpha_hi);
+		
+		in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+			       &xmm_alpha_lo, &xmm_alpha_hi,
+			       &xmm_mask, &xmm_mask,
+			       &xmm_dst_lo, &xmm_dst_hi);
+		
+		save_128_aligned (
+		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	    }
+		
 	    dst += 4;
 	    src += 4;
 	    w -= 4;
@@ -3090,16 +3098,21 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
 	while (w)
 	{
 	    uint32_t s = *src++;
-	    uint32_t d = *dst;
 
-	    __m64 ms = unpack_32_1x64 (s);
-	    __m64 alpha = expand_alpha_1x64 (ms);
-	    __m64 mask  = _mm_movepi64_pi64 (xmm_mask);
-	    __m64 dest  = unpack_32_1x64 (d);
-
-	    *dst++ = pack_1x64_32 (
-		in_over_1x64 (&ms, &alpha, &mask, &dest));
+	    if (s)
+	    {
+		uint32_t d = *dst;
+		
+		__m64 ms = unpack_32_1x64 (s);
+		__m64 alpha = expand_alpha_1x64 (ms);
+		__m64 mask  = _mm_movepi64_pi64 (xmm_mask);
+		__m64 dest  = unpack_32_1x64 (d);
+		
+		*dst = pack_1x64_32 (
+		    in_over_1x64 (&ms, &alpha, &mask, &dest));
+	    }
 
+	    dst++;
 	    w--;
 	}
     }
-- 
1.7.3.1



More information about the Pixman mailing list