This patch doesn&#39;t handle cases where alpha is zero but components are not. Not sure<div>if this is either possible or frequent.</div><div><br></div><div> <br><br><div class="gmail_quote">On Mon, Dec 20, 2010 at 8:50 PM, Søren Sandmann <span dir="ltr">&lt;<a href="mailto:sandmann@daimi.au.dk">sandmann@daimi.au.dk</a>&gt;</span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex;">From: Søren Sandmann Pedersen &lt;<a href="mailto:ssp@redhat.com">ssp@redhat.com</a>&gt;<br>
<br>
This is a big speed-up in the SVG helicopter game:<br>
<br>
   <a href="http://ie.microsoft.com/testdrive/Performance/Helicopter/Default.xhtml" target="_blank">http://ie.microsoft.com/testdrive/Performance/Helicopter/Default.xhtml</a><br>
<br>
when rendered by Firefox 4 since it is compositing big images<br>
consisting almost entirely of zeros.<br>
---<br>
 pixman/pixman-sse2.c |   75 +++++++++++++++++++++++++++++--------------------<br>
 1 files changed, 44 insertions(+), 31 deletions(-)<br>
<br>
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c<br>
index 5907de0..032f13b 100644<br>
--- a/pixman/pixman-sse2.c<br>
+++ b/pixman/pixman-sse2.c<br>
@@ -3051,37 +3051,45 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,<br>
        while (w &amp;&amp; (unsigned long)dst &amp; 15)<br>
        {<br>
            uint32_t s = *src++;<br>
-           uint32_t d = *dst;<br>
-<br>
-           __m64 ms = unpack_32_1x64 (s);<br>
-           __m64 alpha    = expand_alpha_1x64 (ms);<br>
-           __m64 dest     = _mm_movepi64_pi64 (xmm_mask);<br>
-           __m64 alpha_dst = unpack_32_1x64 (d);<br>
-<br>
-           *dst++ = pack_1x64_32 (<br>
-               in_over_1x64 (&amp;ms, &amp;alpha, &amp;dest, &amp;alpha_dst));<br>
<br>
+           if (s)<br>
+           {<br>
+               uint32_t d = *dst;<br>
+<br>
+               __m64 ms = unpack_32_1x64 (s);<br>
+               __m64 alpha    = expand_alpha_1x64 (ms);<br>
+               __m64 dest     = _mm_movepi64_pi64 (xmm_mask);<br>
+               __m64 alpha_dst = unpack_32_1x64 (d);<br>
+<br>
+               *dst = pack_1x64_32 (<br>
+                   in_over_1x64 (&amp;ms, &amp;alpha, &amp;dest, &amp;alpha_dst));<br>
+           }<br>
+           dst++;<br>
            w--;<br>
        }<br>
<br>
        while (w &gt;= 4)<br>
        {<br>
            xmm_src = load_128_unaligned ((__m128i*)src);<br>
-           xmm_dst = load_128_aligned ((__m128i*)dst);<br>
-<br>
-           unpack_128_2x128 (xmm_src, &amp;xmm_src_lo, &amp;xmm_src_hi);<br>
-           unpack_128_2x128 (xmm_dst, &amp;xmm_dst_lo, &amp;xmm_dst_hi);<br>
-           expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,<br>
-                               &amp;xmm_alpha_lo, &amp;xmm_alpha_hi);<br>
-<br>
-           in_over_2x128 (&amp;xmm_src_lo, &amp;xmm_src_hi,<br>
-                          &amp;xmm_alpha_lo, &amp;xmm_alpha_hi,<br>
-                          &amp;xmm_mask, &amp;xmm_mask,<br>
-                          &amp;xmm_dst_lo, &amp;xmm_dst_hi);<br>
-<br>
-           save_128_aligned (<br>
-               (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));<br>
<br>
+           if (!is_zero (xmm_src))<br>
+           {<br>
+               xmm_dst = load_128_aligned ((__m128i*)dst);<br>
+<br>
+               unpack_128_2x128 (xmm_src, &amp;xmm_src_lo, &amp;xmm_src_hi);<br>
+               unpack_128_2x128 (xmm_dst, &amp;xmm_dst_lo, &amp;xmm_dst_hi);<br>
+               expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,<br>
+                                   &amp;xmm_alpha_lo, &amp;xmm_alpha_hi);<br>
+<br>
+               in_over_2x128 (&amp;xmm_src_lo, &amp;xmm_src_hi,<br>
+                              &amp;xmm_alpha_lo, &amp;xmm_alpha_hi,<br>
+                              &amp;xmm_mask, &amp;xmm_mask,<br>
+                              &amp;xmm_dst_lo, &amp;xmm_dst_hi);<br>
+<br>
+               save_128_aligned (<br>
+                   (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));<br>
+           }<br>
+<br>
            dst += 4;<br>
            src += 4;<br>
            w -= 4;<br>
@@ -3090,16 +3098,21 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,<br>
        while (w)<br>
        {<br>
            uint32_t s = *src++;<br>
-           uint32_t d = *dst;<br>
<br>
-           __m64 ms = unpack_32_1x64 (s);<br>
-           __m64 alpha = expand_alpha_1x64 (ms);<br>
-           __m64 mask  = _mm_movepi64_pi64 (xmm_mask);<br>
-           __m64 dest  = unpack_32_1x64 (d);<br>
-<br>
-           *dst++ = pack_1x64_32 (<br>
-               in_over_1x64 (&amp;ms, &amp;alpha, &amp;mask, &amp;dest));<br>
+           if (s)<br>
+           {<br>
+               uint32_t d = *dst;<br>
+<br>
+               __m64 ms = unpack_32_1x64 (s);<br>
+               __m64 alpha = expand_alpha_1x64 (ms);<br>
+               __m64 mask  = _mm_movepi64_pi64 (xmm_mask);<br>
+               __m64 dest  = unpack_32_1x64 (d);<br>
+<br>
+               *dst = pack_1x64_32 (<br>
+                   in_over_1x64 (&amp;ms, &amp;alpha, &amp;mask, &amp;dest));<br>
+           }<br>
<br>
+           dst++;<br>
            w--;<br>
        }<br>
     }<br>
<font color="#888888">--<br>
1.7.3.1<br>
<br>
_______________________________________________<br>
Pixman mailing list<br>
<a href="mailto:Pixman@lists.freedesktop.org">Pixman@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/pixman" target="_blank">http://lists.freedesktop.org/mailman/listinfo/pixman</a><br>
</font></blockquote></div><br></div>