[Pixman] [PATCH 5/7] sse2: Add sse2_composite_over_reverse_n_8888

Søren Sandmann sandmann at daimi.au.dk
Sat Apr 24 13:48:56 PDT 2010


From: Søren Sandmann Pedersen <ssp at redhat.com>

This is a small speed-up for the poppler benchmark:

Before:
[ # ]  backend                         test   min(s) median(s) stddev. count
[  0]    image                      poppler    4.443    4.474   0.31%    6/6

After:
[ # ]  backend                         test   min(s) median(s) stddev. count
[  0]    image                      poppler    4.224    4.248   0.42%    6/6
---
 pixman/pixman-sse2.c |  104 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 104 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 09da14d..3f474d7 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5766,6 +5766,106 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
 }
 
 static void
+sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
+				    pixman_op_t              op,
+				    pixman_image_t *         src_image,
+				    pixman_image_t *         mask_image,
+				    pixman_image_t *         dst_image,
+				    int32_t                  src_x,
+				    int32_t                  src_y,
+				    int32_t                  mask_x,
+				    int32_t                  mask_y,
+				    int32_t                  dest_x,
+				    int32_t                  dest_y,
+				    int32_t                  width,
+				    int32_t                  height)
+{
+    uint32_t src;
+    uint32_t    *dst_line, *dst;
+    __m128i xmm_src;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_dsta_hi, xmm_dsta_lo;
+    int dst_stride;
+    int32_t w;
+
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+
+    if (src == 0)
+	return;
+
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+    xmm_src = expand_pixel_32_1x128 (src);
+
+    while (height--)
+    {
+	dst = dst_line;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)dst);
+
+	dst_line += dst_stride;
+	w = width;
+
+	while (w && (unsigned long)dst & 15)
+	{
+	    __m64 vd;
+
+	    vd = unpack_32_1x64 (*dst);
+
+	    *dst = pack_1x64_32 (over_1x64 (vd, expand_alpha_1x64 (vd),
+					    _mm_movepi64_pi64 (xmm_src)));
+	    w--;
+	    dst++;
+	}
+
+	cache_prefetch ((__m128i*)dst);
+
+	while (w >= 4)
+	{
+	    __m128i tmp_lo, tmp_hi;
+
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)(dst + 4));
+
+	    xmm_dst = load_128_aligned ((__m128i*)dst);
+
+	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+	    expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi);
+
+	    tmp_lo = xmm_src;
+	    tmp_hi = xmm_src;
+
+	    over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+			&xmm_dsta_lo, &xmm_dsta_hi,
+			&tmp_lo, &tmp_hi);
+
+	    save_128_aligned (
+		(__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi));
+
+	    w -= 4;
+	    dst += 4;
+	}
+
+	while (w)
+	{
+	    __m64 vd;
+
+	    vd = unpack_32_1x64 (*dst);
+
+	    *dst = pack_1x64_32 (over_1x64 (vd, expand_alpha_1x64 (vd),
+					    _mm_movepi64_pi64 (xmm_src)));
+	    w--;
+	    dst++;
+	}
+
+    }
+
+    _mm_empty ();
+}
+
+static void
 sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
 				    pixman_op_t              op,
 				    pixman_image_t *         src_image,
@@ -5982,6 +6082,10 @@ static const pixman_fast_path_t sse2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
 
+    /* PIXMAN_OP_OVER_REVERSE */
+    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888),
+
     /* PIXMAN_OP_ADD */
     PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca),
     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8000_8000),
-- 
1.7.0.1



More information about the Pixman mailing list