[Pixman] [PATCH 3/4] sse2: affine bilinear fetcher

Chris Wilson chris at chris-wilson.co.uk
Sun Jan 27 06:10:27 PST 2013


On an SNB i5-2500 using cairo-image:

firefox-canvas        17.8 -> 10.3:  1.72x speedup
firefox-tron          46.3 -> 28.4:  1.63x speedup
swfdec-youtube         1.7 ->  1.4:  1.22x speedup
firefox-fishbowl      64.6 -> 53.7:  1.20x speedup
firefox-paintball     40.8 -> 36.8:  1.11x speedup
firefox-canvas-alpha  27.3 -> 25.4:  1.07x speedup
---
 pixman/pixman-sse2.c |  719 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 719 insertions(+)

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index fc873cc..9558e9c 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -6346,6 +6346,709 @@ static const fetcher_info_t fetchers[] =
     { PIXMAN_null }
 };
 
+typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
+
+static force_inline uint32_t
+linear_interpolation (const uint32_t a, const uint32_t b, int w)
+{
+    uint32_t l, r, t;
+
+    w <<= (8 - BILINEAR_INTERPOLATION_BITS);
+
+    /* red and blue */
+    l = a & 0x00ff00ff;
+    r = b & 0x00ff00ff;
+    t = w*r + (256-w)*l;
+
+    /* alpha and green */
+    l = (a & 0xff00ff00) >> 8;
+    r = (b & 0xff00ff00) >> 8;
+    return ((t & 0xff00ff00) >> 8) | ((w*r + (256-w)*l) & 0xff00ff00);
+}
+
+static force_inline uint32_t
+sse2_bilinear_interpolation (const uint32_t *src_top,
+			     const uint32_t *src_bottom,
+			     int dx, int dy)
+{
+#if 0
+    int wb = dy, wt = BILINEAR_INTERPOLATION_RANGE - dy;
+    pixman_fixed_t   vx = dx << (16 - BILINEAR_INTERPOLATION_BITS);
+    pixman_fixed_t   unit_x = 0;
+    BILINEAR_DECLARE_VARIABLES;
+    uint32_t pix1;
+    BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+    return pix1;
+#else
+    int wb = dy, wt = BILINEAR_INTERPOLATION_RANGE - dy;
+
+    __m128i xmm_wh, xmm_lo, xmm_hi, a;
+
+    /* fetch 2x2 pixel block into sse2 registers */
+    __m128i tltr = _mm_loadl_epi64 ((__m128i *)src_top);
+    __m128i blbr = _mm_loadl_epi64 ((__m128i *)src_bottom);
+
+    /* vertical interpolation */
+    a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, _mm_setzero_si128 ()),
+					_mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt)),
+		       _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, _mm_setzero_si128 ()),
+					_mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb)));
+    if (BILINEAR_INTERPOLATION_BITS < 8)
+    {
+	const __m128i xmm_xorc7 = _mm_set_epi16 (0, BMSK, 0, BMSK, 0, BMSK, 0, BMSK);
+	const __m128i xmm_addc7 = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1);
+	const __m128i xmm_x = _mm_set_epi16 (dx, dx, dx, dx, dx, dx, dx, dx);
+
+	/* calculate horizontal weights */
+	xmm_wh = _mm_add_epi16 (xmm_addc7, _mm_xor_si128 (xmm_xorc7, xmm_x));
+	/* horizontal interpolation */
+	a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 (
+		a, _MM_SHUFFLE (1, 0, 3, 2)), a), xmm_wh);
+    }
+    else
+    {
+	const __m128i xmm_xorc8 = _mm_set_epi16 (0, 0, 0, 0, BMSK, BMSK, BMSK, BMSK);
+	const __m128i xmm_addc8 = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1);
+	const __m128i xmm_x = _mm_set_epi16 (dx, dx, dx, dx, dx, dx, dx, dx);
+
+	/* calculate horizontal weights */
+	xmm_wh = _mm_add_epi16 (xmm_addc8, _mm_xor_si128 (xmm_xorc8, xmm_x));
+	/* horizontal interpolation */
+	xmm_lo = _mm_mullo_epi16 (a, xmm_wh);
+	xmm_hi = _mm_mulhi_epu16 (a, xmm_wh);
+	a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi),
+			   _mm_unpackhi_epi16 (xmm_lo, xmm_hi));
+    }
+    /* shift and pack the result */
+    a = _mm_srli_epi32 (a, BILINEAR_INTERPOLATION_BITS * 2);
+    a = _mm_packs_epi32 (a, a);
+    a = _mm_packus_epi16 (a, a);
+    return _mm_cvtsi128_si32 (a);
+#endif
+}
+
+static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+static force_inline uint32_t *
+sse2_fetch_bilinear (pixman_iter_t *iter,
+		     const uint32_t *mask,
+		     convert_pixel_t	convert_pixel,
+		     pixman_format_code_t	format,
+		     pixman_repeat_t repeat)
+{
+    pixman_image_t * ima = iter->image;
+    int              offset = iter->x;
+    int              line = iter->y++;
+    int              width = iter->width;
+    uint32_t *       buffer = iter->buffer;
+    uint32_t * const end = buffer + width;
+
+    bits_image_t *bits = &ima->bits;
+    pixman_fixed_t x_top, x_bottom, x;
+    pixman_fixed_t ux_top, ux_bottom, ux;
+    pixman_vector_t v;
+    const uint8_t *top_row;
+    const uint8_t *bottom_row;
+    uint32_t one = 1;
+    int y, y1, y2;
+    int disty;
+    int mask_inc;
+    int w;
+
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+
+    if (!pixman_transform_point_3d (bits->common.transform, &v))
+	return iter->buffer;
+
+    ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0];
+    x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2;
+
+    y = v.vector[1] - pixman_fixed_1/2;
+    disty = pixman_fixed_to_bilinear_weight (y);
+
+    /* Load the pointers to the first and second lines from the source
+     * image that bilinear code must read.
+     *
+     * The main trick in this code is about the check if any line are
+     * outside of the image;
+     *
+     * When I realize that a line (any one) is outside, I change
+     * the pointer to a dummy area with zeros. Once I change this, I
+     * must be sure the pointer will not change, so I set the
+     * variables to each pointer increments inside the loop.
+     */
+    y1 = pixman_fixed_to_int (y);
+    y2 = y1 + 1;
+
+    /* Instead of checking whether the operation uses the mast in
+     * each loop iteration, verify this only once and prepare the
+     * variables to make the code smaller inside the loop.
+     */
+    if (!mask)
+    {
+	mask_inc = 0;
+	mask = &one;
+    }
+    else
+    {
+	/* If have a mask, prepare the variables to check it */
+	mask_inc = 1;
+    }
+
+    if (repeat == PIXMAN_REPEAT_NONE)
+    {
+	uint32_t top_mask, bottom_mask;
+
+	if (y1 < 0 || y1 >= bits->height)
+	{
+	    top_row = zero;
+	    x_top = 0;
+	    ux_top = 0;
+	}
+	else
+	{
+	    top_row = (uint8_t *)(bits->bits + y1 * bits->rowstride);
+	    x_top = x;
+	    ux_top = ux;
+	}
+
+	if (y2 < 0 || y2 >= bits->height)
+	{
+	    bottom_row = zero;
+	    x_bottom = 0;
+	    ux_bottom = 0;
+	}
+	else
+	{
+	    bottom_row = (uint8_t *)(bits->bits + y2 * bits->rowstride);
+	    x_bottom = x;
+	    ux_bottom = ux;
+	}
+
+	/* If both are zero, then the whole thing is zero */
+	if (top_row == zero && bottom_row == zero)
+	{
+	    return memset (buffer, 0, width * sizeof (uint32_t));
+	}
+	else if (PIXMAN_FORMAT_A(format) == 0)
+	{
+	    if (top_row == zero)
+	    {
+		top_mask = 0;
+		bottom_mask = 0xff000000;
+	    }
+	    else if (bottom_row == zero)
+	    {
+		top_mask = 0xff000000;
+		bottom_mask = 0;
+	    }
+	    else
+	    {
+		top_mask = 0xff000000;
+		bottom_mask = 0xff000000;
+	    }
+	}
+	else
+	{
+	    top_mask = 0;
+	    bottom_mask = 0;
+	}
+
+	/* Zero fill to the left of the image */
+	while (buffer < end && x < pixman_fixed_minus_1)
+	{
+	    *buffer++ = 0;
+	    x += ux;
+	    x_top += ux_top;
+	    x_bottom += ux_bottom;
+	    mask += mask_inc;
+	}
+
+	/* Left edge
+	*/
+	while (buffer < end && x < 0)
+	{
+	    uint32_t top[2] = {0, convert_pixel (top_row, 0) | top_mask};
+	    uint32_t bot[2] = {0, convert_pixel (bottom_row, 0) | bottom_mask};
+	    int32_t distx = pixman_fixed_to_bilinear_weight (x);
+
+	    *buffer++ = sse2_bilinear_interpolation (top, bot, distx, disty);
+
+	    x += ux;
+	    x_top += ux_top;
+	    x_bottom += ux_bottom;
+	    mask += mask_inc;
+	}
+
+	/* Main part */
+	w = pixman_int_to_fixed (bits->width - 1);
+	if (format == PIXMAN_a8r8g8b8 && ux_top == ux && ux_bottom == ux && x < w)
+	{
+	    int width;
+
+	    width = end - buffer;
+	    if (width * ux > w - x)
+		width = (w - x + ux - 1) / ux;
+
+	    scaled_bilinear_scanline_sse2_8888_8888_SRC (buffer, NULL,
+							 (uint32_t *)top_row,
+							 (uint32_t *)bottom_row,
+							 width,
+							 BILINEAR_INTERPOLATION_RANGE - disty, disty,
+							 x, ux,
+							 0, 0);
+
+	    buffer += width;
+	    x_bottom = x_top = x += ux * width;
+	    mask += mask_inc * width;
+	}
+	else
+	{
+	    while (buffer < end && x < w)
+	    {
+		if (*mask)
+		{
+		    int32_t distx = pixman_fixed_to_bilinear_weight (x);
+		    uint32_t top[2] = {
+			convert_pixel (top_row, pixman_fixed_to_int (x_top)) | top_mask,
+			convert_pixel (top_row, pixman_fixed_to_int (x_top) + 1) | top_mask,
+		    };
+		    uint32_t bot[2] = {
+			convert_pixel (bottom_row, pixman_fixed_to_int (x_bottom)) | bottom_mask,
+			convert_pixel (bottom_row, pixman_fixed_to_int (x_bottom) + 1) | bottom_mask,
+		    };
+
+		    *buffer = sse2_bilinear_interpolation (top, bot, distx, disty);
+		}
+
+		buffer++;
+		x += ux;
+		x_top += ux_top;
+		x_bottom += ux_bottom;
+		mask += mask_inc;
+	    }
+	}
+
+	/* Right Edge */
+	w = pixman_int_to_fixed (bits->width);
+	while (buffer < end && x < w)
+	{
+	    if (*mask)
+	    {
+		uint32_t top[2] = { convert_pixel (top_row, pixman_fixed_to_int (x_top)) | top_mask, 0};
+		uint32_t bot[2] = { convert_pixel (bottom_row, pixman_fixed_to_int (x_bottom)) | bottom_mask, 0};
+		int32_t distx = pixman_fixed_to_bilinear_weight (x);
+
+		*buffer = sse2_bilinear_interpolation (top, bot, distx, disty);
+	    }
+
+	    buffer++;
+	    x += ux;
+	    x_top += ux_top;
+	    x_bottom += ux_bottom;
+	    mask += mask_inc;
+	}
+
+	/* Zero fill to the left of the image */
+	while (buffer < end)
+	    *buffer++ = 0;
+    }
+    else
+    {
+	uint32_t alpha = PIXMAN_FORMAT_A (format) ? 0 : 0xff000000;
+
+	if (y1 <= 0)
+	{
+	    top_row = (uint8_t *)(bits->bits);
+	}
+	else if (y1 >= bits->height)
+	{
+	    top_row = (uint8_t *)(bits->bits + (bits->height-1) * bits->rowstride);
+	}
+	else
+	{
+	    top_row = (uint8_t *)(bits->bits + y1 * bits->rowstride);
+	}
+
+	if (y2 <= 0)
+	{
+	    bottom_row = (uint8_t *)(bits->bits);
+	}
+	else if (y2 >= bits->height)
+	{
+	    bottom_row = (uint8_t *)(bits->bits + (bits->height-1) * bits->rowstride);
+	}
+	else
+	{
+	    bottom_row = (uint8_t *)(bits->bits + y2 * bits->rowstride);
+	}
+
+	/* Left edge */
+	if (x <= 0)
+	{
+	    uint32_t top = convert_pixel (top_row, 0) | alpha;
+	    uint32_t bot = convert_pixel (bottom_row, 0) | alpha;
+	    uint32_t p = linear_interpolation (top, bot, disty);
+	    while (buffer < end && x <= 0)
+	    {
+		*buffer++ = p;
+		x += ux;
+		mask += mask_inc;
+	    }
+	}
+
+	/* Main part */
+	w = pixman_int_to_fixed (bits->width - 1);
+	if (format == PIXMAN_a8r8g8b8 && x < w)
+	{
+	    int width;
+
+	    width = end - buffer;
+	    if (width * ux > w - x)
+		width = (w - x + ux - 1) / ux;
+
+	    scaled_bilinear_scanline_sse2_8888_8888_SRC (buffer, NULL,
+							 (uint32_t *)top_row,
+							 (uint32_t *)bottom_row,
+							 width,
+							 BILINEAR_INTERPOLATION_RANGE - disty, disty,
+							 x, ux,
+							 0, 0);
+
+	    buffer += width;
+	    x += ux * width;
+	    mask += mask_inc * width;
+	}
+	else
+	{
+	    while (buffer < end && x < w)
+	    {
+		if (*mask)
+		{
+		    int32_t distx = pixman_fixed_to_bilinear_weight (x);
+		    uint32_t top[2] = {
+			convert_pixel (top_row, pixman_fixed_to_int (x)) | alpha,
+			convert_pixel (top_row, pixman_fixed_to_int (x) + 1) | alpha,
+		    };
+		    uint32_t bot[2] = {
+			convert_pixel (bottom_row, pixman_fixed_to_int (x)) | alpha,
+			convert_pixel (bottom_row, pixman_fixed_to_int (x) + 1) | alpha,
+		    };
+
+		    *buffer = sse2_bilinear_interpolation (top, bot, distx, disty);
+		}
+
+		buffer++;
+		x += ux;
+		mask += mask_inc;
+	    }
+	}
+
+	/* Right Edge */
+	if (buffer < end)
+	{
+	    uint32_t top = convert_pixel (top_row, bits->width-1) | alpha;
+	    uint32_t bot = convert_pixel (bottom_row, bits->width-1) | alpha;
+	    uint32_t p = linear_interpolation (top, bot, disty);
+	    while (buffer < end)
+	    {
+		*buffer++ = p;
+	    }
+	}
+    }
+
+    return iter->buffer;
+}
+
+static force_inline uint32_t *
+sse2_fetch_bilinear_affine (pixman_iter_t *iter,
+			    const uint32_t * mask,
+
+			    convert_pixel_t	convert_pixel,
+			    pixman_format_code_t	format,
+			    pixman_repeat_t	repeat_mode)
+{
+    pixman_image_t *image = iter->image;
+    int offset = iter->x;
+    int line = iter->y++;
+    int width = iter->width;
+    uint32_t *buffer = iter->buffer;
+    pixman_fixed_t x, y;
+    pixman_fixed_t ux, uy;
+    pixman_vector_t v;
+    bits_image_t *bits = &image->bits;
+    int i;
+
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+
+    if (!pixman_transform_point_3d (image->common.transform, &v))
+	return iter->buffer;
+
+    ux = image->common.transform->matrix[0][0];
+    uy = image->common.transform->matrix[1][0];
+
+    x = v.vector[0];
+    y = v.vector[1];
+
+    for (i = 0; i < width; ++i)
+    {
+	int x1, y1, x2, y2;
+	uint32_t top[2], bot[2];
+	int32_t distx, disty;
+	int width = image->bits.width;
+	int height = image->bits.height;
+	const uint8_t *top_row;
+	const uint8_t *bot_row;
+
+	if (mask && !mask[i])
+	    goto next;
+
+	x1 = x - pixman_fixed_1 / 2;
+	y1 = y - pixman_fixed_1 / 2;
+
+	distx = pixman_fixed_to_bilinear_weight (x1);
+	disty = pixman_fixed_to_bilinear_weight (y1);
+
+	y1 = pixman_fixed_to_int (y1);
+	y2 = y1 + 1;
+	x1 = pixman_fixed_to_int (x1);
+	x2 = x1 + 1;
+
+	if (repeat_mode != PIXMAN_REPEAT_NONE)
+	{
+	    uint32_t alpha = PIXMAN_FORMAT_A (format) ? 0 : 0xff000000;
+
+	    repeat (repeat_mode, &x1, width);
+	    repeat (repeat_mode, &y1, height);
+	    repeat (repeat_mode, &x2, width);
+	    repeat (repeat_mode, &y2, height);
+
+	    top_row = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
+	    bot_row = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
+
+	    top[0] = convert_pixel (top_row, x1) | alpha;
+	    top[1] = convert_pixel (top_row, x2) | alpha;
+	    bot[0] = convert_pixel (bot_row, x1) | alpha;
+	    bot[1] = convert_pixel (bot_row, x2) | alpha;
+	}
+	else
+	{
+	    uint32_t top_alpha, bot_alpha;
+	    int bpp;
+
+	    /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value,
+	     * which means if you use it in expressions, those
+	     * expressions become unsigned themselves. Since
+	     * the variables below can be negative in some cases,
+	     * that will lead to crashes on 64 bit architectures.
+	     *
+	     * So this line makes sure bpp is signed
+	     */
+	    bpp = PIXMAN_FORMAT_BPP (format);
+
+	    if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0)
+	    {
+		buffer[i] = 0;
+		goto next;
+	    }
+
+	    if (y2 == 0)
+	    {
+		top_row = zero;
+		top_alpha = 0;
+	    }
+	    else
+	    {
+		top_row = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
+		top_row += bpp / 8 * x1;
+		top_alpha = PIXMAN_FORMAT_A (format) ? 0 : 0xff000000;
+	    }
+
+	    if (y1 == height - 1)
+	    {
+		bot_row = zero;
+		bot_alpha = 0;
+	    }
+	    else
+	    {
+		bot_row = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
+		bot_row += bpp / 8 * x1;
+		bot_alpha = PIXMAN_FORMAT_A (format) ? 0 : 0xff000000;
+	    }
+
+	    if (x2 == 0)
+	    {
+		top[0] = 0;
+		bot[0] = 0;
+	    }
+	    else
+	    {
+		top[0] = convert_pixel (top_row, 0) | top_alpha;
+		bot[0] = convert_pixel (bot_row, 0) | bot_alpha;
+	    }
+
+	    if (x1 == width - 1)
+	    {
+		top[1] = 0;
+		bot[1] = 0;
+	    }
+	    else
+	    {
+		top[1] = convert_pixel (top_row, 1) | top_alpha;
+		bot[1] = convert_pixel (bot_row, 1) | bot_alpha;
+	    }
+	}
+
+	buffer[i] = sse2_bilinear_interpolation (top, bot, distx, disty);
+
+    next:
+	x += ux;
+	y += uy;
+    }
+
+    return iter->buffer;
+}
+
+static force_inline uint32_t
+convert_a8r8g8b8 (const uint8_t *row, int x)
+{
+    return *(((uint32_t *)row) + x);
+}
+
+static force_inline uint32_t
+convert_x8r8g8b8 (const uint8_t *row, int x)
+{
+    return *(((uint32_t *)row) + x);
+}
+
+static force_inline uint32_t
+convert_a8 (const uint8_t *row, int x)
+{
+    return *(row + x) << 24;
+}
+
+static force_inline uint32_t
+convert_r5g6b5 (const uint8_t *row, int x)
+{
+    return convert_0565_to_0888 (*((uint16_t *)row + x));
+}
+
+#define MAKE_SIMPLE_BILINEAR_FETCHER(format, repeat)			\
+    static uint32_t *							\
+    sse2_fetch_bilinear_ ## format ## _ ## repeat (pixman_iter_t   *iter,\
+						   const uint32_t * mask)\
+    {									\
+	return sse2_fetch_bilinear (iter, mask,				\
+				    convert_ ## format,			\
+				    PIXMAN_ ## format,			\
+				    PIXMAN_REPEAT_ ## repeat);				\
+    }
+
+MAKE_SIMPLE_BILINEAR_FETCHER(a8r8g8b8, NONE)
+MAKE_SIMPLE_BILINEAR_FETCHER(a8r8g8b8, PAD)
+MAKE_SIMPLE_BILINEAR_FETCHER(x8r8g8b8, NONE)
+MAKE_SIMPLE_BILINEAR_FETCHER(x8r8g8b8, PAD)
+MAKE_SIMPLE_BILINEAR_FETCHER(a8, NONE)
+MAKE_SIMPLE_BILINEAR_FETCHER(a8, PAD)
+MAKE_SIMPLE_BILINEAR_FETCHER(r5g6b5, NONE)
+MAKE_SIMPLE_BILINEAR_FETCHER(r5g6b5, PAD)
+
+#define MAKE_BILINEAR_FETCHER(name, format, repeat)			\
+    static uint32_t *							\
+    sse2_fetch_bilinear_affine_ ## name (pixman_iter_t   *iter,		\
+					 const uint32_t * mask)		\
+    {									\
+	return sse2_fetch_bilinear_affine (iter, mask,			\
+					   convert_ ## format,		\
+					   PIXMAN_ ## format,		\
+					   PIXMAN_REPEAT_ ## repeat);	\
+    }									\
+
+MAKE_BILINEAR_FETCHER(pad_a8r8g8b8,	a8r8g8b8, PAD)
+MAKE_BILINEAR_FETCHER(none_a8r8g8b8,	a8r8g8b8, NONE)
+MAKE_BILINEAR_FETCHER(reflect_a8r8g8b8,	a8r8g8b8, REFLECT)
+MAKE_BILINEAR_FETCHER(normal_a8r8g8b8,	a8r8g8b8, NORMAL)
+MAKE_BILINEAR_FETCHER(pad_x8r8g8b8,	x8r8g8b8, PAD)
+MAKE_BILINEAR_FETCHER(none_x8r8g8b8,	x8r8g8b8, NONE)
+MAKE_BILINEAR_FETCHER(reflect_x8r8g8b8,	x8r8g8b8, REFLECT)
+MAKE_BILINEAR_FETCHER(normal_x8r8g8b8,	x8r8g8b8, NORMAL)
+MAKE_BILINEAR_FETCHER(pad_a8,		a8,       PAD)
+MAKE_BILINEAR_FETCHER(none_a8,		a8,       NONE)
+MAKE_BILINEAR_FETCHER(reflect_a8,	a8,       REFLECT)
+MAKE_BILINEAR_FETCHER(normal_a8,	a8,       NORMAL)
+MAKE_BILINEAR_FETCHER(pad_r5g6b5,	r5g6b5,   PAD)
+MAKE_BILINEAR_FETCHER(none_r5g6b5,	r5g6b5,   NONE)
+MAKE_BILINEAR_FETCHER(reflect_r5g6b5,	r5g6b5,   REFLECT)
+MAKE_BILINEAR_FETCHER(normal_r5g6b5,	r5g6b5,   NORMAL)
+
+typedef struct
+{
+    pixman_format_code_t	format;
+    uint32_t			flags;
+    pixman_iter_get_scanline_t	get_scanline;
+} bilinear_fetcher_info_t;
+
+static const bilinear_fetcher_info_t bilinear_fetcher_info[] =
+{
+
+#define GENERAL_BILINEAR_FLAGS						\
+    (FAST_PATH_STANDARD_FLAGS		|				\
+     FAST_PATH_HAS_TRANSFORM		|				\
+     FAST_PATH_AFFINE_TRANSFORM		|				\
+     FAST_PATH_BILINEAR_FILTER)
+
+#define FAST_BILINEAR_FLAGS						\
+    (GENERAL_BILINEAR_FLAGS		|				\
+     FAST_PATH_X_UNIT_POSITIVE		|				\
+     FAST_PATH_Y_UNIT_ZERO)
+
+#define BILINEAR_SIMPLE_FAST_PATH(format, repeat)			\
+    { PIXMAN_ ## format,						\
+      FAST_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
+      sse2_fetch_bilinear_ ## format ## _ ## repeat,			\
+    }
+
+    BILINEAR_SIMPLE_FAST_PATH (a8r8g8b8, NONE),
+    BILINEAR_SIMPLE_FAST_PATH (a8r8g8b8, PAD),
+    BILINEAR_SIMPLE_FAST_PATH (x8r8g8b8, NONE),
+    BILINEAR_SIMPLE_FAST_PATH (x8r8g8b8, PAD),
+    BILINEAR_SIMPLE_FAST_PATH (a8, NONE),
+    BILINEAR_SIMPLE_FAST_PATH (a8, PAD),
+    BILINEAR_SIMPLE_FAST_PATH (r5g6b5, NONE),
+    BILINEAR_SIMPLE_FAST_PATH (r5g6b5, PAD),
+
+#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat)			\
+    { PIXMAN_ ## format,						\
+      GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
+      sse2_fetch_bilinear_affine_ ## name,				\
+    }
+
+    BILINEAR_AFFINE_FAST_PATH (pad_a8r8g8b8, a8r8g8b8, PAD),
+    BILINEAR_AFFINE_FAST_PATH (none_a8r8g8b8, a8r8g8b8, NONE),
+    BILINEAR_AFFINE_FAST_PATH (reflect_a8r8g8b8, a8r8g8b8, REFLECT),
+    BILINEAR_AFFINE_FAST_PATH (normal_a8r8g8b8, a8r8g8b8, NORMAL),
+    BILINEAR_AFFINE_FAST_PATH (pad_x8r8g8b8, x8r8g8b8, PAD),
+    BILINEAR_AFFINE_FAST_PATH (none_x8r8g8b8, x8r8g8b8, NONE),
+    BILINEAR_AFFINE_FAST_PATH (reflect_x8r8g8b8, x8r8g8b8, REFLECT),
+    BILINEAR_AFFINE_FAST_PATH (normal_x8r8g8b8, x8r8g8b8, NORMAL),
+    BILINEAR_AFFINE_FAST_PATH (pad_a8, a8, PAD),
+    BILINEAR_AFFINE_FAST_PATH (none_a8, a8, NONE),
+    BILINEAR_AFFINE_FAST_PATH (reflect_a8, a8, REFLECT),
+    BILINEAR_AFFINE_FAST_PATH (normal_a8, a8, NORMAL),
+    BILINEAR_AFFINE_FAST_PATH (pad_r5g6b5, r5g6b5, PAD),
+    BILINEAR_AFFINE_FAST_PATH (none_r5g6b5, r5g6b5, NONE),
+    BILINEAR_AFFINE_FAST_PATH (reflect_r5g6b5, r5g6b5, REFLECT),
+    BILINEAR_AFFINE_FAST_PATH (normal_r5g6b5, r5g6b5, NORMAL),
+
+    { PIXMAN_null },
+};
+
 static pixman_bool_t
 sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
 {
@@ -6376,6 +7079,22 @@ sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
 	}
     }
 
+    if ((iter->iter_flags & ITER_NARROW) &&
+	(iter->image_flags & GENERAL_BILINEAR_FLAGS) == GENERAL_BILINEAR_FLAGS)
+    {
+	const bilinear_fetcher_info_t *f;
+
+	for (f = bilinear_fetcher_info; f->format != PIXMAN_null; ++f)
+	{
+	    if ((f->flags & iter->image_flags) == f->flags &&
+		f->format == image->common.extended_format_code)
+	    {
+		iter->get_scanline = f->get_scanline;
+		return TRUE;
+	    }
+	}
+    }
+
     return FALSE;
 }
 
-- 
1.7.10.4



More information about the Pixman mailing list