[Pixman] [PATCH 3/4] sse2: affine bilinear fetcher
Chris Wilson
chris at chris-wilson.co.uk
Sun Jan 27 06:10:27 PST 2013
On an SNB i5-2500 using cairo-image:
firefox-canvas 17.8 -> 10.3: 1.72x speedup
firefox-tron 46.3 -> 28.4: 1.63x speedup
swfdec-youtube 1.7 -> 1.4: 1.22x speedup
firefox-fishbowl 64.6 -> 53.7: 1.20x speedup
firefox-paintball 40.8 -> 36.8: 1.11x speedup
firefox-canvas-alpha 27.3 -> 25.4: 1.07x speedup
---
pixman/pixman-sse2.c | 719 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 719 insertions(+)
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index fc873cc..9558e9c 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -6346,6 +6346,709 @@ static const fetcher_info_t fetchers[] =
{ PIXMAN_null }
};
+typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
+
+static force_inline uint32_t
+linear_interpolation (const uint32_t a, const uint32_t b, int w)
+{
+ uint32_t l, r, t;
+
+ w <<= (8 - BILINEAR_INTERPOLATION_BITS);
+
+ /* red and blue */
+ l = a & 0x00ff00ff;
+ r = b & 0x00ff00ff;
+ t = w*r + (256-w)*l;
+
+ /* alpha and green */
+ l = (a & 0xff00ff00) >> 8;
+ r = (b & 0xff00ff00) >> 8;
+ return ((t & 0xff00ff00) >> 8) | ((w*r + (256-w)*l) & 0xff00ff00);
+}
+
+static force_inline uint32_t
+sse2_bilinear_interpolation (const uint32_t *src_top,
+ const uint32_t *src_bottom,
+ int dx, int dy)
+{
+#if 0
+ int wb = dy, wt = BILINEAR_INTERPOLATION_RANGE - dy;
+ pixman_fixed_t vx = dx << (16 - BILINEAR_INTERPOLATION_BITS);
+ pixman_fixed_t unit_x = 0;
+ BILINEAR_DECLARE_VARIABLES;
+ uint32_t pix1;
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ return pix1;
+#else
+ int wb = dy, wt = BILINEAR_INTERPOLATION_RANGE - dy;
+
+ __m128i xmm_wh, xmm_lo, xmm_hi, a;
+
+ /* fetch 2x2 pixel block into sse2 registers */
+ __m128i tltr = _mm_loadl_epi64 ((__m128i *)src_top);
+ __m128i blbr = _mm_loadl_epi64 ((__m128i *)src_bottom);
+
+ /* vertical interpolation */
+ a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, _mm_setzero_si128 ()),
+ _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt)),
+ _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, _mm_setzero_si128 ()),
+ _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb)));
+ if (BILINEAR_INTERPOLATION_BITS < 8)
+ {
+ const __m128i xmm_xorc7 = _mm_set_epi16 (0, BMSK, 0, BMSK, 0, BMSK, 0, BMSK);
+ const __m128i xmm_addc7 = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1);
+ const __m128i xmm_x = _mm_set_epi16 (dx, dx, dx, dx, dx, dx, dx, dx);
+
+ /* calculate horizontal weights */
+ xmm_wh = _mm_add_epi16 (xmm_addc7, _mm_xor_si128 (xmm_xorc7, xmm_x));
+ /* horizontal interpolation */
+ a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 (
+ a, _MM_SHUFFLE (1, 0, 3, 2)), a), xmm_wh);
+ }
+ else
+ {
+ const __m128i xmm_xorc8 = _mm_set_epi16 (0, 0, 0, 0, BMSK, BMSK, BMSK, BMSK);
+ const __m128i xmm_addc8 = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1);
+ const __m128i xmm_x = _mm_set_epi16 (dx, dx, dx, dx, dx, dx, dx, dx);
+
+ /* calculate horizontal weights */
+ xmm_wh = _mm_add_epi16 (xmm_addc8, _mm_xor_si128 (xmm_xorc8, xmm_x));
+ /* horizontal interpolation */
+ xmm_lo = _mm_mullo_epi16 (a, xmm_wh);
+ xmm_hi = _mm_mulhi_epu16 (a, xmm_wh);
+ a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi),
+ _mm_unpackhi_epi16 (xmm_lo, xmm_hi));
+ }
+ /* shift and pack the result */
+ a = _mm_srli_epi32 (a, BILINEAR_INTERPOLATION_BITS * 2);
+ a = _mm_packs_epi32 (a, a);
+ a = _mm_packus_epi16 (a, a);
+ return _mm_cvtsi128_si32 (a);
+#endif
+}
+
+static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+static force_inline uint32_t *
+sse2_fetch_bilinear (pixman_iter_t *iter,
+ const uint32_t *mask,
+ convert_pixel_t convert_pixel,
+ pixman_format_code_t format,
+ pixman_repeat_t repeat)
+{
+ pixman_image_t * ima = iter->image;
+ int offset = iter->x;
+ int line = iter->y++;
+ int width = iter->width;
+ uint32_t * buffer = iter->buffer;
+ uint32_t * const end = buffer + width;
+
+ bits_image_t *bits = &ima->bits;
+ pixman_fixed_t x_top, x_bottom, x;
+ pixman_fixed_t ux_top, ux_bottom, ux;
+ pixman_vector_t v;
+ const uint8_t *top_row;
+ const uint8_t *bottom_row;
+ uint32_t one = 1;
+ int y, y1, y2;
+ int disty;
+ int mask_inc;
+ int w;
+
+ /* reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
+
+ if (!pixman_transform_point_3d (bits->common.transform, &v))
+ return iter->buffer;
+
+ ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0];
+ x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2;
+
+ y = v.vector[1] - pixman_fixed_1/2;
+ disty = pixman_fixed_to_bilinear_weight (y);
+
+ /* Load the pointers to the first and second lines from the source
+ * image that bilinear code must read.
+ *
+ * The main trick in this code is about the check if any line are
+ * outside of the image;
+ *
+ * When I realize that a line (any one) is outside, I change
+ * the pointer to a dummy area with zeros. Once I change this, I
+ * must be sure the pointer will not change, so I set the
+ * variables to each pointer increments inside the loop.
+ */
+ y1 = pixman_fixed_to_int (y);
+ y2 = y1 + 1;
+
+ /* Instead of checking whether the operation uses the mast in
+ * each loop iteration, verify this only once and prepare the
+ * variables to make the code smaller inside the loop.
+ */
+ if (!mask)
+ {
+ mask_inc = 0;
+ mask = &one;
+ }
+ else
+ {
+ /* If have a mask, prepare the variables to check it */
+ mask_inc = 1;
+ }
+
+ if (repeat == PIXMAN_REPEAT_NONE)
+ {
+ uint32_t top_mask, bottom_mask;
+
+ if (y1 < 0 || y1 >= bits->height)
+ {
+ top_row = zero;
+ x_top = 0;
+ ux_top = 0;
+ }
+ else
+ {
+ top_row = (uint8_t *)(bits->bits + y1 * bits->rowstride);
+ x_top = x;
+ ux_top = ux;
+ }
+
+ if (y2 < 0 || y2 >= bits->height)
+ {
+ bottom_row = zero;
+ x_bottom = 0;
+ ux_bottom = 0;
+ }
+ else
+ {
+ bottom_row = (uint8_t *)(bits->bits + y2 * bits->rowstride);
+ x_bottom = x;
+ ux_bottom = ux;
+ }
+
+ /* If both are zero, then the whole thing is zero */
+ if (top_row == zero && bottom_row == zero)
+ {
+ return memset (buffer, 0, width * sizeof (uint32_t));
+ }
+ else if (PIXMAN_FORMAT_A(format) == 0)
+ {
+ if (top_row == zero)
+ {
+ top_mask = 0;
+ bottom_mask = 0xff000000;
+ }
+ else if (bottom_row == zero)
+ {
+ top_mask = 0xff000000;
+ bottom_mask = 0;
+ }
+ else
+ {
+ top_mask = 0xff000000;
+ bottom_mask = 0xff000000;
+ }
+ }
+ else
+ {
+ top_mask = 0;
+ bottom_mask = 0;
+ }
+
+ /* Zero fill to the left of the image */
+ while (buffer < end && x < pixman_fixed_minus_1)
+ {
+ *buffer++ = 0;
+ x += ux;
+ x_top += ux_top;
+ x_bottom += ux_bottom;
+ mask += mask_inc;
+ }
+
+ /* Left edge
+ */
+ while (buffer < end && x < 0)
+ {
+ uint32_t top[2] = {0, convert_pixel (top_row, 0) | top_mask};
+ uint32_t bot[2] = {0, convert_pixel (bottom_row, 0) | bottom_mask};
+ int32_t distx = pixman_fixed_to_bilinear_weight (x);
+
+ *buffer++ = sse2_bilinear_interpolation (top, bot, distx, disty);
+
+ x += ux;
+ x_top += ux_top;
+ x_bottom += ux_bottom;
+ mask += mask_inc;
+ }
+
+ /* Main part */
+ w = pixman_int_to_fixed (bits->width - 1);
+ if (format == PIXMAN_a8r8g8b8 && ux_top == ux && ux_bottom == ux && x < w)
+ {
+ int width;
+
+ width = end - buffer;
+ if (width * ux > w - x)
+ width = (w - x + ux - 1) / ux;
+
+ scaled_bilinear_scanline_sse2_8888_8888_SRC (buffer, NULL,
+ (uint32_t *)top_row,
+ (uint32_t *)bottom_row,
+ width,
+ BILINEAR_INTERPOLATION_RANGE - disty, disty,
+ x, ux,
+ 0, 0);
+
+ buffer += width;
+ x_bottom = x_top = x += ux * width;
+ mask += mask_inc * width;
+ }
+ else
+ {
+ while (buffer < end && x < w)
+ {
+ if (*mask)
+ {
+ int32_t distx = pixman_fixed_to_bilinear_weight (x);
+ uint32_t top[2] = {
+ convert_pixel (top_row, pixman_fixed_to_int (x_top)) | top_mask,
+ convert_pixel (top_row, pixman_fixed_to_int (x_top) + 1) | top_mask,
+ };
+ uint32_t bot[2] = {
+ convert_pixel (bottom_row, pixman_fixed_to_int (x_bottom)) | bottom_mask,
+ convert_pixel (bottom_row, pixman_fixed_to_int (x_bottom) + 1) | bottom_mask,
+ };
+
+ *buffer = sse2_bilinear_interpolation (top, bot, distx, disty);
+ }
+
+ buffer++;
+ x += ux;
+ x_top += ux_top;
+ x_bottom += ux_bottom;
+ mask += mask_inc;
+ }
+ }
+
+ /* Right Edge */
+ w = pixman_int_to_fixed (bits->width);
+ while (buffer < end && x < w)
+ {
+ if (*mask)
+ {
+ uint32_t top[2] = { convert_pixel (top_row, pixman_fixed_to_int (x_top)) | top_mask, 0};
+ uint32_t bot[2] = { convert_pixel (bottom_row, pixman_fixed_to_int (x_bottom)) | bottom_mask, 0};
+ int32_t distx = pixman_fixed_to_bilinear_weight (x);
+
+ *buffer = sse2_bilinear_interpolation (top, bot, distx, disty);
+ }
+
+ buffer++;
+ x += ux;
+ x_top += ux_top;
+ x_bottom += ux_bottom;
+ mask += mask_inc;
+ }
+
+ /* Zero fill to the left of the image */
+ while (buffer < end)
+ *buffer++ = 0;
+ }
+ else
+ {
+ uint32_t alpha = PIXMAN_FORMAT_A (format) ? 0 : 0xff000000;
+
+ if (y1 <= 0)
+ {
+ top_row = (uint8_t *)(bits->bits);
+ }
+ else if (y1 >= bits->height)
+ {
+ top_row = (uint8_t *)(bits->bits + (bits->height-1) * bits->rowstride);
+ }
+ else
+ {
+ top_row = (uint8_t *)(bits->bits + y1 * bits->rowstride);
+ }
+
+ if (y2 <= 0)
+ {
+ bottom_row = (uint8_t *)(bits->bits);
+ }
+ else if (y2 >= bits->height)
+ {
+ bottom_row = (uint8_t *)(bits->bits + (bits->height-1) * bits->rowstride);
+ }
+ else
+ {
+ bottom_row = (uint8_t *)(bits->bits + y2 * bits->rowstride);
+ }
+
+ /* Left edge */
+ if (x <= 0)
+ {
+ uint32_t top = convert_pixel (top_row, 0) | alpha;
+ uint32_t bot = convert_pixel (bottom_row, 0) | alpha;
+ uint32_t p = linear_interpolation (top, bot, disty);
+ while (buffer < end && x <= 0)
+ {
+ *buffer++ = p;
+ x += ux;
+ mask += mask_inc;
+ }
+ }
+
+ /* Main part */
+ w = pixman_int_to_fixed (bits->width - 1);
+ if (format == PIXMAN_a8r8g8b8 && x < w)
+ {
+ int width;
+
+ width = end - buffer;
+ if (width * ux > w - x)
+ width = (w - x + ux - 1) / ux;
+
+ scaled_bilinear_scanline_sse2_8888_8888_SRC (buffer, NULL,
+ (uint32_t *)top_row,
+ (uint32_t *)bottom_row,
+ width,
+ BILINEAR_INTERPOLATION_RANGE - disty, disty,
+ x, ux,
+ 0, 0);
+
+ buffer += width;
+ x += ux * width;
+ mask += mask_inc * width;
+ }
+ else
+ {
+ while (buffer < end && x < w)
+ {
+ if (*mask)
+ {
+ int32_t distx = pixman_fixed_to_bilinear_weight (x);
+ uint32_t top[2] = {
+ convert_pixel (top_row, pixman_fixed_to_int (x)) | alpha,
+ convert_pixel (top_row, pixman_fixed_to_int (x) + 1) | alpha,
+ };
+ uint32_t bot[2] = {
+ convert_pixel (bottom_row, pixman_fixed_to_int (x)) | alpha,
+ convert_pixel (bottom_row, pixman_fixed_to_int (x) + 1) | alpha,
+ };
+
+ *buffer = sse2_bilinear_interpolation (top, bot, distx, disty);
+ }
+
+ buffer++;
+ x += ux;
+ mask += mask_inc;
+ }
+ }
+
+ /* Right Edge */
+ if (buffer < end)
+ {
+ uint32_t top = convert_pixel (top_row, bits->width-1) | alpha;
+ uint32_t bot = convert_pixel (bottom_row, bits->width-1) | alpha;
+ uint32_t p = linear_interpolation (top, bot, disty);
+ while (buffer < end)
+ {
+ *buffer++ = p;
+ }
+ }
+ }
+
+ return iter->buffer;
+}
+
+static force_inline uint32_t *
+sse2_fetch_bilinear_affine (pixman_iter_t *iter,
+ const uint32_t * mask,
+
+ convert_pixel_t convert_pixel,
+ pixman_format_code_t format,
+ pixman_repeat_t repeat_mode)
+{
+ pixman_image_t *image = iter->image;
+ int offset = iter->x;
+ int line = iter->y++;
+ int width = iter->width;
+ uint32_t *buffer = iter->buffer;
+ pixman_fixed_t x, y;
+ pixman_fixed_t ux, uy;
+ pixman_vector_t v;
+ bits_image_t *bits = &image->bits;
+ int i;
+
+ /* reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
+
+ if (!pixman_transform_point_3d (image->common.transform, &v))
+ return iter->buffer;
+
+ ux = image->common.transform->matrix[0][0];
+ uy = image->common.transform->matrix[1][0];
+
+ x = v.vector[0];
+ y = v.vector[1];
+
+ for (i = 0; i < width; ++i)
+ {
+ int x1, y1, x2, y2;
+ uint32_t top[2], bot[2];
+ int32_t distx, disty;
+ int width = image->bits.width;
+ int height = image->bits.height;
+ const uint8_t *top_row;
+ const uint8_t *bot_row;
+
+ if (mask && !mask[i])
+ goto next;
+
+ x1 = x - pixman_fixed_1 / 2;
+ y1 = y - pixman_fixed_1 / 2;
+
+ distx = pixman_fixed_to_bilinear_weight (x1);
+ disty = pixman_fixed_to_bilinear_weight (y1);
+
+ y1 = pixman_fixed_to_int (y1);
+ y2 = y1 + 1;
+ x1 = pixman_fixed_to_int (x1);
+ x2 = x1 + 1;
+
+ if (repeat_mode != PIXMAN_REPEAT_NONE)
+ {
+ uint32_t alpha = PIXMAN_FORMAT_A (format) ? 0 : 0xff000000;
+
+ repeat (repeat_mode, &x1, width);
+ repeat (repeat_mode, &y1, height);
+ repeat (repeat_mode, &x2, width);
+ repeat (repeat_mode, &y2, height);
+
+ top_row = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
+ bot_row = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
+
+ top[0] = convert_pixel (top_row, x1) | alpha;
+ top[1] = convert_pixel (top_row, x2) | alpha;
+ bot[0] = convert_pixel (bot_row, x1) | alpha;
+ bot[1] = convert_pixel (bot_row, x2) | alpha;
+ }
+ else
+ {
+ uint32_t top_alpha, bot_alpha;
+ int bpp;
+
+ /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value,
+ * which means if you use it in expressions, those
+ * expressions become unsigned themselves. Since
+ * the variables below can be negative in some cases,
+ * that will lead to crashes on 64 bit architectures.
+ *
+ * So this line makes sure bpp is signed
+ */
+ bpp = PIXMAN_FORMAT_BPP (format);
+
+ if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0)
+ {
+ buffer[i] = 0;
+ goto next;
+ }
+
+ if (y2 == 0)
+ {
+ top_row = zero;
+ top_alpha = 0;
+ }
+ else
+ {
+ top_row = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
+ top_row += bpp / 8 * x1;
+ top_alpha = PIXMAN_FORMAT_A (format) ? 0 : 0xff000000;
+ }
+
+ if (y1 == height - 1)
+ {
+ bot_row = zero;
+ bot_alpha = 0;
+ }
+ else
+ {
+ bot_row = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
+ bot_row += bpp / 8 * x1;
+ bot_alpha = PIXMAN_FORMAT_A (format) ? 0 : 0xff000000;
+ }
+
+ if (x2 == 0)
+ {
+ top[0] = 0;
+ bot[0] = 0;
+ }
+ else
+ {
+ top[0] = convert_pixel (top_row, 0) | top_alpha;
+ bot[0] = convert_pixel (bot_row, 0) | bot_alpha;
+ }
+
+ if (x1 == width - 1)
+ {
+ top[1] = 0;
+ bot[1] = 0;
+ }
+ else
+ {
+ top[1] = convert_pixel (top_row, 1) | top_alpha;
+ bot[1] = convert_pixel (bot_row, 1) | bot_alpha;
+ }
+ }
+
+ buffer[i] = sse2_bilinear_interpolation (top, bot, distx, disty);
+
+ next:
+ x += ux;
+ y += uy;
+ }
+
+ return iter->buffer;
+}
+
+static force_inline uint32_t
+convert_a8r8g8b8 (const uint8_t *row, int x)
+{
+ return *(((uint32_t *)row) + x);
+}
+
+static force_inline uint32_t
+convert_x8r8g8b8 (const uint8_t *row, int x)
+{
+ return *(((uint32_t *)row) + x);
+}
+
+static force_inline uint32_t
+convert_a8 (const uint8_t *row, int x)
+{
+ return *(row + x) << 24;
+}
+
+static force_inline uint32_t
+convert_r5g6b5 (const uint8_t *row, int x)
+{
+ return convert_0565_to_0888 (*((uint16_t *)row + x));
+}
+
+#define MAKE_SIMPLE_BILINEAR_FETCHER(format, repeat) \
+ static uint32_t * \
+ sse2_fetch_bilinear_ ## format ## _ ## repeat (pixman_iter_t *iter,\
+ const uint32_t * mask)\
+ { \
+ return sse2_fetch_bilinear (iter, mask, \
+ convert_ ## format, \
+ PIXMAN_ ## format, \
+ PIXMAN_REPEAT_ ## repeat); \
+ }
+
+MAKE_SIMPLE_BILINEAR_FETCHER(a8r8g8b8, NONE)
+MAKE_SIMPLE_BILINEAR_FETCHER(a8r8g8b8, PAD)
+MAKE_SIMPLE_BILINEAR_FETCHER(x8r8g8b8, NONE)
+MAKE_SIMPLE_BILINEAR_FETCHER(x8r8g8b8, PAD)
+MAKE_SIMPLE_BILINEAR_FETCHER(a8, NONE)
+MAKE_SIMPLE_BILINEAR_FETCHER(a8, PAD)
+MAKE_SIMPLE_BILINEAR_FETCHER(r5g6b5, NONE)
+MAKE_SIMPLE_BILINEAR_FETCHER(r5g6b5, PAD)
+
+#define MAKE_BILINEAR_FETCHER(name, format, repeat) \
+ static uint32_t * \
+ sse2_fetch_bilinear_affine_ ## name (pixman_iter_t *iter, \
+ const uint32_t * mask) \
+ { \
+ return sse2_fetch_bilinear_affine (iter, mask, \
+ convert_ ## format, \
+ PIXMAN_ ## format, \
+ PIXMAN_REPEAT_ ## repeat); \
+ } \
+
+MAKE_BILINEAR_FETCHER(pad_a8r8g8b8, a8r8g8b8, PAD)
+MAKE_BILINEAR_FETCHER(none_a8r8g8b8, a8r8g8b8, NONE)
+MAKE_BILINEAR_FETCHER(reflect_a8r8g8b8, a8r8g8b8, REFLECT)
+MAKE_BILINEAR_FETCHER(normal_a8r8g8b8, a8r8g8b8, NORMAL)
+MAKE_BILINEAR_FETCHER(pad_x8r8g8b8, x8r8g8b8, PAD)
+MAKE_BILINEAR_FETCHER(none_x8r8g8b8, x8r8g8b8, NONE)
+MAKE_BILINEAR_FETCHER(reflect_x8r8g8b8, x8r8g8b8, REFLECT)
+MAKE_BILINEAR_FETCHER(normal_x8r8g8b8, x8r8g8b8, NORMAL)
+MAKE_BILINEAR_FETCHER(pad_a8, a8, PAD)
+MAKE_BILINEAR_FETCHER(none_a8, a8, NONE)
+MAKE_BILINEAR_FETCHER(reflect_a8, a8, REFLECT)
+MAKE_BILINEAR_FETCHER(normal_a8, a8, NORMAL)
+MAKE_BILINEAR_FETCHER(pad_r5g6b5, r5g6b5, PAD)
+MAKE_BILINEAR_FETCHER(none_r5g6b5, r5g6b5, NONE)
+MAKE_BILINEAR_FETCHER(reflect_r5g6b5, r5g6b5, REFLECT)
+MAKE_BILINEAR_FETCHER(normal_r5g6b5, r5g6b5, NORMAL)
+
+typedef struct
+{
+ pixman_format_code_t format;
+ uint32_t flags;
+ pixman_iter_get_scanline_t get_scanline;
+} bilinear_fetcher_info_t;
+
+static const bilinear_fetcher_info_t bilinear_fetcher_info[] =
+{
+
+#define GENERAL_BILINEAR_FLAGS \
+ (FAST_PATH_STANDARD_FLAGS | \
+ FAST_PATH_HAS_TRANSFORM | \
+ FAST_PATH_AFFINE_TRANSFORM | \
+ FAST_PATH_BILINEAR_FILTER)
+
+#define FAST_BILINEAR_FLAGS \
+ (GENERAL_BILINEAR_FLAGS | \
+ FAST_PATH_X_UNIT_POSITIVE | \
+ FAST_PATH_Y_UNIT_ZERO)
+
+#define BILINEAR_SIMPLE_FAST_PATH(format, repeat) \
+ { PIXMAN_ ## format, \
+ FAST_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
+ sse2_fetch_bilinear_ ## format ## _ ## repeat, \
+ }
+
+ BILINEAR_SIMPLE_FAST_PATH (a8r8g8b8, NONE),
+ BILINEAR_SIMPLE_FAST_PATH (a8r8g8b8, PAD),
+ BILINEAR_SIMPLE_FAST_PATH (x8r8g8b8, NONE),
+ BILINEAR_SIMPLE_FAST_PATH (x8r8g8b8, PAD),
+ BILINEAR_SIMPLE_FAST_PATH (a8, NONE),
+ BILINEAR_SIMPLE_FAST_PATH (a8, PAD),
+ BILINEAR_SIMPLE_FAST_PATH (r5g6b5, NONE),
+ BILINEAR_SIMPLE_FAST_PATH (r5g6b5, PAD),
+
+#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \
+ { PIXMAN_ ## format, \
+ GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
+ sse2_fetch_bilinear_affine_ ## name, \
+ }
+
+ BILINEAR_AFFINE_FAST_PATH (pad_a8r8g8b8, a8r8g8b8, PAD),
+ BILINEAR_AFFINE_FAST_PATH (none_a8r8g8b8, a8r8g8b8, NONE),
+ BILINEAR_AFFINE_FAST_PATH (reflect_a8r8g8b8, a8r8g8b8, REFLECT),
+ BILINEAR_AFFINE_FAST_PATH (normal_a8r8g8b8, a8r8g8b8, NORMAL),
+ BILINEAR_AFFINE_FAST_PATH (pad_x8r8g8b8, x8r8g8b8, PAD),
+ BILINEAR_AFFINE_FAST_PATH (none_x8r8g8b8, x8r8g8b8, NONE),
+ BILINEAR_AFFINE_FAST_PATH (reflect_x8r8g8b8, x8r8g8b8, REFLECT),
+ BILINEAR_AFFINE_FAST_PATH (normal_x8r8g8b8, x8r8g8b8, NORMAL),
+ BILINEAR_AFFINE_FAST_PATH (pad_a8, a8, PAD),
+ BILINEAR_AFFINE_FAST_PATH (none_a8, a8, NONE),
+ BILINEAR_AFFINE_FAST_PATH (reflect_a8, a8, REFLECT),
+ BILINEAR_AFFINE_FAST_PATH (normal_a8, a8, NORMAL),
+ BILINEAR_AFFINE_FAST_PATH (pad_r5g6b5, r5g6b5, PAD),
+ BILINEAR_AFFINE_FAST_PATH (none_r5g6b5, r5g6b5, NONE),
+ BILINEAR_AFFINE_FAST_PATH (reflect_r5g6b5, r5g6b5, REFLECT),
+ BILINEAR_AFFINE_FAST_PATH (normal_r5g6b5, r5g6b5, NORMAL),
+
+ { PIXMAN_null },
+};
+
static pixman_bool_t
sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
{
@@ -6376,6 +7079,22 @@ sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
}
}
+ if ((iter->iter_flags & ITER_NARROW) &&
+ (iter->image_flags & GENERAL_BILINEAR_FLAGS) == GENERAL_BILINEAR_FLAGS)
+ {
+ const bilinear_fetcher_info_t *f;
+
+ for (f = bilinear_fetcher_info; f->format != PIXMAN_null; ++f)
+ {
+ if ((f->flags & iter->image_flags) == f->flags &&
+ f->format == image->common.extended_format_code)
+ {
+ iter->get_scanline = f->get_scanline;
+ return TRUE;
+ }
+ }
+ }
+
return FALSE;
}
--
1.7.10.4
More information about the Pixman
mailing list