pixman: Branch 'wide-composite' - 17 commits

Aaron Plattner aplattner at kemper.freedesktop.org
Fri Oct 26 11:17:29 PDT 2007


 configure.ac                    |    2 
 pixman/formats.pl               |    2 
 pixman/gen.pl                   |   21 
 pixman/pixman-access-handcode.c | 1019 ++++++++++++++++++++++------------------
 pixman/pixman-compose.c         |  172 +++---
 pixman/pixman-edge-imp.h        |   36 -
 pixman/pixman-edge.c            |   52 +-
 pixman/pixman-image.c           |   74 +-
 pixman/pixman-mmx.c             |  534 ++++++++++----------
 pixman/pixman-pict.c            |  168 +++---
 pixman/pixman-private.h         |   80 +--
 pixman/pixman-utils.c           |   36 -
 pixman/pixman.h                 |    8 
 test/Makefile.am                |    6 
 test/fetch-test.c               |  163 ++++++
 15 files changed, 1344 insertions(+), 1029 deletions(-)

New commits:
commit 68c9b8c97ccfdc61171414da64916ccfea6ad2ef
Merge: a77cfd1... d4d78c8...
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Fri Oct 26 11:10:45 2007 -0700

    Merge branch 'master' into wide-composite-merge
    
    Conflicts:
    
    	pixman/pixman-compose.c
    	pixman/pixman.h

diff --cc pixman/pixman-access-handcode.c
index bba413f,0000000..494c8b2
mode 100644,000000..100644
--- a/pixman/pixman-access-handcode.c
+++ b/pixman/pixman-access-handcode.c
@@@ -1,2927 -1,0 +1,2929 @@@
 +/*
 + *
 + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
 + *             2005 Lars Knoll & Zack Rusin, Trolltech
 + *
 + * Permission to use, copy, modify, distribute, and sell this software and its
 + * documentation for any purpose is hereby granted without fee, provided that
 + * the above copyright notice appear in all copies and that both that
 + * copyright notice and this permission notice appear in supporting
 + * documentation, and that the name of Keith Packard not be used in
 + * advertising or publicity pertaining to distribution of the software without
 + * specific, written prior permission.  Keith Packard makes no
 + * representations about the suitability of this software for any purpose.  It
 + * is provided "as is" without express or implied warranty.
 + *
 + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
 + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
 + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
 + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
 + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 + * SOFTWARE.
 + */
 +
++#ifdef HAVE_CONFIG_H
 +#include <config.h>
++#endif
 +
 +#include <stdlib.h>
 +#include <string.h>
 +#include <math.h>
 +#include <assert.h>
 +#include <limits.h>
 +
 +#include "pixman-private.h"
 +
 +
 +/*
 + *    FIXME:
 + *		The stuff here is added just to get it to compile. Something sensible needs to
 + *              be done before this can be used.
 + *
 + *   we should go through this code and clean up some of the weird stuff that have
 + *   resulted from unmacro-ifying it.
 + *
 + */
 +#define INLINE inline
 +
 +/*   End of stuff added to get it to compile
 + */
 +
 +static unsigned int
 +SourcePictureClassify (source_image_t *pict,
 +		       int	       x,
 +		       int	       y,
 +		       int	       width,
 +		       int	       height)
 +{
 +    if (pict->common.type == SOLID)
 +    {
 +	pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +    }
 +    else if (pict->common.type == LINEAR)
 +    {
 +	linear_gradient_t *linear = (linear_gradient_t *)pict;
 +	pixman_vector_t   v;
 +	pixman_fixed_32_32_t l;
 +	pixman_fixed_48_16_t dx, dy, a, b, off;
 +	pixman_fixed_48_16_t factors[4];
 +	int	     i;
 +
 +	dx = linear->p2.x - linear->p1.x;
 +	dy = linear->p2.y - linear->p1.y;
 +	l = dx * dx + dy * dy;
 +	if (l)
 +	{
 +	    a = (dx << 32) / l;
 +	    b = (dy << 32) / l;
 +	}
 +	else
 +	{
 +	    a = b = 0;
 +	}
 +
 +	off = (-a * linear->p1.x
 +	       -b * linear->p1.y) >> 16;
 +
 +	for (i = 0; i < 3; i++)
 +	{
 +	    v.vector[0] = pixman_int_to_fixed ((i % 2) * (width  - 1) + x);
 +	    v.vector[1] = pixman_int_to_fixed ((i / 2) * (height - 1) + y);
 +	    v.vector[2] = pixman_fixed_1;
 +
 +	    if (pict->common.transform)
 +	    {
 +		if (!pixman_transform_point_3d (pict->common.transform, &v))
 +		    return SOURCE_IMAGE_CLASS_UNKNOWN;
 +	    }
 +
 +	    factors[i] = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off;
 +	}
 +
 +	if (factors[2] == factors[0])
 +	    pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	else if (factors[1] == factors[0])
 +	    pict->class = SOURCE_IMAGE_CLASS_VERTICAL;
 +    }
 +
 +    return pict->class;
 +}
 +
 +#define SCANLINE_BUFFER_LENGTH 2048
 +
 +/*
 + * YV12 setup and access macros
 + */
 +
 +#define YV12_SETUP(pict) \
 +	uint32_t *bits = pict->bits; \
 +	int stride = pict->rowstride; \
 +	int offset0 = stride < 0 ? \
 +		((-stride) >> 1) * ((pict->height - 1) >> 1) - stride : \
 +		stride * pict->height; \
 +	int offset1 = stride < 0 ? \
 +		offset0 + ((-stride) >> 1) * ((pict->height) >> 1) : \
 +		offset0 + (offset0 >> 2);
 +
 +#define YV12_Y(line)		\
 +    ((uint8_t *) ((bits) + (stride) * (line)))
 +
 +#define YV12_U(line)	      \
 +    ((uint8_t *) ((bits) + offset1 + \
 +		((stride) >> 1) * ((line) >> 1)))
 +
 +#define YV12_V(line)	      \
 +    ((uint8_t *) ((bits) + offset0 + \
 +		((stride) >> 1) * ((line) >> 1)))
 +
 +typedef FASTCALL void (*fetchProc_32)(bits_image_t *pict, int x, int y, int width, uint32_t *buffer);
 +
 +static fetchProc_32 fetchProcForPicture_32 (bits_image_t * pict);
 +
 +
 +typedef FASTCALL uint32_t (*fetchPixelProc_32)(bits_image_t *pict, int offset, int line);
 +
 +static fetchPixelProc_32 fetchPixelProcForPicture_32 (bits_image_t * pict);
 +
 +typedef FASTCALL void (*storeProc_32) (pixman_image_t *image,
 +				       uint32_t *bits, const uint32_t *values,
 +				       int x, int width,
 +				       const pixman_indexed_t * indexed);
 +
 +static storeProc_32 storeProcForPicture_32 (bits_image_t * pict);
 +
 +typedef FASTCALL void (*fetchProc_64)(bits_image_t *pict, int x, int y, int width, uint64_t *buffer);
 +
 +static fetchProc_64 fetchProcForPicture_64 (bits_image_t * pict);
 +
 +
 +typedef FASTCALL uint64_t (*fetchPixelProc_64)(bits_image_t *pict, int offset, int line);
 +
 +static fetchPixelProc_64 fetchPixelProcForPicture_64 (bits_image_t * pict);
 +
 +typedef FASTCALL void (*storeProc_64) (pixman_image_t *image,
 +				       uint32_t *bits, const uint64_t *values,
 +				       int x, int width,
 +				       const pixman_indexed_t * indexed);
 +
 +static storeProc_64 storeProcForPicture_64 (bits_image_t * pict);
 +
 +/* handcoded fetch/store functions. */
 +#define fbFetch_g8_32 fbFetch_c8_32
 +#define fbFetchPixel_g8_32 fbFetchPixel_c8_32
 +#define fbStore_g8_32 fbStore_c8_32
 +
 +#define fbFetch_g4_32 fbFetch_c4_32
 +#define fbFetchPixel_g4_32 fbFetchPixel_c4_32
 +#define fbStore_g4_32 fbStore_c4_32
 +
 +#define fbFetch_g8_64 fbFetch_c8_64
 +#define fbFetchPixel_g8_64 fbFetchPixel_c8_64
 +#define fbStore_g8_64 fbStore_c8_64
 +
 +#define fbFetch_g4_64 fbFetch_c4_64
 +#define fbFetchPixel_g4_64 fbFetchPixel_c4_64
 +#define fbStore_g4_64 fbStore_c4_64
 +
 +static FASTCALL void
 +fbFetch_r8g8b8_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint32_t b = Fetch24(pict, pixel) | 0xff000000;
 +	pixel += 3;
 +	*buffer++ = b;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_r8g8b8_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint64_t r = (uint64_t)READ(pict, pixel) << 32;
 +	uint64_t g = (uint64_t)READ(pict, pixel + 1) << 16;
 +	uint64_t b = (uint64_t)READ(pict, pixel + 2);
 +	uint64_t p = r | g | b;
 +
 +	pixel += 3;
 +	*buffer++ = (p << 16) | p;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_b8g8r8_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint32_t b = 0xff000000;
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +	b |= (READ(pict, pixel++));
 +	b |= (READ(pict, pixel++) << 8);
 +	b |= (READ(pict, pixel++) << 16);
 +#else
 +	b |= (READ(pict, pixel++) << 16);
 +	b |= (READ(pict, pixel++) << 8);
 +	b |= (READ(pict, pixel++));
 +#endif
 +	*buffer++ = b;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_b8g8r8_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint64_t b = 0xffff000000000000LL;
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +	b |= ((uint64_t)READ(pict, pixel++));
 +	b |= ((uint64_t)READ(pict, pixel++) << 16);
 +	b |= ((uint64_t)READ(pict, pixel++) << 32);
 +#else
 +	b |= ((uint64_t)READ(pict, pixel++) << 32);
 +	b |= ((uint64_t)READ(pict, pixel++) << 16);
 +	b |= ((uint64_t)READ(pict, pixel++));
 +#endif
 +	*buffer++ = b | (b << 8);
 +    }
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_r8g8b8_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    return (0xff000000 |
 +	    (READ(pict, pixel + 0) << 16) |
 +	    (READ(pict, pixel + 1) << 8) |
 +	    (READ(pict, pixel + 2)));
 +#else
 +    return (0xff000000 |
 +	    (READ(pict, pixel + 2) << 16) |
 +	    (READ(pict, pixel + 1) << 8) |
 +	    (READ(pict, pixel + 0)));
 +#endif
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_r8g8b8_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    uint64_t p = (0xffff000000000000LL |
 +		  ((uint64_t)READ(pict, pixel + 0) << 32) |
 +		  ((uint64_t)READ(pict, pixel + 1) << 16) |
 +		  ((uint64_t)READ(pict, pixel + 2)));
 +#else
 +    uint64_t p = (0xffff000000000000LL |
 +		  ((uint64_t)READ(pict, pixel + 2) << 32) |
 +		  ((uint64_t)READ(pict, pixel + 1) << 16) |
 +		  ((uint64_t)READ(pict, pixel + 0)));
 +#endif
 +    return p | (p << 8);
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_b8g8r8_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    return (0xff000000 |
 +	    (READ(pict, pixel + 2) << 16) |
 +	    (READ(pict, pixel + 1) << 8) |
 +	    (READ(pict, pixel + 0)));
 +#else
 +    return (0xff000000 |
 +	    (READ(pict, pixel + 0) << 16) |
 +	    (READ(pict, pixel + 1) << 8) |
 +	    (READ(pict, pixel + 2)));
 +#endif
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_b8g8r8_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    uint64_t p = (0xffff000000000000LL |
 +		  ((uint64_t)READ(pict, pixel + 2) << 32) |
 +		  ((uint64_t)READ(pict, pixel + 1) << 16) |
 +		  ((uint64_t)READ(pict, pixel + 0)));
 +#else
 +    uint64_t p = (0xffff000000000000LL |
 +		  ((uint64_t)READ(pict, pixel + 0) << 32) |
 +		  ((uint64_t)READ(pict, pixel + 1) << 16) |
 +		  ((uint64_t)READ(pict, pixel + 2)));
 +#endif
 +    return p | (p << 8);
 +}
 +
 +static FASTCALL void
 +fbStore_r8g8b8_32 (pixman_image_t *image,
 +		   uint32_t *bits, const uint32_t *values, int x, int width,
 +		   const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	Store24(image, pixel, values[i]);
 +	pixel += 3;
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_r8g8b8_64 (pixman_image_t *image,
 +		   uint32_t *bits, const uint64_t *values, int x, int width,
 +		   const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t r = (values[i] >> 40) & 0xff;
 +	uint32_t g = (values[i] >> 24) & 0xff;
 +	uint32_t b = (values[i] >> 8) & 0xff;
 +	Store24(image, pixel, (r|g|b));
 +	pixel += 3;
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_b8g8r8_32 (pixman_image_t *image,
 +		   uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t val = values[i];
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +	WRITE(image, pixel++, Blue(val));
 +	WRITE(image, pixel++, Green(val));
 +	WRITE(image, pixel++, Red(val));
 +#else
 +	WRITE(image, pixel++, Red(val));
 +	WRITE(image, pixel++, Green(val));
 +	WRITE(image, pixel++, Blue(val));
 +#endif
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_b8g8r8_64 (pixman_image_t *image,
 +		   uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t r = (values[i] >> 40) & 0xff;
 +	uint32_t g = (values[i] >> 24) & 0xff;
 +	uint32_t b = (values[i] >> 8) & 0xff;
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +	WRITE(image, pixel++, b);
 +	WRITE(image, pixel++, g);
 +	WRITE(image, pixel++, r);
 +#else
 +	WRITE(image, pixel++, r);
 +	WRITE(image, pixel++, g);
 +	WRITE(image, pixel++, b);
 +#endif
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_c8_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    const uint8_t *pixel = (const uint8_t *)bits + x;
 +    const uint8_t *end = pixel + width;
 +    while (pixel < end) {
 +	uint32_t  p = READ(pict, pixel++);
 +	*buffer++ = indexed->rgba[p];
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_c8_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + x;
 +    const uint8_t *end = pixel + width;
 +    while (pixel < end) {
 +	uint64_t  p = READ(pict, pixel++);
 +	uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +	*buffer++ = px | (px << 8);
 +    }
 +}
 +
 +#define Fetch8(img,l,o)    (READ(img, (uint8_t *)(l) + ((o) >> 2)))
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) & 0xf : Fetch8(img,l,o) >> 4)
 +#else
 +#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) >> 4 : Fetch8(img,l,o) & 0xf)
 +#endif
 +
 +static FASTCALL void
 +fbFetch_c4_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  p = Fetch4(pict, bits, i + x);
 +
 +	*buffer++ = indexed->rgba[p];
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_c4_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint64_t  p = indexed->rgba[Fetch4(pict, bits, i + x)];
 +	uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +	*buffer++ = px | (px << 8);
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_a1_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  p = READ(pict, bits + ((i + x) >> 5));
 +	uint32_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = p >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = p >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +	a |= a << 1;
 +	a |= a << 2;
 +	a |= a << 4;
 +	*buffer++ = a << 24;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_a1_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  p = READ(pict, bits + ((i + x) >> 5));
 +	uint64_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = p >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = p >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +	a |= a << 1;
 +	a |= a << 2;
 +	a |= a << 4;
 +	a |= a << 8;
 +	*buffer++ = a << 48;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_g1_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t p = READ(pict, bits + ((i+x) >> 5));
 +	uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = p >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = p >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +	*buffer++ = indexed->rgba[a];
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_g1_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t t = READ(pict, bits + ((i+x) >> 5));
 +	uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = t >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = t >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +
 +	uint64_t  p = indexed->rgba[Fetch4(pict, bits, i + x)];
 +	uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +	*buffer++ = px | (px << 8);
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_yuy2_32 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
 +{
 +    int16_t y, u, v;
 +    int32_t r, g, b;
 +    int   i;
 +
 +    const uint32_t *bits = pict->bits + pict->rowstride * line;
 +
 +    for (i = 0; i < width; i++)
 +    {
 +	y = ((uint8_t *) bits)[(x + i) << 1] - 16;
 +	u = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 1] - 128;
 +	v = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 3] - 128;
 +
 +	/* R = 1.164(Y - 16) + 1.596(V - 128) */
 +	r = 0x012b27 * y + 0x019a2e * v;
 +	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
 +	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
 +	/* B = 1.164(Y - 16) + 2.018(U - 128) */
 +	b = 0x012b27 * y + 0x0206a2 * u;
 +
 +    WRITE(pict, buffer++, 0xff000000 |
 +	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
 +	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
 +	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_yuy2_64 (bits_image_t *pict, int x, int line, int width, uint64_t *buffer)
 +{
 +    /* [AGP] Unimplemented */
 +}
 +
 +static FASTCALL void
 +fbFetch_yv12_32 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
 +{
 +    YV12_SETUP(pict);
 +    uint8_t *pY = YV12_Y (line);
 +    uint8_t *pU = YV12_U (line);
 +    uint8_t *pV = YV12_V (line);
 +    int16_t y, u, v;
 +    int32_t r, g, b;
 +    int   i;
 +
 +    for (i = 0; i < width; i++)
 +    {
 +	y = pY[x + i] - 16;
 +	u = pU[(x + i) >> 1] - 128;
 +	v = pV[(x + i) >> 1] - 128;
 +
 +	/* R = 1.164(Y - 16) + 1.596(V - 128) */
 +	r = 0x012b27 * y + 0x019a2e * v;
 +	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
 +	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
 +	/* B = 1.164(Y - 16) + 2.018(U - 128) */
 +	b = 0x012b27 * y + 0x0206a2 * u;
 +
 +	WRITE(pict, buffer++, 0xff000000 |
 +	    (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
 +	    (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
 +	    (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_yv12_64 (bits_image_t *pict, int x, int line, int width, uint64_t *buffer)
 +{
 +    /* [AGP] Unimplemented */
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_c8_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    return indexed->rgba[pixel];
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_c8_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    uint64_t p = indexed->rgba[READ(pict, (uint8_t *) bits + offset)];
 +    uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +    return px | (px << 8);
 +}
 +
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_c4_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t  pixel = Fetch4(pict, bits, offset);
 +    const pixman_indexed_t * indexed = pict->indexed;
 +
 +    return indexed->rgba[pixel];
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_c4_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    uint64_t p = indexed->rgba[Fetch4(pict, bits, offset)];
 +    uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +    return px | (px << 8);
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_a1_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t  pixel = READ(pict, bits + (offset >> 5));
 +    uint32_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    a |= a << 1;
 +    a |= a << 2;
 +    a |= a << 4;
 +    return a << 24;
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_a1_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t  pixel = READ(pict, bits + (offset >> 5));
 +    uint64_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    a |= a << 1;
 +    a |= a << 2;
 +    a |= a << 4;
 +    a |= a << 8;
 +    return a << 48;
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_g1_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t pixel = READ(pict, bits + (offset >> 5));
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    return indexed->rgba[a];
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_g1_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t pixel = READ(pict, bits + (offset >> 5));
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    uint64_t p = indexed->rgba[a];
 +    uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +    return px | (px << 8);
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_yuy2_32 (bits_image_t *pict, int offset, int line)
 +{
 +    int16_t y, u, v;
 +    int32_t r, g, b;
 +
 +    const uint32_t *bits = pict->bits + pict->rowstride * line;
 +
 +    y = ((uint8_t *) bits)[offset << 1] - 16;
 +    u = ((uint8_t *) bits)[((offset << 1) & -4) + 1] - 128;
 +    v = ((uint8_t *) bits)[((offset << 1) & -4) + 3] - 128;
 +
 +    /* R = 1.164(Y - 16) + 1.596(V - 128) */
 +    r = 0x012b27 * y + 0x019a2e * v;
 +    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
 +    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
 +    /* B = 1.164(Y - 16) + 2.018(U - 128) */
 +    b = 0x012b27 * y + 0x0206a2 * u;
 +
 +    return 0xff000000 |
 +	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
 +	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
 +	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_yuy2_64 (bits_image_t *pict, int offset, int line)
 +{
 +    /* [AGP] Unimplemented */
 +    return 0;
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_yv12_32 (bits_image_t *pict, int offset, int line)
 +{
 +    YV12_SETUP(pict);
 +    int16_t y = YV12_Y (line)[offset] - 16;
 +    int16_t u = YV12_U (line)[offset >> 1] - 128;
 +    int16_t v = YV12_V (line)[offset >> 1] - 128;
 +    int32_t r, g, b;
 +
 +    /* R = 1.164(Y - 16) + 1.596(V - 128) */
 +    r = 0x012b27 * y + 0x019a2e * v;
 +    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
 +    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
 +    /* B = 1.164(Y - 16) + 2.018(U - 128) */
 +    b = 0x012b27 * y + 0x0206a2 * u;
 +
 +    return 0xff000000 |
 +	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
 +	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
 +	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_yv12_64 (bits_image_t *pict, int offset, int line)
 +{
 +    /* [AGP] Unimplemented */
 +    return 0;
 +}
 +
 +static FASTCALL void
 +fbStore_c8_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t   *pixel = ((uint8_t *) bits) + x;
 +    for (i = 0; i < width; ++i) {
 +	WRITE(image, pixel++, miIndexToEnt24(indexed,values[i]));
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_c8_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t   *pixel = ((uint8_t *) bits) + x;
 +    for (i = 0; i < width; ++i) {
 +	WRITE(image, pixel++, miIndexToEnt48(indexed,values[i]));
 +    }
 +}
 +
 +#define Store8(img,l,o,v)  (WRITE(img, (uint8_t *)(l) + ((o) >> 3), (v)))
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?				\
 +				   (Fetch8(img,l,o) & 0xf0) | (v) :		\
 +				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4)))
 +#else
 +#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?			       \
 +				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4) : \
 +				   (Fetch8(img,l,o) & 0xf0) | (v)))
 +#endif
 +
 +
 +static FASTCALL void
 +fbStore_c4_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  pixel;
 +
 +	pixel = miIndexToEnt24(indexed, values[i]);
 +	Store4(image, bits, i + x, pixel);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_c4_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  pixel;
 +
 +	pixel = miIndexToEnt48(indexed, values[i]);
 +	Store4(image, bits, i + x, pixel);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_a1_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +
 +	uint32_t v = values[i] & 0x80000000 ? mask : 0;
 +	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_a1_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +
 +	uint64_t v = values[i] & 0x8000000000000000LL ? mask : 0;
 +	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_g1_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +
 +	uint32_t v = miIndexToEntY24(indexed,values[i]) ? mask : 0;
 +	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_g1_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +
 +	uint32_t v = miIndexToEntY48(indexed,values[i]) ? mask : 0;
 +	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_yuy2_32 (pixman_image_t *image,
 +	         uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    /* [AGP] Unimplemented */
 +}
 +
 +static FASTCALL void
 +fbStore_yuy2_64 (pixman_image_t *image,
 +	         uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    /* [AGP] Unimplemented */
 +}
 +
 +static FASTCALL void
 +fbStore_yv12_32 (pixman_image_t *image,
 +	         uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    /* [AGP] Unimplemented */
 +}
 +
 +static FASTCALL void
 +fbStore_yv12_64 (pixman_image_t *image,
 +	         uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    /* [AGP] Unimplemented */
 +}
 +
 +/* end of handcoded fetch/store functions */
 +
 +#ifdef PIXMAN_FB_ACCESSORS
 +static
 +#endif
 +void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    uint32_t color;
 +    uint32_t *end;
 +    fetchPixelProc_32 fetch = fetchPixelProcForPicture_32(pict);
 +
 +    color = fetch(pict, 0, 0);
 +
 +    end = buffer + width;
 +    while (buffer < end)
 +	*(buffer++) = color;
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +#ifdef PIXMAN_FB_ACCESSORS
 +static
 +#endif
 +void fbFetchSolid64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    uint64_t color;
 +    uint64_t *end;
 +    fetchPixelProc_64 fetch = fetchPixelProcForPicture_64(pict);
 +
 +    color = fetch(pict, 0, 0);
 +
 +    end = buffer + width;
 +    while (buffer < end)
 +	*(buffer++) = color;
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbFetch(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    fetchProc_32 fetch = fetchProcForPicture_32(pict);
 +
 +    fetch(pict, x, y, width, buffer);
 +}
 +
 +static void fbFetch64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    fetchProc_64 fetch = fetchProcForPicture_64(pict);
 +
 +    fetch(pict, x, y, width, buffer);
 +}
 +
 +#ifdef PIXMAN_FB_ACCESSORS
 +#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_accessors
 +#define PIXMAN_COMPOSITE_RECT_GENERAL_WIDE pixman_composite_rect_general_wide_accessors
 +#else
 +#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
 +#define PIXMAN_COMPOSITE_RECT_GENERAL_WIDE pixman_composite_rect_general_wide_no_accessors
 +#endif
 +
 +typedef struct
 +{
 +    uint32_t        left_ag;
 +    uint32_t        left_rb;
 +    uint32_t        right_ag;
 +    uint32_t        right_rb;
 +    int32_t       left_x;
 +    int32_t       right_x;
 +    int32_t       stepper;
 +
 +    pixman_gradient_stop_t	*stops;
 +    int                      num_stops;
 +    unsigned int             spread;
 +
 +    int		  need_reset;
 +} GradientWalker;
 +
 +static void
 +_gradient_walker_init (GradientWalker  *walker,
 +		       gradient_t      *gradient,
 +		       unsigned int     spread)
 +{
 +    walker->num_stops = gradient->n_stops;
 +    walker->stops     = gradient->stops;
 +    walker->left_x    = 0;
 +    walker->right_x   = 0x10000;
 +    walker->stepper   = 0;
 +    walker->left_ag   = 0;
 +    walker->left_rb   = 0;
 +    walker->right_ag  = 0;
 +    walker->right_rb  = 0;
 +    walker->spread    = spread;
 +
 +    walker->need_reset = TRUE;
 +}
 +
 +static void
 +_gradient_walker_reset (GradientWalker  *walker,
 +                        pixman_fixed_32_32_t     pos)
 +{
 +    int32_t                  x, left_x, right_x;
 +    pixman_color_t          *left_c, *right_c;
 +    int                      n, count = walker->num_stops;
 +    pixman_gradient_stop_t *      stops = walker->stops;
 +
 +    static const pixman_color_t   transparent_black = { 0, 0, 0, 0 };
 +
 +    switch (walker->spread)
 +    {
 +    case PIXMAN_REPEAT_NORMAL:
 +	x = (int32_t)pos & 0xFFFF;
 +	for (n = 0; n < count; n++)
 +	    if (x < stops[n].x)
 +		break;
 +	if (n == 0) {
 +	    left_x =  stops[count-1].x - 0x10000;
 +	    left_c = &stops[count-1].color;
 +	} else {
 +	    left_x =  stops[n-1].x;
 +	    left_c = &stops[n-1].color;
 +	}
 +
 +	if (n == count) {
 +	    right_x =  stops[0].x + 0x10000;
 +	    right_c = &stops[0].color;
 +	} else {
 +	    right_x =  stops[n].x;
 +	    right_c = &stops[n].color;
 +	}
 +	left_x  += (pos - x);
 +	right_x += (pos - x);
 +	break;
 +
 +    case PIXMAN_REPEAT_PAD:
 +	for (n = 0; n < count; n++)
 +	    if (pos < stops[n].x)
 +		break;
 +
 +	if (n == 0) {
 +	    left_x =  INT32_MIN;
 +	    left_c = &stops[0].color;
 +	} else {
 +	    left_x =  stops[n-1].x;
 +	    left_c = &stops[n-1].color;
 +	}
 +
 +	if (n == count) {
 +	    right_x =  INT32_MAX;
 +	    right_c = &stops[n-1].color;
 +	} else {
 +	    right_x =  stops[n].x;
 +	    right_c = &stops[n].color;
 +	}
 +	break;
 +
 +    case PIXMAN_REPEAT_REFLECT:
 +	x = (int32_t)pos & 0xFFFF;
 +	if ((int32_t)pos & 0x10000)
 +	    x = 0x10000 - x;
 +	for (n = 0; n < count; n++)
 +	    if (x < stops[n].x)
 +		break;
 +
 +	if (n == 0) {
 +	    left_x =  -stops[0].x;
 +	    left_c = &stops[0].color;
 +	} else {
 +	    left_x =  stops[n-1].x;
 +	    left_c = &stops[n-1].color;
 +	}
 +
 +	if (n == count) {
 +	    right_x = 0x20000 - stops[n-1].x;
 +	    right_c = &stops[n-1].color;
 +	} else {
 +	    right_x =  stops[n].x;
 +	    right_c = &stops[n].color;
 +	}
 +
 +	if ((int32_t)pos & 0x10000) {
 +	    pixman_color_t  *tmp_c;
 +	    int32_t          tmp_x;
 +
 +	    tmp_x   = 0x10000 - right_x;
 +	    right_x = 0x10000 - left_x;
 +	    left_x  = tmp_x;
 +
 +	    tmp_c   = right_c;
 +	    right_c = left_c;
 +	    left_c  = tmp_c;
 +
 +	    x = 0x10000 - x;
 +	}
 +	left_x  += (pos - x);
 +	right_x += (pos - x);
 +	break;
 +
 +    default:  /* RepeatNone */
 +	for (n = 0; n < count; n++)
 +	    if (pos < stops[n].x)
 +		break;
 +
 +	if (n == 0)
 +	{
 +	    left_x  =  INT32_MIN;
 +	    right_x =  stops[0].x;
 +	    left_c  = right_c = (pixman_color_t*) &transparent_black;
 +	}
 +	else if (n == count)
 +	{
 +	    left_x  = stops[n-1].x;
 +	    right_x = INT32_MAX;
 +	    left_c  = right_c = (pixman_color_t*) &transparent_black;
 +	}
 +	else
 +	{
 +	    left_x  =  stops[n-1].x;
 +	    right_x =  stops[n].x;
 +	    left_c  = &stops[n-1].color;
 +	    right_c = &stops[n].color;
 +	}
 +    }
 +
 +    walker->left_x   = left_x;
 +    walker->right_x  = right_x;
 +    walker->left_ag  = ((left_c->alpha >> 8) << 16)   | (left_c->green >> 8);
 +    walker->left_rb  = ((left_c->red & 0xff00) << 8)  | (left_c->blue >> 8);
 +    walker->right_ag = ((right_c->alpha >> 8) << 16)  | (right_c->green >> 8);
 +    walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
 +
 +    if ( walker->left_x == walker->right_x                ||
 +	 ( walker->left_ag == walker->right_ag &&
 +	   walker->left_rb == walker->right_rb )   )
 +    {
 +	walker->stepper = 0;
 +    }
 +    else
 +    {
 +	int32_t width = right_x - left_x;
 +	walker->stepper = ((1 << 24) + width/2)/width;
 +    }
 +
 +    walker->need_reset = FALSE;
 +}
 +
 +#define  GRADIENT_WALKER_NEED_RESET(w,x)				\
 +    ( (w)->need_reset || (x) < (w)->left_x || (x) >= (w)->right_x)
 +
 +/* the following assumes that GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */
 +static uint32_t
 +_gradient_walker_pixel (GradientWalker  *walker,
 +                        pixman_fixed_32_32_t     x)
 +{
 +    int  dist, idist;
 +    uint32_t  t1, t2, a, color;
 +
 +    if (GRADIENT_WALKER_NEED_RESET (walker, x))
 +        _gradient_walker_reset (walker, x);
 +
 +    dist  = ((int)(x - walker->left_x)*walker->stepper) >> 16;
 +    idist = 256 - dist;
 +
 +    /* combined INTERPOLATE and premultiply */
 +    t1 = walker->left_rb*idist + walker->right_rb*dist;
 +    t1 = (t1 >> 8) & 0xff00ff;
 +
 +    t2  = walker->left_ag*idist + walker->right_ag*dist;
 +    t2 &= 0xff00ff00;
 +
 +    color = t2 & 0xff000000;
 +    a     = t2 >> 24;
 +
 +    t1  = t1*a + 0x800080;
 +    t1  = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
 +
 +    t2  = (t2 >> 8)*a + 0x800080;
 +    t2  = (t2 + ((t2 >> 8) & 0xff00ff));
 +
 +    return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
 +}
 +
 +static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +#if 0
 +    SourcePictPtr   pGradient = pict->pSourcePict;
 +#endif
 +    GradientWalker  walker;
 +    uint32_t       *end = buffer + width;
 +    gradient_t	    *gradient;
 +
 +    if (pict->common.type == SOLID)
 +    {
 +	register uint32_t color = ((solid_fill_t *)pict)->color;
 +
 +	while (buffer < end)
 +	    *(buffer++) = color;
 +
 +	return;
 +    }
 +
 +    gradient = (gradient_t *)pict;
 +
 +    _gradient_walker_init (&walker, gradient, pict->common.repeat);
 +
 +    if (pict->common.type == LINEAR) {
 +	pixman_vector_t v, unit;
 +	pixman_fixed_32_32_t l;
 +	pixman_fixed_48_16_t dx, dy, a, b, off;
 +	linear_gradient_t *linear = (linear_gradient_t *)pict;
 +
 +        /* reference point is the center of the pixel */
 +        v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
 +        v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
 +        v.vector[2] = pixman_fixed_1;
 +        if (pict->common.transform) {
 +            if (!pixman_transform_point_3d (pict->common.transform, &v))
 +                return;
 +            unit.vector[0] = pict->common.transform->matrix[0][0];
 +            unit.vector[1] = pict->common.transform->matrix[1][0];
 +            unit.vector[2] = pict->common.transform->matrix[2][0];
 +        } else {
 +            unit.vector[0] = pixman_fixed_1;
 +            unit.vector[1] = 0;
 +            unit.vector[2] = 0;
 +        }
 +
 +        dx = linear->p2.x - linear->p1.x;
 +        dy = linear->p2.y - linear->p1.y;
 +        l = dx*dx + dy*dy;
 +        if (l != 0) {
 +            a = (dx << 32) / l;
 +            b = (dy << 32) / l;
 +            off = (-a*linear->p1.x - b*linear->p1.y)>>16;
 +        }
 +        if (l == 0  || (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)) {
 +            pixman_fixed_48_16_t inc, t;
 +            /* affine transformation only */
 +            if (l == 0) {
 +                t = 0;
 +                inc = 0;
 +            } else {
 +                t = ((a*v.vector[0] + b*v.vector[1]) >> 16) + off;
 +                inc = (a * unit.vector[0] + b * unit.vector[1]) >> 16;
 +            }
 +
 +	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
 +	    {
 +		register uint32_t color;
 +
 +		color = _gradient_walker_pixel( &walker, t );
 +		while (buffer < end)
 +		    *(buffer++) = color;
 +	    }
 +	    else
 +	    {
 +                if (!mask) {
 +                    while (buffer < end)
 +                    {
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +                        buffer += 1;
 +                        t      += inc;
 +                    }
 +                } else {
 +                    while (buffer < end) {
 +                        if (*mask++ & maskBits)
 +                        {
 +			    *(buffer) = _gradient_walker_pixel (&walker, t);
 +                        }
 +                        buffer += 1;
 +                        t      += inc;
 +                    }
 +                }
 +	    }
 +	}
 +	else /* projective transformation */
 +	{
 +	    pixman_fixed_48_16_t t;
 +
 +	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
 +	    {
 +		register uint32_t color;
 +
 +		if (v.vector[2] == 0)
 +		{
 +		    t = 0;
 +		}
 +		else
 +		{
 +		    pixman_fixed_48_16_t x, y;
 +
 +		    x = ((pixman_fixed_48_16_t) v.vector[0] << 16) / v.vector[2];
 +		    y = ((pixman_fixed_48_16_t) v.vector[1] << 16) / v.vector[2];
 +		    t = ((a * x + b * y) >> 16) + off;
 +		}
 +
 + 		color = _gradient_walker_pixel( &walker, t );
 +		while (buffer < end)
 +		    *(buffer++) = color;
 +	    }
 +	    else
 +	    {
 +		while (buffer < end)
 +		{
 +		    if (!mask || *mask++ & maskBits)
 +		    {
 +			if (v.vector[2] == 0) {
 +			    t = 0;
 +			} else {
 +			    pixman_fixed_48_16_t x, y;
 +			    x = ((pixman_fixed_48_16_t)v.vector[0] << 16) / v.vector[2];
 +			    y = ((pixman_fixed_48_16_t)v.vector[1] << 16) / v.vector[2];
 +			    t = ((a*x + b*y) >> 16) + off;
 +			}
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    ++buffer;
 +		    v.vector[0] += unit.vector[0];
 +		    v.vector[1] += unit.vector[1];
 +		    v.vector[2] += unit.vector[2];
 +		}
 +            }
 +        }
 +    } else {
 +
 +/*
 + * In the radial gradient problem we are given two circles (c₁,r₁) and
 + * (câ‚‚,râ‚‚) that define the gradient itself. Then, for any point p, we
 + * must compute the value(s) of t within [0.0, 1.0] representing the
 + * circle(s) that would color the point.
 + *
 + * There are potentially two values of t since the point p can be
 + * colored by both sides of the circle, (which happens whenever one
 + * circle is not entirely contained within the other).
 + *
 + * If we solve for a value of t that is outside of [0.0, 1.0] then we
 + * use the extend mode (NONE, REPEAT, REFLECT, or PAD) to map to a
 + * value within [0.0, 1.0].
 + *
 + * Here is an illustration of the problem:
 + *
 + *              pâ‚‚
 + *           p  •
 + *           •   ╲
 + *        ·       ╲r₂
 + *  p₁ ·           ╲
 + *  •              θ╲
 + *   ╲             ╌╌•
 + *    ╲r₁        ·   c₂
 + *    θ╲    ·
 + *    ╌╌•
 + *      c₁
 + *
 + * Given (c₁,r₁), (c₂,r₂) and p, we must find an angle θ such that two
 + * points p₁ and p₂ on the two circles are collinear with p. Then, the
 + * desired value of t is the ratio of the length of p₁p to the length
 + * of p₁p₂.
 + *
 + * So, we have six unknown values: (p₁x, p₁y), (p₂x, p₂y), θ and t.
 + * We can also write six equations that constrain the problem:
 + *
 + * Point p₁ is a distance r₁ from c₁ at an angle of θ:
 + *
 + *	1. p₁x = c₁x + r₁·cos θ
 + *	2. p₁y = c₁y + r₁·sin θ
 + *
 + * Point p₂ is a distance r₂ from c₂ at an angle of θ:
 + *
 + *	3. p₂x = c₂x + r2·cos θ
 + *	4. p₂y = c₂y + r2·sin θ
 + *
 + * Point p lies at a fraction t along the line segment p₁p₂:
 + *
 + *	5. px = t·p₂x + (1-t)·p₁x
 + *	6. py = t·p₂y + (1-t)·p₁y
 + *
 + * To solve, first subtitute 1-4 into 5 and 6:
 + *
 + * px = t·(c₂x + r₂·cos θ) + (1-t)·(c₁x + r₁·cos θ)
 + * py = t·(c₂y + r₂·sin θ) + (1-t)·(c₁y + r₁·sin θ)
 + *
 + * Then solve each for cos θ and sin θ expressed as a function of t:
 + *
 + * cos θ = (-(c₂x - c₁x)·t + (px - c₁x)) / ((r₂-r₁)·t + r₁)
 + * sin θ = (-(c₂y - c₁y)·t + (py - c₁y)) / ((r₂-r₁)·t + r₁)
 + *
 + * To simplify this a bit, we define new variables for several of the
 + * common terms as shown below:
 + *
 + *              pâ‚‚
 + *           p  •
 + *           •   ╲
 + *        ·  ┆    ╲r₂
 + *  p₁ ·     ┆     ╲
 + *  •     pdy┆      ╲
 + *   ╲       ┆       •c₂
 + *    ╲r₁    ┆   ·   ┆
 + *     ╲    ·┆       ┆cdy
 + *      •╌╌╌╌┴╌╌╌╌╌╌╌┘
 + *    c₁  pdx   cdx
 + *
 + * cdx = (c₂x - c₁x)
 + * cdy = (c₂y - c₁y)
 + *  dr =  r₂-r₁
 + * pdx =  px - c₁x
 + * pdy =  py - c₁y
 + *
 + * Note that cdx, cdy, and dr do not depend on point p at all, so can
 + * be pre-computed for the entire gradient. The simplifed equations
 + * are now:
 + *
 + * cos θ = (-cdx·t + pdx) / (dr·t + r₁)
 + * sin θ = (-cdy·t + pdy) / (dr·t + r₁)
 + *
 + * Finally, to get a single function of t and eliminate the last
 + * unknown θ, we use the identity sin²θ + cos²θ = 1. First, square
 + * each equation, (we knew a quadratic was coming since it must be
 + * possible to obtain two solutions in some cases):
 + *
 + * cos²θ = (cdx²t² - 2·cdx·pdx·t + pdx²) / (dr²·t² + 2·r₁·dr·t + r₁²)
 + * sin²θ = (cdy²t² - 2·cdy·pdy·t + pdy²) / (dr²·t² + 2·r₁·dr·t + r₁²)
 + *
 + * Then add both together, set the result equal to 1, and express as a
 + * standard quadratic equation in t of the form At² + Bt + C = 0
 + *
 + * (cdx² + cdy² - dr²)·t² - 2·(cdx·pdx + cdy·pdy + r₁·dr)·t + (pdx² + pdy² - r₁²) = 0
 + *
 + * In other words:
 + *
 + * A = cdx² + cdy² - dr²
 + * B = -2·(pdx·cdx + pdy·cdy + r₁·dr)
 + * C = pdx² + pdy² - r₁²
 + *
 + * And again, notice that A does not depend on p, so can be
 + * precomputed. From here we just use the quadratic formula to solve
 + * for t:
 + *
 + * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A
 + */
 +        /* radial or conical */
 +        pixman_bool_t affine = TRUE;
 +        double cx = 1.;
 +        double cy = 0.;
 +        double cz = 0.;
 +	double rx = x + 0.5;
 +	double ry = y + 0.5;
 +        double rz = 1.;
 +
 +        if (pict->common.transform) {
 +            pixman_vector_t v;
 +            /* reference point is the center of the pixel */
 +            v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
 +            v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
 +            v.vector[2] = pixman_fixed_1;
 +            if (!pixman_transform_point_3d (pict->common.transform, &v))
 +                return;
 +
 +            cx = pict->common.transform->matrix[0][0]/65536.;
 +            cy = pict->common.transform->matrix[1][0]/65536.;
 +            cz = pict->common.transform->matrix[2][0]/65536.;
 +            rx = v.vector[0]/65536.;
 +            ry = v.vector[1]/65536.;
 +            rz = v.vector[2]/65536.;
 +            affine = pict->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1;
 +        }
 +
 +        if (pict->common.type == RADIAL) {
 +	    radial_gradient_t *radial = (radial_gradient_t *)pict;
 +            if (affine) {
 +                while (buffer < end) {
 +		    if (!mask || *mask++ & maskBits)
 +		    {
 +			double pdx, pdy;
 +			double B, C;
 +			double det;
 +			double c1x = radial->c1.x / 65536.0;
 +			double c1y = radial->c1.y / 65536.0;
 +			double r1  = radial->c1.radius / 65536.0;
 +                        pixman_fixed_48_16_t t;
 +
 +			pdx = rx - c1x;
 +			pdy = ry - c1y;
 +
 +			B = -2 * (  pdx * radial->cdx
 +				    + pdy * radial->cdy
 +				    + r1 * radial->dr);
 +			C = (pdx * pdx + pdy * pdy - r1 * r1);
 +
 +                        det = (B * B) - (4 * radial->A * C);
 +			if (det < 0.0)
 +			    det = 0.0;
 +
 +			if (radial->A < 0)
 +			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
 +			else
 +			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
 +
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    ++buffer;
 +
 +                    rx += cx;
 +                    ry += cy;
 +                }
 +            } else {
 +		/* projective */
 +                while (buffer < end) {
 +		    if (!mask || *mask++ & maskBits)
 +		    {
 +			double pdx, pdy;
 +			double B, C;
 +			double det;
 +			double c1x = radial->c1.x / 65536.0;
 +			double c1y = radial->c1.y / 65536.0;
 +			double r1  = radial->c1.radius / 65536.0;
 +                        pixman_fixed_48_16_t t;
 +			double x, y;
 +
 +			if (rz != 0) {
 +			    x = rx/rz;
 +			    y = ry/rz;
 +			} else {
 +			    x = y = 0.;
 +			}
 +
 +			pdx = x - c1x;
 +			pdy = y - c1y;
 +
 +			B = -2 * (  pdx * radial->cdx
 +				    + pdy * radial->cdy
 +				    + r1 * radial->dr);
 +			C = (pdx * pdx + pdy * pdy - r1 * r1);
 +
 +                        det = (B * B) - (4 * radial->A * C);
 +			if (det < 0.0)
 +			    det = 0.0;
 +
 +			if (radial->A < 0)
 +			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
 +			else
 +			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
 +
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    ++buffer;
 +
 +                    rx += cx;
 +                    ry += cy;
 +		    rz += cz;
 +                }
 +            }
 +        } else /* SourcePictTypeConical */ {
 +	    conical_gradient_t *conical = (conical_gradient_t *)pict;
 +            double a = conical->angle/(180.*65536);
 +            if (affine) {
 +                rx -= conical->center.x/65536.;
 +                ry -= conical->center.y/65536.;
 +
 +                while (buffer < end) {
 +		    double angle;
 +
 +                    if (!mask || *mask++ & maskBits)
 +		    {
 +                        pixman_fixed_48_16_t   t;
 +
 +                        angle = atan2(ry, rx) + a;
 +			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
 +
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +
 +                    ++buffer;
 +                    rx += cx;
 +                    ry += cy;
 +                }
 +            } else {
 +                while (buffer < end) {
 +                    double x, y;
 +                    double angle;
 +
 +                    if (!mask || *mask++ & maskBits)
 +                    {
 +			pixman_fixed_48_16_t  t;
 +
 +			if (rz != 0) {
 +			    x = rx/rz;
 +			    y = ry/rz;
 +			} else {
 +			    x = y = 0.;
 +			}
 +			x -= conical->center.x/65536.;
 +			y -= conical->center.y/65536.;
 +			angle = atan2(y, x) + a;
 +			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
 +
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +
 +                    ++buffer;
 +                    rx += cx;
 +                    ry += cy;
 +                    rz += cz;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    uint32_t     *bits;
 +    int32_t    stride;
 +    fetchPixelProc_32   fetch;
 +    pixman_vector_t	v;
 +    pixman_vector_t  unit;
 +    int         i;
 +    pixman_box16_t box;
 +    pixman_bool_t affine = TRUE;
 +
 +    fetch = fetchPixelProcForPicture_32(pict);
 +
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +
 +    /* reference point is the center of the pixel */
 +    v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1 / 2;
 +    v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1 / 2;
 +    v.vector[2] = pixman_fixed_1;
 +
 +    /* when using convolution filters one might get here without a transform */
 +    if (pict->common.transform)
 +    {
 +        if (!pixman_transform_point_3d (pict->common.transform, &v))
 +	{
 +            fbFinishAccess (pict->pDrawable);
 +            return;
 +        }
 +        unit.vector[0] = pict->common.transform->matrix[0][0];
 +        unit.vector[1] = pict->common.transform->matrix[1][0];
 +        unit.vector[2] = pict->common.transform->matrix[2][0];
 +        affine = v.vector[2] == pixman_fixed_1 && unit.vector[2] == 0;
 +    }
 +    else
 +    {
 +        unit.vector[0] = pixman_fixed_1;
 +        unit.vector[1] = 0;
 +        unit.vector[2] = 0;
 +    }
 +
 +    if (pict->common.filter == PIXMAN_FILTER_NEAREST || pict->common.filter == PIXMAN_FILTER_FAST)
 +    {
 +        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL) {
 +            if (pixman_region_n_rects (pict->common.src_clip) == 1) {
 +		for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = MOD(DIV(v.vector[1],v.vector[2]), pict->height);
 +				x = MOD(DIV(v.vector[0],v.vector[2]), pict->width);
 +			    } else {
 +				y = MOD(v.vector[1]>>16, pict->height);
 +				x = MOD(v.vector[0]>>16, pict->width);
 +			    }
 +			    *(buffer + i) = fetch(pict, x, y);
 +			}
 +		    }
 +
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = MOD(DIV(v.vector[1],v.vector[2]), pict->height);
 +				x = MOD(DIV(v.vector[0],v.vector[2]), pict->width);
 +			    } else {
 +				y = MOD(v.vector[1]>>16, pict->height);
 +				x = MOD(v.vector[0]>>16, pict->width);
 +			    }
 +			    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box))
 +				*(buffer + i) = fetch (pict, x, y);
 +			    else
 +				*(buffer + i) = 0;
 +			}
 +		    }
 +
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        } else {
 +            if (pixman_region_n_rects(pict->common.src_clip) == 1) {
 +                box = pict->common.src_clip->extents;
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = DIV(v.vector[1],v.vector[2]);
 +				x = DIV(v.vector[0],v.vector[2]);
 +			    } else {
 +				y = v.vector[1]>>16;
 +				x = v.vector[0]>>16;
 +			    }
 +			    *(buffer + i) = ((x < box.x1) | (x >= box.x2) | (y < box.y1) | (y >= box.y2)) ?
 +				0 : fetch(pict, x, y);
 +			}
 +		    }
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +                    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = DIV(v.vector[1],v.vector[2]);
 +				x = DIV(v.vector[0],v.vector[2]);
 +			    } else {
 +				y = v.vector[1]>>16;
 +				x = v.vector[0]>>16;
 +			    }
 +			    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box))
 +				*(buffer + i) = fetch(pict, x, y);
 +			    else
 +				*(buffer + i) = 0;
 +			}
 +		    }
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        }
 +    } else if (pict->common.filter == PIXMAN_FILTER_BILINEAR	||
 +	       pict->common.filter == PIXMAN_FILTER_GOOD	||
 +	       pict->common.filter == PIXMAN_FILTER_BEST)
 +    {
 +        /* adjust vector for maximum contribution at 0.5, 0.5 of each texel. */
 +        v.vector[0] -= v.vector[2] / 2;
 +        v.vector[1] -= v.vector[2] / 2;
 +        unit.vector[0] -= unit.vector[2] / 2;
 +        unit.vector[1] -= unit.vector[2] / 2;
 +
 +        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL) {
 +            if (pixman_region_n_rects(pict->common.src_clip) == 1) {
 +                for (i = 0; i < width; ++i) {
 +                    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    uint32_t ft, fb;
 +
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
 +
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
 +
 +			    x1 = MOD (x1, pict->width);
 +			    x2 = MOD (x2, pict->width);
 +			    y1 = MOD (y1, pict->height);
 +			    y2 = MOD (y2, pict->height);
 +
 +			    tl = fetch(pict, x1, y1);
 +			    tr = fetch(pict, x2, y1);
 +			    bl = fetch(pict, x1, y2);
 +			    br = fetch(pict, x2, y2);
 +
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    uint32_t ft, fb;
 +
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
 +
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
 +
 +			    x1 = MOD (x1, pict->width);
 +			    x2 = MOD (x2, pict->width);
 +			    y1 = MOD (y1, pict->height);
 +			    y2 = MOD (y2, pict->height);
 +
 +			    tl = pixman_region_contains_point(pict->common.src_clip, x1, y1, &box)
 +				? fetch(pict, x1, y1) : 0;
 +			    tr = pixman_region_contains_point(pict->common.src_clip, x2, y1, &box)
 +				? fetch(pict, x2, y1) : 0;
 +			    bl = pixman_region_contains_point(pict->common.src_clip, x1, y2, &box)
 +				? fetch(pict, x1, y2) : 0;
 +			    br = pixman_region_contains_point(pict->common.src_clip, x2, y2, &box)
 +				? fetch(pict, x2, y2) : 0;
 +
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
 +
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        } else {
 +            if (pixman_region_n_rects(pict->common.src_clip) == 1) {
 +                box = pict->common.src_clip->extents;
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    pixman_bool_t x1_out, x2_out, y1_out, y2_out;
 +			    uint32_t ft, fb;
 +
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
 +
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
 +
 +			    x1_out = (x1 < box.x1) | (x1 >= box.x2);
 +			    x2_out = (x2 < box.x1) | (x2 >= box.x2);
 +			    y1_out = (y1 < box.y1) | (y1 >= box.y2);
 +			    y2_out = (y2 < box.y1) | (y2 >= box.y2);
 +
 +			    tl = x1_out|y1_out ? 0 : fetch(pict, x1, y1);
 +			    tr = x2_out|y1_out ? 0 : fetch(pict, x2, y1);
 +			    bl = x1_out|y2_out ? 0 : fetch(pict, x1, y2);
 +			    br = x2_out|y2_out ? 0 : fetch(pict, x2, y2);
 +
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
 +
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +                    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    uint32_t ft, fb;
 +
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
 +
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
 +
 +			    tl = pixman_region_contains_point(pict->common.src_clip, x1, y1, &box)
 +				? fetch(pict, x1, y1) : 0;
 +			    tr = pixman_region_contains_point(pict->common.src_clip, x2, y1, &box)
 +				? fetch(pict, x2, y1) : 0;
 +			    bl = pixman_region_contains_point(pict->common.src_clip, x1, y2, &box)
 +				? fetch(pict, x1, y2) : 0;
 +			    br = pixman_region_contains_point(pict->common.src_clip, x2, y2, &box)
 +				? fetch(pict, x2, y2) : 0;
 +
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
 +
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        }
 +    } else if (pict->common.filter == PIXMAN_FILTER_CONVOLUTION) {
 +        pixman_fixed_t *params = pict->common.filter_params;
 +        int32_t cwidth = pixman_fixed_to_int(params[0]);
 +        int32_t cheight = pixman_fixed_to_int(params[1]);
 +        int xoff = (params[0] - pixman_fixed_1) >> 1;
 +	int yoff = (params[1] - pixman_fixed_1) >> 1;
 +        params += 2;
 +        for (i = 0; i < width; ++i) {
 +	    if (!mask || mask[i] & maskBits)
 +	    {
 +		if (!v.vector[2]) {
 +		    *(buffer + i) = 0;
 +		} else {
 +		    int x1, x2, y1, y2, x, y;
 +		    int32_t srtot, sgtot, sbtot, satot;
 +		    pixman_fixed_t *p = params;
 +
 +		    if (!affine) {
 +			pixman_fixed_48_16_t tmp;
 +			tmp = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2] - xoff;
 +			x1 = pixman_fixed_to_int(tmp);
 +			tmp = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2] - yoff;
 +			y1 = pixman_fixed_to_int(tmp);
 +		    } else {
 +			x1 = pixman_fixed_to_int(v.vector[0] - xoff);
 +			y1 = pixman_fixed_to_int(v.vector[1] - yoff);
 +		    }
 +		    x2 = x1 + cwidth;
 +		    y2 = y1 + cheight;
 +
 +		    srtot = sgtot = sbtot = satot = 0;
 +
 +		    for (y = y1; y < y2; y++) {
 +			int ty = (pict->common.repeat == PIXMAN_REPEAT_NORMAL) ? MOD (y, pict->height) : y;
 +			for (x = x1; x < x2; x++) {
 +			    if (*p) {
 +				int tx = (pict->common.repeat == PIXMAN_REPEAT_NORMAL) ? MOD (x, pict->width) : x;
 +				if (pixman_region_contains_point (pict->common.src_clip, tx, ty, &box)) {
 +				    uint32_t c = fetch(pict, tx, ty);
 +
 +				    srtot += Red(c) * *p;
 +				    sgtot += Green(c) * *p;
 +				    sbtot += Blue(c) * *p;
 +				    satot += Alpha(c) * *p;
 +				}
 +			    }
 +			    p++;
 +			}
 +		    }
 +
 +		    satot >>= 16;
 +		    srtot >>= 16;
 +		    sgtot >>= 16;
 +		    sbtot >>= 16;
 +
 +		    if (satot < 0) satot = 0; else if (satot > 0xff) satot = 0xff;
 +		    if (srtot < 0) srtot = 0; else if (srtot > 0xff) srtot = 0xff;
 +		    if (sgtot < 0) sgtot = 0; else if (sgtot > 0xff) sgtot = 0xff;
 +		    if (sbtot < 0) sbtot = 0; else if (sbtot > 0xff) sbtot = 0xff;
 +
 +		    *(buffer + i) = ((satot << 24) |
 +				     (srtot << 16) |
 +				     (sgtot <<  8) |
 +				     (sbtot       ));
 +		}
 +	    }
 +            v.vector[0] += unit.vector[0];
 +            v.vector[1] += unit.vector[1];
 +            v.vector[2] += unit.vector[2];
 +        }
 +    }
 +
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +
 +static void fbFetchExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    int i;
 +    uint32_t _alpha_buffer[SCANLINE_BUFFER_LENGTH];
 +    uint32_t *alpha_buffer = _alpha_buffer;
 +
 +    if (!pict->common.alpha_map) {
 +        fbFetchTransformed (pict, x, y, width, buffer, mask, maskBits);
 +	return;
 +    }
 +    if (width > SCANLINE_BUFFER_LENGTH)
 +        alpha_buffer = (uint32_t *) pixman_malloc_ab (width, sizeof(uint32_t));
 +
 +    fbFetchTransformed(pict, x, y, width, buffer, mask, maskBits);
 +    fbFetchTransformed((bits_image_t *)pict->common.alpha_map, x - pict->common.alpha_origin.x,
 +		       y - pict->common.alpha_origin.y, width, alpha_buffer,
 +		       mask, maskBits);
 +    for (i = 0; i < width; ++i) {
 +        if (!mask || mask[i] & maskBits)
 +	{
 +	    int a = alpha_buffer[i]>>24;
 +	    *(buffer + i) = (a << 24)
 +		| (div_255(Red(*(buffer + i)) * a) << 16)
 +		| (div_255(Green(*(buffer + i)) * a) << 8)
 +		| (div_255(Blue(*(buffer + i)) * a));
 +	}
 +    }
 +
 +    if (alpha_buffer != _alpha_buffer)
 +        free(alpha_buffer);
 +}
 +
 +static void fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    uint32_t *bits;
 +    int32_t stride;
 +    storeProc_32 store = storeProcForPicture_32(pict);
 +    const pixman_indexed_t * indexed = pict->indexed;
 +
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    bits += y*stride;
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbStore64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    uint32_t *bits;
 +    int32_t stride;
 +    storeProc_64 store = storeProcForPicture_64(pict);
 +    const pixman_indexed_t * indexed = pict->indexed;
 +
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    bits += y*stride;
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbStoreExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    uint32_t *bits, *alpha_bits;
 +    int32_t stride, astride;
 +    int ax, ay;
 +    storeProc_32 store;
 +    storeProc_32 astore;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    const pixman_indexed_t * aindexed;
 +
 +    if (!pict->common.alpha_map) {
 +        fbStore(pict, x, y, width, buffer);
 +	return;
 +    }
 +
 +    store = storeProcForPicture_32(pict);
 +    astore = storeProcForPicture_32(pict->common.alpha_map);
 +    aindexed = pict->common.alpha_map->indexed;
 +
 +    ax = x;
 +    ay = y;
 +
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +
 +    alpha_bits = pict->common.alpha_map->bits;
 +    astride = pict->common.alpha_map->rowstride;
 +
 +    bits       += y*stride;
 +    alpha_bits += (ay - pict->common.alpha_origin.y)*astride;
 +
 +
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    astore((pixman_image_t *)pict->common.alpha_map,
 +	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
 +
 +    fbFinishAccess (pict->alpha_map->pDrawable);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbStoreExternalAlpha64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    uint32_t *bits, *alpha_bits;
 +    int32_t stride, astride;
 +    int ax, ay;
 +    storeProc_64 store;
 +    storeProc_64 astore;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    const pixman_indexed_t * aindexed;
 +
 +    if (!pict->common.alpha_map) {
 +        fbStore64(pict, x, y, width, buffer);
 +	return;
 +    }
 +
 +    store = storeProcForPicture_64(pict);
 +    astore = storeProcForPicture_64(pict->common.alpha_map);
 +    aindexed = pict->common.alpha_map->indexed;
 +
 +    ax = x;
 +    ay = y;
 +
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +
 +    alpha_bits = pict->common.alpha_map->bits;
 +    astride = pict->common.alpha_map->rowstride;
 +
 +    bits       += y*stride;
 +    alpha_bits += (ay - pict->common.alpha_origin.y)*astride;
 +
 +
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    astore((pixman_image_t *)pict->common.alpha_map,
 +	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
 +
 +    fbFinishAccess (pict->alpha_map->pDrawable);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void
 +fbExpand(uint32_t * source, uint64_t * dest, int width)
 +{
 +    int i;
 +    for(i = 0; i < width; i++) {
 +	uint32_t p = source[i];
 +	uint64_t r = (uint64_t)(p & 0x00ff0000) << 24;
 +	uint64_t g = (uint64_t)(p & 0x0000ff00) << 16;
 +	uint64_t b = (uint64_t)(p & 0x000000ff) << 8;
 +	uint64_t a = (uint64_t)(p & 0xff000000) << 32;
 +
 +	dest[i] = r | g | b | a;
 +    }
 +}
 +
 +static void
 +fbContract(uint64_t * source, uint32_t * dest, int width)
 +{
 +    int i;
 +    for(i = 0; i < width; i++) {
 +       uint64_t p = source[i];
 +       uint64_t r = (p >> 24) & 0x00ff0000;
 +       uint64_t g = (p >> 16) & 0x0000ff00;
 +       uint64_t b = (p >> 8) & 0x000000ff;
 +       uint64_t a = (p >> 32) & 0xff000000;
 +
 +       dest[i] = r | g | b | a;
 +    }
 +}
 +
 +static uint32_t
 +fbContractPixel(uint64_t p)
 +{
 +    uint64_t r = (p >> 24) & 0x00ff0000;
 +    uint64_t g = (p >> 16) & 0x0000ff00;
 +    uint64_t b = (p >> 8) & 0x000000ff;
 +    uint64_t a = (p >> 32) & 0xff000000;
 +
 +    return r | g | b | a;
 +}
 +
 +static void
 +pixmanFetchSourcePict64(source_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    // use the space in the existing buffer for the 32-bit mask and result
 +    uint32_t * mask32 = (uint32_t *) buffer;
 +    uint32_t * buffer32 = ((uint32_t *) buffer) + width;
 +    uint32_t maskBits32 = fbContractPixel(maskBits);
 +
 +    if(mask)
 +	fbContract(mask, mask32, width);
 +    else
 +	mask32 = NULL;
 +    pixmanFetchSourcePict(pict, x, y, width, buffer32, mask32, maskBits32);
 +    fbExpand(buffer32, buffer, width);
 +}
 +
 +static void
 +fbFetchExternalAlpha64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    uint32_t * mask32 = (uint32_t *) buffer;
 +    uint32_t * buffer32 = ((uint32_t *) buffer) + width;
 +    uint32_t maskBits32 = fbContractPixel(maskBits);
 +
 +    if (mask)
 +	fbContract(mask, mask32, width);
 +    else
 +	mask32 = NULL;
 +    fbFetchExternalAlpha(pict, x, y, width, buffer32, mask32, maskBits32);
 +    fbExpand(buffer32, buffer, width);
 +}
 +
 +static void
 +fbFetchTransformed64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    uint32_t * mask32 = (uint32_t *) buffer;
 +    uint32_t * buffer32 = ((uint32_t *) buffer) + width;
 +    uint32_t maskBits32 = fbContractPixel(maskBits);
 +
 +    if (mask)
 +	fbContract(mask, mask32, width);
 +    else
 +	mask32 = NULL;
 +    fbFetchTransformed(pict, x, y, width, buffer32, mask32, maskBits32);
 +    fbExpand(buffer32, buffer, width);
 +}
 +
 +typedef void (*scanStoreProc)(pixman_image_t *, int, int, int, uint32_t *);
 +typedef void (*scanFetchProc)(pixman_image_t *, int, int, int, uint32_t *,
 +			      uint32_t *, uint32_t);
 +
 +typedef void (*scanStoreProc64)(pixman_image_t *, int, int, int, uint64_t *);
 +typedef void (*scanFetchProc64)(pixman_image_t *, int, int, int, uint64_t *,
 +				uint64_t *, uint64_t);
 +
 +//#ifndef PIXMAN_FB_ACCESSORS
 +//static
 +//#endif
 +void
 +PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 +			       uint32_t *scanline_buffer)
 +{
 +    uint32_t *src_buffer = scanline_buffer;
 +    uint32_t *dest_buffer = src_buffer + data->width;
 +    int i;
 +    scanStoreProc store;
 +    scanFetchProc fetchSrc = NULL, fetchMask = NULL, fetchDest = NULL;
 +    unsigned int srcClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    unsigned int maskClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    uint32_t *bits;
 +    int32_t stride;
 +    int xoff, yoff;
 +
 +    if (data->op == PIXMAN_OP_CLEAR)
 +        fetchSrc = NULL;
 +    else if (IS_SOURCE_IMAGE (data->src))
 +    {
 +	fetchSrc = (scanFetchProc)pixmanFetchSourcePict;
 +	srcClass = SourcePictureClassify ((source_image_t *)data->src,
 +					  data->xSrc, data->ySrc,
 +					  data->width, data->height);
 +    }
 +    else
 +    {
 +	bits_image_t *bits = (bits_image_t *)data->src;
 +
 +	if (bits->common.alpha_map)
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetchExternalAlpha;
 +	}
 +	else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		 bits->width == 1 &&
 +		 bits->height == 1)
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetchSolid;
 +	    srcClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	}
 +	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetch;
 +	}
 +	else
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetchTransformed;
 +	}
 +    }
 +
 +    if (!data->mask || data->op == PIXMAN_OP_CLEAR)
 +    {
 +	fetchMask = NULL;
 +    }
 +    else
 +    {
 +	if (IS_SOURCE_IMAGE (data->mask))
 +	{
 +	    fetchMask = (scanFetchProc)pixmanFetchSourcePict;
 +	    maskClass = SourcePictureClassify ((source_image_t *)data->mask,
 +					       data->xMask, data->yMask,
 +					       data->width, data->height);
 +	}
 +	else
 +	{
 +	    bits_image_t *bits = (bits_image_t *)data->mask;
 +
 +	    if (bits->common.alpha_map)
 +	    {
 +		fetchMask = (scanFetchProc)fbFetchExternalAlpha;
 +	    }
 +	    else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		     bits->width == 1 && bits->height == 1)
 +	    {
 +		fetchMask = (scanFetchProc)fbFetchSolid;
 +		maskClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	    }
 +	    else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +		fetchMask = (scanFetchProc)fbFetch;
 +	    else
 +		fetchMask = (scanFetchProc)fbFetchTransformed;
 +	}
 +    }
 +
 +    if (data->dest->common.alpha_map)
 +    {
 +	fetchDest = (scanFetchProc)fbFetchExternalAlpha;
 +	store = (scanStoreProc)fbStoreExternalAlpha;
 +
 +	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
 +	    fetchDest = NULL;
 +    }
 +    else
 +    {
 +	fetchDest = (scanFetchProc)fbFetch;
 +	store = (scanStoreProc)fbStore;
 +
 +	switch (data->op)
 +	{
 +	case PIXMAN_OP_CLEAR:
 +	case PIXMAN_OP_SRC:
 +	    fetchDest = NULL;
 +#ifndef PIXMAN_FB_ACCESSORS
 +	    /* fall-through */
 +	case PIXMAN_OP_ADD:
 +	case PIXMAN_OP_OVER:
 +	    switch (data->dest->bits.format) {
 +	    case PIXMAN_a8r8g8b8:
 +	    case PIXMAN_x8r8g8b8:
 +		store = NULL;
 +		break;
 +	    default:
 +		break;
 +	    }
 +#endif
 +	    break;
 +	}
 +    }
 +
 +    if (!store)
 +    {
 +	bits = data->dest->bits.bits;
 +	stride = data->dest->bits.rowstride;
 +	xoff = yoff = 0;
 +    }
 +    else
 +    {
 +	bits = NULL;
 +	stride = 0;
 +	xoff = yoff = 0;
 +    }
 +
 +    if (fetchSrc		   &&
 +	fetchMask		   &&
 +	data->mask		   &&
 +	data->mask->common.type == BITS &&
 +	data->mask->common.component_alpha &&
 +	PIXMAN_FORMAT_RGB (data->mask->bits.format))
 +    {
 +	uint32_t *mask_buffer = dest_buffer + data->width;
 +	CombineFuncC compose = pixman_composeFunctions.combineC[data->op];
 +	if (!compose)
 +	    return;
 +
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
 +
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
 +
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xffffffff);
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +	    }
 +
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
 +
 +		/* blend */
 +		compose (dest_buffer, src_buffer, mask_buffer, data->width);
 +
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		/* blend */
 +		compose (bits + (data->yDest + i+ yoff) * stride +
 +			 data->xDest + xoff,
 +			 src_buffer, mask_buffer, data->width);
 +	    }
 +	}
 +    }
 +    else
 +    {
 +	uint32_t *src_mask_buffer = 0, *mask_buffer = 0;
 +	CombineFuncU compose = pixman_composeFunctions.combineU[data->op];
 +	if (!compose)
 +	    return;
 +
 +	if (fetchMask)
 +	    mask_buffer = dest_buffer + data->width;
 +
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
 +
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
 +
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +
 +		    if (mask_buffer)
 +		    {
 +			pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +			src_mask_buffer = mask_buffer;
 +		    }
 +		    else
 +			src_mask_buffer = src_buffer;
 +
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xff000000);
 +
 +		    if (mask_buffer)
 +			pixman_composeFunctions.combineMaskU (src_buffer,
 +							      mask_buffer,
 +							      data->width);
 +
 +		    src_mask_buffer = src_buffer;
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +
 +		pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +
 +		src_mask_buffer = mask_buffer;
 +	    }
 +
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
 +
 +		/* blend */
 +		compose (dest_buffer, src_mask_buffer, data->width);
 +
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		/* blend */
 +		compose (bits + (data->yDest + i+ yoff) * stride +
 +			 data->xDest + xoff,
 +			 src_mask_buffer, data->width);
 +	    }
 +	}
 +    }
 +
 +    if (!store)
 +	fbFinishAccess (data->dest->pDrawable);
 +}
 +
 +
 +void
 +PIXMAN_COMPOSITE_RECT_GENERAL_WIDE (const FbComposeData *data,
 +			            uint64_t *scanline_buffer)
 +{
 +    uint64_t *src_buffer = scanline_buffer;
 +    uint64_t *dest_buffer = src_buffer + data->width;
 +    int i;
 +    scanStoreProc64 store;
 +    scanFetchProc64 fetchSrc = NULL, fetchMask = NULL, fetchDest = NULL;
 +    unsigned int srcClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    unsigned int maskClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    uint32_t *bits;
 +    int32_t stride;
 +    int xoff, yoff;
 +
 +    if (data->op == PIXMAN_OP_CLEAR)
 +        fetchSrc = NULL;
 +    else if (IS_SOURCE_IMAGE (data->src))
 +    {
 +	fetchSrc = (scanFetchProc64)pixmanFetchSourcePict64;
 +	srcClass = SourcePictureClassify ((source_image_t *)data->src,
 +					  data->xSrc, data->ySrc,
 +					  data->width, data->height);
 +    }
 +    else
 +    {
 +	bits_image_t *bits = (bits_image_t *)data->src;
 +
 +	if (bits->common.alpha_map)
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetchExternalAlpha64;
 +	}
 +	else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		 bits->width == 1 &&
 +		 bits->height == 1)
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetchSolid64;
 +	    srcClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	}
 +	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetch64;
 +	}
 +	else
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetchTransformed64;
 +	}
 +    }
 +
 +    if (!data->mask || data->op == PIXMAN_OP_CLEAR)
 +    {
 +	fetchMask = NULL;
 +    }
 +    else
 +    {
 +	if (IS_SOURCE_IMAGE (data->mask))
 +	{
 +	    fetchMask = (scanFetchProc64)pixmanFetchSourcePict64;
 +	    maskClass = SourcePictureClassify ((source_image_t *)data->mask,
 +					       data->xMask, data->yMask,
 +					       data->width, data->height);
 +	}
 +	else
 +	{
 +	    bits_image_t *bits = (bits_image_t *)data->mask;
 +
 +	    if (bits->common.alpha_map)
 +	    {
 +		fetchMask = (scanFetchProc64)fbFetchExternalAlpha64;
 +	    }
 +	    else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		     bits->width == 1 && bits->height == 1)
 +	    {
 +		fetchMask = (scanFetchProc64)fbFetchSolid64;
 +		maskClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	    }
 +	    else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +		fetchMask = (scanFetchProc64)fbFetch64;
 +	    else
 +		fetchMask = (scanFetchProc64)fbFetchTransformed64;
 +	}
 +    }
 +
 +    if (data->dest->common.alpha_map)
 +    {
 +	fetchDest = (scanFetchProc64)fbFetchExternalAlpha64;
 +	store = (scanStoreProc64)fbStoreExternalAlpha64;
 +
 +	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
 +	    fetchDest = NULL;
 +    }
 +    else
 +    {
 +	fetchDest = (scanFetchProc64)fbFetch64;
 +	store = (scanStoreProc64)fbStore64;
 +
 +	switch (data->op)
 +	{
 +	case PIXMAN_OP_CLEAR:
 +	case PIXMAN_OP_SRC:
 +	    fetchDest = NULL;
 +	    break;
 +	}
 +    }
 +
 +    if (!store)
 +    {
 +	bits = data->dest->bits.bits;
 +	stride = data->dest->bits.rowstride;
 +	xoff = yoff = 0;
 +    }
 +    else
 +    {
 +	bits = NULL;
 +	stride = 0;
 +	xoff = yoff = 0;
 +    }
 +
 +    if (fetchSrc		   &&
 +	fetchMask		   &&
 +	data->mask		   &&
 +	data->mask->common.type == BITS &&
 +	data->mask->common.component_alpha &&
 +	PIXMAN_FORMAT_RGB (data->mask->bits.format))
 +    {
 +	uint64_t *mask_buffer = dest_buffer + data->width;
 +	CombineFuncC64 compose = pixman_composeFunctions_wide.combineC[data->op];
 +	if (!compose)
 +	    return;
 +
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
 +
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
 +
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xffffffffffffffffLL);
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +	    }
 +
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
 +
 +		/* blend */
 +		compose (dest_buffer, src_buffer, mask_buffer, data->width);
 +
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		assert(!"need to have a storeproc with 64-bit internal format");
 +	    }
 +	}
 +    }
 +    else
 +    {
 +	uint64_t *src_mask_buffer = 0, *mask_buffer = 0;
 +	CombineFuncU64 compose = pixman_composeFunctions_wide.combineU[data->op];
 +	if (!compose)
 +	    return;
 +
 +	if (fetchMask)
 +	    mask_buffer = dest_buffer + data->width;
 +
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
 +
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
 +
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +
 +		    if (mask_buffer)
 +		    {
 +			pixman_composeFunctions_wide.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +			src_mask_buffer = mask_buffer;
 +		    }
 +		    else
 +			src_mask_buffer = src_buffer;
 +
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xffff000000000000LL);
 +
 +		    if (mask_buffer)
 +			pixman_composeFunctions_wide.combineMaskU (src_buffer,
 +							      mask_buffer,
 +							      data->width);
 +
 +		    src_mask_buffer = src_buffer;
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +
 +		pixman_composeFunctions_wide.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +
 +		src_mask_buffer = mask_buffer;
 +	    }
 +
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
 +
 +		/* blend */
 +		compose (dest_buffer, src_mask_buffer, data->width);
 +
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		assert(!"need to have a storeproc with 64-bit internal format");
 +	    }
 +	}
 +    }
 +
 +    if (!store)
 +	fbFinishAccess (data->dest->pDrawable);
 +}
commit a77cfd17647b741647d77abaaa0da1645dc65d66
Merge: 74a680a... be4990f...
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Fri Oct 26 10:44:01 2007 -0700

    Merge commit 'be4990f4a0a8d278b99df7669c5162136120ccd5' into wide-composite-merge
    
    Conflicts:
    
    	pixman/pixman-compose.c
    	pixman/pixman-pict.c

diff --cc pixman/gen.pl
index c4dff00,0000000..d06ce1f
mode 100644,000000..100644
--- a/pixman/gen.pl
+++ b/pixman/gen.pl
@@@ -1,420 -1,0 +1,418 @@@
 +#!/usr/bin/perl
 +
 +# Copyright notice should go here.
 +
 +use strict;
 +use warnings;
 +
 +our @handcode_formats;
 +
 +sub max($$) { $_[0] > $_[1] ? $_[0] : $_[1]; }
 +sub min($$) { $_[0] < $_[1] ? $_[0] : $_[1]; }
 +# get_mask($offset, $size, $total)
 +# get a mask corresponding to $size bits starting at $offset (from right)
 +# out of an integer of size $total 
 +sub get_mask($$$)
 +{
 +    my ($offset, $size, $total) = @_;
 +    my $left = $total - $offset - $size;
 +    my $bitstr = ("0" x $left) . ("1" x $size) . ("0" x $offset);
 +    my $suffix = $total > 32 ? "LL" : "";
 +    my $hexstr;
 +    for(my $i = 0; $i < $total; $i += 8) {
 +	$hexstr .= sprintf("%02x", eval("0b" . substr($bitstr, $i, 8)));
 +    }
 +    return "0x$hexstr$suffix";
 +}
 +
 +# stub left for allowing more efficient code when we don't need  
 +# to split apart the components
 +sub unified_access($@) { 0 }
 +
 +sub access($$@)
 +{
 +    my ($comp, $dsize, @format) = @_;
 +    if($dsize != 32 && $dsize != 64) {
 +	die "only 32 and 64 bit intermediate formats are supported";
 +    }
 +    my $dtype = "uint${dsize}_t";
 +    my $dcs = $dsize / 4;
 +    my ($dst_off, $src_sz, $src_sh, $mask);
 +    if($comp eq "a") {
 +	if($format[4] == 0) {
 +	    $mask = get_mask($dcs * 3, $dcs, $dsize);
 +	    return "$dtype a = $mask;";
 +	}
 +	$dst_off = 3;
 +	$src_sz = $format[4];
 +	$src_sh = $format[8] + max($src_sz - $dcs, 0);
 +    } elsif ($comp eq "r") {
 +	$dst_off = 2;
 +	$src_sz = $format[1];
 +	$src_sh = $format[5] + max($src_sz - $dcs, 0);
 +    } elsif ($comp eq "g") {
 +	$dst_off = 1;
 +	$src_sz = $format[2];
 +	$src_sh = $format[6] + max($src_sz - $dcs, 0);
 +    } elsif ($comp eq "b") {
 +	$dst_off = 0;
 +	$src_sz = $format[3];
 +	$src_sh = $format[7] + max($src_sz - $dcs, 0);
 +    }
 +    return "" if $src_sz == 0;
 +    my $dst_sh = $dcs*($dst_off + 1) - min($dcs, $src_sz);
 +    
 +    # calculate shift based on src_sh, dst_sh
 +    my $sh;
 +    if ($src_sh < $dst_sh) {
 +	$sh = "<< " . ($dst_sh - $src_sh);
 +    } elsif ($src_sh > $dst_sh) {
 +	$sh = ">> " . ($src_sh - $dst_sh);
 +    } else {
 +	$sh = "";
 +    }
 +    # calculate mask based on format
 +    $mask = get_mask($src_sh, min($src_sz, $dcs), $format[0]);
 +    # put it all together
 +    "$dtype $comp = ($dtype)(p & $mask) $sh;";
 +}
 +
 +# stub left for more efficient code when we can
 +# expand after combining the components, rather than before
 +sub unified_expand($@)
 +{
 +    # conditions for unified expand: all the bits that need expanding
 +    # are the same size.
 +    0;
 +}
 +
 +sub expand($$@)
 +{
 +    my ($comp, $dsize, @format) = @_;
 +    my $acc = "";
 +    my $dcs = $dsize / 4;
 +    my ($dst_off, $src_sz);
 +
 +    if ($comp eq "a") {
 +	$dst_off = 3;
 +	$src_sz = $format[4];
 +    } elsif ($comp eq "r") {
 +	$dst_off = 2;
 +	$src_sz = $format[1];
 +    } elsif ($comp eq "g") {
 +	$dst_off = 1;
 +	$src_sz = $format[2];
 +    } elsif ($comp eq "b") {
 +	$dst_off = 0;
 +	$src_sz = $format[3];
 +    }
 +    my $dmask = get_mask($dst_off * $dcs, $dcs, $dsize);
 +    return "" if $src_sz == 0;
 +    my $expanded_bits = $src_sz;
 +    # do a loop to emit the necessary number of expand stages
 +    while ($expanded_bits < $dcs) {
 +	# we need a mask if the expanded bits overflow the dest field
 +	my $mask_str;
 +	if($expanded_bits * 2 > $dcs) {
 +	    $mask_str = "& $dmask"; 
 +	} else {
 +	    $mask_str = "";
 +	}
 +	$acc .= "$comp |= ($comp >> $expanded_bits)$mask_str; ";
 +	$expanded_bits *= 2;
 +    }
 +    return $acc;
 +}
 +
 +sub emit_accesses($@)
 +{
 +    my ($dsize, @format) = @_;
 +    my $acc = "";
 +    if(!unified_access($dsize, @format)) {
 +	for my $c ("r", "g", "b", "a") {
 +	    $acc .= "\t" . access($c, $dsize, @format) . "\n";
 +	}
 +    }
 +    $acc .= "\n";
 +    if(!unified_expand($dsize, @format)) {
 +	for my $c ("r", "g", "b", "a") {
 +	    my $t = expand($c, $dsize, @format);
 +	    $acc .= "\t$t\n" if $t;
 +	}
 +    }
 +    return $acc . "\n";
 +}
 +
 +sub combine(@)
 +{
 +    if ($_[9] eq "argb" || $_[9] eq "abgr") {
 +	"a | r | g | b";
 +    } elsif ($_[9] eq "alpha") {
 +	"a";
 +    }
 +}
 +
 +sub size_for_bpp($)
 +{
 +    my $bpp = shift;
 +    if    ($bpp > 32) { 64 }
 +    elsif ($bpp > 16) { 32 }
 +    elsif ($bpp > 8 ) { 16 }
 +    else { 8 }
 +}
 +
 +sub readpixel($)
 +{
 +    my $bpp = shift;
-     if ($bpp >= 8) { "READ(pixel + i)" }
-     elsif ($bpp == 4) { "Fetch4(pixel, i)" }
++    if ($bpp >= 8) { "READ(pict, pixel + i)" }
++    elsif ($bpp == 4) { "Fetch4(pict, pixel, i)" }
 +    else { die "unsupported bpp $bpp"; }
 +}
 +
 +sub gen_fetch($@)
 +{
 +    my ($dsize, @format) = @_;
 +    my $name = $format[10];
 +    my $dtype = "uint${dsize}_t";
 +    my $stype = "uint" . size_for_bpp($format[0]) . "_t";
 +
 +    my $out = <<EOS;
 +static FASTCALL void
 +fbFetch_${name}_$dsize (bits_image_t *pict, int x, int y, int width, $dtype
 +*buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const $stype *pixel = ($stype *)bits + x;
 +    int i;
 +
 +    for (i = 0; i < width; ++i) {
 +EOS
 +    $out .= "        $stype p = " . readpixel($format[0]) . ";\n";
 +    $out .= emit_accesses($dsize, @format);
 +
 +$out .= "        *buffer++ = " . combine(@format) . ";\n    }\n}\n";
 +
 +}
 +
 +sub readonepixel($)
 +{
 +    my $bpp = shift;
-     if ($bpp >= 8) { "READ(pixel)" }
-     elsif ($bpp == 4) { "Fetch4(pixel, 0)" }
++    if ($bpp >= 8) { "READ(pict, pixel)" }
++    elsif ($bpp == 4) { "Fetch4(pict, pixel, 0)" }
 +    else { die "unsupported bpp $bpp"; }
 +}
 +
 +sub gen_fetchpixel($@)
 +{
 +    my ($dsize, @format) = @_;
 +    my $name = $format[10];
 +    my $dtype = "uint${dsize}_t";
 +    my $stype = "uint" . size_for_bpp($format[0]) . "_t";
 +    
 +    my $out = <<EOS;
 +static FASTCALL $dtype
 +fbFetchPixel_${name}_$dsize (bits_image_t *pict, int offset, int line)
 +{
 +    const uint32_t *bits = pict->bits + line*pict->rowstride;
 +    const $stype *pixel = ($stype *)bits + offset;
 +EOS
 +    $out .= "    $stype p = " . readonepixel($format[0]) . ";\n";
 +    $out .= emit_accesses($dsize, @format);
 +    $out .= "    return (". combine(@format) . ");\n}\n";
 +}
 +
 +sub unified_stexpand($@)
 +{
 +    my ($ssize, @format) = @_;
 +    return 0;
 +}
 +
 +sub stexpand($$@)
 +{
 +    my ($comp, $ssize, @format) = @_;
 +    my $acc = "";
 +    my $scs = $ssize / 4;
 +    my ($src_off, $dst_sz, $dst_off);
 +
 +    if ($comp eq "a") {
 +	return "" if ($format[4] == 0);
 +	$src_off = 3;
 +	$dst_sz = $format[4];
 +	$dst_off = $format[8];
 +    } elsif ($comp eq "r") {
 +	$src_off = 2;
 +	$dst_sz = $format[1];
 +	$dst_off = $format[5];
 +    } elsif ($comp eq "g") {
 +	$src_off = 1;
 +	$dst_sz = $format[2];
 +	$dst_off = $format[6];
 +    } elsif($comp eq "b") {
 +	$src_off = 0;
 +	$dst_sz = $format[3];
 +	$dst_off = $format[7];
 +    }
 +
 +    my $dmask = get_mask($dst_off, $dst_sz, $format[0]);
 +    my $expanded_bits = $scs;
 +    # do a loop to emit the necessary number of expand stages
 +    while ($expanded_bits < $dst_sz) {
 +	# we need a mask if the expanded bits overflow the dest field
 +	my $mask_str = "";
 +	if ($expanded_bits * 2 > $dst_sz) {
 +	    $mask_str = "& $dmask";
 +	}
 +	$acc .= "$comp |= ($comp >> $expanded_bits)$mask_str; ";
 +	$expanded_bits *= 2;
 +    }
 +    return $acc;
 +}
 +
 +
 +sub unified_store($@)
 +{
 +    my ($ssize, @format) = @_;
 +    return 0;
 +}
 +
 +
 +sub store($$@)
 +{
 +    my ($comp, $ssize, @format) = @_;
 +    if ($ssize != 32 && $ssize != 64) { die }
 +    my $stype = "uint${ssize}_t";
 +    my $scs = $ssize / 4;
 +    my ($src_off, $dst_sz, $dst_sh);
 +    
 +    if ($comp eq "a") {
 +	$src_off = 3;
 +	$dst_sz = $format[4];
 +	$dst_sh = $format[8] + max($dst_sz - $scs, 0);
 +    } elsif ($comp eq "r") {
 +	$src_off = 2;
 +	$dst_sz = $format[1];
 +	$dst_sh = $format[5] + max($dst_sz - $scs, 0);
 +    } elsif ($comp eq "g") {
 +	$src_off = 1;
 +	$dst_sz = $format[2];
 +	$dst_sh = $format[6] + max($dst_sz - $scs, 0);
 +    } elsif ($comp eq "b") {
 +	$src_off = 0;
 +	$dst_sz = $format[3];
 +	$dst_sh = $format[7] + max($dst_sz - $scs, 0);
 +    }
 +
 +    my $src_sh = $scs*($src_off + 1) - min($scs, $dst_sz);
 +    # calculate mask based on format
 +    my $mask = get_mask($dst_sh, min($dst_sz, $scs), $format[0]);
 +    
 +    return "$stype $comp = 0;" if $dst_sz == 0;
 +    # calculate shift based on src_sh, dst_sh
 +    my $sh = "";
 +    if ($src_sh < $dst_sh) {
 +	$sh = "<< " . ($dst_sh - $src_sh);
 +    } elsif ($src_sh > $dst_sh) {
 +	$sh = ">> " . ($src_sh - $dst_sh);
 +    }
 +    # put it all together
 +    return "$stype $comp = (p $sh) & $mask;";
 +}
 +
 +sub emit_stores($@)
 +{
 +    my ($ssize, @format) = @_;
 +    my $acc = "";
 +    if (!unified_store($ssize, @format)) {
 +	for my $c ("r", "g", "b", "a") {
 +	    $acc .= "\t" . store($c, $ssize, @format) . "\n";
 +	}
 +    }
 +    $acc .= "\n";
 +    if (!unified_stexpand($ssize, @format)) {
 +	for my $c ("r", "g", "b", "a") {
 +	    my $t = stexpand($c, $ssize, @format);
 +	    $acc .= "\t$t\n" if $t;
 +	}
 +    }
 +    return $acc . "\n";
 +}
 +
 +sub storepixel($)
 +{
 +    my $bpp = shift;
-     if ($bpp >= 8) { "WRITE(pixel + i, (r | g | b | a))" }
-     elsif ($bpp == 4) { "Store4(pixel, i, (r | g | b | a))" }
++    if ($bpp >= 8) { "WRITE(image, pixel + i, (r | g | b | a))" }
++    elsif ($bpp == 4) { "Store4(image, pixel, i, (r | g | b | a))" }
 +    else { die "unsupported bpp $bpp"; }
 +}
 +
 +sub gen_store($@)
 +{
 +    my ($ssize, @format) = @_;
 +    my $name = $format[10];
 +    my $stype = "uint${ssize}_t";
 +    my $dtype = "uint" . size_for_bpp($format[0]) . "_t";
 +
 +    my $out = <<EOS;
 +static FASTCALL void
 +fbStore_${name}_$ssize (pixman_image_t *image,
 +                 uint32_t *bits, const $stype *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    $dtype * pixel = ($dtype *)bits + x;
 +    for(i = 0; i < width; ++i) {
 +       $stype p = values[i];
 +EOS
 +    $out .= emit_stores($ssize, @format);
 +       $out .= "       " . storepixel($format[0]) . ";\n    }\n}\n";
 +   }
 +
 +
 +# type is one of fetch, store, fetchpixel
 +sub gen_switch($$@)
 +{
 +    my ($type, $size, @formatnames) = @_;
 +    my ($proctype, $procname);
 +    if ($type eq "fetch") {
 +	$proctype = "fetchProc";
 +	$procname = "fbFetch";
 +    } elsif ($type eq "fetchpixel") {
 +	$proctype = "fetchPixelProc";
 +	$procname = "fbFetchPixel";
 +    } elsif ($type eq "store") {
 +	$proctype = "storeProc";
 +	$procname = "fbStore";
 +    }
 +
 +    my $acc = "static ${proctype}_$size ${proctype}ForPicture_$size (bits_image_t * pict)\n";
 +    $acc .= "{\n    switch(pict->format) {\n";
 +    for my $formatname (@formatnames) {
 +	$acc .= "    case PIXMAN_$formatname: return ${procname}_${formatname}_${size};\n";
 +    }
 +
 +    $acc . "    }\n    return NULL;\n}\n\n";
 +}
 +
 +# gen_functions generates all the accessor functions for a given dsize
 +# and then the switch statement with an entry for each format
 +sub gen_functions($@)
 +{
 +    my ($dsize, @formats) = @_;
 +    my $out = "";
 +    for my $format (@formats) {
- 	$out .= "#define image ((pixman_image_t *)pict)\n";
 +	$out .= gen_fetch($dsize, @{$format});
 +	$out .= gen_fetchpixel($dsize, @{$format});
- 	$out .= "#undef image\n";
 +	$out .= gen_store($dsize, @{$format});
 +    }
 +    my @formatnames = map { $_->[10] } @formats;
 +    push @formatnames, @handcode_formats;
 +    $out .= gen_switch("fetch", $dsize, @formatnames);
 +    $out .= gen_switch("fetchpixel", $dsize, @formatnames);
 +    $out .= gen_switch("store", $dsize, @formatnames);
 +}
 +
 +our @formats;
 +require "formats.pl";
 +
 +print "#include \"pixman-access-handcode.c\"\n\n";
 +print gen_functions(32, @formats);
 +print gen_functions(64, @formats);
diff --cc pixman/pixman-access-handcode.c
index 41f95df,0000000..bba413f
mode 100644,000000..100644
--- a/pixman/pixman-access-handcode.c
+++ b/pixman/pixman-access-handcode.c
@@@ -1,2941 -1,0 +1,2927 @@@
 +/*
 + *
 + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
 + *             2005 Lars Knoll & Zack Rusin, Trolltech
 + *
 + * Permission to use, copy, modify, distribute, and sell this software and its
 + * documentation for any purpose is hereby granted without fee, provided that
 + * the above copyright notice appear in all copies and that both that
 + * copyright notice and this permission notice appear in supporting
 + * documentation, and that the name of Keith Packard not be used in
 + * advertising or publicity pertaining to distribution of the software without
 + * specific, written prior permission.  Keith Packard makes no
 + * representations about the suitability of this software for any purpose.  It
 + * is provided "as is" without express or implied warranty.
 + *
 + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
 + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
 + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
 + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
 + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 + * SOFTWARE.
 + */
 +
 +#include <config.h>
 +
 +#include <stdlib.h>
 +#include <string.h>
 +#include <math.h>
 +#include <assert.h>
 +#include <limits.h>
 +
 +#include "pixman-private.h"
 +
 +
 +/*
 + *    FIXME:
 + *		The stuff here is added just to get it to compile. Something sensible needs to
 + *              be done before this can be used.
 + *
 + *   we should go through this code and clean up some of the weird stuff that have
 + *   resulted from unmacro-ifying it.
 + *
 + */
 +#define INLINE inline
 +
 +/*   End of stuff added to get it to compile
-  */ 
++ */
 +
 +static unsigned int
 +SourcePictureClassify (source_image_t *pict,
 +		       int	       x,
 +		       int	       y,
 +		       int	       width,
 +		       int	       height)
 +{
 +    if (pict->common.type == SOLID)
 +    {
 +	pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +    }
 +    else if (pict->common.type == LINEAR)
 +    {
 +	linear_gradient_t *linear = (linear_gradient_t *)pict;
 +	pixman_vector_t   v;
 +	pixman_fixed_32_32_t l;
 +	pixman_fixed_48_16_t dx, dy, a, b, off;
 +	pixman_fixed_48_16_t factors[4];
 +	int	     i;
- 	
++
 +	dx = linear->p2.x - linear->p1.x;
 +	dy = linear->p2.y - linear->p1.y;
 +	l = dx * dx + dy * dy;
 +	if (l)
 +	{
 +	    a = (dx << 32) / l;
 +	    b = (dy << 32) / l;
 +	}
 +	else
 +	{
 +	    a = b = 0;
 +	}
- 	
++
 +	off = (-a * linear->p1.x
 +	       -b * linear->p1.y) >> 16;
- 	
++
 +	for (i = 0; i < 3; i++)
 +	{
 +	    v.vector[0] = pixman_int_to_fixed ((i % 2) * (width  - 1) + x);
 +	    v.vector[1] = pixman_int_to_fixed ((i / 2) * (height - 1) + y);
 +	    v.vector[2] = pixman_fixed_1;
- 	    
++
 +	    if (pict->common.transform)
 +	    {
 +		if (!pixman_transform_point_3d (pict->common.transform, &v))
 +		    return SOURCE_IMAGE_CLASS_UNKNOWN;
 +	    }
- 	    
++
 +	    factors[i] = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off;
 +	}
- 	
++
 +	if (factors[2] == factors[0])
 +	    pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	else if (factors[1] == factors[0])
 +	    pict->class = SOURCE_IMAGE_CLASS_VERTICAL;
 +    }
-     
++
 +    return pict->class;
 +}
 +
 +#define SCANLINE_BUFFER_LENGTH 2048
 +
 +/*
 + * YV12 setup and access macros
 + */
 +
 +#define YV12_SETUP(pict) \
 +	uint32_t *bits = pict->bits; \
 +	int stride = pict->rowstride; \
 +	int offset0 = stride < 0 ? \
 +		((-stride) >> 1) * ((pict->height - 1) >> 1) - stride : \
 +		stride * pict->height; \
 +	int offset1 = stride < 0 ? \
 +		offset0 + ((-stride) >> 1) * ((pict->height) >> 1) : \
- 		offset0 + (offset0 >> 2); 
++		offset0 + (offset0 >> 2);
 +
 +#define YV12_Y(line)		\
 +    ((uint8_t *) ((bits) + (stride) * (line)))
 +
 +#define YV12_U(line)	      \
 +    ((uint8_t *) ((bits) + offset1 + \
 +		((stride) >> 1) * ((line) >> 1)))
 +
 +#define YV12_V(line)	      \
 +    ((uint8_t *) ((bits) + offset0 + \
 +		((stride) >> 1) * ((line) >> 1)))
 +
 +typedef FASTCALL void (*fetchProc_32)(bits_image_t *pict, int x, int y, int width, uint32_t *buffer);
 +
 +static fetchProc_32 fetchProcForPicture_32 (bits_image_t * pict);
 +
 +
 +typedef FASTCALL uint32_t (*fetchPixelProc_32)(bits_image_t *pict, int offset, int line);
 +
 +static fetchPixelProc_32 fetchPixelProcForPicture_32 (bits_image_t * pict);
 +
 +typedef FASTCALL void (*storeProc_32) (pixman_image_t *image,
 +				       uint32_t *bits, const uint32_t *values,
 +				       int x, int width,
 +				       const pixman_indexed_t * indexed);
 +
 +static storeProc_32 storeProcForPicture_32 (bits_image_t * pict);
 +
 +typedef FASTCALL void (*fetchProc_64)(bits_image_t *pict, int x, int y, int width, uint64_t *buffer);
 +
 +static fetchProc_64 fetchProcForPicture_64 (bits_image_t * pict);
 +
 +
 +typedef FASTCALL uint64_t (*fetchPixelProc_64)(bits_image_t *pict, int offset, int line);
 +
 +static fetchPixelProc_64 fetchPixelProcForPicture_64 (bits_image_t * pict);
 +
 +typedef FASTCALL void (*storeProc_64) (pixman_image_t *image,
 +				       uint32_t *bits, const uint64_t *values,
 +				       int x, int width,
 +				       const pixman_indexed_t * indexed);
 +
 +static storeProc_64 storeProcForPicture_64 (bits_image_t * pict);
 +
 +/* handcoded fetch/store functions. */
 +#define fbFetch_g8_32 fbFetch_c8_32
 +#define fbFetchPixel_g8_32 fbFetchPixel_c8_32
 +#define fbStore_g8_32 fbStore_c8_32
 +
 +#define fbFetch_g4_32 fbFetch_c4_32
 +#define fbFetchPixel_g4_32 fbFetchPixel_c4_32
 +#define fbStore_g4_32 fbStore_c4_32
 +
 +#define fbFetch_g8_64 fbFetch_c8_64
 +#define fbFetchPixel_g8_64 fbFetchPixel_c8_64
 +#define fbStore_g8_64 fbStore_c8_64
 +
 +#define fbFetch_g4_64 fbFetch_c4_64
 +#define fbFetchPixel_g4_64 fbFetchPixel_c4_64
 +#define fbStore_g4_64 fbStore_c4_64
 +
- /*
-  * Used by READ/WRITE macros
-  */
- #define image ((pixman_image_t *)pict)
- 
 +static FASTCALL void
 +fbFetch_r8g8b8_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
- 	uint32_t b = Fetch24(pixel) | 0xff000000;
++	uint32_t b = Fetch24(pict, pixel) | 0xff000000;
 +	pixel += 3;
 +	*buffer++ = b;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_r8g8b8_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
- 	uint64_t r = (uint64_t)READ(pixel) << 32;
- 	uint64_t g = (uint64_t)READ(pixel + 1) << 16;
- 	uint64_t b = (uint64_t)READ(pixel + 2);
++	uint64_t r = (uint64_t)READ(pict, pixel) << 32;
++	uint64_t g = (uint64_t)READ(pict, pixel + 1) << 16;
++	uint64_t b = (uint64_t)READ(pict, pixel + 2);
 +	uint64_t p = r | g | b;
 +
 +	pixel += 3;
 +	*buffer++ = (p << 16) | p;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_b8g8r8_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint32_t b = 0xff000000;
 +#if IMAGE_BYTE_ORDER == MSBFirst
- 	b |= (READ(pixel++));
- 	b |= (READ(pixel++) << 8);
- 	b |= (READ(pixel++) << 16);
++	b |= (READ(pict, pixel++));
++	b |= (READ(pict, pixel++) << 8);
++	b |= (READ(pict, pixel++) << 16);
 +#else
- 	b |= (READ(pixel++) << 16);
- 	b |= (READ(pixel++) << 8);
- 	b |= (READ(pixel++));
++	b |= (READ(pict, pixel++) << 16);
++	b |= (READ(pict, pixel++) << 8);
++	b |= (READ(pict, pixel++));
 +#endif
 +	*buffer++ = b;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_b8g8r8_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint64_t b = 0xffff000000000000LL;
 +#if IMAGE_BYTE_ORDER == MSBFirst
- 	b |= ((uint64_t)READ(pixel++));
- 	b |= ((uint64_t)READ(pixel++) << 16);
- 	b |= ((uint64_t)READ(pixel++) << 32);
++	b |= ((uint64_t)READ(pict, pixel++));
++	b |= ((uint64_t)READ(pict, pixel++) << 16);
++	b |= ((uint64_t)READ(pict, pixel++) << 32);
 +#else
- 	b |= ((uint64_t)READ(pixel++) << 32);
- 	b |= ((uint64_t)READ(pixel++) << 16);
- 	b |= ((uint64_t)READ(pixel++));
++	b |= ((uint64_t)READ(pict, pixel++) << 32);
++	b |= ((uint64_t)READ(pict, pixel++) << 16);
++	b |= ((uint64_t)READ(pict, pixel++));
 +#endif
 +	*buffer++ = b | (b << 8);
 +    }
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_r8g8b8_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    return (0xff000000 |
- 	    (READ(pixel + 0) << 16) |
- 	    (READ(pixel + 1) << 8) |
- 	    (READ(pixel + 2)));
++	    (READ(pict, pixel + 0) << 16) |
++	    (READ(pict, pixel + 1) << 8) |
++	    (READ(pict, pixel + 2)));
 +#else
 +    return (0xff000000 |
- 	    (READ(pixel + 2) << 16) |
- 	    (READ(pixel + 1) << 8) |
- 	    (READ(pixel + 0)));
++	    (READ(pict, pixel + 2) << 16) |
++	    (READ(pict, pixel + 1) << 8) |
++	    (READ(pict, pixel + 0)));
 +#endif
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_r8g8b8_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    uint64_t p = (0xffff000000000000LL |
- 		  ((uint64_t)READ(pixel + 0) << 32) |
- 		  ((uint64_t)READ(pixel + 1) << 16) |
- 		  ((uint64_t)READ(pixel + 2)));
++		  ((uint64_t)READ(pict, pixel + 0) << 32) |
++		  ((uint64_t)READ(pict, pixel + 1) << 16) |
++		  ((uint64_t)READ(pict, pixel + 2)));
 +#else
 +    uint64_t p = (0xffff000000000000LL |
- 		  ((uint64_t)READ(pixel + 2) << 32) |
- 		  ((uint64_t)READ(pixel + 1) << 16) |
- 		  ((uint64_t)READ(pixel + 0)));
++		  ((uint64_t)READ(pict, pixel + 2) << 32) |
++		  ((uint64_t)READ(pict, pixel + 1) << 16) |
++		  ((uint64_t)READ(pict, pixel + 0)));
 +#endif
 +    return p | (p << 8);
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_b8g8r8_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    return (0xff000000 |
- 	    (READ(pixel + 2) << 16) |
- 	    (READ(pixel + 1) << 8) |
- 	    (READ(pixel + 0)));
++	    (READ(pict, pixel + 2) << 16) |
++	    (READ(pict, pixel + 1) << 8) |
++	    (READ(pict, pixel + 0)));
 +#else
 +    return (0xff000000 |
- 	    (READ(pixel + 0) << 16) |
- 	    (READ(pixel + 1) << 8) |
- 	    (READ(pixel + 2)));
++	    (READ(pict, pixel + 0) << 16) |
++	    (READ(pict, pixel + 1) << 8) |
++	    (READ(pict, pixel + 2)));
 +#endif
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_b8g8r8_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    uint64_t p = (0xffff000000000000LL |
- 		  ((uint64_t)READ(pixel + 2) << 32) |
- 		  ((uint64_t)READ(pixel + 1) << 16) |
- 		  ((uint64_t)READ(pixel + 0)));
++		  ((uint64_t)READ(pict, pixel + 2) << 32) |
++		  ((uint64_t)READ(pict, pixel + 1) << 16) |
++		  ((uint64_t)READ(pict, pixel + 0)));
 +#else
 +    uint64_t p = (0xffff000000000000LL |
- 		  ((uint64_t)READ(pixel + 0) << 32) |
- 		  ((uint64_t)READ(pixel + 1) << 16) |
- 		  ((uint64_t)READ(pixel + 2)));
++		  ((uint64_t)READ(pict, pixel + 0) << 32) |
++		  ((uint64_t)READ(pict, pixel + 1) << 16) |
++		  ((uint64_t)READ(pict, pixel + 2)));
 +#endif
 +    return p | (p << 8);
 +}
 +
- #undef image
- 
 +static FASTCALL void
 +fbStore_r8g8b8_32 (pixman_image_t *image,
 +		   uint32_t *bits, const uint32_t *values, int x, int width,
 +		   const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
- 	Store24(pixel, values[i]);
++	Store24(image, pixel, values[i]);
 +	pixel += 3;
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_r8g8b8_64 (pixman_image_t *image,
 +		   uint32_t *bits, const uint64_t *values, int x, int width,
 +		   const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t r = (values[i] >> 40) & 0xff;
 +	uint32_t g = (values[i] >> 24) & 0xff;
 +	uint32_t b = (values[i] >> 8) & 0xff;
- 	Store24(pixel, (r|g|b));
++	Store24(image, pixel, (r|g|b));
 +	pixel += 3;
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_b8g8r8_32 (pixman_image_t *image,
 +		   uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t val = values[i];
 +#if IMAGE_BYTE_ORDER == MSBFirst
- 	WRITE(pixel++, Blue(val));
- 	WRITE(pixel++, Green(val));
- 	WRITE(pixel++, Red(val));
++	WRITE(image, pixel++, Blue(val));
++	WRITE(image, pixel++, Green(val));
++	WRITE(image, pixel++, Red(val));
 +#else
- 	WRITE(pixel++, Red(val));
- 	WRITE(pixel++, Green(val));
- 	WRITE(pixel++, Blue(val));
++	WRITE(image, pixel++, Red(val));
++	WRITE(image, pixel++, Green(val));
++	WRITE(image, pixel++, Blue(val));
 +#endif
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_b8g8r8_64 (pixman_image_t *image,
 +		   uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t r = (values[i] >> 40) & 0xff;
 +	uint32_t g = (values[i] >> 24) & 0xff;
 +	uint32_t b = (values[i] >> 8) & 0xff;
 +#if IMAGE_BYTE_ORDER == MSBFirst
- 	WRITE(pixel++, b);
- 	WRITE(pixel++, g);
- 	WRITE(pixel++, r);
++	WRITE(image, pixel++, b);
++	WRITE(image, pixel++, g);
++	WRITE(image, pixel++, r);
 +#else
- 	WRITE(pixel++, r);
- 	WRITE(pixel++, g);
- 	WRITE(pixel++, b);
++	WRITE(image, pixel++, r);
++	WRITE(image, pixel++, g);
++	WRITE(image, pixel++, b);
 +#endif
 +    }
 +}
 +
- #define image ((pixman_image_t *)pict)
- 
 +static FASTCALL void
 +fbFetch_c8_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    const uint8_t *pixel = (const uint8_t *)bits + x;
 +    const uint8_t *end = pixel + width;
 +    while (pixel < end) {
- 	uint32_t  p = READ(pixel++);
++	uint32_t  p = READ(pict, pixel++);
 +	*buffer++ = indexed->rgba[p];
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_c8_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + x;
 +    const uint8_t *end = pixel + width;
 +    while (pixel < end) {
- 	uint64_t  p = READ(pixel++);
++	uint64_t  p = READ(pict, pixel++);
 +	uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +	*buffer++ = px | (px << 8);
 +    }
 +}
 +
- #define Fetch8(l,o)    (READ((uint8_t *)(l) + ((o) >> 2)))
++#define Fetch8(img,l,o)    (READ(img, (uint8_t *)(l) + ((o) >> 2)))
 +#if IMAGE_BYTE_ORDER == MSBFirst
- #define Fetch4(l,o)    ((o) & 2 ? Fetch8(l,o) & 0xf : Fetch8(l,o) >> 4)
++#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) & 0xf : Fetch8(img,l,o) >> 4)
 +#else
- #define Fetch4(l,o)    ((o) & 2 ? Fetch8(l,o) >> 4 : Fetch8(l,o) & 0xf)
++#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) >> 4 : Fetch8(img,l,o) & 0xf)
 +#endif
 +
 +static FASTCALL void
 +fbFetch_c4_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
- 	uint32_t  p = Fetch4(bits, i + x);
- 	
++	uint32_t  p = Fetch4(pict, bits, i + x);
++
 +	*buffer++ = indexed->rgba[p];
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_c4_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
- 	uint64_t  p = indexed->rgba[Fetch4(bits, i + x)];
++	uint64_t  p = indexed->rgba[Fetch4(pict, bits, i + x)];
 +	uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +	*buffer++ = px | (px << 8);
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_a1_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    int i;
 +    for (i = 0; i < width; ++i) {
- 	uint32_t  p = READ(bits + ((i + x) >> 5));
++	uint32_t  p = READ(pict, bits + ((i + x) >> 5));
 +	uint32_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = p >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = p >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +	a |= a << 1;
 +	a |= a << 2;
 +	a |= a << 4;
 +	*buffer++ = a << 24;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_a1_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    int i;
 +    for (i = 0; i < width; ++i) {
- 	uint32_t  p = READ(bits + ((i + x) >> 5));
++	uint32_t  p = READ(pict, bits + ((i + x) >> 5));
 +	uint64_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = p >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = p >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +	a |= a << 1;
 +	a |= a << 2;
 +	a |= a << 4;
 +	a |= a << 8;
 +	*buffer++ = a << 48;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_g1_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
- 	uint32_t p = READ(bits + ((i+x) >> 5));
++	uint32_t p = READ(pict, bits + ((i+x) >> 5));
 +	uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = p >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = p >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +	*buffer++ = indexed->rgba[a];
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_g1_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
- 	uint32_t t = READ(bits + ((i+x) >> 5));
++	uint32_t t = READ(pict, bits + ((i+x) >> 5));
 +	uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = t >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = t >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +
- 	uint64_t  p = indexed->rgba[Fetch4(bits, i + x)];
++	uint64_t  p = indexed->rgba[Fetch4(pict, bits, i + x)];
 +	uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +	*buffer++ = px | (px << 8);
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_yuy2_32 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
 +{
 +    int16_t y, u, v;
 +    int32_t r, g, b;
 +    int   i;
 +
 +    const uint32_t *bits = pict->bits + pict->rowstride * line;
 +
 +    for (i = 0; i < width; i++)
 +    {
 +	y = ((uint8_t *) bits)[(x + i) << 1] - 16;
 +	u = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 1] - 128;
 +	v = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 3] - 128;
 +
 +	/* R = 1.164(Y - 16) + 1.596(V - 128) */
 +	r = 0x012b27 * y + 0x019a2e * v;
 +	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
 +	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
 +	/* B = 1.164(Y - 16) + 2.018(U - 128) */
 +	b = 0x012b27 * y + 0x0206a2 * u;
 +
-     WRITE(buffer++, 0xff000000 |
++    WRITE(pict, buffer++, 0xff000000 |
 +	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
 +	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
 +	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_yuy2_64 (bits_image_t *pict, int x, int line, int width, uint64_t *buffer)
 +{
 +    /* [AGP] Unimplemented */
 +}
 +
 +static FASTCALL void
 +fbFetch_yv12_32 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
 +{
 +    YV12_SETUP(pict);
 +    uint8_t *pY = YV12_Y (line);
 +    uint8_t *pU = YV12_U (line);
 +    uint8_t *pV = YV12_V (line);
 +    int16_t y, u, v;
 +    int32_t r, g, b;
 +    int   i;
 +
 +    for (i = 0; i < width; i++)
 +    {
 +	y = pY[x + i] - 16;
 +	u = pU[(x + i) >> 1] - 128;
 +	v = pV[(x + i) >> 1] - 128;
 +
 +	/* R = 1.164(Y - 16) + 1.596(V - 128) */
 +	r = 0x012b27 * y + 0x019a2e * v;
 +	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
 +	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
 +	/* B = 1.164(Y - 16) + 2.018(U - 128) */
 +	b = 0x012b27 * y + 0x0206a2 * u;
 +
- 	WRITE(buffer++, 0xff000000 |
++	WRITE(pict, buffer++, 0xff000000 |
 +	    (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
 +	    (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
 +	    (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_yv12_64 (bits_image_t *pict, int x, int line, int width, uint64_t *buffer)
 +{
 +    /* [AGP] Unimplemented */
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_c8_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
-     uint32_t   pixel = READ((uint8_t *) bits + offset);
++    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    return indexed->rgba[pixel];
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_c8_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
-     uint64_t p = indexed->rgba[READ((uint8_t *) bits + offset)];
++    uint64_t p = indexed->rgba[READ(pict, (uint8_t *) bits + offset)];
 +    uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +    return px | (px << 8);
 +}
 +
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_c4_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
-     uint32_t  pixel = Fetch4(bits, offset);
++    uint32_t  pixel = Fetch4(pict, bits, offset);
 +    const pixman_indexed_t * indexed = pict->indexed;
-     
++
 +    return indexed->rgba[pixel];
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_c4_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
-     uint64_t p = indexed->rgba[Fetch4(bits, offset)];
++    uint64_t p = indexed->rgba[Fetch4(pict, bits, offset)];
 +    uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +    return px | (px << 8);
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_a1_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
-     uint32_t  pixel = READ(bits + (offset >> 5));
++    uint32_t  pixel = READ(pict, bits + (offset >> 5));
 +    uint32_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    a |= a << 1;
 +    a |= a << 2;
 +    a |= a << 4;
 +    return a << 24;
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_a1_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
-     uint32_t  pixel = READ(bits + (offset >> 5));
++    uint32_t  pixel = READ(pict, bits + (offset >> 5));
 +    uint64_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    a |= a << 1;
 +    a |= a << 2;
 +    a |= a << 4;
 +    a |= a << 8;
 +    return a << 48;
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_g1_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
-     uint32_t pixel = READ(bits + (offset >> 5));
++    uint32_t pixel = READ(pict, bits + (offset >> 5));
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    return indexed->rgba[a];
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_g1_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
-     uint32_t pixel = READ(bits + (offset >> 5));
++    uint32_t pixel = READ(pict, bits + (offset >> 5));
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    uint64_t p = indexed->rgba[a];
 +    uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +    return px | (px << 8);
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_yuy2_32 (bits_image_t *pict, int offset, int line)
 +{
 +    int16_t y, u, v;
 +    int32_t r, g, b;
 +
 +    const uint32_t *bits = pict->bits + pict->rowstride * line;
 +
 +    y = ((uint8_t *) bits)[offset << 1] - 16;
 +    u = ((uint8_t *) bits)[((offset << 1) & -4) + 1] - 128;
 +    v = ((uint8_t *) bits)[((offset << 1) & -4) + 3] - 128;
 +
 +    /* R = 1.164(Y - 16) + 1.596(V - 128) */
 +    r = 0x012b27 * y + 0x019a2e * v;
 +    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
 +    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
 +    /* B = 1.164(Y - 16) + 2.018(U - 128) */
 +    b = 0x012b27 * y + 0x0206a2 * u;
 +
 +    return 0xff000000 |
 +	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
 +	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
 +	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_yuy2_64 (bits_image_t *pict, int offset, int line)
 +{
 +    /* [AGP] Unimplemented */
 +    return 0;
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_yv12_32 (bits_image_t *pict, int offset, int line)
 +{
 +    YV12_SETUP(pict);
 +    int16_t y = YV12_Y (line)[offset] - 16;
 +    int16_t u = YV12_U (line)[offset >> 1] - 128;
 +    int16_t v = YV12_V (line)[offset >> 1] - 128;
 +    int32_t r, g, b;
 +
 +    /* R = 1.164(Y - 16) + 1.596(V - 128) */
 +    r = 0x012b27 * y + 0x019a2e * v;
 +    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
 +    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
 +    /* B = 1.164(Y - 16) + 2.018(U - 128) */
 +    b = 0x012b27 * y + 0x0206a2 * u;
 +
 +    return 0xff000000 |
 +	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
 +	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
 +	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_yv12_64 (bits_image_t *pict, int offset, int line)
 +{
 +    /* [AGP] Unimplemented */
 +    return 0;
 +}
 +
- #undef image
- 
 +static FASTCALL void
 +fbStore_c8_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t   *pixel = ((uint8_t *) bits) + x;
 +    for (i = 0; i < width; ++i) {
- 	WRITE(pixel++, miIndexToEnt24(indexed,values[i]));
++	WRITE(image, pixel++, miIndexToEnt24(indexed,values[i]));
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_c8_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t   *pixel = ((uint8_t *) bits) + x;
 +    for (i = 0; i < width; ++i) {
- 	WRITE(pixel++, miIndexToEnt48(indexed,values[i]));
++	WRITE(image, pixel++, miIndexToEnt48(indexed,values[i]));
 +    }
 +}
 +
- #define Store8(l,o,v)  (WRITE((uint8_t *)(l) + ((o) >> 3), (v)))
++#define Store8(img,l,o,v)  (WRITE(img, (uint8_t *)(l) + ((o) >> 3), (v)))
 +#if IMAGE_BYTE_ORDER == MSBFirst
- #define Store4(l,o,v)  Store8(l,o,((o) & 4 ?				\
- 				   (Fetch8(l,o) & 0xf0) | (v) :		\
- 				   (Fetch8(l,o) & 0x0f) | ((v) << 4)))
++#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?				\
++				   (Fetch8(img,l,o) & 0xf0) | (v) :		\
++				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4)))
 +#else
- #define Store4(l,o,v)  Store8(l,o,((o) & 4 ?			       \
- 				   (Fetch8(l,o) & 0x0f) | ((v) << 4) : \
- 				   (Fetch8(l,o) & 0xf0) | (v)))
++#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?			       \
++				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4) : \
++				   (Fetch8(img,l,o) & 0xf0) | (v)))
 +#endif
 +
++
 +static FASTCALL void
 +fbStore_c4_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  pixel;
- 	
++
 +	pixel = miIndexToEnt24(indexed, values[i]);
- 	Store4(bits, i + x, pixel);
++	Store4(image, bits, i + x, pixel);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_c4_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  pixel;
 +
 +	pixel = miIndexToEnt48(indexed, values[i]);
- 	Store4(bits, i + x, pixel);
++	Store4(image, bits, i + x, pixel);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_a1_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
- 	
++
 +	uint32_t v = values[i] & 0x80000000 ? mask : 0;
- 	WRITE(pixel, (READ(pixel) & ~mask) | v);
++	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_a1_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +
 +	uint64_t v = values[i] & 0x8000000000000000LL ? mask : 0;
- 	WRITE(pixel, (READ(pixel) & ~mask) | v);
++	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_g1_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
- 	
++
 +	uint32_t v = miIndexToEntY24(indexed,values[i]) ? mask : 0;
- 	WRITE(pixel, (READ(pixel) & ~mask) | v);
++	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_g1_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +
 +	uint32_t v = miIndexToEntY48(indexed,values[i]) ? mask : 0;
- 	WRITE(pixel, (READ(pixel) & ~mask) | v);
++	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_yuy2_32 (pixman_image_t *image,
 +	         uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    /* [AGP] Unimplemented */
 +}
 +
 +static FASTCALL void
 +fbStore_yuy2_64 (pixman_image_t *image,
 +	         uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    /* [AGP] Unimplemented */
 +}
 +
 +static FASTCALL void
 +fbStore_yv12_32 (pixman_image_t *image,
 +	         uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    /* [AGP] Unimplemented */
 +}
 +
 +static FASTCALL void
 +fbStore_yv12_64 (pixman_image_t *image,
 +	         uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    /* [AGP] Unimplemented */
 +}
 +
 +/* end of handcoded fetch/store functions */
 +
- #define image ((pixman_image_t *)pict)
- 
 +#ifdef PIXMAN_FB_ACCESSORS
 +static
 +#endif
 +void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    uint32_t color;
 +    uint32_t *end;
 +    fetchPixelProc_32 fetch = fetchPixelProcForPicture_32(pict);
-     
++
 +    color = fetch(pict, 0, 0);
-     
++
 +    end = buffer + width;
 +    while (buffer < end)
 +	*(buffer++) = color;
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +#ifdef PIXMAN_FB_ACCESSORS
 +static
 +#endif
 +void fbFetchSolid64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    uint64_t color;
 +    uint64_t *end;
 +    fetchPixelProc_64 fetch = fetchPixelProcForPicture_64(pict);
-     
++
 +    color = fetch(pict, 0, 0);
-     
++
 +    end = buffer + width;
 +    while (buffer < end)
 +	*(buffer++) = color;
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbFetch(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    fetchProc_32 fetch = fetchProcForPicture_32(pict);
-     
++
 +    fetch(pict, x, y, width, buffer);
 +}
 +
 +static void fbFetch64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    fetchProc_64 fetch = fetchProcForPicture_64(pict);
-     
++
 +    fetch(pict, x, y, width, buffer);
 +}
 +
 +#ifdef PIXMAN_FB_ACCESSORS
 +#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_accessors
 +#define PIXMAN_COMPOSITE_RECT_GENERAL_WIDE pixman_composite_rect_general_wide_accessors
 +#else
 +#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
 +#define PIXMAN_COMPOSITE_RECT_GENERAL_WIDE pixman_composite_rect_general_wide_no_accessors
 +#endif
 +
 +typedef struct
 +{
 +    uint32_t        left_ag;
 +    uint32_t        left_rb;
 +    uint32_t        right_ag;
 +    uint32_t        right_rb;
 +    int32_t       left_x;
 +    int32_t       right_x;
 +    int32_t       stepper;
-     
++
 +    pixman_gradient_stop_t	*stops;
 +    int                      num_stops;
 +    unsigned int             spread;
-     
++
 +    int		  need_reset;
 +} GradientWalker;
 +
 +static void
 +_gradient_walker_init (GradientWalker  *walker,
 +		       gradient_t      *gradient,
 +		       unsigned int     spread)
 +{
 +    walker->num_stops = gradient->n_stops;
 +    walker->stops     = gradient->stops;
 +    walker->left_x    = 0;
 +    walker->right_x   = 0x10000;
 +    walker->stepper   = 0;
 +    walker->left_ag   = 0;
 +    walker->left_rb   = 0;
 +    walker->right_ag  = 0;
 +    walker->right_rb  = 0;
 +    walker->spread    = spread;
-     
++
 +    walker->need_reset = TRUE;
 +}
 +
 +static void
 +_gradient_walker_reset (GradientWalker  *walker,
 +                        pixman_fixed_32_32_t     pos)
 +{
 +    int32_t                  x, left_x, right_x;
 +    pixman_color_t          *left_c, *right_c;
 +    int                      n, count = walker->num_stops;
 +    pixman_gradient_stop_t *      stops = walker->stops;
-     
++
 +    static const pixman_color_t   transparent_black = { 0, 0, 0, 0 };
-     
++
 +    switch (walker->spread)
 +    {
 +    case PIXMAN_REPEAT_NORMAL:
 +	x = (int32_t)pos & 0xFFFF;
 +	for (n = 0; n < count; n++)
 +	    if (x < stops[n].x)
 +		break;
 +	if (n == 0) {
 +	    left_x =  stops[count-1].x - 0x10000;
 +	    left_c = &stops[count-1].color;
 +	} else {
 +	    left_x =  stops[n-1].x;
 +	    left_c = &stops[n-1].color;
 +	}
- 	
++
 +	if (n == count) {
 +	    right_x =  stops[0].x + 0x10000;
 +	    right_c = &stops[0].color;
 +	} else {
 +	    right_x =  stops[n].x;
 +	    right_c = &stops[n].color;
 +	}
 +	left_x  += (pos - x);
 +	right_x += (pos - x);
 +	break;
- 	
++
 +    case PIXMAN_REPEAT_PAD:
 +	for (n = 0; n < count; n++)
 +	    if (pos < stops[n].x)
 +		break;
- 	
++
 +	if (n == 0) {
 +	    left_x =  INT32_MIN;
 +	    left_c = &stops[0].color;
 +	} else {
 +	    left_x =  stops[n-1].x;
 +	    left_c = &stops[n-1].color;
 +	}
- 	
++
 +	if (n == count) {
 +	    right_x =  INT32_MAX;
 +	    right_c = &stops[n-1].color;
 +	} else {
 +	    right_x =  stops[n].x;
 +	    right_c = &stops[n].color;
 +	}
 +	break;
- 	
++
 +    case PIXMAN_REPEAT_REFLECT:
 +	x = (int32_t)pos & 0xFFFF;
 +	if ((int32_t)pos & 0x10000)
 +	    x = 0x10000 - x;
 +	for (n = 0; n < count; n++)
 +	    if (x < stops[n].x)
 +		break;
- 	
++
 +	if (n == 0) {
 +	    left_x =  -stops[0].x;
 +	    left_c = &stops[0].color;
 +	} else {
 +	    left_x =  stops[n-1].x;
 +	    left_c = &stops[n-1].color;
 +	}
- 	
++
 +	if (n == count) {
 +	    right_x = 0x20000 - stops[n-1].x;
 +	    right_c = &stops[n-1].color;
 +	} else {
 +	    right_x =  stops[n].x;
 +	    right_c = &stops[n].color;
 +	}
- 	
++
 +	if ((int32_t)pos & 0x10000) {
 +	    pixman_color_t  *tmp_c;
 +	    int32_t          tmp_x;
- 	    
++
 +	    tmp_x   = 0x10000 - right_x;
 +	    right_x = 0x10000 - left_x;
 +	    left_x  = tmp_x;
- 	    
++
 +	    tmp_c   = right_c;
 +	    right_c = left_c;
 +	    left_c  = tmp_c;
- 	    
++
 +	    x = 0x10000 - x;
 +	}
 +	left_x  += (pos - x);
 +	right_x += (pos - x);
 +	break;
- 	
++
 +    default:  /* RepeatNone */
 +	for (n = 0; n < count; n++)
 +	    if (pos < stops[n].x)
 +		break;
- 	
++
 +	if (n == 0)
 +	{
 +	    left_x  =  INT32_MIN;
 +	    right_x =  stops[0].x;
 +	    left_c  = right_c = (pixman_color_t*) &transparent_black;
 +	}
 +	else if (n == count)
 +	{
 +	    left_x  = stops[n-1].x;
 +	    right_x = INT32_MAX;
 +	    left_c  = right_c = (pixman_color_t*) &transparent_black;
 +	}
 +	else
 +	{
 +	    left_x  =  stops[n-1].x;
 +	    right_x =  stops[n].x;
 +	    left_c  = &stops[n-1].color;
 +	    right_c = &stops[n].color;
 +	}
 +    }
-     
++
 +    walker->left_x   = left_x;
 +    walker->right_x  = right_x;
 +    walker->left_ag  = ((left_c->alpha >> 8) << 16)   | (left_c->green >> 8);
 +    walker->left_rb  = ((left_c->red & 0xff00) << 8)  | (left_c->blue >> 8);
 +    walker->right_ag = ((right_c->alpha >> 8) << 16)  | (right_c->green >> 8);
 +    walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
-     
++
 +    if ( walker->left_x == walker->right_x                ||
 +	 ( walker->left_ag == walker->right_ag &&
 +	   walker->left_rb == walker->right_rb )   )
 +    {
 +	walker->stepper = 0;
 +    }
 +    else
 +    {
 +	int32_t width = right_x - left_x;
 +	walker->stepper = ((1 << 24) + width/2)/width;
 +    }
-     
++
 +    walker->need_reset = FALSE;
 +}
 +
 +#define  GRADIENT_WALKER_NEED_RESET(w,x)				\
 +    ( (w)->need_reset || (x) < (w)->left_x || (x) >= (w)->right_x)
 +
- #undef image
- 
 +/* the following assumes that GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */
 +static uint32_t
 +_gradient_walker_pixel (GradientWalker  *walker,
 +                        pixman_fixed_32_32_t     x)
 +{
 +    int  dist, idist;
 +    uint32_t  t1, t2, a, color;
-     
++
 +    if (GRADIENT_WALKER_NEED_RESET (walker, x))
 +        _gradient_walker_reset (walker, x);
-     
++
 +    dist  = ((int)(x - walker->left_x)*walker->stepper) >> 16;
 +    idist = 256 - dist;
-     
++
 +    /* combined INTERPOLATE and premultiply */
 +    t1 = walker->left_rb*idist + walker->right_rb*dist;
 +    t1 = (t1 >> 8) & 0xff00ff;
-     
++
 +    t2  = walker->left_ag*idist + walker->right_ag*dist;
 +    t2 &= 0xff00ff00;
-     
++
 +    color = t2 & 0xff000000;
 +    a     = t2 >> 24;
-     
++
 +    t1  = t1*a + 0x800080;
 +    t1  = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
-     
++
 +    t2  = (t2 >> 8)*a + 0x800080;
 +    t2  = (t2 + ((t2 >> 8) & 0xff00ff));
-     
++
 +    return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
 +}
 +
 +static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +#if 0
 +    SourcePictPtr   pGradient = pict->pSourcePict;
 +#endif
 +    GradientWalker  walker;
 +    uint32_t       *end = buffer + width;
 +    gradient_t	    *gradient;
-     
++
 +    if (pict->common.type == SOLID)
 +    {
 +	register uint32_t color = ((solid_fill_t *)pict)->color;
- 	
++
 +	while (buffer < end)
 +	    *(buffer++) = color;
- 	
++
 +	return;
 +    }
-     
++
 +    gradient = (gradient_t *)pict;
-     
++
 +    _gradient_walker_init (&walker, gradient, pict->common.repeat);
-     
++
 +    if (pict->common.type == LINEAR) {
 +	pixman_vector_t v, unit;
 +	pixman_fixed_32_32_t l;
 +	pixman_fixed_48_16_t dx, dy, a, b, off;
 +	linear_gradient_t *linear = (linear_gradient_t *)pict;
- 	
++
 +        /* reference point is the center of the pixel */
 +        v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
 +        v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
 +        v.vector[2] = pixman_fixed_1;
 +        if (pict->common.transform) {
 +            if (!pixman_transform_point_3d (pict->common.transform, &v))
 +                return;
 +            unit.vector[0] = pict->common.transform->matrix[0][0];
 +            unit.vector[1] = pict->common.transform->matrix[1][0];
 +            unit.vector[2] = pict->common.transform->matrix[2][0];
 +        } else {
 +            unit.vector[0] = pixman_fixed_1;
 +            unit.vector[1] = 0;
 +            unit.vector[2] = 0;
 +        }
- 	
++
 +        dx = linear->p2.x - linear->p1.x;
 +        dy = linear->p2.y - linear->p1.y;
 +        l = dx*dx + dy*dy;
 +        if (l != 0) {
 +            a = (dx << 32) / l;
 +            b = (dy << 32) / l;
 +            off = (-a*linear->p1.x - b*linear->p1.y)>>16;
 +        }
 +        if (l == 0  || (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)) {
 +            pixman_fixed_48_16_t inc, t;
 +            /* affine transformation only */
 +            if (l == 0) {
 +                t = 0;
 +                inc = 0;
 +            } else {
 +                t = ((a*v.vector[0] + b*v.vector[1]) >> 16) + off;
 +                inc = (a * unit.vector[0] + b * unit.vector[1]) >> 16;
 +            }
- 	    
++
 +	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
 +	    {
 +		register uint32_t color;
- 		
++
 +		color = _gradient_walker_pixel( &walker, t );
 +		while (buffer < end)
 +		    *(buffer++) = color;
 +	    }
 +	    else
 +	    {
 +                if (!mask) {
 +                    while (buffer < end)
 +                    {
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +                        buffer += 1;
 +                        t      += inc;
 +                    }
 +                } else {
 +                    while (buffer < end) {
 +                        if (*mask++ & maskBits)
 +                        {
 +			    *(buffer) = _gradient_walker_pixel (&walker, t);
 +                        }
 +                        buffer += 1;
 +                        t      += inc;
 +                    }
 +                }
 +	    }
 +	}
 +	else /* projective transformation */
 +	{
 +	    pixman_fixed_48_16_t t;
- 	    
++
 +	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
 +	    {
 +		register uint32_t color;
- 		
++
 +		if (v.vector[2] == 0)
 +		{
 +		    t = 0;
 +		}
 +		else
 +		{
 +		    pixman_fixed_48_16_t x, y;
- 		    
++
 +		    x = ((pixman_fixed_48_16_t) v.vector[0] << 16) / v.vector[2];
 +		    y = ((pixman_fixed_48_16_t) v.vector[1] << 16) / v.vector[2];
 +		    t = ((a * x + b * y) >> 16) + off;
 +		}
- 		
++
 + 		color = _gradient_walker_pixel( &walker, t );
 +		while (buffer < end)
 +		    *(buffer++) = color;
 +	    }
 +	    else
 +	    {
 +		while (buffer < end)
 +		{
 +		    if (!mask || *mask++ & maskBits)
 +		    {
 +			if (v.vector[2] == 0) {
 +			    t = 0;
 +			} else {
 +			    pixman_fixed_48_16_t x, y;
 +			    x = ((pixman_fixed_48_16_t)v.vector[0] << 16) / v.vector[2];
 +			    y = ((pixman_fixed_48_16_t)v.vector[1] << 16) / v.vector[2];
 +			    t = ((a*x + b*y) >> 16) + off;
 +			}
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    ++buffer;
 +		    v.vector[0] += unit.vector[0];
 +		    v.vector[1] += unit.vector[1];
 +		    v.vector[2] += unit.vector[2];
 +		}
 +            }
 +        }
 +    } else {
- 	
++
 +/*
 + * In the radial gradient problem we are given two circles (c₁,r₁) and
 + * (câ‚‚,râ‚‚) that define the gradient itself. Then, for any point p, we
 + * must compute the value(s) of t within [0.0, 1.0] representing the
 + * circle(s) that would color the point.
 + *
 + * There are potentially two values of t since the point p can be
 + * colored by both sides of the circle, (which happens whenever one
 + * circle is not entirely contained within the other).
 + *
 + * If we solve for a value of t that is outside of [0.0, 1.0] then we
 + * use the extend mode (NONE, REPEAT, REFLECT, or PAD) to map to a
 + * value within [0.0, 1.0].
 + *
 + * Here is an illustration of the problem:
 + *
 + *              pâ‚‚
 + *           p  •
 + *           •   ╲
 + *        ·       ╲r₂
 + *  p₁ ·           ╲
 + *  •              θ╲
 + *   ╲             ╌╌•
 + *    ╲r₁        ·   c₂
 + *    θ╲    ·
 + *    ╌╌•
 + *      c₁
 + *
 + * Given (c₁,r₁), (c₂,r₂) and p, we must find an angle θ such that two
 + * points p₁ and p₂ on the two circles are collinear with p. Then, the
 + * desired value of t is the ratio of the length of p₁p to the length
 + * of p₁p₂.
 + *
 + * So, we have six unknown values: (p₁x, p₁y), (p₂x, p₂y), θ and t.
 + * We can also write six equations that constrain the problem:
 + *
 + * Point p₁ is a distance r₁ from c₁ at an angle of θ:
 + *
 + *	1. p₁x = c₁x + r₁·cos θ
 + *	2. p₁y = c₁y + r₁·sin θ
 + *
 + * Point p₂ is a distance r₂ from c₂ at an angle of θ:
 + *
 + *	3. p₂x = c₂x + r2·cos θ
 + *	4. p₂y = c₂y + r2·sin θ
 + *
 + * Point p lies at a fraction t along the line segment p₁p₂:
 + *
 + *	5. px = t·p₂x + (1-t)·p₁x
 + *	6. py = t·p₂y + (1-t)·p₁y
 + *
 + * To solve, first subtitute 1-4 into 5 and 6:
 + *
 + * px = t·(c₂x + r₂·cos θ) + (1-t)·(c₁x + r₁·cos θ)
 + * py = t·(c₂y + r₂·sin θ) + (1-t)·(c₁y + r₁·sin θ)
 + *
 + * Then solve each for cos θ and sin θ expressed as a function of t:
 + *
 + * cos θ = (-(c₂x - c₁x)·t + (px - c₁x)) / ((r₂-r₁)·t + r₁)
 + * sin θ = (-(c₂y - c₁y)·t + (py - c₁y)) / ((r₂-r₁)·t + r₁)
 + *
 + * To simplify this a bit, we define new variables for several of the
 + * common terms as shown below:
 + *
 + *              pâ‚‚
 + *           p  •
 + *           •   ╲
 + *        ·  ┆    ╲r₂
 + *  p₁ ·     ┆     ╲
 + *  •     pdy┆      ╲
 + *   ╲       ┆       •c₂
 + *    ╲r₁    ┆   ·   ┆
 + *     ╲    ·┆       ┆cdy
 + *      •╌╌╌╌┴╌╌╌╌╌╌╌┘
 + *    c₁  pdx   cdx
 + *
 + * cdx = (c₂x - c₁x)
 + * cdy = (c₂y - c₁y)
 + *  dr =  r₂-r₁
 + * pdx =  px - c₁x
 + * pdy =  py - c₁y
 + *
 + * Note that cdx, cdy, and dr do not depend on point p at all, so can
 + * be pre-computed for the entire gradient. The simplifed equations
 + * are now:
 + *
 + * cos θ = (-cdx·t + pdx) / (dr·t + r₁)
 + * sin θ = (-cdy·t + pdy) / (dr·t + r₁)
 + *
 + * Finally, to get a single function of t and eliminate the last
 + * unknown θ, we use the identity sin²θ + cos²θ = 1. First, square
 + * each equation, (we knew a quadratic was coming since it must be
 + * possible to obtain two solutions in some cases):
 + *
 + * cos²θ = (cdx²t² - 2·cdx·pdx·t + pdx²) / (dr²·t² + 2·r₁·dr·t + r₁²)
 + * sin²θ = (cdy²t² - 2·cdy·pdy·t + pdy²) / (dr²·t² + 2·r₁·dr·t + r₁²)
 + *
 + * Then add both together, set the result equal to 1, and express as a
 + * standard quadratic equation in t of the form At² + Bt + C = 0
 + *
 + * (cdx² + cdy² - dr²)·t² - 2·(cdx·pdx + cdy·pdy + r₁·dr)·t + (pdx² + pdy² - r₁²) = 0
 + *
 + * In other words:
 + *
 + * A = cdx² + cdy² - dr²
 + * B = -2·(pdx·cdx + pdy·cdy + r₁·dr)
 + * C = pdx² + pdy² - r₁²
 + *
 + * And again, notice that A does not depend on p, so can be
 + * precomputed. From here we just use the quadratic formula to solve
 + * for t:
 + *
 + * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A
 + */
 +        /* radial or conical */
 +        pixman_bool_t affine = TRUE;
 +        double cx = 1.;
 +        double cy = 0.;
 +        double cz = 0.;
 +	double rx = x + 0.5;
 +	double ry = y + 0.5;
 +        double rz = 1.;
- 	
++
 +        if (pict->common.transform) {
 +            pixman_vector_t v;
 +            /* reference point is the center of the pixel */
 +            v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
 +            v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
 +            v.vector[2] = pixman_fixed_1;
 +            if (!pixman_transform_point_3d (pict->common.transform, &v))
 +                return;
- 	    
++
 +            cx = pict->common.transform->matrix[0][0]/65536.;
 +            cy = pict->common.transform->matrix[1][0]/65536.;
 +            cz = pict->common.transform->matrix[2][0]/65536.;
 +            rx = v.vector[0]/65536.;
 +            ry = v.vector[1]/65536.;
 +            rz = v.vector[2]/65536.;
 +            affine = pict->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1;
 +        }
- 	
++
 +        if (pict->common.type == RADIAL) {
 +	    radial_gradient_t *radial = (radial_gradient_t *)pict;
 +            if (affine) {
 +                while (buffer < end) {
 +		    if (!mask || *mask++ & maskBits)
 +		    {
 +			double pdx, pdy;
 +			double B, C;
 +			double det;
 +			double c1x = radial->c1.x / 65536.0;
 +			double c1y = radial->c1.y / 65536.0;
 +			double r1  = radial->c1.radius / 65536.0;
 +                        pixman_fixed_48_16_t t;
- 			
++
 +			pdx = rx - c1x;
 +			pdy = ry - c1y;
- 			
++
 +			B = -2 * (  pdx * radial->cdx
 +				    + pdy * radial->cdy
 +				    + r1 * radial->dr);
 +			C = (pdx * pdx + pdy * pdy - r1 * r1);
- 			
++
 +                        det = (B * B) - (4 * radial->A * C);
 +			if (det < 0.0)
 +			    det = 0.0;
- 			
++
 +			if (radial->A < 0)
 +			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
 +			else
 +			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
- 			
++
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    ++buffer;
- 		    
++
 +                    rx += cx;
 +                    ry += cy;
 +                }
 +            } else {
 +		/* projective */
 +                while (buffer < end) {
 +		    if (!mask || *mask++ & maskBits)
 +		    {
 +			double pdx, pdy;
 +			double B, C;
 +			double det;
 +			double c1x = radial->c1.x / 65536.0;
 +			double c1y = radial->c1.y / 65536.0;
 +			double r1  = radial->c1.radius / 65536.0;
 +                        pixman_fixed_48_16_t t;
 +			double x, y;
- 			
++
 +			if (rz != 0) {
 +			    x = rx/rz;
 +			    y = ry/rz;
 +			} else {
 +			    x = y = 0.;
 +			}
- 			
++
 +			pdx = x - c1x;
 +			pdy = y - c1y;
- 			
++
 +			B = -2 * (  pdx * radial->cdx
 +				    + pdy * radial->cdy
 +				    + r1 * radial->dr);
 +			C = (pdx * pdx + pdy * pdy - r1 * r1);
- 			
++
 +                        det = (B * B) - (4 * radial->A * C);
 +			if (det < 0.0)
 +			    det = 0.0;
- 			
++
 +			if (radial->A < 0)
 +			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
 +			else
 +			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
- 			
++
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    ++buffer;
- 		    
++
 +                    rx += cx;
 +                    ry += cy;
 +		    rz += cz;
 +                }
 +            }
 +        } else /* SourcePictTypeConical */ {
 +	    conical_gradient_t *conical = (conical_gradient_t *)pict;
 +            double a = conical->angle/(180.*65536);
 +            if (affine) {
 +                rx -= conical->center.x/65536.;
 +                ry -= conical->center.y/65536.;
- 		
++
 +                while (buffer < end) {
 +		    double angle;
- 		    
++
 +                    if (!mask || *mask++ & maskBits)
 +		    {
 +                        pixman_fixed_48_16_t   t;
- 			
++
 +                        angle = atan2(ry, rx) + a;
 +			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
- 			
++
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
- 		    
++
 +                    ++buffer;
 +                    rx += cx;
 +                    ry += cy;
 +                }
 +            } else {
 +                while (buffer < end) {
 +                    double x, y;
 +                    double angle;
- 		    
++
 +                    if (!mask || *mask++ & maskBits)
 +                    {
 +			pixman_fixed_48_16_t  t;
- 			
++
 +			if (rz != 0) {
 +			    x = rx/rz;
 +			    y = ry/rz;
 +			} else {
 +			    x = y = 0.;
 +			}
 +			x -= conical->center.x/65536.;
 +			y -= conical->center.y/65536.;
 +			angle = atan2(y, x) + a;
 +			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
- 			
++
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
- 		    
++
 +                    ++buffer;
 +                    rx += cx;
 +                    ry += cy;
 +                    rz += cz;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    uint32_t     *bits;
 +    int32_t    stride;
 +    fetchPixelProc_32   fetch;
 +    pixman_vector_t	v;
 +    pixman_vector_t  unit;
 +    int         i;
 +    pixman_box16_t box;
 +    pixman_bool_t affine = TRUE;
-     
++
 +    fetch = fetchPixelProcForPicture_32(pict);
-     
++
 +    bits = pict->bits;
 +    stride = pict->rowstride;
-     
++
 +    /* reference point is the center of the pixel */
 +    v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1 / 2;
 +    v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1 / 2;
 +    v.vector[2] = pixman_fixed_1;
-     
++
 +    /* when using convolution filters one might get here without a transform */
 +    if (pict->common.transform)
 +    {
 +        if (!pixman_transform_point_3d (pict->common.transform, &v))
 +	{
 +            fbFinishAccess (pict->pDrawable);
 +            return;
 +        }
 +        unit.vector[0] = pict->common.transform->matrix[0][0];
 +        unit.vector[1] = pict->common.transform->matrix[1][0];
 +        unit.vector[2] = pict->common.transform->matrix[2][0];
 +        affine = v.vector[2] == pixman_fixed_1 && unit.vector[2] == 0;
 +    }
 +    else
 +    {
 +        unit.vector[0] = pixman_fixed_1;
 +        unit.vector[1] = 0;
 +        unit.vector[2] = 0;
 +    }
-     
++
 +    if (pict->common.filter == PIXMAN_FILTER_NEAREST || pict->common.filter == PIXMAN_FILTER_FAST)
 +    {
 +        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL) {
 +            if (pixman_region_n_rects (pict->common.src_clip) == 1) {
 +		for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = MOD(DIV(v.vector[1],v.vector[2]), pict->height);
 +				x = MOD(DIV(v.vector[0],v.vector[2]), pict->width);
 +			    } else {
 +				y = MOD(v.vector[1]>>16, pict->height);
 +				x = MOD(v.vector[0]>>16, pict->width);
 +			    }
 +			    *(buffer + i) = fetch(pict, x, y);
 +			}
 +		    }
- 		    
++
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = MOD(DIV(v.vector[1],v.vector[2]), pict->height);
 +				x = MOD(DIV(v.vector[0],v.vector[2]), pict->width);
 +			    } else {
 +				y = MOD(v.vector[1]>>16, pict->height);
 +				x = MOD(v.vector[0]>>16, pict->width);
 +			    }
 +			    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box))
 +				*(buffer + i) = fetch (pict, x, y);
 +			    else
 +				*(buffer + i) = 0;
 +			}
 +		    }
- 		    
++
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        } else {
 +            if (pixman_region_n_rects(pict->common.src_clip) == 1) {
 +                box = pict->common.src_clip->extents;
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = DIV(v.vector[1],v.vector[2]);
 +				x = DIV(v.vector[0],v.vector[2]);
 +			    } else {
 +				y = v.vector[1]>>16;
 +				x = v.vector[0]>>16;
 +			    }
 +			    *(buffer + i) = ((x < box.x1) | (x >= box.x2) | (y < box.y1) | (y >= box.y2)) ?
 +				0 : fetch(pict, x, y);
 +			}
 +		    }
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +                    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = DIV(v.vector[1],v.vector[2]);
 +				x = DIV(v.vector[0],v.vector[2]);
 +			    } else {
 +				y = v.vector[1]>>16;
 +				x = v.vector[0]>>16;
 +			    }
 +			    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box))
 +				*(buffer + i) = fetch(pict, x, y);
 +			    else
 +				*(buffer + i) = 0;
 +			}
 +		    }
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        }
 +    } else if (pict->common.filter == PIXMAN_FILTER_BILINEAR	||
 +	       pict->common.filter == PIXMAN_FILTER_GOOD	||
 +	       pict->common.filter == PIXMAN_FILTER_BEST)
 +    {
 +        /* adjust vector for maximum contribution at 0.5, 0.5 of each texel. */
 +        v.vector[0] -= v.vector[2] / 2;
 +        v.vector[1] -= v.vector[2] / 2;
 +        unit.vector[0] -= unit.vector[2] / 2;
 +        unit.vector[1] -= unit.vector[2] / 2;
- 	
++
 +        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL) {
 +            if (pixman_region_n_rects(pict->common.src_clip) == 1) {
 +                for (i = 0; i < width; ++i) {
 +                    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    uint32_t ft, fb;
- 			    
++
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
- 			    
++
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
- 			    
++
 +			    x1 = MOD (x1, pict->width);
 +			    x2 = MOD (x2, pict->width);
 +			    y1 = MOD (y1, pict->height);
 +			    y2 = MOD (y2, pict->height);
- 			    
++
 +			    tl = fetch(pict, x1, y1);
 +			    tr = fetch(pict, x2, y1);
 +			    bl = fetch(pict, x1, y2);
 +			    br = fetch(pict, x2, y2);
- 			    
++
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    uint32_t ft, fb;
- 			    
++
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
- 			    
++
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
- 			    
++
 +			    x1 = MOD (x1, pict->width);
 +			    x2 = MOD (x2, pict->width);
 +			    y1 = MOD (y1, pict->height);
 +			    y2 = MOD (y2, pict->height);
- 			    
++
 +			    tl = pixman_region_contains_point(pict->common.src_clip, x1, y1, &box)
 +				? fetch(pict, x1, y1) : 0;
 +			    tr = pixman_region_contains_point(pict->common.src_clip, x2, y1, &box)
 +				? fetch(pict, x2, y1) : 0;
 +			    bl = pixman_region_contains_point(pict->common.src_clip, x1, y2, &box)
 +				? fetch(pict, x1, y2) : 0;
 +			    br = pixman_region_contains_point(pict->common.src_clip, x2, y2, &box)
 +				? fetch(pict, x2, y2) : 0;
- 			    
++
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
- 		    
++
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        } else {
 +            if (pixman_region_n_rects(pict->common.src_clip) == 1) {
 +                box = pict->common.src_clip->extents;
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    pixman_bool_t x1_out, x2_out, y1_out, y2_out;
 +			    uint32_t ft, fb;
- 			    
++
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
- 			    
++
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
- 			    
++
 +			    x1_out = (x1 < box.x1) | (x1 >= box.x2);
 +			    x2_out = (x2 < box.x1) | (x2 >= box.x2);
 +			    y1_out = (y1 < box.y1) | (y1 >= box.y2);
 +			    y2_out = (y2 < box.y1) | (y2 >= box.y2);
- 			    
++
 +			    tl = x1_out|y1_out ? 0 : fetch(pict, x1, y1);
 +			    tr = x2_out|y1_out ? 0 : fetch(pict, x2, y1);
 +			    bl = x1_out|y2_out ? 0 : fetch(pict, x1, y2);
 +			    br = x2_out|y2_out ? 0 : fetch(pict, x2, y2);
- 			    
++
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
- 		    
++
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +                    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    uint32_t ft, fb;
- 			    
++
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
- 			    
++
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
- 			    
++
 +			    tl = pixman_region_contains_point(pict->common.src_clip, x1, y1, &box)
 +				? fetch(pict, x1, y1) : 0;
 +			    tr = pixman_region_contains_point(pict->common.src_clip, x2, y1, &box)
 +				? fetch(pict, x2, y1) : 0;
 +			    bl = pixman_region_contains_point(pict->common.src_clip, x1, y2, &box)
 +				? fetch(pict, x1, y2) : 0;
 +			    br = pixman_region_contains_point(pict->common.src_clip, x2, y2, &box)
 +				? fetch(pict, x2, y2) : 0;
- 			    
++
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
- 		    
++
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        }
 +    } else if (pict->common.filter == PIXMAN_FILTER_CONVOLUTION) {
 +        pixman_fixed_t *params = pict->common.filter_params;
 +        int32_t cwidth = pixman_fixed_to_int(params[0]);
 +        int32_t cheight = pixman_fixed_to_int(params[1]);
 +        int xoff = (params[0] - pixman_fixed_1) >> 1;
 +	int yoff = (params[1] - pixman_fixed_1) >> 1;
 +        params += 2;
 +        for (i = 0; i < width; ++i) {
 +	    if (!mask || mask[i] & maskBits)
 +	    {
 +		if (!v.vector[2]) {
 +		    *(buffer + i) = 0;
 +		} else {
 +		    int x1, x2, y1, y2, x, y;
 +		    int32_t srtot, sgtot, sbtot, satot;
 +		    pixman_fixed_t *p = params;
- 		    
++
 +		    if (!affine) {
 +			pixman_fixed_48_16_t tmp;
 +			tmp = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2] - xoff;
 +			x1 = pixman_fixed_to_int(tmp);
 +			tmp = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2] - yoff;
 +			y1 = pixman_fixed_to_int(tmp);
 +		    } else {
 +			x1 = pixman_fixed_to_int(v.vector[0] - xoff);
 +			y1 = pixman_fixed_to_int(v.vector[1] - yoff);
 +		    }
 +		    x2 = x1 + cwidth;
 +		    y2 = y1 + cheight;
- 		    
++
 +		    srtot = sgtot = sbtot = satot = 0;
- 		    
++
 +		    for (y = y1; y < y2; y++) {
 +			int ty = (pict->common.repeat == PIXMAN_REPEAT_NORMAL) ? MOD (y, pict->height) : y;
 +			for (x = x1; x < x2; x++) {
 +			    if (*p) {
 +				int tx = (pict->common.repeat == PIXMAN_REPEAT_NORMAL) ? MOD (x, pict->width) : x;
 +				if (pixman_region_contains_point (pict->common.src_clip, tx, ty, &box)) {
 +				    uint32_t c = fetch(pict, tx, ty);
- 				    
++
 +				    srtot += Red(c) * *p;
 +				    sgtot += Green(c) * *p;
 +				    sbtot += Blue(c) * *p;
 +				    satot += Alpha(c) * *p;
 +				}
 +			    }
 +			    p++;
 +			}
 +		    }
- 		    
++
 +		    satot >>= 16;
 +		    srtot >>= 16;
 +		    sgtot >>= 16;
 +		    sbtot >>= 16;
- 		    
++
 +		    if (satot < 0) satot = 0; else if (satot > 0xff) satot = 0xff;
 +		    if (srtot < 0) srtot = 0; else if (srtot > 0xff) srtot = 0xff;
 +		    if (sgtot < 0) sgtot = 0; else if (sgtot > 0xff) sgtot = 0xff;
 +		    if (sbtot < 0) sbtot = 0; else if (sbtot > 0xff) sbtot = 0xff;
- 		    
++
 +		    *(buffer + i) = ((satot << 24) |
 +				     (srtot << 16) |
 +				     (sgtot <<  8) |
 +				     (sbtot       ));
 +		}
 +	    }
 +            v.vector[0] += unit.vector[0];
 +            v.vector[1] += unit.vector[1];
 +            v.vector[2] += unit.vector[2];
 +        }
 +    }
-     
++
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +
 +static void fbFetchExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    int i;
 +    uint32_t _alpha_buffer[SCANLINE_BUFFER_LENGTH];
 +    uint32_t *alpha_buffer = _alpha_buffer;
-     
++
 +    if (!pict->common.alpha_map) {
 +        fbFetchTransformed (pict, x, y, width, buffer, mask, maskBits);
 +	return;
 +    }
 +    if (width > SCANLINE_BUFFER_LENGTH)
 +        alpha_buffer = (uint32_t *) pixman_malloc_ab (width, sizeof(uint32_t));
-     
++
 +    fbFetchTransformed(pict, x, y, width, buffer, mask, maskBits);
 +    fbFetchTransformed((bits_image_t *)pict->common.alpha_map, x - pict->common.alpha_origin.x,
 +		       y - pict->common.alpha_origin.y, width, alpha_buffer,
 +		       mask, maskBits);
 +    for (i = 0; i < width; ++i) {
 +        if (!mask || mask[i] & maskBits)
 +	{
 +	    int a = alpha_buffer[i]>>24;
 +	    *(buffer + i) = (a << 24)
 +		| (div_255(Red(*(buffer + i)) * a) << 16)
 +		| (div_255(Green(*(buffer + i)) * a) << 8)
 +		| (div_255(Blue(*(buffer + i)) * a));
 +	}
 +    }
-     
++
 +    if (alpha_buffer != _alpha_buffer)
 +        free(alpha_buffer);
 +}
 +
 +static void fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    uint32_t *bits;
 +    int32_t stride;
 +    storeProc_32 store = storeProcForPicture_32(pict);
 +    const pixman_indexed_t * indexed = pict->indexed;
-     
++
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    bits += y*stride;
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbStore64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    uint32_t *bits;
 +    int32_t stride;
 +    storeProc_64 store = storeProcForPicture_64(pict);
 +    const pixman_indexed_t * indexed = pict->indexed;
-     
++
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    bits += y*stride;
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbStoreExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    uint32_t *bits, *alpha_bits;
 +    int32_t stride, astride;
 +    int ax, ay;
 +    storeProc_32 store;
 +    storeProc_32 astore;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    const pixman_indexed_t * aindexed;
-     
++
 +    if (!pict->common.alpha_map) {
 +        fbStore(pict, x, y, width, buffer);
 +	return;
 +    }
-     
++
 +    store = storeProcForPicture_32(pict);
 +    astore = storeProcForPicture_32(pict->common.alpha_map);
 +    aindexed = pict->common.alpha_map->indexed;
-     
++
 +    ax = x;
 +    ay = y;
-     
++
 +    bits = pict->bits;
 +    stride = pict->rowstride;
-     
++
 +    alpha_bits = pict->common.alpha_map->bits;
 +    astride = pict->common.alpha_map->rowstride;
-     
++
 +    bits       += y*stride;
 +    alpha_bits += (ay - pict->common.alpha_origin.y)*astride;
-     
-     
++
++
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    astore((pixman_image_t *)pict->common.alpha_map,
 +	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
-     
++
 +    fbFinishAccess (pict->alpha_map->pDrawable);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbStoreExternalAlpha64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    uint32_t *bits, *alpha_bits;
 +    int32_t stride, astride;
 +    int ax, ay;
 +    storeProc_64 store;
 +    storeProc_64 astore;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    const pixman_indexed_t * aindexed;
-     
++
 +    if (!pict->common.alpha_map) {
 +        fbStore64(pict, x, y, width, buffer);
 +	return;
 +    }
-     
++
 +    store = storeProcForPicture_64(pict);
 +    astore = storeProcForPicture_64(pict->common.alpha_map);
 +    aindexed = pict->common.alpha_map->indexed;
-     
++
 +    ax = x;
 +    ay = y;
-     
++
 +    bits = pict->bits;
 +    stride = pict->rowstride;
-     
++
 +    alpha_bits = pict->common.alpha_map->bits;
 +    astride = pict->common.alpha_map->rowstride;
-     
++
 +    bits       += y*stride;
 +    alpha_bits += (ay - pict->common.alpha_origin.y)*astride;
-     
-     
++
++
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    astore((pixman_image_t *)pict->common.alpha_map,
 +	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
-     
++
 +    fbFinishAccess (pict->alpha_map->pDrawable);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void
 +fbExpand(uint32_t * source, uint64_t * dest, int width)
 +{
 +    int i;
 +    for(i = 0; i < width; i++) {
 +	uint32_t p = source[i];
 +	uint64_t r = (uint64_t)(p & 0x00ff0000) << 24;
 +	uint64_t g = (uint64_t)(p & 0x0000ff00) << 16;
 +	uint64_t b = (uint64_t)(p & 0x000000ff) << 8;
 +	uint64_t a = (uint64_t)(p & 0xff000000) << 32;
- 	    
++
 +	dest[i] = r | g | b | a;
 +    }
 +}
 +
 +static void
 +fbContract(uint64_t * source, uint32_t * dest, int width)
 +{
 +    int i;
 +    for(i = 0; i < width; i++) {
 +       uint64_t p = source[i];
 +       uint64_t r = (p >> 24) & 0x00ff0000;
 +       uint64_t g = (p >> 16) & 0x0000ff00;
 +       uint64_t b = (p >> 8) & 0x000000ff;
 +       uint64_t a = (p >> 32) & 0xff000000;
-        
++
 +       dest[i] = r | g | b | a;
 +    }
 +}
 +
 +static uint32_t
 +fbContractPixel(uint64_t p)
 +{
 +    uint64_t r = (p >> 24) & 0x00ff0000;
 +    uint64_t g = (p >> 16) & 0x0000ff00;
 +    uint64_t b = (p >> 8) & 0x000000ff;
 +    uint64_t a = (p >> 32) & 0xff000000;
-        
++
 +    return r | g | b | a;
 +}
 +
 +static void
 +pixmanFetchSourcePict64(source_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    // use the space in the existing buffer for the 32-bit mask and result
 +    uint32_t * mask32 = (uint32_t *) buffer;
 +    uint32_t * buffer32 = ((uint32_t *) buffer) + width;
 +    uint32_t maskBits32 = fbContractPixel(maskBits);
 +
 +    if(mask)
 +	fbContract(mask, mask32, width);
 +    else
 +	mask32 = NULL;
 +    pixmanFetchSourcePict(pict, x, y, width, buffer32, mask32, maskBits32);
 +    fbExpand(buffer32, buffer, width);
 +}
 +
 +static void
 +fbFetchExternalAlpha64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    uint32_t * mask32 = (uint32_t *) buffer;
 +    uint32_t * buffer32 = ((uint32_t *) buffer) + width;
 +    uint32_t maskBits32 = fbContractPixel(maskBits);
 +
 +    if (mask)
 +	fbContract(mask, mask32, width);
 +    else
 +	mask32 = NULL;
 +    fbFetchExternalAlpha(pict, x, y, width, buffer32, mask32, maskBits32);
 +    fbExpand(buffer32, buffer, width);
 +}
 +
 +static void
 +fbFetchTransformed64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    uint32_t * mask32 = (uint32_t *) buffer;
 +    uint32_t * buffer32 = ((uint32_t *) buffer) + width;
 +    uint32_t maskBits32 = fbContractPixel(maskBits);
 +
 +    if (mask)
 +	fbContract(mask, mask32, width);
 +    else
 +	mask32 = NULL;
 +    fbFetchTransformed(pict, x, y, width, buffer32, mask32, maskBits32);
 +    fbExpand(buffer32, buffer, width);
 +}
 +
 +typedef void (*scanStoreProc)(pixman_image_t *, int, int, int, uint32_t *);
 +typedef void (*scanFetchProc)(pixman_image_t *, int, int, int, uint32_t *,
 +			      uint32_t *, uint32_t);
 +
 +typedef void (*scanStoreProc64)(pixman_image_t *, int, int, int, uint64_t *);
 +typedef void (*scanFetchProc64)(pixman_image_t *, int, int, int, uint64_t *,
 +				uint64_t *, uint64_t);
 +
 +//#ifndef PIXMAN_FB_ACCESSORS
 +//static
 +//#endif
 +void
 +PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 +			       uint32_t *scanline_buffer)
 +{
 +    uint32_t *src_buffer = scanline_buffer;
 +    uint32_t *dest_buffer = src_buffer + data->width;
 +    int i;
 +    scanStoreProc store;
 +    scanFetchProc fetchSrc = NULL, fetchMask = NULL, fetchDest = NULL;
 +    unsigned int srcClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    unsigned int maskClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    uint32_t *bits;
 +    int32_t stride;
 +    int xoff, yoff;
-     
++
 +    if (data->op == PIXMAN_OP_CLEAR)
 +        fetchSrc = NULL;
 +    else if (IS_SOURCE_IMAGE (data->src))
 +    {
 +	fetchSrc = (scanFetchProc)pixmanFetchSourcePict;
 +	srcClass = SourcePictureClassify ((source_image_t *)data->src,
 +					  data->xSrc, data->ySrc,
 +					  data->width, data->height);
 +    }
 +    else
 +    {
 +	bits_image_t *bits = (bits_image_t *)data->src;
- 	
++
 +	if (bits->common.alpha_map)
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetchExternalAlpha;
 +	}
 +	else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		 bits->width == 1 &&
 +		 bits->height == 1)
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetchSolid;
 +	    srcClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	}
 +	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetch;
 +	}
 +	else
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetchTransformed;
 +	}
 +    }
-     
++
 +    if (!data->mask || data->op == PIXMAN_OP_CLEAR)
 +    {
 +	fetchMask = NULL;
 +    }
 +    else
 +    {
 +	if (IS_SOURCE_IMAGE (data->mask))
 +	{
 +	    fetchMask = (scanFetchProc)pixmanFetchSourcePict;
 +	    maskClass = SourcePictureClassify ((source_image_t *)data->mask,
 +					       data->xMask, data->yMask,
 +					       data->width, data->height);
 +	}
 +	else
 +	{
 +	    bits_image_t *bits = (bits_image_t *)data->mask;
- 	    
++
 +	    if (bits->common.alpha_map)
 +	    {
 +		fetchMask = (scanFetchProc)fbFetchExternalAlpha;
 +	    }
 +	    else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		     bits->width == 1 && bits->height == 1)
 +	    {
 +		fetchMask = (scanFetchProc)fbFetchSolid;
 +		maskClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	    }
 +	    else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +		fetchMask = (scanFetchProc)fbFetch;
 +	    else
 +		fetchMask = (scanFetchProc)fbFetchTransformed;
 +	}
 +    }
-     
++
 +    if (data->dest->common.alpha_map)
 +    {
 +	fetchDest = (scanFetchProc)fbFetchExternalAlpha;
 +	store = (scanStoreProc)fbStoreExternalAlpha;
- 	
++
 +	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
 +	    fetchDest = NULL;
 +    }
 +    else
 +    {
 +	fetchDest = (scanFetchProc)fbFetch;
 +	store = (scanStoreProc)fbStore;
- 	
++
 +	switch (data->op)
 +	{
 +	case PIXMAN_OP_CLEAR:
 +	case PIXMAN_OP_SRC:
 +	    fetchDest = NULL;
 +#ifndef PIXMAN_FB_ACCESSORS
 +	    /* fall-through */
 +	case PIXMAN_OP_ADD:
 +	case PIXMAN_OP_OVER:
 +	    switch (data->dest->bits.format) {
 +	    case PIXMAN_a8r8g8b8:
 +	    case PIXMAN_x8r8g8b8:
 +		store = NULL;
 +		break;
 +	    default:
 +		break;
 +	    }
 +#endif
 +	    break;
 +	}
 +    }
-     
++
 +    if (!store)
 +    {
 +	bits = data->dest->bits.bits;
 +	stride = data->dest->bits.rowstride;
 +	xoff = yoff = 0;
 +    }
 +    else
 +    {
 +	bits = NULL;
 +	stride = 0;
 +	xoff = yoff = 0;
 +    }
-     
++
 +    if (fetchSrc		   &&
 +	fetchMask		   &&
 +	data->mask		   &&
- 	data->mask->common.type == BITS && 
++	data->mask->common.type == BITS &&
 +	data->mask->common.component_alpha &&
 +	PIXMAN_FORMAT_RGB (data->mask->bits.format))
 +    {
 +	uint32_t *mask_buffer = dest_buffer + data->width;
 +	CombineFuncC compose = pixman_composeFunctions.combineC[data->op];
 +	if (!compose)
 +	    return;
- 	
++
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
- 		    
++
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
- 		
++
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xffffffff);
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +	    }
- 	    
++
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
- 		
++
 +		/* blend */
 +		compose (dest_buffer, src_buffer, mask_buffer, data->width);
- 		
++
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		/* blend */
 +		compose (bits + (data->yDest + i+ yoff) * stride +
 +			 data->xDest + xoff,
 +			 src_buffer, mask_buffer, data->width);
 +	    }
 +	}
 +    }
 +    else
 +    {
 +	uint32_t *src_mask_buffer = 0, *mask_buffer = 0;
 +	CombineFuncU compose = pixman_composeFunctions.combineU[data->op];
 +	if (!compose)
 +	    return;
- 	
++
 +	if (fetchMask)
 +	    mask_buffer = dest_buffer + data->width;
- 	
++
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
- 		    
++
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
- 		
++
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
- 		    
++
 +		    if (mask_buffer)
 +		    {
 +			pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +			src_mask_buffer = mask_buffer;
 +		    }
 +		    else
 +			src_mask_buffer = src_buffer;
- 		    
++
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xff000000);
- 		    
++
 +		    if (mask_buffer)
 +			pixman_composeFunctions.combineMaskU (src_buffer,
 +							      mask_buffer,
 +							      data->width);
- 		    
++
 +		    src_mask_buffer = src_buffer;
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
- 		
++
 +		pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
- 		
++
 +		src_mask_buffer = mask_buffer;
 +	    }
- 	    
++
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
- 		
++
 +		/* blend */
 +		compose (dest_buffer, src_mask_buffer, data->width);
- 		
++
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		/* blend */
 +		compose (bits + (data->yDest + i+ yoff) * stride +
 +			 data->xDest + xoff,
 +			 src_mask_buffer, data->width);
 +	    }
 +	}
 +    }
-     
++
 +    if (!store)
 +	fbFinishAccess (data->dest->pDrawable);
 +}
 +
 +
 +void
 +PIXMAN_COMPOSITE_RECT_GENERAL_WIDE (const FbComposeData *data,
 +			            uint64_t *scanline_buffer)
 +{
 +    uint64_t *src_buffer = scanline_buffer;
 +    uint64_t *dest_buffer = src_buffer + data->width;
 +    int i;
 +    scanStoreProc64 store;
 +    scanFetchProc64 fetchSrc = NULL, fetchMask = NULL, fetchDest = NULL;
 +    unsigned int srcClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    unsigned int maskClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    uint32_t *bits;
 +    int32_t stride;
 +    int xoff, yoff;
-     
++
 +    if (data->op == PIXMAN_OP_CLEAR)
 +        fetchSrc = NULL;
 +    else if (IS_SOURCE_IMAGE (data->src))
 +    {
 +	fetchSrc = (scanFetchProc64)pixmanFetchSourcePict64;
 +	srcClass = SourcePictureClassify ((source_image_t *)data->src,
 +					  data->xSrc, data->ySrc,
 +					  data->width, data->height);
 +    }
 +    else
 +    {
 +	bits_image_t *bits = (bits_image_t *)data->src;
- 	
++
 +	if (bits->common.alpha_map)
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetchExternalAlpha64;
 +	}
 +	else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		 bits->width == 1 &&
 +		 bits->height == 1)
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetchSolid64;
 +	    srcClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	}
 +	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetch64;
 +	}
 +	else
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetchTransformed64;
 +	}
 +    }
-     
++
 +    if (!data->mask || data->op == PIXMAN_OP_CLEAR)
 +    {
 +	fetchMask = NULL;
 +    }
 +    else
 +    {
 +	if (IS_SOURCE_IMAGE (data->mask))
 +	{
 +	    fetchMask = (scanFetchProc64)pixmanFetchSourcePict64;
 +	    maskClass = SourcePictureClassify ((source_image_t *)data->mask,
 +					       data->xMask, data->yMask,
 +					       data->width, data->height);
 +	}
 +	else
 +	{
 +	    bits_image_t *bits = (bits_image_t *)data->mask;
- 	    
++
 +	    if (bits->common.alpha_map)
 +	    {
 +		fetchMask = (scanFetchProc64)fbFetchExternalAlpha64;
 +	    }
 +	    else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		     bits->width == 1 && bits->height == 1)
 +	    {
 +		fetchMask = (scanFetchProc64)fbFetchSolid64;
 +		maskClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	    }
 +	    else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +		fetchMask = (scanFetchProc64)fbFetch64;
 +	    else
 +		fetchMask = (scanFetchProc64)fbFetchTransformed64;
 +	}
 +    }
-     
++
 +    if (data->dest->common.alpha_map)
 +    {
 +	fetchDest = (scanFetchProc64)fbFetchExternalAlpha64;
 +	store = (scanStoreProc64)fbStoreExternalAlpha64;
- 	
++
 +	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
 +	    fetchDest = NULL;
 +    }
 +    else
 +    {
 +	fetchDest = (scanFetchProc64)fbFetch64;
 +	store = (scanStoreProc64)fbStore64;
- 	
++
 +	switch (data->op)
 +	{
 +	case PIXMAN_OP_CLEAR:
 +	case PIXMAN_OP_SRC:
 +	    fetchDest = NULL;
 +	    break;
 +	}
 +    }
-     
++
 +    if (!store)
 +    {
 +	bits = data->dest->bits.bits;
 +	stride = data->dest->bits.rowstride;
 +	xoff = yoff = 0;
 +    }
 +    else
 +    {
 +	bits = NULL;
 +	stride = 0;
 +	xoff = yoff = 0;
 +    }
-     
++
 +    if (fetchSrc		   &&
 +	fetchMask		   &&
 +	data->mask		   &&
- 	data->mask->common.type == BITS && 
++	data->mask->common.type == BITS &&
 +	data->mask->common.component_alpha &&
 +	PIXMAN_FORMAT_RGB (data->mask->bits.format))
 +    {
 +	uint64_t *mask_buffer = dest_buffer + data->width;
 +	CombineFuncC64 compose = pixman_composeFunctions_wide.combineC[data->op];
 +	if (!compose)
 +	    return;
- 	
++
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
- 		    
++
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
- 		
++
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xffffffffffffffffLL);
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +	    }
- 	    
++
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
- 		
++
 +		/* blend */
 +		compose (dest_buffer, src_buffer, mask_buffer, data->width);
- 		
++
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		assert(!"need to have a storeproc with 64-bit internal format");
 +	    }
 +	}
 +    }
 +    else
 +    {
 +	uint64_t *src_mask_buffer = 0, *mask_buffer = 0;
 +	CombineFuncU64 compose = pixman_composeFunctions_wide.combineU[data->op];
 +	if (!compose)
 +	    return;
- 	
++
 +	if (fetchMask)
 +	    mask_buffer = dest_buffer + data->width;
- 	
++
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
- 		    
++
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
- 		
++
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
- 		    
++
 +		    if (mask_buffer)
 +		    {
 +			pixman_composeFunctions_wide.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +			src_mask_buffer = mask_buffer;
 +		    }
 +		    else
 +			src_mask_buffer = src_buffer;
- 		    
++
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xffff000000000000LL);
- 		    
++
 +		    if (mask_buffer)
 +			pixman_composeFunctions_wide.combineMaskU (src_buffer,
 +							      mask_buffer,
 +							      data->width);
- 		    
++
 +		    src_mask_buffer = src_buffer;
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
- 		
++
 +		pixman_composeFunctions_wide.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
- 		
++
 +		src_mask_buffer = mask_buffer;
 +	    }
- 	    
++
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
- 		
++
 +		/* blend */
 +		compose (dest_buffer, src_mask_buffer, data->width);
- 		
++
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		assert(!"need to have a storeproc with 64-bit internal format");
 +	    }
 +	}
 +    }
-     
++
 +    if (!store)
 +	fbFinishAccess (data->dest->pDrawable);
 +}
diff --cc pixman/pixman-compose.c
index 2ebb439,31ad490..2ddfceb
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@@ -144,12 -1875,13 +144,12 @@@ fbCombineAtopU (full_t *dest, const ful
  {
      int i;
      for (i = 0; i < width; ++i) {
 -        uint32_t s = *(src + i);
 -        uint32_t d = *(dest + i);
 -        uint32_t dest_a = Alpha(d);
 -        uint32_t src_ia = Alpha(~s);
 +        full_t s = *(src + i);
 +        full_t d = *(dest + i);
 +        full_t dest_a = Alpha(d);
 +        full_t src_ia = Alpha(~s);
- 	
+ 
 -        FbByteAddMul(s, dest_a, d, src_ia);
 -	*(dest + i) = s;
 +	*(dest + i) = FullAddMul(s, dest_a, d, src_ia);
      }
  }
  
@@@ -158,12 -1890,13 +158,12 @@@ fbCombineAtopReverseU (full_t *dest, co
  {
      int i;
      for (i = 0; i < width; ++i) {
 -        uint32_t s = *(src + i);
 -        uint32_t d = *(dest + i);
 -        uint32_t src_a = Alpha(s);
 -        uint32_t dest_ia = Alpha(~d);
 +        full_t s = *(src + i);
 +        full_t d = *(dest + i);
 +        full_t src_a = Alpha(s);
 +        full_t dest_ia = Alpha(~d);
- 	
+ 
 -        FbByteAddMul(s, dest_ia, d, src_a);
 -	*(dest + i) = s;
 +	*(dest + i) = FullAddMul(s, dest_ia, d, src_a);
      }
  }
  
@@@ -172,12 -1905,13 +172,12 @@@ fbCombineXorU (full_t *dest, const full
  {
      int i;
      for (i = 0; i < width; ++i) {
 -        uint32_t s = *(src + i);
 -        uint32_t d = *(dest + i);
 -        uint32_t src_ia = Alpha(~s);
 -        uint32_t dest_ia = Alpha(~d);
 +        full_t s = *(src + i);
 +        full_t d = *(dest + i);
 +        full_t src_ia = Alpha(~s);
 +        full_t dest_ia = Alpha(~d);
- 	
+ 
 -        FbByteAddMul(s, dest_ia, d, src_ia);
 -	*(dest + i) = s;
 +	*(dest + i) = FullAddMul(s, dest_ia, d, src_ia);
      }
  }
  
@@@ -197,18 -1932,19 +197,18 @@@ fbCombineSaturateU (full_t *dest, cons
  {
      int i;
      for (i = 0; i < width; ++i) {
 -        uint32_t  s = *(src + i);
 -        uint32_t d = *(dest + i);
 -        uint16_t  sa, da;
 +        full_t s = *(src + i);
 +        full_t d = *(dest + i);
 +        half_t sa, da;
- 	
+ 
 -        sa = s >> 24;
 -        da = ~d >> 24;
 +        sa = s >> (SHIFT * 3);
 +        da = ~d >> (SHIFT * 3);
          if (sa > da)
          {
 -            sa = FbIntDiv(da, sa);
 -            FbByteMul(s, sa);
 +            sa = IntDiv(da, sa);
 +            s = FullMul(s, sa);
          };
 -        FbByteAdd(d, s);
 -	*(dest + i) = d;
 +	*(dest + i) = FullAdd(d, s);
      }
  }
  
@@@ -252,15 -1988,15 +252,15 @@@
  #define CombineXor	(CombineAOut|CombineBOut)
  
  /* portion covered by a but not b */
 -static INLINE uint8_t
 -fbCombineDisjointOutPart (uint8_t a, uint8_t b)
 +static INLINE quart_t
 +fbCombineDisjointOutPart (quart_t a, quart_t b)
  {
      /* min (1, (1-b) / a) */
-     
+ 
      b = ~b;		    /* 1 - b */
      if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
 -	return 0xff;	    /* 1 */
 -    return FbIntDiv(b,a);   /* (1-b) / a */
 +	return QUARTER_MASK;/* 1 */
 +    return IntDiv(b,a);   /* (1-b) / a */
  }
  
  /* portion covered by both a and b */
@@@ -282,13 -2018,13 +282,13 @@@ fbCombineDisjointGeneralU (full_t *dest
  {
      int i;
      for (i = 0; i < width; ++i) {
 -        uint32_t s = *(src + i);
 -        uint32_t d = *(dest + i);
 -        uint32_t m,n,o,p;
 -        uint16_t Fa, Fb, t, u, v;
 -        uint8_t sa = s >> 24;
 -        uint8_t da = d >> 24;
 +        full_t s = *(src + i);
 +        full_t d = *(dest + i);
 +        full_t m,n,o,p;
 +        half_t Fa, Fb, t, u, v;
 +        quart_t sa = s >> (SHIFT * 3);
 +        quart_t da = d >> (SHIFT * 3);
- 	
+ 
          switch (combine & CombineA) {
          default:
              Fa = 0;
@@@ -300,10 -2036,10 +300,10 @@@
              Fa = fbCombineDisjointInPart (sa, da);
              break;
          case CombineA:
 -            Fa = 0xff;
 +            Fa = QUARTER_MASK;
              break;
          }
- 	
+ 
          switch (combine & CombineB) {
          default:
              Fb = 0;
@@@ -332,16 -2068,17 +332,16 @@@ fbCombineDisjointOverU (full_t *dest, c
  {
      int i;
      for (i = 0; i < width; ++i) {
 -        uint32_t  s = *(src + i);
 -        uint16_t  a = s >> 24;
 +        full_t  s = *(src + i);
 +        half_t  a = s >> (SHIFT * 3);
- 	
+ 
          if (a != 0x00)
          {
 -            if (a != 0xff)
 +            if (a != QUARTER_MASK)
              {
 -                uint32_t d = *(dest + i);
 -                a = fbCombineDisjointOutPart (d >> 24, a);
 -                FbByteMulAdd(d, a, s);
 -                s = d;
 +                full_t d = *(dest + i);
 +                a = fbCombineDisjointOutPart (d >> (SHIFT * 3), a);
 +                s = FullMulAdd(d, a, s);
              }
  	    *(dest + i) = s;
          }
@@@ -396,23 -2133,23 +396,23 @@@ fbCombineConjointOutPart (quart_t a, qu
  {
      /* max (1-b/a,0) */
      /* = 1-min(b/a,1) */
-     
+ 
      /* min (1, (1-b) / a) */
-     
+ 
      if (b >= a)		    /* b >= a -> b/a >= 1 */
  	return 0x00;	    /* 0 */
 -    return ~FbIntDiv(b,a);   /* 1 - b/a */
 +    return ~IntDiv(b,a);   /* 1 - b/a */
  }
  
  /* portion covered by both a and b */
 -static INLINE uint8_t
 -fbCombineConjointInPart (uint8_t a, uint8_t b)
 +static INLINE quart_t
 +fbCombineConjointInPart (quart_t a, quart_t b)
  {
      /* min (1,b/a) */
-     
+ 
      if (b >= a)		    /* b >= a -> b/a >= 1 */
 -	return 0xff;	    /* 1 */
 -    return FbIntDiv(b,a);   /* b/a */
 +	return QUARTER_MASK;	    /* 1 */
 +    return IntDiv(b,a);   /* b/a */
  }
  
  static FASTCALL void
@@@ -420,13 -2157,13 +420,13 @@@ fbCombineConjointGeneralU (full_t *dest
  {
      int i;
      for (i = 0; i < width; ++i) {
 -        uint32_t  s = *(src + i);
 -        uint32_t d = *(dest + i);
 -        uint32_t  m,n,o,p;
 -        uint16_t  Fa, Fb, t, u, v;
 -        uint8_t sa = s >> 24;
 -        uint8_t da = d >> 24;
 +        full_t  s = *(src + i);
 +        full_t  d = *(dest + i);
 +        full_t  m,n,o,p;
 +        half_t  Fa, Fb, t, u, v;
 +        quart_t sa = s >> (SHIFT * 3);
 +        quart_t da = d >> (SHIFT * 3);
- 	
+ 
          switch (combine & CombineA) {
          default:
              Fa = 0;
@@@ -438,10 -2175,10 +438,10 @@@
              Fa = fbCombineConjointInPart (sa, da);
              break;
          case CombineA:
 -            Fa = 0xff;
 +            Fa = QUARTER_MASK;
              break;
          }
- 	
+ 
          switch (combine & CombineB) {
          default:
              Fb = 0;
@@@ -522,76 -2259,127 +522,76 @@@ fbCombineConjointXorU (full_t *dest, co
      fbCombineConjointGeneralU (dest, src, width, CombineXor);
  }
  
 -static CombineFuncU pixman_fbCombineFuncU[] = {
 -    fbCombineClear,
 -    fbCombineSrcU,
 -    NULL, /* CombineDst */
 -    fbCombineOverU,
 -    fbCombineOverReverseU,
 -    fbCombineInU,
 -    fbCombineInReverseU,
 -    fbCombineOutU,
 -    fbCombineOutReverseU,
 -    fbCombineAtopU,
 -    fbCombineAtopReverseU,
 -    fbCombineXorU,
 -    fbCombineAddU,
 -    fbCombineSaturateU,
 -    NULL,
 -    NULL,
 -    fbCombineClear,
 -    fbCombineSrcU,
 -    NULL, /* CombineDst */
 -    fbCombineDisjointOverU,
 -    fbCombineSaturateU, /* DisjointOverReverse */
 -    fbCombineDisjointInU,
 -    fbCombineDisjointInReverseU,
 -    fbCombineDisjointOutU,
 -    fbCombineDisjointOutReverseU,
 -    fbCombineDisjointAtopU,
 -    fbCombineDisjointAtopReverseU,
 -    fbCombineDisjointXorU,
 -    NULL,
 -    NULL,
 -    NULL,
 -    NULL,
 -    fbCombineClear,
 -    fbCombineSrcU,
 -    NULL, /* CombineDst */
 -    fbCombineConjointOverU,
 -    fbCombineConjointOverReverseU,
 -    fbCombineConjointInU,
 -    fbCombineConjointInReverseU,
 -    fbCombineConjointOutU,
 -    fbCombineConjointOutReverseU,
 -    fbCombineConjointAtopU,
 -    fbCombineConjointAtopReverseU,
 -    fbCombineConjointXorU,
 -};
 +/******************************************************************************/
  
  static INLINE void
 -fbCombineMaskC (uint32_t *src, uint32_t *mask)
 +fbCombineMaskC (full_t *src, full_t *mask)
  {
 -    uint32_t a = *mask;
 +    full_t a = *mask;
-     
+ 
 -    uint32_t	x;
 -    uint16_t	xa;
 +    full_t	x;
 +    half_t	xa;
-     
+ 
      if (!a)
      {
  	*(src) = 0;
  	return;
      }
-     
+ 
      x = *(src);
 -    if (a == 0xffffffff)
 +    if (a == FULL_MASK)
      {
 -	x = x >> 24;
 -	x |= x << 8;
 -	x |= x << 16;
 +	x = x >> (SHIFT * 3);
 +	x |= x << SHIFT;
 +	x |= x << (SHIFT * 2);
  	*(mask) = x;
  	return;
      }
-     
+ 
 -    xa = x >> 24;
 -    FbByteMulC(x, a);
 -    *(src) = x;
 -    FbByteMul(a, xa);
 -    *(mask) = a;
 +    xa = x >> (SHIFT * 3);
 +    *(src) = FullMulC(x, a);
 +    *(mask) = FullMul(a, xa);
  }
  
  static INLINE void
 -fbCombineMaskValueC (uint32_t *src, const uint32_t *mask)
 +fbCombineMaskValueC (full_t *src, const full_t *mask)
  {
 -    uint32_t a = *mask;
 -    uint32_t	x;
 +    full_t a = *mask;
-     
+ 
      if (!a)
      {
  	*(src) = 0;
  	return;
      }
-     
+ 
 -    if (a == 0xffffffff)
 +    if (a == FULL_MASK)
  	return;
-     
+ 
 -    x = *(src);
 -    FbByteMulC(x, a);
 -    *(src) =x;
 +    *(src) = FullMulC(*(src), a);
  }
  
  static INLINE void
 -fbCombineMaskAlphaC (const uint32_t *src, uint32_t *mask)
 +fbCombineMaskAlphaC (const full_t *src, full_t *mask)
  {
 -    uint32_t a = *(mask);
 -    uint32_t	x;
 +    full_t a = *(mask);
 +    full_t	x;
-     
+ 
      if (!a)
  	return;
-     
+ 
 -    x = *(src) >> 24;
 -    if (x == 0xff)
 +    x = *(src) >> (SHIFT * 3);
 +    if (x == QUARTER_MASK)
  	return;
 -    if (a == 0xffffffff)
 +    if (a == FULL_MASK)
      {
 -	x = x >> 24;
 -	x |= x << 8;
 -	x |= x << 16;
 +	x = x >> (SHIFT * 3);
 +	x |= x << SHIFT;
 +	x |= x << (SHIFT * 2);
  	*(mask) = x;
  	return;
      }
-     
+ 
 -    FbByteMul(a, x);
 -    *(mask) = a;
 +    *(mask) = FullMul(a, x);
  }
  
  static FASTCALL void
@@@ -601,34 -2389,34 +601,34 @@@ fbCombineClearC (full_t *dest, full_t *
  }
  
  static FASTCALL void
 -fbCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +fbCombineSrcC (full_t *dest, full_t *src, full_t *mask, int width)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
 -	uint32_t s = *(src + i);
 -	uint32_t m = *(mask + i);
 +	full_t s = *(src + i);
 +	full_t m = *(mask + i);
- 	
+ 
  	fbCombineMaskValueC (&s, &m);
- 	
+ 
  	*(dest) = s;
      }
  }
  
  static FASTCALL void
 -fbCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +fbCombineOverC (full_t *dest, full_t *src, full_t *mask, int width)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
 -	uint32_t s = *(src + i);
 -	uint32_t m = *(mask + i);
 -	uint32_t a;
 +	full_t s = *(src + i);
 +	full_t m = *(mask + i);
 +	full_t a;
- 	
+ 
  	fbCombineMaskC (&s, &m);
- 	
+ 
  	a = ~m;
 -        if (a != 0xffffffff)
 +        if (a != FULL_MASK)
          {
              if (a)
              {
@@@ -641,24 -2430,24 +641,24 @@@
  }
  
  static FASTCALL void
 -fbCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +fbCombineOverReverseC (full_t *dest, full_t *src, full_t *mask, int width)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
 -        uint32_t d = *(dest + i);
 -        uint32_t a = ~d >> 24;
 +        full_t d = *(dest + i);
 +        full_t a = ~d >> (SHIFT * 3);
- 	
+ 
          if (a)
          {
 -            uint32_t s = *(src + i);
 -	    uint32_t m = *(mask + i);
 +            full_t s = *(src + i);
 +	    full_t m = *(mask + i);
- 	    
+ 
  	    fbCombineMaskValueC (&s, &m);
- 	    
+ 
 -            if (a != 0xff)
 +            if (a != QUARTER_MASK)
              {
 -                FbByteMulAdd(s, a, d);
 +                s = FullMulAdd(s, a, d);
              }
  	    *(dest + i) = s;
          }
@@@ -666,23 -2455,23 +666,23 @@@
  }
  
  static FASTCALL void
 -fbCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +fbCombineInC (full_t *dest, full_t *src, full_t *mask, int width)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
 -        uint32_t d = *(dest + i);
 -        uint16_t a = d >> 24;
 -        uint32_t s = 0;
 +        full_t d = *(dest + i);
 +        half_t a = d >> (SHIFT * 3);
 +        full_t s = 0;
          if (a)
          {
 -	    uint32_t m = *(mask + i);
 +	    full_t m = *(mask + i);
- 	    
+ 
  	    s = *(src + i);
  	    fbCombineMaskValueC (&s, &m);
 -            if (a != 0xff)
 +            if (a != QUARTER_MASK)
              {
 -                FbByteMul(s, a);
 +                s = FullMul(s, a);
              }
          }
  	*(dest + i) = s;
@@@ -690,50 -2479,50 +690,50 @@@
  }
  
  static FASTCALL void
 -fbCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +fbCombineInReverseC (full_t *dest, full_t *src, full_t *mask, int width)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
 -        uint32_t s = *(src + i);
 -        uint32_t m = *(mask + i);
 -        uint32_t a;
 +        full_t s = *(src + i);
 +        full_t m = *(mask + i);
 +        full_t a;
- 	
+ 
  	fbCombineMaskAlphaC (&s, &m);
- 	
+ 
  	a = m;
 -        if (a != 0xffffffff)
 +        if (a != FULL_MASK)
          {
 -            uint32_t d = 0;
 +            full_t d = 0;
              if (a)
              {
                  d = *(dest + i);
 -                FbByteMulC(d, a);
 +                d = FullMulC(d, a);
              }
- 	    *(dest + i) = d; 
+ 	    *(dest + i) = d;
          }
      }
  }
  
  static FASTCALL void
 -fbCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +fbCombineOutC (full_t *dest, full_t *src, full_t *mask, int width)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
 -        uint32_t d = *(dest + i);
 -        uint16_t a = ~d >> 24;
 -        uint32_t s = 0;
 +        full_t d = *(dest + i);
 +        half_t a = ~d >> (SHIFT * 3);
 +        full_t s = 0;
          if (a)
          {
 -	    uint32_t m = *(mask + i);
 +	    full_t m = *(mask + i);
- 	    
+ 
  	    s = *(src + i);
  	    fbCombineMaskValueC (&s, &m);
- 	    
+ 
 -            if (a != 0xff)
 +            if (a != QUARTER_MASK)
              {
 -                FbByteMul(s, a);
 +                s = FullMul(s, a);
              }
          }
  	*(dest + i) = s;
@@@ -741,21 -2530,21 +741,21 @@@
  }
  
  static FASTCALL void
 -fbCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +fbCombineOutReverseC (full_t *dest, full_t *src, full_t *mask, int width)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
 -	uint32_t s = *(src + i);
 -	uint32_t m = *(mask + i);
 -	uint32_t a;
 +	full_t s = *(src + i);
 +	full_t m = *(mask + i);
 +	full_t a;
- 	
+ 
  	fbCombineMaskAlphaC (&s, &m);
- 	
+ 
          a = ~m;
 -        if (a != 0xffffffff)
 +        if (a != FULL_MASK)
          {
 -            uint32_t d = 0;
 +            full_t d = 0;
              if (a)
              {
                  d = *(dest + i);
@@@ -767,151 -2556,155 +767,151 @@@
  }
  
  static FASTCALL void
 -fbCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +fbCombineAtopC (full_t *dest, full_t *src, full_t *mask, int width)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
 -        uint32_t d = *(dest + i);
 -        uint32_t s = *(src + i);
 -        uint32_t m = *(mask + i);
 -        uint32_t ad;
 -        uint16_t as = d >> 24;
 +        full_t d = *(dest + i);
 +        full_t s = *(src + i);
 +        full_t m = *(mask + i);
 +        full_t ad;
 +        half_t as = d >> (SHIFT * 3);
- 	
+ 
  	fbCombineMaskC (&s, &m);
- 	
+ 
          ad = ~m;
- 	
+ 
 -        FbByteAddMulC(d, ad, s, as);
 -	*(dest + i) = d;
 +	*(dest + i) = FullAddMulC(d, ad, s, as);
      }
  }
  
  static FASTCALL void
 -fbCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +fbCombineAtopReverseC (full_t *dest, full_t *src, full_t *mask, int width)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
- 	
+ 
 -        uint32_t d = *(dest + i);
 -        uint32_t s = *(src + i);
 -        uint32_t m = *(mask + i);
 -        uint32_t ad;
 -        uint16_t as = ~d >> 24;
 +        full_t d = *(dest + i);
 +        full_t s = *(src + i);
 +        full_t m = *(mask + i);
 +        full_t ad;
 +        half_t as = ~d >> (SHIFT * 3);
- 	
+ 
  	fbCombineMaskC (&s, &m);
- 	
+ 
  	ad = m;
- 	
+ 
 -        FbByteAddMulC(d, ad, s, as);
 -	*(dest + i) = d;
 +	*(dest + i) = FullAddMulC(d, ad, s, as);
      }
  }
  
  static FASTCALL void
 -fbCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +fbCombineXorC (full_t *dest, full_t *src, full_t *mask, int width)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
 -        uint32_t d = *(dest + i);
 -        uint32_t s = *(src + i);
 -        uint32_t m = *(mask + i);
 -        uint32_t ad;
 -        uint16_t as = ~d >> 24;
 +        full_t d = *(dest + i);
 +        full_t s = *(src + i);
 +        full_t m = *(mask + i);
 +        full_t ad;
 +        half_t as = ~d >> (SHIFT * 3);
- 	
+ 
  	fbCombineMaskC (&s, &m);
- 	
+ 
  	ad = ~m;
- 	
+ 
 -        FbByteAddMulC(d, ad, s, as);
 -	*(dest + i) = d;
 +	*(dest + i) = FullAddMulC(d, ad, s, as);
      }
  }
  
  static FASTCALL void
 -fbCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +fbCombineAddC (full_t *dest, full_t *src, full_t *mask, int width)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
 -        uint32_t s = *(src + i);
 -        uint32_t m = *(mask + i);
 -        uint32_t d = *(dest + i);
 +        full_t s = *(src + i);
 +        full_t m = *(mask + i);
 +        full_t d = *(dest + i);
- 	
+ 
  	fbCombineMaskValueC (&s, &m);
- 	
+ 
 -        FbByteAdd(d, s);
 -	*(dest + i) = d;
 +	*(dest + i) = FullAdd(d, s);
      }
  }
  
  static FASTCALL void
 -fbCombineSaturateC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +fbCombineSaturateC (full_t *dest, full_t *src, full_t *mask, int width)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
 -        uint32_t  s, d;
 -        uint16_t  sa, sr, sg, sb, da;
 -        uint16_t  t, u, v;
 -        uint32_t  m,n,o,p;
 +        full_t  s, d;
 +        half_t  sa, sr, sg, sb, da;
 +        half_t  t, u, v;
 +        full_t  m,n,o,p;
- 	
+ 
          d = *(dest + i);
          s = *(src + i);
  	m = *(mask + i);
- 	
+ 
  	fbCombineMaskC (&s, &m);
- 	
+ 
 -        sa = (m >> 24);
 -        sr = (m >> 16) & 0xff;
 -        sg = (m >>  8) & 0xff;
 -        sb = (m      ) & 0xff;
 -        da = ~d >> 24;
 +        sa = (m >> (SHIFT * 3));
 +        sr = (m >> (SHIFT * 2)) & QUARTER_MASK;
 +        sg = (m >>  SHIFT     ) & QUARTER_MASK;
 +        sb = (m               ) & QUARTER_MASK;
 +        da = ~d >> (SHIFT * 3);
- 	
+ 
          if (sb <= da)
 -            m = FbAdd(s,d,0,t);
 +            m = Add(s,d,0,t);
          else
 -            m = FbGen (s, d, 0, (da << 8) / sb, 0xff, t, u, v);
 +            m = Gen (s, d, 0, (da << SHIFT) / sb, QUARTER_MASK, t, u, v);
- 	
+ 
          if (sg <= da)
 -            n = FbAdd(s,d,8,t);
 +            n = Add(s,d,SHIFT,t);
          else
 -            n = FbGen (s, d, 8, (da << 8) / sg, 0xff, t, u, v);
 +            n = Gen (s, d, SHIFT, (da << SHIFT) / sg, QUARTER_MASK, t, u, v);
- 	
+ 
          if (sr <= da)
 -            o = FbAdd(s,d,16,t);
 +            o = Add(s,d,(SHIFT * 2),t);
          else
 -            o = FbGen (s, d, 16, (da << 8) / sr, 0xff, t, u, v);
 +            o = Gen (s, d, (SHIFT * 2), (da << SHIFT) / sr, QUARTER_MASK, t, u, v);
- 	
+ 
          if (sa <= da)
 -            p = FbAdd(s,d,24,t);
 +            p = Add(s,d,(SHIFT * 3),t);
          else
 -            p = FbGen (s, d, 24, (da << 8) / sa, 0xff, t, u, v);
 +            p = Gen (s, d, (SHIFT * 3), (da << SHIFT) / sa, QUARTER_MASK, t, u, v);
- 	
+ 
  	*(dest + i) = m|n|o|p;
      }
  }
  
  static FASTCALL void
 -fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
 +fbCombineDisjointGeneralC (full_t *dest, full_t *src, full_t *mask, int width, quart_t combine)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
 -        uint32_t  s, d;
 -        uint32_t  m,n,o,p;
 -        uint32_t  Fa, Fb;
 -        uint16_t  t, u, v;
 -        uint32_t  sa;
 -        uint8_t   da;
 +        full_t  s, d;
 +        full_t  m,n,o,p;
 +        full_t  Fa, Fb;
 +        half_t  t, u, v;
 +        full_t  sa;
 +        quart_t da;
- 	
+ 
          s = *(src + i);
          m = *(mask + i);
          d = *(dest + i);
 -        da = d >> 24;
 +        da = d >> (SHIFT * 3);
- 	
+ 
  	fbCombineMaskC (&s, &m);
- 	
+ 
  	sa = m;
- 	
+ 
          switch (combine & CombineA) {
          default:
              Fa = 0;
@@@ -931,10 -2724,10 +931,10 @@@
              Fa = m|n|o|p;
              break;
          case CombineA:
 -            Fa = 0xffffffff;
 +            Fa = FULL_MASK;
              break;
          }
- 	
+ 
          switch (combine & CombineB) {
          default:
              Fb = 0;
@@@ -1015,27 -2808,27 +1015,27 @@@ fbCombineDisjointXorC (full_t *dest, fu
  }
  
  static FASTCALL void
 -fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
 +fbCombineConjointGeneralC (full_t *dest, full_t *src, full_t *mask, int width, quart_t combine)
  {
      int i;
-     
+ 
      for (i = 0; i < width; ++i) {
 -        uint32_t  s, d;
 -        uint32_t  m,n,o,p;
 -        uint32_t  Fa, Fb;
 -        uint16_t  t, u, v;
 -        uint32_t  sa;
 -        uint8_t   da;
 +        full_t  s, d;
 +        full_t  m,n,o,p;
 +        full_t  Fa, Fb;
 +        half_t  t, u, v;
 +        full_t  sa;
 +        quart_t da;
- 	
+ 
          s = *(src + i);
          m = *(mask + i);
          d = *(dest + i);
 -        da = d >> 24;
 +        da = d >> (SHIFT * 3);
- 	
+ 
  	fbCombineMaskC (&s, &m);
- 	
+ 
          sa = m;
- 	
+ 
          switch (combine & CombineA) {
          default:
              Fa = 0;
@@@ -1055,10 -2848,10 +1055,10 @@@
              Fa = m|n|o|p;
              break;
          case CombineA:
 -            Fa = 0xffffffff;
 +            Fa = FULL_MASK;
              break;
          }
- 	
+ 
          switch (combine & CombineB) {
          default:
              Fb = 0;
commit 74a680aa4fdf0d056ec35b77ba2f8a176040f3e7
Merge: 723c2ac... 14f2a4c...
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Fri Oct 26 09:41:33 2007 -0700

    Merge commit '14f2a4c222bb6f0748a07e21663663b43beef466' into wide-composite-merge

commit 723c2acaf223d3d641eb6e8ffa58a3e80625e1e8
Merge: 86dd2c7... 562fa00...
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Fri Oct 26 09:39:42 2007 -0700

    Merge commit '562fa00e40b5466914b4f4cf49ae183e7b38e4f8' into wide-composite-merge
    
    Conflicts:
    
    	pixman/pixman-compose.c
    	pixman/pixman.h

diff --cc pixman/formats.pl
index 2235347,0000000..8153422
mode 100644,000000..100644
--- a/pixman/formats.pl
+++ b/pixman/formats.pl
@@@ -1,51 -1,0 +1,53 @@@
 +
 +# format structure:
 +# bpp, redbits, greenbits, bluebits, alphabits, redshift, greenshift, blueshift, alphashift, type
 + at formats = 
 +    (
 +     [32, 8, 8, 8, 8, 16, 8, 0, 24, "argb", "a8r8g8b8"],
 +     [32, 8, 8, 8, 0, 16, 8, 0, 0, "argb", "x8r8g8b8"],
 +     [32, 8, 8, 8, 8, 0, 8, 16, 24, "abgr", "a8b8g8r8"],
 +     [32, 8, 8, 8, 0, 0, 8, 16, 0, "abgr", "x8b8g8r8"],
 +     
 +     [32, 10, 10, 10, 2, 0, 10, 20, 30, "abgr", "a2b10g10r10"],
 +     [32, 10, 10, 10, 0, 0, 10, 20, 0, "abgr", "x2b10g10r10"],
 +     
 +     [16, 5, 6, 5, 0, 11, 5, 0, 0, "argb", "r5g6b5"],
 +     [16, 5, 6, 5, 0, 0, 5, 11, 0, "abgr", "b5g6r5"],
 +     
 +     [16, 5, 5, 5, 1, 10, 5, 0, 15, "argb", "a1r5g5b5"],
 +     [16, 5, 5, 5, 0, 10, 5, 0, 0, "argb", "x1r5g5b5"],
 +     [16, 5, 5, 5, 1, 0, 5, 10, 15, "abgr", "a1b5g5r5"],
 +     [16, 5, 5, 5, 0, 0, 5, 10, 0, "abgr", "x1b5g5r5"],
 +
 +     [16, 4, 4, 4, 4, 8, 4, 0, 12, "argb", "a4r4g4b4"],
 +     [16, 4, 4, 4, 0, 8, 4, 0, 0, "argb", "x4r4g4b4"],
 +     [16, 4, 4, 4, 4, 0, 4, 8, 12, "abgr", "a4b4g4r4"],
 +     [16, 4, 4, 4, 0, 0, 4, 8, 0, "abgr", "x4b4g4r4"],
 +     
 +     [8, 3, 3, 2, 0, 5, 2, 0, 0, "argb", "r3g3b2"],
 +     [8, 3, 3, 2, 0, 0, 3, 6, 0, "abgr", "b2g3r3"],
 +     [8, 2, 2, 2, 2, 4, 2, 0, 6, "argb", "a2r2g2b2"],
 +     [8, 2, 2, 2, 2, 0, 2, 4, 6, "abgr", "a2b2g2r2"],
 +     [8, 0, 0, 0, 8, 0, 0, 0, 0, "alpha", "a8"],
 +     [8, 0, 0, 0, 4, 0, 0, 0, 0, "alpha", "x4a4"],
 +     
 +     [4, 1, 2, 1, 0, 3, 1, 0, 0, "argb", "r1g2b1"],
 +     [4, 1, 2, 1, 0, 0, 1, 3, 0, "abgr", "b1g2r1"],
 +     [4, 1, 1, 1, 1, 2, 1, 0, 3, "argb", "a1r1g1b1"],
 +     [4, 1, 1, 1, 1, 0, 1, 2, 3, "abgr", "a1b1g1r1"],
 +     [4, 0, 0, 0, 4, 0, 0, 0, 0, "alpha", "a4"],
 +     );
 +
 + at handcode_formats =
 +    (
 +     "r8g8b8",
 +     "b8g8r8",
 +     "c8",
 +     "g8",
 +     "c4",
 +     "g4",
 +     "a1",
 +     "g1",
++     "yuy2",
++     "yv12",
 +     );
diff --cc pixman/pixman-access-handcode.c
index 142234d,0000000..41f95df
mode 100644,000000..100644
--- a/pixman/pixman-access-handcode.c
+++ b/pixman/pixman-access-handcode.c
@@@ -1,2755 -1,0 +1,2941 @@@
 +/*
 + *
 + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
 + *             2005 Lars Knoll & Zack Rusin, Trolltech
 + *
 + * Permission to use, copy, modify, distribute, and sell this software and its
 + * documentation for any purpose is hereby granted without fee, provided that
 + * the above copyright notice appear in all copies and that both that
 + * copyright notice and this permission notice appear in supporting
 + * documentation, and that the name of Keith Packard not be used in
 + * advertising or publicity pertaining to distribution of the software without
 + * specific, written prior permission.  Keith Packard makes no
 + * representations about the suitability of this software for any purpose.  It
 + * is provided "as is" without express or implied warranty.
 + *
 + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
 + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
 + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
 + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
 + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 + * SOFTWARE.
 + */
 +
 +#include <config.h>
 +
 +#include <stdlib.h>
 +#include <string.h>
 +#include <math.h>
 +#include <assert.h>
 +#include <limits.h>
 +
 +#include "pixman-private.h"
 +
 +
 +/*
 + *    FIXME:
 + *		The stuff here is added just to get it to compile. Something sensible needs to
 + *              be done before this can be used.
 + *
 + *   we should go through this code and clean up some of the weird stuff that have
 + *   resulted from unmacro-ifying it.
 + *
 + */
 +#define INLINE inline
 +
 +/*   End of stuff added to get it to compile
 + */ 
 +
 +static unsigned int
 +SourcePictureClassify (source_image_t *pict,
 +		       int	       x,
 +		       int	       y,
 +		       int	       width,
 +		       int	       height)
 +{
 +    if (pict->common.type == SOLID)
 +    {
 +	pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +    }
 +    else if (pict->common.type == LINEAR)
 +    {
 +	linear_gradient_t *linear = (linear_gradient_t *)pict;
 +	pixman_vector_t   v;
 +	pixman_fixed_32_32_t l;
 +	pixman_fixed_48_16_t dx, dy, a, b, off;
 +	pixman_fixed_48_16_t factors[4];
 +	int	     i;
 +	
 +	dx = linear->p2.x - linear->p1.x;
 +	dy = linear->p2.y - linear->p1.y;
 +	l = dx * dx + dy * dy;
 +	if (l)
 +	{
 +	    a = (dx << 32) / l;
 +	    b = (dy << 32) / l;
 +	}
 +	else
 +	{
 +	    a = b = 0;
 +	}
 +	
 +	off = (-a * linear->p1.x
 +	       -b * linear->p1.y) >> 16;
 +	
 +	for (i = 0; i < 3; i++)
 +	{
 +	    v.vector[0] = pixman_int_to_fixed ((i % 2) * (width  - 1) + x);
 +	    v.vector[1] = pixman_int_to_fixed ((i / 2) * (height - 1) + y);
 +	    v.vector[2] = pixman_fixed_1;
 +	    
 +	    if (pict->common.transform)
 +	    {
 +		if (!pixman_transform_point_3d (pict->common.transform, &v))
 +		    return SOURCE_IMAGE_CLASS_UNKNOWN;
 +	    }
 +	    
 +	    factors[i] = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off;
 +	}
 +	
 +	if (factors[2] == factors[0])
 +	    pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	else if (factors[1] == factors[0])
 +	    pict->class = SOURCE_IMAGE_CLASS_VERTICAL;
 +    }
 +    
 +    return pict->class;
 +}
 +
 +#define SCANLINE_BUFFER_LENGTH 2048
 +
++/*
++ * YV12 setup and access macros
++ */
++
++#define YV12_SETUP(pict) \
++	uint32_t *bits = pict->bits; \
++	int stride = pict->rowstride; \
++	int offset0 = stride < 0 ? \
++		((-stride) >> 1) * ((pict->height - 1) >> 1) - stride : \
++		stride * pict->height; \
++	int offset1 = stride < 0 ? \
++		offset0 + ((-stride) >> 1) * ((pict->height) >> 1) : \
++		offset0 + (offset0 >> 2); 
++
++#define YV12_Y(line)		\
++    ((uint8_t *) ((bits) + (stride) * (line)))
++
++#define YV12_U(line)	      \
++    ((uint8_t *) ((bits) + offset1 + \
++		((stride) >> 1) * ((line) >> 1)))
++
++#define YV12_V(line)	      \
++    ((uint8_t *) ((bits) + offset0 + \
++		((stride) >> 1) * ((line) >> 1)))
++
 +typedef FASTCALL void (*fetchProc_32)(bits_image_t *pict, int x, int y, int width, uint32_t *buffer);
 +
 +static fetchProc_32 fetchProcForPicture_32 (bits_image_t * pict);
 +
 +
 +typedef FASTCALL uint32_t (*fetchPixelProc_32)(bits_image_t *pict, int offset, int line);
 +
 +static fetchPixelProc_32 fetchPixelProcForPicture_32 (bits_image_t * pict);
 +
 +typedef FASTCALL void (*storeProc_32) (pixman_image_t *image,
 +				       uint32_t *bits, const uint32_t *values,
 +				       int x, int width,
 +				       const pixman_indexed_t * indexed);
 +
 +static storeProc_32 storeProcForPicture_32 (bits_image_t * pict);
 +
 +typedef FASTCALL void (*fetchProc_64)(bits_image_t *pict, int x, int y, int width, uint64_t *buffer);
 +
 +static fetchProc_64 fetchProcForPicture_64 (bits_image_t * pict);
 +
 +
 +typedef FASTCALL uint64_t (*fetchPixelProc_64)(bits_image_t *pict, int offset, int line);
 +
 +static fetchPixelProc_64 fetchPixelProcForPicture_64 (bits_image_t * pict);
 +
 +typedef FASTCALL void (*storeProc_64) (pixman_image_t *image,
 +				       uint32_t *bits, const uint64_t *values,
 +				       int x, int width,
 +				       const pixman_indexed_t * indexed);
 +
 +static storeProc_64 storeProcForPicture_64 (bits_image_t * pict);
 +
 +/* handcoded fetch/store functions. */
 +#define fbFetch_g8_32 fbFetch_c8_32
 +#define fbFetchPixel_g8_32 fbFetchPixel_c8_32
 +#define fbStore_g8_32 fbStore_c8_32
 +
 +#define fbFetch_g4_32 fbFetch_c4_32
 +#define fbFetchPixel_g4_32 fbFetchPixel_c4_32
 +#define fbStore_g4_32 fbStore_c4_32
 +
 +#define fbFetch_g8_64 fbFetch_c8_64
 +#define fbFetchPixel_g8_64 fbFetchPixel_c8_64
 +#define fbStore_g8_64 fbStore_c8_64
 +
 +#define fbFetch_g4_64 fbFetch_c4_64
 +#define fbFetchPixel_g4_64 fbFetchPixel_c4_64
 +#define fbStore_g4_64 fbStore_c4_64
 +
 +/*
 + * Used by READ/WRITE macros
 + */
 +#define image ((pixman_image_t *)pict)
 +
 +static FASTCALL void
 +fbFetch_r8g8b8_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint32_t b = Fetch24(pixel) | 0xff000000;
 +	pixel += 3;
 +	*buffer++ = b;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_r8g8b8_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint64_t r = (uint64_t)READ(pixel) << 32;
 +	uint64_t g = (uint64_t)READ(pixel + 1) << 16;
 +	uint64_t b = (uint64_t)READ(pixel + 2);
 +	uint64_t p = r | g | b;
 +
 +	pixel += 3;
 +	*buffer++ = (p << 16) | p;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_b8g8r8_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint32_t b = 0xff000000;
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +	b |= (READ(pixel++));
 +	b |= (READ(pixel++) << 8);
 +	b |= (READ(pixel++) << 16);
 +#else
 +	b |= (READ(pixel++) << 16);
 +	b |= (READ(pixel++) << 8);
 +	b |= (READ(pixel++));
 +#endif
 +	*buffer++ = b;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_b8g8r8_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint64_t b = 0xffff000000000000LL;
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +	b |= ((uint64_t)READ(pixel++));
 +	b |= ((uint64_t)READ(pixel++) << 16);
 +	b |= ((uint64_t)READ(pixel++) << 32);
 +#else
 +	b |= ((uint64_t)READ(pixel++) << 32);
 +	b |= ((uint64_t)READ(pixel++) << 16);
 +	b |= ((uint64_t)READ(pixel++));
 +#endif
 +	*buffer++ = b | (b << 8);
 +    }
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_r8g8b8_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    return (0xff000000 |
 +	    (READ(pixel + 0) << 16) |
 +	    (READ(pixel + 1) << 8) |
 +	    (READ(pixel + 2)));
 +#else
 +    return (0xff000000 |
 +	    (READ(pixel + 2) << 16) |
 +	    (READ(pixel + 1) << 8) |
 +	    (READ(pixel + 0)));
 +#endif
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_r8g8b8_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    uint64_t p = (0xffff000000000000LL |
 +		  ((uint64_t)READ(pixel + 0) << 32) |
 +		  ((uint64_t)READ(pixel + 1) << 16) |
 +		  ((uint64_t)READ(pixel + 2)));
 +#else
 +    uint64_t p = (0xffff000000000000LL |
 +		  ((uint64_t)READ(pixel + 2) << 32) |
 +		  ((uint64_t)READ(pixel + 1) << 16) |
 +		  ((uint64_t)READ(pixel + 0)));
 +#endif
 +    return p | (p << 8);
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_b8g8r8_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    return (0xff000000 |
 +	    (READ(pixel + 2) << 16) |
 +	    (READ(pixel + 1) << 8) |
 +	    (READ(pixel + 0)));
 +#else
 +    return (0xff000000 |
 +	    (READ(pixel + 0) << 16) |
 +	    (READ(pixel + 1) << 8) |
 +	    (READ(pixel + 2)));
 +#endif
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_b8g8r8_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    uint64_t p = (0xffff000000000000LL |
 +		  ((uint64_t)READ(pixel + 2) << 32) |
 +		  ((uint64_t)READ(pixel + 1) << 16) |
 +		  ((uint64_t)READ(pixel + 0)));
 +#else
 +    uint64_t p = (0xffff000000000000LL |
 +		  ((uint64_t)READ(pixel + 0) << 32) |
 +		  ((uint64_t)READ(pixel + 1) << 16) |
 +		  ((uint64_t)READ(pixel + 2)));
 +#endif
 +    return p | (p << 8);
 +}
 +
 +#undef image
 +
 +static FASTCALL void
 +fbStore_r8g8b8_32 (pixman_image_t *image,
 +		   uint32_t *bits, const uint32_t *values, int x, int width,
 +		   const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	Store24(pixel, values[i]);
 +	pixel += 3;
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_r8g8b8_64 (pixman_image_t *image,
 +		   uint32_t *bits, const uint64_t *values, int x, int width,
 +		   const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t r = (values[i] >> 40) & 0xff;
 +	uint32_t g = (values[i] >> 24) & 0xff;
 +	uint32_t b = (values[i] >> 8) & 0xff;
 +	Store24(pixel, (r|g|b));
 +	pixel += 3;
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_b8g8r8_32 (pixman_image_t *image,
 +		   uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t val = values[i];
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +	WRITE(pixel++, Blue(val));
 +	WRITE(pixel++, Green(val));
 +	WRITE(pixel++, Red(val));
 +#else
 +	WRITE(pixel++, Red(val));
 +	WRITE(pixel++, Green(val));
 +	WRITE(pixel++, Blue(val));
 +#endif
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_b8g8r8_64 (pixman_image_t *image,
 +		   uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t r = (values[i] >> 40) & 0xff;
 +	uint32_t g = (values[i] >> 24) & 0xff;
 +	uint32_t b = (values[i] >> 8) & 0xff;
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +	WRITE(pixel++, b);
 +	WRITE(pixel++, g);
 +	WRITE(pixel++, r);
 +#else
 +	WRITE(pixel++, r);
 +	WRITE(pixel++, g);
 +	WRITE(pixel++, b);
 +#endif
 +    }
 +}
 +
 +#define image ((pixman_image_t *)pict)
 +
 +static FASTCALL void
 +fbFetch_c8_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    const uint8_t *pixel = (const uint8_t *)bits + x;
 +    const uint8_t *end = pixel + width;
 +    while (pixel < end) {
 +	uint32_t  p = READ(pixel++);
 +	*buffer++ = indexed->rgba[p];
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_c8_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + x;
 +    const uint8_t *end = pixel + width;
 +    while (pixel < end) {
 +	uint64_t  p = READ(pixel++);
 +	uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +	*buffer++ = px | (px << 8);
 +    }
 +}
 +
 +#define Fetch8(l,o)    (READ((uint8_t *)(l) + ((o) >> 2)))
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +#define Fetch4(l,o)    ((o) & 2 ? Fetch8(l,o) & 0xf : Fetch8(l,o) >> 4)
 +#else
 +#define Fetch4(l,o)    ((o) & 2 ? Fetch8(l,o) >> 4 : Fetch8(l,o) & 0xf)
 +#endif
 +
 +static FASTCALL void
 +fbFetch_c4_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  p = Fetch4(bits, i + x);
 +	
 +	*buffer++ = indexed->rgba[p];
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_c4_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint64_t  p = indexed->rgba[Fetch4(bits, i + x)];
 +	uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +	*buffer++ = px | (px << 8);
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_a1_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  p = READ(bits + ((i + x) >> 5));
 +	uint32_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = p >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = p >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +	a |= a << 1;
 +	a |= a << 2;
 +	a |= a << 4;
 +	*buffer++ = a << 24;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_a1_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  p = READ(bits + ((i + x) >> 5));
 +	uint64_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = p >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = p >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +	a |= a << 1;
 +	a |= a << 2;
 +	a |= a << 4;
 +	a |= a << 8;
 +	*buffer++ = a << 48;
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_g1_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t p = READ(bits + ((i+x) >> 5));
 +	uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = p >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = p >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +	*buffer++ = indexed->rgba[a];
 +    }
 +}
 +
 +static FASTCALL void
 +fbFetch_g1_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t t = READ(bits + ((i+x) >> 5));
 +	uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = t >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = t >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +
 +	uint64_t  p = indexed->rgba[Fetch4(bits, i + x)];
 +	uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +	*buffer++ = px | (px << 8);
 +    }
 +}
 +
++static FASTCALL void
++fbFetch_yuy2_32 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
++{
++    int16_t y, u, v;
++    int32_t r, g, b;
++    int   i;
++
++    const uint32_t *bits = pict->bits + pict->rowstride * line;
++
++    for (i = 0; i < width; i++)
++    {
++	y = ((uint8_t *) bits)[(x + i) << 1] - 16;
++	u = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 1] - 128;
++	v = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 3] - 128;
++
++	/* R = 1.164(Y - 16) + 1.596(V - 128) */
++	r = 0x012b27 * y + 0x019a2e * v;
++	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
++	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
++	/* B = 1.164(Y - 16) + 2.018(U - 128) */
++	b = 0x012b27 * y + 0x0206a2 * u;
++
++    WRITE(buffer++, 0xff000000 |
++	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
++	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
++	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
++    }
++}
++
++static FASTCALL void
++fbFetch_yuy2_64 (bits_image_t *pict, int x, int line, int width, uint64_t *buffer)
++{
++    /* [AGP] Unimplemented */
++}
++
++static FASTCALL void
++fbFetch_yv12_32 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
++{
++    YV12_SETUP(pict);
++    uint8_t *pY = YV12_Y (line);
++    uint8_t *pU = YV12_U (line);
++    uint8_t *pV = YV12_V (line);
++    int16_t y, u, v;
++    int32_t r, g, b;
++    int   i;
++
++    for (i = 0; i < width; i++)
++    {
++	y = pY[x + i] - 16;
++	u = pU[(x + i) >> 1] - 128;
++	v = pV[(x + i) >> 1] - 128;
++
++	/* R = 1.164(Y - 16) + 1.596(V - 128) */
++	r = 0x012b27 * y + 0x019a2e * v;
++	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
++	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
++	/* B = 1.164(Y - 16) + 2.018(U - 128) */
++	b = 0x012b27 * y + 0x0206a2 * u;
++
++	WRITE(buffer++, 0xff000000 |
++	    (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
++	    (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
++	    (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
++    }
++}
++
++static FASTCALL void
++fbFetch_yv12_64 (bits_image_t *pict, int x, int line, int width, uint64_t *buffer)
++{
++    /* [AGP] Unimplemented */
++}
++
 +static FASTCALL uint32_t
 +fbFetchPixel_c8_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t   pixel = READ((uint8_t *) bits + offset);
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    return indexed->rgba[pixel];
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_c8_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    uint64_t p = indexed->rgba[READ((uint8_t *) bits + offset)];
 +    uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +    return px | (px << 8);
 +}
 +
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_c4_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t  pixel = Fetch4(bits, offset);
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    
 +    return indexed->rgba[pixel];
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_c4_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    uint64_t p = indexed->rgba[Fetch4(bits, offset)];
 +    uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +    return px | (px << 8);
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_a1_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t  pixel = READ(bits + (offset >> 5));
 +    uint32_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    a |= a << 1;
 +    a |= a << 2;
 +    a |= a << 4;
 +    return a << 24;
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_a1_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t  pixel = READ(bits + (offset >> 5));
 +    uint64_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    a |= a << 1;
 +    a |= a << 2;
 +    a |= a << 4;
 +    a |= a << 8;
 +    return a << 48;
 +}
 +
 +static FASTCALL uint32_t
 +fbFetchPixel_g1_32 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t pixel = READ(bits + (offset >> 5));
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    return indexed->rgba[a];
 +}
 +
 +static FASTCALL uint64_t
 +fbFetchPixel_g1_64 (bits_image_t *pict, int offset, int line)
 +{
 +    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t pixel = READ(bits + (offset >> 5));
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    uint64_t p = indexed->rgba[a];
 +    uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +    return px | (px << 8);
 +}
 +
++static FASTCALL uint32_t
++fbFetchPixel_yuy2_32 (bits_image_t *pict, int offset, int line)
++{
++    int16_t y, u, v;
++    int32_t r, g, b;
++
++    const uint32_t *bits = pict->bits + pict->rowstride * line;
++
++    y = ((uint8_t *) bits)[offset << 1] - 16;
++    u = ((uint8_t *) bits)[((offset << 1) & -4) + 1] - 128;
++    v = ((uint8_t *) bits)[((offset << 1) & -4) + 3] - 128;
++
++    /* R = 1.164(Y - 16) + 1.596(V - 128) */
++    r = 0x012b27 * y + 0x019a2e * v;
++    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
++    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
++    /* B = 1.164(Y - 16) + 2.018(U - 128) */
++    b = 0x012b27 * y + 0x0206a2 * u;
++
++    return 0xff000000 |
++	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
++	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
++	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
++}
++
++static FASTCALL uint64_t
++fbFetchPixel_yuy2_64 (bits_image_t *pict, int offset, int line)
++{
++    /* [AGP] Unimplemented */
++    return 0;
++}
++
++static FASTCALL uint32_t
++fbFetchPixel_yv12_32 (bits_image_t *pict, int offset, int line)
++{
++    YV12_SETUP(pict);
++    int16_t y = YV12_Y (line)[offset] - 16;
++    int16_t u = YV12_U (line)[offset >> 1] - 128;
++    int16_t v = YV12_V (line)[offset >> 1] - 128;
++    int32_t r, g, b;
++
++    /* R = 1.164(Y - 16) + 1.596(V - 128) */
++    r = 0x012b27 * y + 0x019a2e * v;
++    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
++    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
++    /* B = 1.164(Y - 16) + 2.018(U - 128) */
++    b = 0x012b27 * y + 0x0206a2 * u;
++
++    return 0xff000000 |
++	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
++	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
++	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
++}
++
++static FASTCALL uint64_t
++fbFetchPixel_yv12_64 (bits_image_t *pict, int offset, int line)
++{
++    /* [AGP] Unimplemented */
++    return 0;
++}
++
 +#undef image
 +
 +static FASTCALL void
 +fbStore_c8_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t   *pixel = ((uint8_t *) bits) + x;
 +    for (i = 0; i < width; ++i) {
 +	WRITE(pixel++, miIndexToEnt24(indexed,values[i]));
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_c8_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t   *pixel = ((uint8_t *) bits) + x;
 +    for (i = 0; i < width; ++i) {
 +	WRITE(pixel++, miIndexToEnt48(indexed,values[i]));
 +    }
 +}
 +
 +#define Store8(l,o,v)  (WRITE((uint8_t *)(l) + ((o) >> 3), (v)))
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +#define Store4(l,o,v)  Store8(l,o,((o) & 4 ?				\
 +				   (Fetch8(l,o) & 0xf0) | (v) :		\
 +				   (Fetch8(l,o) & 0x0f) | ((v) << 4)))
 +#else
 +#define Store4(l,o,v)  Store8(l,o,((o) & 4 ?			       \
 +				   (Fetch8(l,o) & 0x0f) | ((v) << 4) : \
 +				   (Fetch8(l,o) & 0xf0) | (v)))
 +#endif
 +
 +static FASTCALL void
 +fbStore_c4_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  pixel;
 +	
 +	pixel = miIndexToEnt24(indexed, values[i]);
 +	Store4(bits, i + x, pixel);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_c4_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  pixel;
 +
 +	pixel = miIndexToEnt48(indexed, values[i]);
 +	Store4(bits, i + x, pixel);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_a1_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +	
 +	uint32_t v = values[i] & 0x80000000 ? mask : 0;
 +	WRITE(pixel, (READ(pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_a1_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +
 +	uint64_t v = values[i] & 0x8000000000000000LL ? mask : 0;
 +	WRITE(pixel, (READ(pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_g1_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +	
 +	uint32_t v = miIndexToEntY24(indexed,values[i]) ? mask : 0;
 +	WRITE(pixel, (READ(pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_g1_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +
 +	uint32_t v = miIndexToEntY48(indexed,values[i]) ? mask : 0;
 +	WRITE(pixel, (READ(pixel) & ~mask) | v);
 +    }
 +}
 +
++static FASTCALL void
++fbStore_yuy2_32 (pixman_image_t *image,
++	         uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
++{
++    /* [AGP] Unimplemented */
++}
++
++static FASTCALL void
++fbStore_yuy2_64 (pixman_image_t *image,
++	         uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
++{
++    /* [AGP] Unimplemented */
++}
++
++static FASTCALL void
++fbStore_yv12_32 (pixman_image_t *image,
++	         uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
++{
++    /* [AGP] Unimplemented */
++}
++
++static FASTCALL void
++fbStore_yv12_64 (pixman_image_t *image,
++	         uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
++{
++    /* [AGP] Unimplemented */
++}
++
 +/* end of handcoded fetch/store functions */
 +
 +#define image ((pixman_image_t *)pict)
 +
 +#ifdef PIXMAN_FB_ACCESSORS
 +static
 +#endif
 +void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    uint32_t color;
 +    uint32_t *end;
 +    fetchPixelProc_32 fetch = fetchPixelProcForPicture_32(pict);
 +    
 +    color = fetch(pict, 0, 0);
 +    
 +    end = buffer + width;
 +    while (buffer < end)
 +	*(buffer++) = color;
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +#ifdef PIXMAN_FB_ACCESSORS
 +static
 +#endif
 +void fbFetchSolid64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    uint64_t color;
 +    uint64_t *end;
 +    fetchPixelProc_64 fetch = fetchPixelProcForPicture_64(pict);
 +    
 +    color = fetch(pict, 0, 0);
 +    
 +    end = buffer + width;
 +    while (buffer < end)
 +	*(buffer++) = color;
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbFetch(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    fetchProc_32 fetch = fetchProcForPicture_32(pict);
 +    
 +    fetch(pict, x, y, width, buffer);
 +}
 +
 +static void fbFetch64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    fetchProc_64 fetch = fetchProcForPicture_64(pict);
 +    
 +    fetch(pict, x, y, width, buffer);
 +}
 +
 +#ifdef PIXMAN_FB_ACCESSORS
 +#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_accessors
 +#define PIXMAN_COMPOSITE_RECT_GENERAL_WIDE pixman_composite_rect_general_wide_accessors
 +#else
 +#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
 +#define PIXMAN_COMPOSITE_RECT_GENERAL_WIDE pixman_composite_rect_general_wide_no_accessors
 +#endif
 +
 +typedef struct
 +{
 +    uint32_t        left_ag;
 +    uint32_t        left_rb;
 +    uint32_t        right_ag;
 +    uint32_t        right_rb;
 +    int32_t       left_x;
 +    int32_t       right_x;
 +    int32_t       stepper;
 +    
 +    pixman_gradient_stop_t	*stops;
 +    int                      num_stops;
 +    unsigned int             spread;
 +    
 +    int		  need_reset;
 +} GradientWalker;
 +
 +static void
 +_gradient_walker_init (GradientWalker  *walker,
 +		       gradient_t      *gradient,
 +		       unsigned int     spread)
 +{
 +    walker->num_stops = gradient->n_stops;
 +    walker->stops     = gradient->stops;
 +    walker->left_x    = 0;
 +    walker->right_x   = 0x10000;
 +    walker->stepper   = 0;
 +    walker->left_ag   = 0;
 +    walker->left_rb   = 0;
 +    walker->right_ag  = 0;
 +    walker->right_rb  = 0;
 +    walker->spread    = spread;
 +    
 +    walker->need_reset = TRUE;
 +}
 +
 +static void
 +_gradient_walker_reset (GradientWalker  *walker,
 +                        pixman_fixed_32_32_t     pos)
 +{
 +    int32_t                  x, left_x, right_x;
 +    pixman_color_t          *left_c, *right_c;
 +    int                      n, count = walker->num_stops;
 +    pixman_gradient_stop_t *      stops = walker->stops;
 +    
 +    static const pixman_color_t   transparent_black = { 0, 0, 0, 0 };
 +    
 +    switch (walker->spread)
 +    {
 +    case PIXMAN_REPEAT_NORMAL:
 +	x = (int32_t)pos & 0xFFFF;
 +	for (n = 0; n < count; n++)
 +	    if (x < stops[n].x)
 +		break;
 +	if (n == 0) {
 +	    left_x =  stops[count-1].x - 0x10000;
 +	    left_c = &stops[count-1].color;
 +	} else {
 +	    left_x =  stops[n-1].x;
 +	    left_c = &stops[n-1].color;
 +	}
 +	
 +	if (n == count) {
 +	    right_x =  stops[0].x + 0x10000;
 +	    right_c = &stops[0].color;
 +	} else {
 +	    right_x =  stops[n].x;
 +	    right_c = &stops[n].color;
 +	}
 +	left_x  += (pos - x);
 +	right_x += (pos - x);
 +	break;
 +	
 +    case PIXMAN_REPEAT_PAD:
 +	for (n = 0; n < count; n++)
 +	    if (pos < stops[n].x)
 +		break;
 +	
 +	if (n == 0) {
 +	    left_x =  INT32_MIN;
 +	    left_c = &stops[0].color;
 +	} else {
 +	    left_x =  stops[n-1].x;
 +	    left_c = &stops[n-1].color;
 +	}
 +	
 +	if (n == count) {
 +	    right_x =  INT32_MAX;
 +	    right_c = &stops[n-1].color;
 +	} else {
 +	    right_x =  stops[n].x;
 +	    right_c = &stops[n].color;
 +	}
 +	break;
 +	
 +    case PIXMAN_REPEAT_REFLECT:
 +	x = (int32_t)pos & 0xFFFF;
 +	if ((int32_t)pos & 0x10000)
 +	    x = 0x10000 - x;
 +	for (n = 0; n < count; n++)
 +	    if (x < stops[n].x)
 +		break;
 +	
 +	if (n == 0) {
 +	    left_x =  -stops[0].x;
 +	    left_c = &stops[0].color;
 +	} else {
 +	    left_x =  stops[n-1].x;
 +	    left_c = &stops[n-1].color;
 +	}
 +	
 +	if (n == count) {
 +	    right_x = 0x20000 - stops[n-1].x;
 +	    right_c = &stops[n-1].color;
 +	} else {
 +	    right_x =  stops[n].x;
 +	    right_c = &stops[n].color;
 +	}
 +	
 +	if ((int32_t)pos & 0x10000) {
 +	    pixman_color_t  *tmp_c;
 +	    int32_t          tmp_x;
 +	    
 +	    tmp_x   = 0x10000 - right_x;
 +	    right_x = 0x10000 - left_x;
 +	    left_x  = tmp_x;
 +	    
 +	    tmp_c   = right_c;
 +	    right_c = left_c;
 +	    left_c  = tmp_c;
 +	    
 +	    x = 0x10000 - x;
 +	}
 +	left_x  += (pos - x);
 +	right_x += (pos - x);
 +	break;
 +	
 +    default:  /* RepeatNone */
 +	for (n = 0; n < count; n++)
 +	    if (pos < stops[n].x)
 +		break;
 +	
 +	if (n == 0)
 +	{
 +	    left_x  =  INT32_MIN;
 +	    right_x =  stops[0].x;
 +	    left_c  = right_c = (pixman_color_t*) &transparent_black;
 +	}
 +	else if (n == count)
 +	{
 +	    left_x  = stops[n-1].x;
 +	    right_x = INT32_MAX;
 +	    left_c  = right_c = (pixman_color_t*) &transparent_black;
 +	}
 +	else
 +	{
 +	    left_x  =  stops[n-1].x;
 +	    right_x =  stops[n].x;
 +	    left_c  = &stops[n-1].color;
 +	    right_c = &stops[n].color;
 +	}
 +    }
 +    
 +    walker->left_x   = left_x;
 +    walker->right_x  = right_x;
 +    walker->left_ag  = ((left_c->alpha >> 8) << 16)   | (left_c->green >> 8);
 +    walker->left_rb  = ((left_c->red & 0xff00) << 8)  | (left_c->blue >> 8);
 +    walker->right_ag = ((right_c->alpha >> 8) << 16)  | (right_c->green >> 8);
 +    walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
 +    
 +    if ( walker->left_x == walker->right_x                ||
 +	 ( walker->left_ag == walker->right_ag &&
 +	   walker->left_rb == walker->right_rb )   )
 +    {
 +	walker->stepper = 0;
 +    }
 +    else
 +    {
 +	int32_t width = right_x - left_x;
 +	walker->stepper = ((1 << 24) + width/2)/width;
 +    }
 +    
 +    walker->need_reset = FALSE;
 +}
 +
 +#define  GRADIENT_WALKER_NEED_RESET(w,x)				\
 +    ( (w)->need_reset || (x) < (w)->left_x || (x) >= (w)->right_x)
 +
 +#undef image
 +
 +/* the following assumes that GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */
 +static uint32_t
 +_gradient_walker_pixel (GradientWalker  *walker,
 +                        pixman_fixed_32_32_t     x)
 +{
 +    int  dist, idist;
 +    uint32_t  t1, t2, a, color;
 +    
 +    if (GRADIENT_WALKER_NEED_RESET (walker, x))
 +        _gradient_walker_reset (walker, x);
 +    
 +    dist  = ((int)(x - walker->left_x)*walker->stepper) >> 16;
 +    idist = 256 - dist;
 +    
 +    /* combined INTERPOLATE and premultiply */
 +    t1 = walker->left_rb*idist + walker->right_rb*dist;
 +    t1 = (t1 >> 8) & 0xff00ff;
 +    
 +    t2  = walker->left_ag*idist + walker->right_ag*dist;
 +    t2 &= 0xff00ff00;
 +    
 +    color = t2 & 0xff000000;
 +    a     = t2 >> 24;
 +    
 +    t1  = t1*a + 0x800080;
 +    t1  = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
 +    
 +    t2  = (t2 >> 8)*a + 0x800080;
 +    t2  = (t2 + ((t2 >> 8) & 0xff00ff));
 +    
 +    return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
 +}
 +
 +static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +#if 0
 +    SourcePictPtr   pGradient = pict->pSourcePict;
 +#endif
 +    GradientWalker  walker;
 +    uint32_t       *end = buffer + width;
 +    gradient_t	    *gradient;
 +    
 +    if (pict->common.type == SOLID)
 +    {
 +	register uint32_t color = ((solid_fill_t *)pict)->color;
 +	
 +	while (buffer < end)
 +	    *(buffer++) = color;
 +	
 +	return;
 +    }
 +    
 +    gradient = (gradient_t *)pict;
 +    
 +    _gradient_walker_init (&walker, gradient, pict->common.repeat);
 +    
 +    if (pict->common.type == LINEAR) {
 +	pixman_vector_t v, unit;
 +	pixman_fixed_32_32_t l;
 +	pixman_fixed_48_16_t dx, dy, a, b, off;
 +	linear_gradient_t *linear = (linear_gradient_t *)pict;
 +	
 +        /* reference point is the center of the pixel */
 +        v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
 +        v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
 +        v.vector[2] = pixman_fixed_1;
 +        if (pict->common.transform) {
 +            if (!pixman_transform_point_3d (pict->common.transform, &v))
 +                return;
 +            unit.vector[0] = pict->common.transform->matrix[0][0];
 +            unit.vector[1] = pict->common.transform->matrix[1][0];
 +            unit.vector[2] = pict->common.transform->matrix[2][0];
 +        } else {
 +            unit.vector[0] = pixman_fixed_1;
 +            unit.vector[1] = 0;
 +            unit.vector[2] = 0;
 +        }
 +	
 +        dx = linear->p2.x - linear->p1.x;
 +        dy = linear->p2.y - linear->p1.y;
 +        l = dx*dx + dy*dy;
 +        if (l != 0) {
 +            a = (dx << 32) / l;
 +            b = (dy << 32) / l;
 +            off = (-a*linear->p1.x - b*linear->p1.y)>>16;
 +        }
 +        if (l == 0  || (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)) {
 +            pixman_fixed_48_16_t inc, t;
 +            /* affine transformation only */
 +            if (l == 0) {
 +                t = 0;
 +                inc = 0;
 +            } else {
 +                t = ((a*v.vector[0] + b*v.vector[1]) >> 16) + off;
 +                inc = (a * unit.vector[0] + b * unit.vector[1]) >> 16;
 +            }
 +	    
 +	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
 +	    {
 +		register uint32_t color;
 +		
 +		color = _gradient_walker_pixel( &walker, t );
 +		while (buffer < end)
 +		    *(buffer++) = color;
 +	    }
 +	    else
 +	    {
 +                if (!mask) {
 +                    while (buffer < end)
 +                    {
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +                        buffer += 1;
 +                        t      += inc;
 +                    }
 +                } else {
 +                    while (buffer < end) {
 +                        if (*mask++ & maskBits)
 +                        {
 +			    *(buffer) = _gradient_walker_pixel (&walker, t);
 +                        }
 +                        buffer += 1;
 +                        t      += inc;
 +                    }
 +                }
 +	    }
 +	}
 +	else /* projective transformation */
 +	{
 +	    pixman_fixed_48_16_t t;
 +	    
 +	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
 +	    {
 +		register uint32_t color;
 +		
 +		if (v.vector[2] == 0)
 +		{
 +		    t = 0;
 +		}
 +		else
 +		{
 +		    pixman_fixed_48_16_t x, y;
 +		    
 +		    x = ((pixman_fixed_48_16_t) v.vector[0] << 16) / v.vector[2];
 +		    y = ((pixman_fixed_48_16_t) v.vector[1] << 16) / v.vector[2];
 +		    t = ((a * x + b * y) >> 16) + off;
 +		}
 +		
 + 		color = _gradient_walker_pixel( &walker, t );
 +		while (buffer < end)
 +		    *(buffer++) = color;
 +	    }
 +	    else
 +	    {
 +		while (buffer < end)
 +		{
 +		    if (!mask || *mask++ & maskBits)
 +		    {
 +			if (v.vector[2] == 0) {
 +			    t = 0;
 +			} else {
 +			    pixman_fixed_48_16_t x, y;
 +			    x = ((pixman_fixed_48_16_t)v.vector[0] << 16) / v.vector[2];
 +			    y = ((pixman_fixed_48_16_t)v.vector[1] << 16) / v.vector[2];
 +			    t = ((a*x + b*y) >> 16) + off;
 +			}
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    ++buffer;
 +		    v.vector[0] += unit.vector[0];
 +		    v.vector[1] += unit.vector[1];
 +		    v.vector[2] += unit.vector[2];
 +		}
 +            }
 +        }
 +    } else {
 +	
 +/*
 + * In the radial gradient problem we are given two circles (c₁,r₁) and
 + * (câ‚‚,râ‚‚) that define the gradient itself. Then, for any point p, we
 + * must compute the value(s) of t within [0.0, 1.0] representing the
 + * circle(s) that would color the point.
 + *
 + * There are potentially two values of t since the point p can be
 + * colored by both sides of the circle, (which happens whenever one
 + * circle is not entirely contained within the other).
 + *
 + * If we solve for a value of t that is outside of [0.0, 1.0] then we
 + * use the extend mode (NONE, REPEAT, REFLECT, or PAD) to map to a
 + * value within [0.0, 1.0].
 + *
 + * Here is an illustration of the problem:
 + *
 + *              pâ‚‚
 + *           p  •
 + *           •   ╲
 + *        ·       ╲r₂
 + *  p₁ ·           ╲
 + *  •              θ╲
 + *   ╲             ╌╌•
 + *    ╲r₁        ·   c₂
 + *    θ╲    ·
 + *    ╌╌•
 + *      c₁
 + *
 + * Given (c₁,r₁), (c₂,r₂) and p, we must find an angle θ such that two
 + * points p₁ and p₂ on the two circles are collinear with p. Then, the
 + * desired value of t is the ratio of the length of p₁p to the length
 + * of p₁p₂.
 + *
 + * So, we have six unknown values: (p₁x, p₁y), (p₂x, p₂y), θ and t.
 + * We can also write six equations that constrain the problem:
 + *
 + * Point p₁ is a distance r₁ from c₁ at an angle of θ:
 + *
 + *	1. p₁x = c₁x + r₁·cos θ
 + *	2. p₁y = c₁y + r₁·sin θ
 + *
 + * Point p₂ is a distance r₂ from c₂ at an angle of θ:
 + *
 + *	3. p₂x = c₂x + r2·cos θ
 + *	4. p₂y = c₂y + r2·sin θ
 + *
 + * Point p lies at a fraction t along the line segment p₁p₂:
 + *
 + *	5. px = t·p₂x + (1-t)·p₁x
 + *	6. py = t·p₂y + (1-t)·p₁y
 + *
 + * To solve, first subtitute 1-4 into 5 and 6:
 + *
 + * px = t·(c₂x + r₂·cos θ) + (1-t)·(c₁x + r₁·cos θ)
 + * py = t·(c₂y + r₂·sin θ) + (1-t)·(c₁y + r₁·sin θ)
 + *
 + * Then solve each for cos θ and sin θ expressed as a function of t:
 + *
 + * cos θ = (-(c₂x - c₁x)·t + (px - c₁x)) / ((r₂-r₁)·t + r₁)
 + * sin θ = (-(c₂y - c₁y)·t + (py - c₁y)) / ((r₂-r₁)·t + r₁)
 + *
 + * To simplify this a bit, we define new variables for several of the
 + * common terms as shown below:
 + *
 + *              pâ‚‚
 + *           p  •
 + *           •   ╲
 + *        ·  ┆    ╲r₂
 + *  p₁ ·     ┆     ╲
 + *  •     pdy┆      ╲
 + *   ╲       ┆       •c₂
 + *    ╲r₁    ┆   ·   ┆
 + *     ╲    ·┆       ┆cdy
 + *      •╌╌╌╌┴╌╌╌╌╌╌╌┘
 + *    c₁  pdx   cdx
 + *
 + * cdx = (c₂x - c₁x)
 + * cdy = (c₂y - c₁y)
 + *  dr =  r₂-r₁
 + * pdx =  px - c₁x
 + * pdy =  py - c₁y
 + *
 + * Note that cdx, cdy, and dr do not depend on point p at all, so can
 + * be pre-computed for the entire gradient. The simplifed equations
 + * are now:
 + *
 + * cos θ = (-cdx·t + pdx) / (dr·t + r₁)
 + * sin θ = (-cdy·t + pdy) / (dr·t + r₁)
 + *
 + * Finally, to get a single function of t and eliminate the last
 + * unknown θ, we use the identity sin²θ + cos²θ = 1. First, square
 + * each equation, (we knew a quadratic was coming since it must be
 + * possible to obtain two solutions in some cases):
 + *
 + * cos²θ = (cdx²t² - 2·cdx·pdx·t + pdx²) / (dr²·t² + 2·r₁·dr·t + r₁²)
 + * sin²θ = (cdy²t² - 2·cdy·pdy·t + pdy²) / (dr²·t² + 2·r₁·dr·t + r₁²)
 + *
 + * Then add both together, set the result equal to 1, and express as a
 + * standard quadratic equation in t of the form At² + Bt + C = 0
 + *
 + * (cdx² + cdy² - dr²)·t² - 2·(cdx·pdx + cdy·pdy + r₁·dr)·t + (pdx² + pdy² - r₁²) = 0
 + *
 + * In other words:
 + *
 + * A = cdx² + cdy² - dr²
 + * B = -2·(pdx·cdx + pdy·cdy + r₁·dr)
 + * C = pdx² + pdy² - r₁²
 + *
 + * And again, notice that A does not depend on p, so can be
 + * precomputed. From here we just use the quadratic formula to solve
 + * for t:
 + *
 + * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A
 + */
 +        /* radial or conical */
 +        pixman_bool_t affine = TRUE;
 +        double cx = 1.;
 +        double cy = 0.;
 +        double cz = 0.;
 +	double rx = x + 0.5;
 +	double ry = y + 0.5;
 +        double rz = 1.;
 +	
 +        if (pict->common.transform) {
 +            pixman_vector_t v;
 +            /* reference point is the center of the pixel */
 +            v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
 +            v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
 +            v.vector[2] = pixman_fixed_1;
 +            if (!pixman_transform_point_3d (pict->common.transform, &v))
 +                return;
 +	    
 +            cx = pict->common.transform->matrix[0][0]/65536.;
 +            cy = pict->common.transform->matrix[1][0]/65536.;
 +            cz = pict->common.transform->matrix[2][0]/65536.;
 +            rx = v.vector[0]/65536.;
 +            ry = v.vector[1]/65536.;
 +            rz = v.vector[2]/65536.;
 +            affine = pict->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1;
 +        }
 +	
 +        if (pict->common.type == RADIAL) {
 +	    radial_gradient_t *radial = (radial_gradient_t *)pict;
 +            if (affine) {
 +                while (buffer < end) {
 +		    if (!mask || *mask++ & maskBits)
 +		    {
 +			double pdx, pdy;
 +			double B, C;
 +			double det;
 +			double c1x = radial->c1.x / 65536.0;
 +			double c1y = radial->c1.y / 65536.0;
 +			double r1  = radial->c1.radius / 65536.0;
 +                        pixman_fixed_48_16_t t;
 +			
 +			pdx = rx - c1x;
 +			pdy = ry - c1y;
 +			
 +			B = -2 * (  pdx * radial->cdx
 +				    + pdy * radial->cdy
 +				    + r1 * radial->dr);
 +			C = (pdx * pdx + pdy * pdy - r1 * r1);
 +			
 +                        det = (B * B) - (4 * radial->A * C);
 +			if (det < 0.0)
 +			    det = 0.0;
 +			
 +			if (radial->A < 0)
 +			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
 +			else
 +			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
 +			
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    ++buffer;
 +		    
 +                    rx += cx;
 +                    ry += cy;
 +                }
 +            } else {
 +		/* projective */
 +                while (buffer < end) {
 +		    if (!mask || *mask++ & maskBits)
 +		    {
 +			double pdx, pdy;
 +			double B, C;
 +			double det;
 +			double c1x = radial->c1.x / 65536.0;
 +			double c1y = radial->c1.y / 65536.0;
 +			double r1  = radial->c1.radius / 65536.0;
 +                        pixman_fixed_48_16_t t;
 +			double x, y;
 +			
 +			if (rz != 0) {
 +			    x = rx/rz;
 +			    y = ry/rz;
 +			} else {
 +			    x = y = 0.;
 +			}
 +			
 +			pdx = x - c1x;
 +			pdy = y - c1y;
 +			
 +			B = -2 * (  pdx * radial->cdx
 +				    + pdy * radial->cdy
 +				    + r1 * radial->dr);
 +			C = (pdx * pdx + pdy * pdy - r1 * r1);
 +			
 +                        det = (B * B) - (4 * radial->A * C);
 +			if (det < 0.0)
 +			    det = 0.0;
 +			
 +			if (radial->A < 0)
 +			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
 +			else
 +			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
 +			
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    ++buffer;
 +		    
 +                    rx += cx;
 +                    ry += cy;
 +		    rz += cz;
 +                }
 +            }
 +        } else /* SourcePictTypeConical */ {
 +	    conical_gradient_t *conical = (conical_gradient_t *)pict;
 +            double a = conical->angle/(180.*65536);
 +            if (affine) {
 +                rx -= conical->center.x/65536.;
 +                ry -= conical->center.y/65536.;
 +		
 +                while (buffer < end) {
 +		    double angle;
 +		    
 +                    if (!mask || *mask++ & maskBits)
 +		    {
 +                        pixman_fixed_48_16_t   t;
 +			
 +                        angle = atan2(ry, rx) + a;
 +			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
 +			
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    
 +                    ++buffer;
 +                    rx += cx;
 +                    ry += cy;
 +                }
 +            } else {
 +                while (buffer < end) {
 +                    double x, y;
 +                    double angle;
 +		    
 +                    if (!mask || *mask++ & maskBits)
 +                    {
 +			pixman_fixed_48_16_t  t;
 +			
 +			if (rz != 0) {
 +			    x = rx/rz;
 +			    y = ry/rz;
 +			} else {
 +			    x = y = 0.;
 +			}
 +			x -= conical->center.x/65536.;
 +			y -= conical->center.y/65536.;
 +			angle = atan2(y, x) + a;
 +			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
 +			
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    
 +                    ++buffer;
 +                    rx += cx;
 +                    ry += cy;
 +                    rz += cz;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    uint32_t     *bits;
 +    int32_t    stride;
 +    fetchPixelProc_32   fetch;
 +    pixman_vector_t	v;
 +    pixman_vector_t  unit;
 +    int         i;
 +    pixman_box16_t box;
 +    pixman_bool_t affine = TRUE;
 +    
 +    fetch = fetchPixelProcForPicture_32(pict);
 +    
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    
 +    /* reference point is the center of the pixel */
 +    v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1 / 2;
 +    v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1 / 2;
 +    v.vector[2] = pixman_fixed_1;
 +    
 +    /* when using convolution filters one might get here without a transform */
 +    if (pict->common.transform)
 +    {
 +        if (!pixman_transform_point_3d (pict->common.transform, &v))
 +	{
 +            fbFinishAccess (pict->pDrawable);
 +            return;
 +        }
 +        unit.vector[0] = pict->common.transform->matrix[0][0];
 +        unit.vector[1] = pict->common.transform->matrix[1][0];
 +        unit.vector[2] = pict->common.transform->matrix[2][0];
 +        affine = v.vector[2] == pixman_fixed_1 && unit.vector[2] == 0;
 +    }
 +    else
 +    {
 +        unit.vector[0] = pixman_fixed_1;
 +        unit.vector[1] = 0;
 +        unit.vector[2] = 0;
 +    }
 +    
 +    if (pict->common.filter == PIXMAN_FILTER_NEAREST || pict->common.filter == PIXMAN_FILTER_FAST)
 +    {
 +        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL) {
 +            if (pixman_region_n_rects (pict->common.src_clip) == 1) {
 +		for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = MOD(DIV(v.vector[1],v.vector[2]), pict->height);
 +				x = MOD(DIV(v.vector[0],v.vector[2]), pict->width);
 +			    } else {
 +				y = MOD(v.vector[1]>>16, pict->height);
 +				x = MOD(v.vector[0]>>16, pict->width);
 +			    }
 +			    *(buffer + i) = fetch(pict, x, y);
 +			}
 +		    }
 +		    
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = MOD(DIV(v.vector[1],v.vector[2]), pict->height);
 +				x = MOD(DIV(v.vector[0],v.vector[2]), pict->width);
 +			    } else {
 +				y = MOD(v.vector[1]>>16, pict->height);
 +				x = MOD(v.vector[0]>>16, pict->width);
 +			    }
 +			    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box))
 +				*(buffer + i) = fetch (pict, x, y);
 +			    else
 +				*(buffer + i) = 0;
 +			}
 +		    }
 +		    
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        } else {
 +            if (pixman_region_n_rects(pict->common.src_clip) == 1) {
 +                box = pict->common.src_clip->extents;
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = DIV(v.vector[1],v.vector[2]);
 +				x = DIV(v.vector[0],v.vector[2]);
 +			    } else {
 +				y = v.vector[1]>>16;
 +				x = v.vector[0]>>16;
 +			    }
 +			    *(buffer + i) = ((x < box.x1) | (x >= box.x2) | (y < box.y1) | (y >= box.y2)) ?
 +				0 : fetch(pict, x, y);
 +			}
 +		    }
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +                    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = DIV(v.vector[1],v.vector[2]);
 +				x = DIV(v.vector[0],v.vector[2]);
 +			    } else {
 +				y = v.vector[1]>>16;
 +				x = v.vector[0]>>16;
 +			    }
 +			    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box))
 +				*(buffer + i) = fetch(pict, x, y);
 +			    else
 +				*(buffer + i) = 0;
 +			}
 +		    }
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        }
 +    } else if (pict->common.filter == PIXMAN_FILTER_BILINEAR	||
 +	       pict->common.filter == PIXMAN_FILTER_GOOD	||
 +	       pict->common.filter == PIXMAN_FILTER_BEST)
 +    {
 +        /* adjust vector for maximum contribution at 0.5, 0.5 of each texel. */
 +        v.vector[0] -= v.vector[2] / 2;
 +        v.vector[1] -= v.vector[2] / 2;
 +        unit.vector[0] -= unit.vector[2] / 2;
 +        unit.vector[1] -= unit.vector[2] / 2;
 +	
 +        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL) {
 +            if (pixman_region_n_rects(pict->common.src_clip) == 1) {
 +                for (i = 0; i < width; ++i) {
 +                    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    uint32_t ft, fb;
 +			    
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
 +			    
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
 +			    
 +			    x1 = MOD (x1, pict->width);
 +			    x2 = MOD (x2, pict->width);
 +			    y1 = MOD (y1, pict->height);
 +			    y2 = MOD (y2, pict->height);
 +			    
 +			    tl = fetch(pict, x1, y1);
 +			    tr = fetch(pict, x2, y1);
 +			    bl = fetch(pict, x1, y2);
 +			    br = fetch(pict, x2, y2);
 +			    
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    uint32_t ft, fb;
 +			    
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
 +			    
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
 +			    
 +			    x1 = MOD (x1, pict->width);
 +			    x2 = MOD (x2, pict->width);
 +			    y1 = MOD (y1, pict->height);
 +			    y2 = MOD (y2, pict->height);
 +			    
 +			    tl = pixman_region_contains_point(pict->common.src_clip, x1, y1, &box)
 +				? fetch(pict, x1, y1) : 0;
 +			    tr = pixman_region_contains_point(pict->common.src_clip, x2, y1, &box)
 +				? fetch(pict, x2, y1) : 0;
 +			    bl = pixman_region_contains_point(pict->common.src_clip, x1, y2, &box)
 +				? fetch(pict, x1, y2) : 0;
 +			    br = pixman_region_contains_point(pict->common.src_clip, x2, y2, &box)
 +				? fetch(pict, x2, y2) : 0;
 +			    
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
 +		    
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        } else {
 +            if (pixman_region_n_rects(pict->common.src_clip) == 1) {
 +                box = pict->common.src_clip->extents;
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    pixman_bool_t x1_out, x2_out, y1_out, y2_out;
 +			    uint32_t ft, fb;
 +			    
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
 +			    
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
 +			    
 +			    x1_out = (x1 < box.x1) | (x1 >= box.x2);
 +			    x2_out = (x2 < box.x1) | (x2 >= box.x2);
 +			    y1_out = (y1 < box.y1) | (y1 >= box.y2);
 +			    y2_out = (y2 < box.y1) | (y2 >= box.y2);
 +			    
 +			    tl = x1_out|y1_out ? 0 : fetch(pict, x1, y1);
 +			    tr = x2_out|y1_out ? 0 : fetch(pict, x2, y1);
 +			    bl = x1_out|y2_out ? 0 : fetch(pict, x1, y2);
 +			    br = x2_out|y2_out ? 0 : fetch(pict, x2, y2);
 +			    
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
 +		    
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +                    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    uint32_t ft, fb;
 +			    
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
 +			    
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
 +			    
 +			    tl = pixman_region_contains_point(pict->common.src_clip, x1, y1, &box)
 +				? fetch(pict, x1, y1) : 0;
 +			    tr = pixman_region_contains_point(pict->common.src_clip, x2, y1, &box)
 +				? fetch(pict, x2, y1) : 0;
 +			    bl = pixman_region_contains_point(pict->common.src_clip, x1, y2, &box)
 +				? fetch(pict, x1, y2) : 0;
 +			    br = pixman_region_contains_point(pict->common.src_clip, x2, y2, &box)
 +				? fetch(pict, x2, y2) : 0;
 +			    
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
 +		    
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        }
 +    } else if (pict->common.filter == PIXMAN_FILTER_CONVOLUTION) {
 +        pixman_fixed_t *params = pict->common.filter_params;
 +        int32_t cwidth = pixman_fixed_to_int(params[0]);
 +        int32_t cheight = pixman_fixed_to_int(params[1]);
 +        int xoff = (params[0] - pixman_fixed_1) >> 1;
 +	int yoff = (params[1] - pixman_fixed_1) >> 1;
 +        params += 2;
 +        for (i = 0; i < width; ++i) {
 +	    if (!mask || mask[i] & maskBits)
 +	    {
 +		if (!v.vector[2]) {
 +		    *(buffer + i) = 0;
 +		} else {
 +		    int x1, x2, y1, y2, x, y;
 +		    int32_t srtot, sgtot, sbtot, satot;
 +		    pixman_fixed_t *p = params;
 +		    
 +		    if (!affine) {
 +			pixman_fixed_48_16_t tmp;
 +			tmp = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2] - xoff;
 +			x1 = pixman_fixed_to_int(tmp);
 +			tmp = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2] - yoff;
 +			y1 = pixman_fixed_to_int(tmp);
 +		    } else {
 +			x1 = pixman_fixed_to_int(v.vector[0] - xoff);
 +			y1 = pixman_fixed_to_int(v.vector[1] - yoff);
 +		    }
 +		    x2 = x1 + cwidth;
 +		    y2 = y1 + cheight;
 +		    
 +		    srtot = sgtot = sbtot = satot = 0;
 +		    
 +		    for (y = y1; y < y2; y++) {
 +			int ty = (pict->common.repeat == PIXMAN_REPEAT_NORMAL) ? MOD (y, pict->height) : y;
 +			for (x = x1; x < x2; x++) {
 +			    if (*p) {
 +				int tx = (pict->common.repeat == PIXMAN_REPEAT_NORMAL) ? MOD (x, pict->width) : x;
 +				if (pixman_region_contains_point (pict->common.src_clip, tx, ty, &box)) {
 +				    uint32_t c = fetch(pict, tx, ty);
 +				    
 +				    srtot += Red(c) * *p;
 +				    sgtot += Green(c) * *p;
 +				    sbtot += Blue(c) * *p;
 +				    satot += Alpha(c) * *p;
 +				}
 +			    }
 +			    p++;
 +			}
 +		    }
 +		    
 +		    satot >>= 16;
 +		    srtot >>= 16;
 +		    sgtot >>= 16;
 +		    sbtot >>= 16;
 +		    
 +		    if (satot < 0) satot = 0; else if (satot > 0xff) satot = 0xff;
 +		    if (srtot < 0) srtot = 0; else if (srtot > 0xff) srtot = 0xff;
 +		    if (sgtot < 0) sgtot = 0; else if (sgtot > 0xff) sgtot = 0xff;
 +		    if (sbtot < 0) sbtot = 0; else if (sbtot > 0xff) sbtot = 0xff;
 +		    
 +		    *(buffer + i) = ((satot << 24) |
 +				     (srtot << 16) |
 +				     (sgtot <<  8) |
 +				     (sbtot       ));
 +		}
 +	    }
 +            v.vector[0] += unit.vector[0];
 +            v.vector[1] += unit.vector[1];
 +            v.vector[2] += unit.vector[2];
 +        }
 +    }
 +    
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +
 +static void fbFetchExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    int i;
 +    uint32_t _alpha_buffer[SCANLINE_BUFFER_LENGTH];
 +    uint32_t *alpha_buffer = _alpha_buffer;
 +    
 +    if (!pict->common.alpha_map) {
 +        fbFetchTransformed (pict, x, y, width, buffer, mask, maskBits);
 +	return;
 +    }
 +    if (width > SCANLINE_BUFFER_LENGTH)
 +        alpha_buffer = (uint32_t *) pixman_malloc_ab (width, sizeof(uint32_t));
 +    
 +    fbFetchTransformed(pict, x, y, width, buffer, mask, maskBits);
 +    fbFetchTransformed((bits_image_t *)pict->common.alpha_map, x - pict->common.alpha_origin.x,
 +		       y - pict->common.alpha_origin.y, width, alpha_buffer,
 +		       mask, maskBits);
 +    for (i = 0; i < width; ++i) {
 +        if (!mask || mask[i] & maskBits)
 +	{
 +	    int a = alpha_buffer[i]>>24;
 +	    *(buffer + i) = (a << 24)
 +		| (div_255(Red(*(buffer + i)) * a) << 16)
 +		| (div_255(Green(*(buffer + i)) * a) << 8)
 +		| (div_255(Blue(*(buffer + i)) * a));
 +	}
 +    }
 +    
 +    if (alpha_buffer != _alpha_buffer)
 +        free(alpha_buffer);
 +}
 +
 +static void fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    uint32_t *bits;
 +    int32_t stride;
 +    storeProc_32 store = storeProcForPicture_32(pict);
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    bits += y*stride;
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbStore64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    uint32_t *bits;
 +    int32_t stride;
 +    storeProc_64 store = storeProcForPicture_64(pict);
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    bits += y*stride;
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbStoreExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    uint32_t *bits, *alpha_bits;
 +    int32_t stride, astride;
 +    int ax, ay;
 +    storeProc_32 store;
 +    storeProc_32 astore;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    const pixman_indexed_t * aindexed;
 +    
 +    if (!pict->common.alpha_map) {
 +        fbStore(pict, x, y, width, buffer);
 +	return;
 +    }
 +    
 +    store = storeProcForPicture_32(pict);
 +    astore = storeProcForPicture_32(pict->common.alpha_map);
 +    aindexed = pict->common.alpha_map->indexed;
 +    
 +    ax = x;
 +    ay = y;
 +    
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    
 +    alpha_bits = pict->common.alpha_map->bits;
 +    astride = pict->common.alpha_map->rowstride;
 +    
 +    bits       += y*stride;
 +    alpha_bits += (ay - pict->common.alpha_origin.y)*astride;
 +    
 +    
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    astore((pixman_image_t *)pict->common.alpha_map,
 +	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
 +    
 +    fbFinishAccess (pict->alpha_map->pDrawable);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbStoreExternalAlpha64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    uint32_t *bits, *alpha_bits;
 +    int32_t stride, astride;
 +    int ax, ay;
 +    storeProc_64 store;
 +    storeProc_64 astore;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    const pixman_indexed_t * aindexed;
 +    
 +    if (!pict->common.alpha_map) {
 +        fbStore64(pict, x, y, width, buffer);
 +	return;
 +    }
 +    
 +    store = storeProcForPicture_64(pict);
 +    astore = storeProcForPicture_64(pict->common.alpha_map);
 +    aindexed = pict->common.alpha_map->indexed;
 +    
 +    ax = x;
 +    ay = y;
 +    
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    
 +    alpha_bits = pict->common.alpha_map->bits;
 +    astride = pict->common.alpha_map->rowstride;
 +    
 +    bits       += y*stride;
 +    alpha_bits += (ay - pict->common.alpha_origin.y)*astride;
 +    
 +    
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    astore((pixman_image_t *)pict->common.alpha_map,
 +	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
 +    
 +    fbFinishAccess (pict->alpha_map->pDrawable);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void
 +fbExpand(uint32_t * source, uint64_t * dest, int width)
 +{
 +    int i;
 +    for(i = 0; i < width; i++) {
 +	uint32_t p = source[i];
 +	uint64_t r = (uint64_t)(p & 0x00ff0000) << 24;
 +	uint64_t g = (uint64_t)(p & 0x0000ff00) << 16;
 +	uint64_t b = (uint64_t)(p & 0x000000ff) << 8;
 +	uint64_t a = (uint64_t)(p & 0xff000000) << 32;
 +	    
 +	dest[i] = r | g | b | a;
 +    }
 +}
 +
 +static void
 +fbContract(uint64_t * source, uint32_t * dest, int width)
 +{
 +    int i;
 +    for(i = 0; i < width; i++) {
 +       uint64_t p = source[i];
 +       uint64_t r = (p >> 24) & 0x00ff0000;
 +       uint64_t g = (p >> 16) & 0x0000ff00;
 +       uint64_t b = (p >> 8) & 0x000000ff;
 +       uint64_t a = (p >> 32) & 0xff000000;
 +       
 +       dest[i] = r | g | b | a;
 +    }
 +}
 +
 +static uint32_t
 +fbContractPixel(uint64_t p)
 +{
 +    uint64_t r = (p >> 24) & 0x00ff0000;
 +    uint64_t g = (p >> 16) & 0x0000ff00;
 +    uint64_t b = (p >> 8) & 0x000000ff;
 +    uint64_t a = (p >> 32) & 0xff000000;
 +       
 +    return r | g | b | a;
 +}
 +
 +static void
 +pixmanFetchSourcePict64(source_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    // use the space in the existing buffer for the 32-bit mask and result
 +    uint32_t * mask32 = (uint32_t *) buffer;
 +    uint32_t * buffer32 = ((uint32_t *) buffer) + width;
 +    uint32_t maskBits32 = fbContractPixel(maskBits);
 +
 +    if(mask)
 +	fbContract(mask, mask32, width);
 +    else
 +	mask32 = NULL;
 +    pixmanFetchSourcePict(pict, x, y, width, buffer32, mask32, maskBits32);
 +    fbExpand(buffer32, buffer, width);
 +}
 +
 +static void
 +fbFetchExternalAlpha64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    uint32_t * mask32 = (uint32_t *) buffer;
 +    uint32_t * buffer32 = ((uint32_t *) buffer) + width;
 +    uint32_t maskBits32 = fbContractPixel(maskBits);
 +
 +    if (mask)
 +	fbContract(mask, mask32, width);
 +    else
 +	mask32 = NULL;
 +    fbFetchExternalAlpha(pict, x, y, width, buffer32, mask32, maskBits32);
 +    fbExpand(buffer32, buffer, width);
 +}
 +
 +static void
 +fbFetchTransformed64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    uint32_t * mask32 = (uint32_t *) buffer;
 +    uint32_t * buffer32 = ((uint32_t *) buffer) + width;
 +    uint32_t maskBits32 = fbContractPixel(maskBits);
 +
 +    if (mask)
 +	fbContract(mask, mask32, width);
 +    else
 +	mask32 = NULL;
 +    fbFetchTransformed(pict, x, y, width, buffer32, mask32, maskBits32);
 +    fbExpand(buffer32, buffer, width);
 +}
 +
 +typedef void (*scanStoreProc)(pixman_image_t *, int, int, int, uint32_t *);
 +typedef void (*scanFetchProc)(pixman_image_t *, int, int, int, uint32_t *,
 +			      uint32_t *, uint32_t);
 +
 +typedef void (*scanStoreProc64)(pixman_image_t *, int, int, int, uint64_t *);
 +typedef void (*scanFetchProc64)(pixman_image_t *, int, int, int, uint64_t *,
 +				uint64_t *, uint64_t);
 +
 +//#ifndef PIXMAN_FB_ACCESSORS
 +//static
 +//#endif
 +void
 +PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 +			       uint32_t *scanline_buffer)
 +{
 +    uint32_t *src_buffer = scanline_buffer;
 +    uint32_t *dest_buffer = src_buffer + data->width;
 +    int i;
 +    scanStoreProc store;
 +    scanFetchProc fetchSrc = NULL, fetchMask = NULL, fetchDest = NULL;
 +    unsigned int srcClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    unsigned int maskClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    uint32_t *bits;
 +    int32_t stride;
 +    int xoff, yoff;
 +    
 +    if (data->op == PIXMAN_OP_CLEAR)
 +        fetchSrc = NULL;
 +    else if (IS_SOURCE_IMAGE (data->src))
 +    {
 +	fetchSrc = (scanFetchProc)pixmanFetchSourcePict;
 +	srcClass = SourcePictureClassify ((source_image_t *)data->src,
 +					  data->xSrc, data->ySrc,
 +					  data->width, data->height);
 +    }
 +    else
 +    {
 +	bits_image_t *bits = (bits_image_t *)data->src;
 +	
 +	if (bits->common.alpha_map)
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetchExternalAlpha;
 +	}
 +	else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		 bits->width == 1 &&
 +		 bits->height == 1)
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetchSolid;
 +	    srcClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	}
 +	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetch;
 +	}
 +	else
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetchTransformed;
 +	}
 +    }
 +    
 +    if (!data->mask || data->op == PIXMAN_OP_CLEAR)
 +    {
 +	fetchMask = NULL;
 +    }
 +    else
 +    {
 +	if (IS_SOURCE_IMAGE (data->mask))
 +	{
 +	    fetchMask = (scanFetchProc)pixmanFetchSourcePict;
 +	    maskClass = SourcePictureClassify ((source_image_t *)data->mask,
 +					       data->xMask, data->yMask,
 +					       data->width, data->height);
 +	}
 +	else
 +	{
 +	    bits_image_t *bits = (bits_image_t *)data->mask;
 +	    
 +	    if (bits->common.alpha_map)
 +	    {
 +		fetchMask = (scanFetchProc)fbFetchExternalAlpha;
 +	    }
 +	    else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		     bits->width == 1 && bits->height == 1)
 +	    {
 +		fetchMask = (scanFetchProc)fbFetchSolid;
 +		maskClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	    }
 +	    else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +		fetchMask = (scanFetchProc)fbFetch;
 +	    else
 +		fetchMask = (scanFetchProc)fbFetchTransformed;
 +	}
 +    }
 +    
 +    if (data->dest->common.alpha_map)
 +    {
 +	fetchDest = (scanFetchProc)fbFetchExternalAlpha;
 +	store = (scanStoreProc)fbStoreExternalAlpha;
 +	
 +	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
 +	    fetchDest = NULL;
 +    }
 +    else
 +    {
 +	fetchDest = (scanFetchProc)fbFetch;
 +	store = (scanStoreProc)fbStore;
 +	
 +	switch (data->op)
 +	{
 +	case PIXMAN_OP_CLEAR:
 +	case PIXMAN_OP_SRC:
 +	    fetchDest = NULL;
 +#ifndef PIXMAN_FB_ACCESSORS
 +	    /* fall-through */
 +	case PIXMAN_OP_ADD:
 +	case PIXMAN_OP_OVER:
 +	    switch (data->dest->bits.format) {
 +	    case PIXMAN_a8r8g8b8:
 +	    case PIXMAN_x8r8g8b8:
 +		store = NULL;
 +		break;
 +	    default:
 +		break;
 +	    }
 +#endif
 +	    break;
 +	}
 +    }
 +    
 +    if (!store)
 +    {
 +	bits = data->dest->bits.bits;
 +	stride = data->dest->bits.rowstride;
 +	xoff = yoff = 0;
 +    }
 +    else
 +    {
 +	bits = NULL;
 +	stride = 0;
 +	xoff = yoff = 0;
 +    }
 +    
 +    if (fetchSrc		   &&
 +	fetchMask		   &&
 +	data->mask		   &&
 +	data->mask->common.type == BITS && 
 +	data->mask->common.component_alpha &&
 +	PIXMAN_FORMAT_RGB (data->mask->bits.format))
 +    {
 +	uint32_t *mask_buffer = dest_buffer + data->width;
 +	CombineFuncC compose = pixman_composeFunctions.combineC[data->op];
 +	if (!compose)
 +	    return;
 +	
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
 +		    
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
 +		
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xffffffff);
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +	    }
 +	    
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
 +		
 +		/* blend */
 +		compose (dest_buffer, src_buffer, mask_buffer, data->width);
 +		
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		/* blend */
 +		compose (bits + (data->yDest + i+ yoff) * stride +
 +			 data->xDest + xoff,
 +			 src_buffer, mask_buffer, data->width);
 +	    }
 +	}
 +    }
 +    else
 +    {
 +	uint32_t *src_mask_buffer = 0, *mask_buffer = 0;
 +	CombineFuncU compose = pixman_composeFunctions.combineU[data->op];
 +	if (!compose)
 +	    return;
 +	
 +	if (fetchMask)
 +	    mask_buffer = dest_buffer + data->width;
 +	
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
 +		    
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
 +		
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +		    
 +		    if (mask_buffer)
 +		    {
 +			pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +			src_mask_buffer = mask_buffer;
 +		    }
 +		    else
 +			src_mask_buffer = src_buffer;
 +		    
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xff000000);
 +		    
 +		    if (mask_buffer)
 +			pixman_composeFunctions.combineMaskU (src_buffer,
 +							      mask_buffer,
 +							      data->width);
 +		    
 +		    src_mask_buffer = src_buffer;
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +		
 +		pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +		
 +		src_mask_buffer = mask_buffer;
 +	    }
 +	    
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
 +		
 +		/* blend */
 +		compose (dest_buffer, src_mask_buffer, data->width);
 +		
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		/* blend */
 +		compose (bits + (data->yDest + i+ yoff) * stride +
 +			 data->xDest + xoff,
 +			 src_mask_buffer, data->width);
 +	    }
 +	}
 +    }
 +    
 +    if (!store)
 +	fbFinishAccess (data->dest->pDrawable);
 +}
 +
 +
 +void
 +PIXMAN_COMPOSITE_RECT_GENERAL_WIDE (const FbComposeData *data,
 +			            uint64_t *scanline_buffer)
 +{
 +    uint64_t *src_buffer = scanline_buffer;
 +    uint64_t *dest_buffer = src_buffer + data->width;
 +    int i;
 +    scanStoreProc64 store;
 +    scanFetchProc64 fetchSrc = NULL, fetchMask = NULL, fetchDest = NULL;
 +    unsigned int srcClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    unsigned int maskClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    uint32_t *bits;
 +    int32_t stride;
 +    int xoff, yoff;
 +    
 +    if (data->op == PIXMAN_OP_CLEAR)
 +        fetchSrc = NULL;
 +    else if (IS_SOURCE_IMAGE (data->src))
 +    {
 +	fetchSrc = (scanFetchProc64)pixmanFetchSourcePict64;
 +	srcClass = SourcePictureClassify ((source_image_t *)data->src,
 +					  data->xSrc, data->ySrc,
 +					  data->width, data->height);
 +    }
 +    else
 +    {
 +	bits_image_t *bits = (bits_image_t *)data->src;
 +	
 +	if (bits->common.alpha_map)
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetchExternalAlpha64;
 +	}
 +	else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		 bits->width == 1 &&
 +		 bits->height == 1)
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetchSolid64;
 +	    srcClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	}
 +	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetch64;
 +	}
 +	else
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetchTransformed64;
 +	}
 +    }
 +    
 +    if (!data->mask || data->op == PIXMAN_OP_CLEAR)
 +    {
 +	fetchMask = NULL;
 +    }
 +    else
 +    {
 +	if (IS_SOURCE_IMAGE (data->mask))
 +	{
 +	    fetchMask = (scanFetchProc64)pixmanFetchSourcePict64;
 +	    maskClass = SourcePictureClassify ((source_image_t *)data->mask,
 +					       data->xMask, data->yMask,
 +					       data->width, data->height);
 +	}
 +	else
 +	{
 +	    bits_image_t *bits = (bits_image_t *)data->mask;
 +	    
 +	    if (bits->common.alpha_map)
 +	    {
 +		fetchMask = (scanFetchProc64)fbFetchExternalAlpha64;
 +	    }
 +	    else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		     bits->width == 1 && bits->height == 1)
 +	    {
 +		fetchMask = (scanFetchProc64)fbFetchSolid64;
 +		maskClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	    }
 +	    else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +		fetchMask = (scanFetchProc64)fbFetch64;
 +	    else
 +		fetchMask = (scanFetchProc64)fbFetchTransformed64;
 +	}
 +    }
 +    
 +    if (data->dest->common.alpha_map)
 +    {
 +	fetchDest = (scanFetchProc64)fbFetchExternalAlpha64;
 +	store = (scanStoreProc64)fbStoreExternalAlpha64;
 +	
 +	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
 +	    fetchDest = NULL;
 +    }
 +    else
 +    {
 +	fetchDest = (scanFetchProc64)fbFetch64;
 +	store = (scanStoreProc64)fbStore64;
 +	
 +	switch (data->op)
 +	{
 +	case PIXMAN_OP_CLEAR:
 +	case PIXMAN_OP_SRC:
 +	    fetchDest = NULL;
 +	    break;
 +	}
 +    }
 +    
 +    if (!store)
 +    {
 +	bits = data->dest->bits.bits;
 +	stride = data->dest->bits.rowstride;
 +	xoff = yoff = 0;
 +    }
 +    else
 +    {
 +	bits = NULL;
 +	stride = 0;
 +	xoff = yoff = 0;
 +    }
 +    
 +    if (fetchSrc		   &&
 +	fetchMask		   &&
 +	data->mask		   &&
 +	data->mask->common.type == BITS && 
 +	data->mask->common.component_alpha &&
 +	PIXMAN_FORMAT_RGB (data->mask->bits.format))
 +    {
 +	uint64_t *mask_buffer = dest_buffer + data->width;
 +	CombineFuncC64 compose = pixman_composeFunctions_wide.combineC[data->op];
 +	if (!compose)
 +	    return;
 +	
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
 +		    
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
 +		
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xffffffffffffffffLL);
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +	    }
 +	    
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
 +		
 +		/* blend */
 +		compose (dest_buffer, src_buffer, mask_buffer, data->width);
 +		
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		assert(!"need to have a storeproc with 64-bit internal format");
 +	    }
 +	}
 +    }
 +    else
 +    {
 +	uint64_t *src_mask_buffer = 0, *mask_buffer = 0;
 +	CombineFuncU64 compose = pixman_composeFunctions_wide.combineU[data->op];
 +	if (!compose)
 +	    return;
 +	
 +	if (fetchMask)
 +	    mask_buffer = dest_buffer + data->width;
 +	
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
 +		    
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
 +		
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +		    
 +		    if (mask_buffer)
 +		    {
 +			pixman_composeFunctions_wide.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +			src_mask_buffer = mask_buffer;
 +		    }
 +		    else
 +			src_mask_buffer = src_buffer;
 +		    
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xffff000000000000LL);
 +		    
 +		    if (mask_buffer)
 +			pixman_composeFunctions_wide.combineMaskU (src_buffer,
 +							      mask_buffer,
 +							      data->width);
 +		    
 +		    src_mask_buffer = src_buffer;
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +		
 +		pixman_composeFunctions_wide.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +		
 +		src_mask_buffer = mask_buffer;
 +	    }
 +	    
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
 +		
 +		/* blend */
 +		compose (dest_buffer, src_mask_buffer, data->width);
 +		
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		assert(!"need to have a storeproc with 64-bit internal format");
 +	    }
 +	}
 +    }
 +    
 +    if (!store)
 +	fbFinishAccess (data->dest->pDrawable);
 +}
commit 86dd2c765cff3f18fa41fd3a0a51376667663104
Merge: e3e25cb... ba5fc60...
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Fri Oct 26 09:25:17 2007 -0700

    Merge commit 'ba5fc60317e177a4140d4ea2cf8cb538d1e4e07a' into wide-composite-merge
    
    Conflicts:
    
    	pixman/pixman-compose.c

diff --cc pixman/gen.pl
index dfb73b6,0000000..c4dff00
mode 100644,000000..100644
--- a/pixman/gen.pl
+++ b/pixman/gen.pl
@@@ -1,417 -1,0 +1,420 @@@
 +#!/usr/bin/perl
 +
 +# Copyright notice should go here.
 +
 +use strict;
 +use warnings;
 +
 +our @handcode_formats;
 +
 +sub max($$) { $_[0] > $_[1] ? $_[0] : $_[1]; }
 +sub min($$) { $_[0] < $_[1] ? $_[0] : $_[1]; }
 +# get_mask($offset, $size, $total)
 +# get a mask corresponding to $size bits starting at $offset (from right)
 +# out of an integer of size $total 
 +sub get_mask($$$)
 +{
 +    my ($offset, $size, $total) = @_;
 +    my $left = $total - $offset - $size;
 +    my $bitstr = ("0" x $left) . ("1" x $size) . ("0" x $offset);
 +    my $suffix = $total > 32 ? "LL" : "";
 +    my $hexstr;
 +    for(my $i = 0; $i < $total; $i += 8) {
 +	$hexstr .= sprintf("%02x", eval("0b" . substr($bitstr, $i, 8)));
 +    }
 +    return "0x$hexstr$suffix";
 +}
 +
 +# stub left for allowing more efficient code when we don't need  
 +# to split apart the components
 +sub unified_access($@) { 0 }
 +
 +sub access($$@)
 +{
 +    my ($comp, $dsize, @format) = @_;
 +    if($dsize != 32 && $dsize != 64) {
 +	die "only 32 and 64 bit intermediate formats are supported";
 +    }
 +    my $dtype = "uint${dsize}_t";
 +    my $dcs = $dsize / 4;
 +    my ($dst_off, $src_sz, $src_sh, $mask);
 +    if($comp eq "a") {
 +	if($format[4] == 0) {
 +	    $mask = get_mask($dcs * 3, $dcs, $dsize);
 +	    return "$dtype a = $mask;";
 +	}
 +	$dst_off = 3;
 +	$src_sz = $format[4];
 +	$src_sh = $format[8] + max($src_sz - $dcs, 0);
 +    } elsif ($comp eq "r") {
 +	$dst_off = 2;
 +	$src_sz = $format[1];
 +	$src_sh = $format[5] + max($src_sz - $dcs, 0);
 +    } elsif ($comp eq "g") {
 +	$dst_off = 1;
 +	$src_sz = $format[2];
 +	$src_sh = $format[6] + max($src_sz - $dcs, 0);
 +    } elsif ($comp eq "b") {
 +	$dst_off = 0;
 +	$src_sz = $format[3];
 +	$src_sh = $format[7] + max($src_sz - $dcs, 0);
 +    }
 +    return "" if $src_sz == 0;
 +    my $dst_sh = $dcs*($dst_off + 1) - min($dcs, $src_sz);
 +    
 +    # calculate shift based on src_sh, dst_sh
 +    my $sh;
 +    if ($src_sh < $dst_sh) {
 +	$sh = "<< " . ($dst_sh - $src_sh);
 +    } elsif ($src_sh > $dst_sh) {
 +	$sh = ">> " . ($src_sh - $dst_sh);
 +    } else {
 +	$sh = "";
 +    }
 +    # calculate mask based on format
 +    $mask = get_mask($src_sh, min($src_sz, $dcs), $format[0]);
 +    # put it all together
 +    "$dtype $comp = ($dtype)(p & $mask) $sh;";
 +}
 +
 +# stub left for more efficient code when we can
 +# expand after combining the components, rather than before
 +sub unified_expand($@)
 +{
 +    # conditions for unified expand: all the bits that need expanding
 +    # are the same size.
 +    0;
 +}
 +
 +sub expand($$@)
 +{
 +    my ($comp, $dsize, @format) = @_;
 +    my $acc = "";
 +    my $dcs = $dsize / 4;
 +    my ($dst_off, $src_sz);
 +
 +    if ($comp eq "a") {
 +	$dst_off = 3;
 +	$src_sz = $format[4];
 +    } elsif ($comp eq "r") {
 +	$dst_off = 2;
 +	$src_sz = $format[1];
 +    } elsif ($comp eq "g") {
 +	$dst_off = 1;
 +	$src_sz = $format[2];
 +    } elsif ($comp eq "b") {
 +	$dst_off = 0;
 +	$src_sz = $format[3];
 +    }
 +    my $dmask = get_mask($dst_off * $dcs, $dcs, $dsize);
 +    return "" if $src_sz == 0;
 +    my $expanded_bits = $src_sz;
 +    # do a loop to emit the necessary number of expand stages
 +    while ($expanded_bits < $dcs) {
 +	# we need a mask if the expanded bits overflow the dest field
 +	my $mask_str;
 +	if($expanded_bits * 2 > $dcs) {
 +	    $mask_str = "& $dmask"; 
 +	} else {
 +	    $mask_str = "";
 +	}
 +	$acc .= "$comp |= ($comp >> $expanded_bits)$mask_str; ";
 +	$expanded_bits *= 2;
 +    }
 +    return $acc;
 +}
 +
 +sub emit_accesses($@)
 +{
 +    my ($dsize, @format) = @_;
 +    my $acc = "";
 +    if(!unified_access($dsize, @format)) {
 +	for my $c ("r", "g", "b", "a") {
 +	    $acc .= "\t" . access($c, $dsize, @format) . "\n";
 +	}
 +    }
 +    $acc .= "\n";
 +    if(!unified_expand($dsize, @format)) {
 +	for my $c ("r", "g", "b", "a") {
 +	    my $t = expand($c, $dsize, @format);
 +	    $acc .= "\t$t\n" if $t;
 +	}
 +    }
 +    return $acc . "\n";
 +}
 +
 +sub combine(@)
 +{
 +    if ($_[9] eq "argb" || $_[9] eq "abgr") {
 +	"a | r | g | b";
 +    } elsif ($_[9] eq "alpha") {
 +	"a";
 +    }
 +}
 +
 +sub size_for_bpp($)
 +{
 +    my $bpp = shift;
 +    if    ($bpp > 32) { 64 }
 +    elsif ($bpp > 16) { 32 }
 +    elsif ($bpp > 8 ) { 16 }
 +    else { 8 }
 +}
 +
 +sub readpixel($)
 +{
 +    my $bpp = shift;
 +    if ($bpp >= 8) { "READ(pixel + i)" }
 +    elsif ($bpp == 4) { "Fetch4(pixel, i)" }
 +    else { die "unsupported bpp $bpp"; }
 +}
 +
 +sub gen_fetch($@)
 +{
 +    my ($dsize, @format) = @_;
 +    my $name = $format[10];
 +    my $dtype = "uint${dsize}_t";
 +    my $stype = "uint" . size_for_bpp($format[0]) . "_t";
 +
 +    my $out = <<EOS;
 +static FASTCALL void
- fbFetch_${name}_$dsize (pixman_image_t *image,
-                       const uint32_t *bits, int x, int width, $dtype *buffer, const pixman_indexed_t * indexed)
++fbFetch_${name}_$dsize (bits_image_t *pict, int x, int y, int width, $dtype
++*buffer)
 +{
++    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const $stype *pixel = ($stype *)bits + x;
 +    int i;
 +
 +    for (i = 0; i < width; ++i) {
 +EOS
 +    $out .= "        $stype p = " . readpixel($format[0]) . ";\n";
 +    $out .= emit_accesses($dsize, @format);
 +
 +$out .= "        *buffer++ = " . combine(@format) . ";\n    }\n}\n";
 +
 +}
 +
 +sub readonepixel($)
 +{
 +    my $bpp = shift;
 +    if ($bpp >= 8) { "READ(pixel)" }
 +    elsif ($bpp == 4) { "Fetch4(pixel, 0)" }
 +    else { die "unsupported bpp $bpp"; }
 +}
 +
 +sub gen_fetchpixel($@)
 +{
 +    my ($dsize, @format) = @_;
 +    my $name = $format[10];
 +    my $dtype = "uint${dsize}_t";
 +    my $stype = "uint" . size_for_bpp($format[0]) . "_t";
 +    
 +    my $out = <<EOS;
 +static FASTCALL $dtype
- fbFetchPixel_${name}_$dsize (pixman_image_t *image,
-                       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
++fbFetchPixel_${name}_$dsize (bits_image_t *pict, int offset, int line)
 +{
++    const uint32_t *bits = pict->bits + line*pict->rowstride;
 +    const $stype *pixel = ($stype *)bits + offset;
 +EOS
 +    $out .= "    $stype p = " . readonepixel($format[0]) . ";\n";
 +    $out .= emit_accesses($dsize, @format);
 +    $out .= "    return (". combine(@format) . ");\n}\n";
 +}
 +
 +sub unified_stexpand($@)
 +{
 +    my ($ssize, @format) = @_;
 +    return 0;
 +}
 +
 +sub stexpand($$@)
 +{
 +    my ($comp, $ssize, @format) = @_;
 +    my $acc = "";
 +    my $scs = $ssize / 4;
 +    my ($src_off, $dst_sz, $dst_off);
 +
 +    if ($comp eq "a") {
 +	return "" if ($format[4] == 0);
 +	$src_off = 3;
 +	$dst_sz = $format[4];
 +	$dst_off = $format[8];
 +    } elsif ($comp eq "r") {
 +	$src_off = 2;
 +	$dst_sz = $format[1];
 +	$dst_off = $format[5];
 +    } elsif ($comp eq "g") {
 +	$src_off = 1;
 +	$dst_sz = $format[2];
 +	$dst_off = $format[6];
 +    } elsif($comp eq "b") {
 +	$src_off = 0;
 +	$dst_sz = $format[3];
 +	$dst_off = $format[7];
 +    }
 +
 +    my $dmask = get_mask($dst_off, $dst_sz, $format[0]);
 +    my $expanded_bits = $scs;
 +    # do a loop to emit the necessary number of expand stages
 +    while ($expanded_bits < $dst_sz) {
 +	# we need a mask if the expanded bits overflow the dest field
 +	my $mask_str = "";
 +	if ($expanded_bits * 2 > $dst_sz) {
 +	    $mask_str = "& $dmask";
 +	}
 +	$acc .= "$comp |= ($comp >> $expanded_bits)$mask_str; ";
 +	$expanded_bits *= 2;
 +    }
 +    return $acc;
 +}
 +
 +
 +sub unified_store($@)
 +{
 +    my ($ssize, @format) = @_;
 +    return 0;
 +}
 +
 +
 +sub store($$@)
 +{
 +    my ($comp, $ssize, @format) = @_;
 +    if ($ssize != 32 && $ssize != 64) { die }
 +    my $stype = "uint${ssize}_t";
 +    my $scs = $ssize / 4;
 +    my ($src_off, $dst_sz, $dst_sh);
 +    
 +    if ($comp eq "a") {
 +	$src_off = 3;
 +	$dst_sz = $format[4];
 +	$dst_sh = $format[8] + max($dst_sz - $scs, 0);
 +    } elsif ($comp eq "r") {
 +	$src_off = 2;
 +	$dst_sz = $format[1];
 +	$dst_sh = $format[5] + max($dst_sz - $scs, 0);
 +    } elsif ($comp eq "g") {
 +	$src_off = 1;
 +	$dst_sz = $format[2];
 +	$dst_sh = $format[6] + max($dst_sz - $scs, 0);
 +    } elsif ($comp eq "b") {
 +	$src_off = 0;
 +	$dst_sz = $format[3];
 +	$dst_sh = $format[7] + max($dst_sz - $scs, 0);
 +    }
 +
 +    my $src_sh = $scs*($src_off + 1) - min($scs, $dst_sz);
 +    # calculate mask based on format
 +    my $mask = get_mask($dst_sh, min($dst_sz, $scs), $format[0]);
 +    
 +    return "$stype $comp = 0;" if $dst_sz == 0;
 +    # calculate shift based on src_sh, dst_sh
 +    my $sh = "";
 +    if ($src_sh < $dst_sh) {
 +	$sh = "<< " . ($dst_sh - $src_sh);
 +    } elsif ($src_sh > $dst_sh) {
 +	$sh = ">> " . ($src_sh - $dst_sh);
 +    }
 +    # put it all together
 +    return "$stype $comp = (p $sh) & $mask;";
 +}
 +
 +sub emit_stores($@)
 +{
 +    my ($ssize, @format) = @_;
 +    my $acc = "";
 +    if (!unified_store($ssize, @format)) {
 +	for my $c ("r", "g", "b", "a") {
 +	    $acc .= "\t" . store($c, $ssize, @format) . "\n";
 +	}
 +    }
 +    $acc .= "\n";
 +    if (!unified_stexpand($ssize, @format)) {
 +	for my $c ("r", "g", "b", "a") {
 +	    my $t = stexpand($c, $ssize, @format);
 +	    $acc .= "\t$t\n" if $t;
 +	}
 +    }
 +    return $acc . "\n";
 +}
 +
 +sub storepixel($)
 +{
 +    my $bpp = shift;
 +    if ($bpp >= 8) { "WRITE(pixel + i, (r | g | b | a))" }
 +    elsif ($bpp == 4) { "Store4(pixel, i, (r | g | b | a))" }
 +    else { die "unsupported bpp $bpp"; }
 +}
 +
 +sub gen_store($@)
 +{
 +    my ($ssize, @format) = @_;
 +    my $name = $format[10];
 +    my $stype = "uint${ssize}_t";
 +    my $dtype = "uint" . size_for_bpp($format[0]) . "_t";
 +
 +    my $out = <<EOS;
 +static FASTCALL void
 +fbStore_${name}_$ssize (pixman_image_t *image,
 +                 uint32_t *bits, const $stype *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    $dtype * pixel = ($dtype *)bits + x;
 +    for(i = 0; i < width; ++i) {
 +       $stype p = values[i];
 +EOS
 +    $out .= emit_stores($ssize, @format);
 +       $out .= "       " . storepixel($format[0]) . ";\n    }\n}\n";
 +   }
 +
 +
 +# type is one of fetch, store, fetchpixel
 +sub gen_switch($$@)
 +{
 +    my ($type, $size, @formatnames) = @_;
 +    my ($proctype, $procname);
 +    if ($type eq "fetch") {
 +	$proctype = "fetchProc";
 +	$procname = "fbFetch";
 +    } elsif ($type eq "fetchpixel") {
 +	$proctype = "fetchPixelProc";
 +	$procname = "fbFetchPixel";
 +    } elsif ($type eq "store") {
 +	$proctype = "storeProc";
 +	$procname = "fbStore";
 +    }
 +
 +    my $acc = "static ${proctype}_$size ${proctype}ForPicture_$size (bits_image_t * pict)\n";
 +    $acc .= "{\n    switch(pict->format) {\n";
 +    for my $formatname (@formatnames) {
 +	$acc .= "    case PIXMAN_$formatname: return ${procname}_${formatname}_${size};\n";
 +    }
 +
 +    $acc . "    }\n    return NULL;\n}\n\n";
 +}
 +
 +# gen_functions generates all the accessor functions for a given dsize
 +# and then the switch statement with an entry for each format
 +sub gen_functions($@)
 +{
 +    my ($dsize, @formats) = @_;
 +    my $out = "";
 +    for my $format (@formats) {
++	$out .= "#define image ((pixman_image_t *)pict)\n";
 +	$out .= gen_fetch($dsize, @{$format});
 +	$out .= gen_fetchpixel($dsize, @{$format});
++	$out .= "#undef image\n";
 +	$out .= gen_store($dsize, @{$format});
 +    }
 +    my @formatnames = map { $_->[10] } @formats;
 +    push @formatnames, @handcode_formats;
 +    $out .= gen_switch("fetch", $dsize, @formatnames);
 +    $out .= gen_switch("fetchpixel", $dsize, @formatnames);
 +    $out .= gen_switch("store", $dsize, @formatnames);
 +}
 +
 +our @formats;
 +require "formats.pl";
 +
 +print "#include \"pixman-access-handcode.c\"\n\n";
 +print gen_functions(32, @formats);
 +print gen_functions(64, @formats);
diff --cc pixman/pixman-access-handcode.c
index 82e8bae,0000000..142234d
mode 100644,000000..100644
--- a/pixman/pixman-access-handcode.c
+++ b/pixman/pixman-access-handcode.c
@@@ -1,2788 -1,0 +1,2755 @@@
 +/*
 + *
 + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
 + *             2005 Lars Knoll & Zack Rusin, Trolltech
 + *
 + * Permission to use, copy, modify, distribute, and sell this software and its
 + * documentation for any purpose is hereby granted without fee, provided that
 + * the above copyright notice appear in all copies and that both that
 + * copyright notice and this permission notice appear in supporting
 + * documentation, and that the name of Keith Packard not be used in
 + * advertising or publicity pertaining to distribution of the software without
 + * specific, written prior permission.  Keith Packard makes no
 + * representations about the suitability of this software for any purpose.  It
 + * is provided "as is" without express or implied warranty.
 + *
 + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
 + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
 + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
 + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
 + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 + * SOFTWARE.
 + */
 +
 +#include <config.h>
 +
 +#include <stdlib.h>
 +#include <string.h>
 +#include <math.h>
 +#include <assert.h>
 +#include <limits.h>
 +
 +#include "pixman-private.h"
 +
 +
 +/*
 + *    FIXME:
 + *		The stuff here is added just to get it to compile. Something sensible needs to
 + *              be done before this can be used.
 + *
 + *   we should go through this code and clean up some of the weird stuff that have
 + *   resulted from unmacro-ifying it.
 + *
 + */
 +#define INLINE inline
 +
 +/*   End of stuff added to get it to compile
 + */ 
 +
 +static unsigned int
 +SourcePictureClassify (source_image_t *pict,
 +		       int	       x,
 +		       int	       y,
 +		       int	       width,
 +		       int	       height)
 +{
 +    if (pict->common.type == SOLID)
 +    {
 +	pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +    }
 +    else if (pict->common.type == LINEAR)
 +    {
 +	linear_gradient_t *linear = (linear_gradient_t *)pict;
 +	pixman_vector_t   v;
 +	pixman_fixed_32_32_t l;
 +	pixman_fixed_48_16_t dx, dy, a, b, off;
 +	pixman_fixed_48_16_t factors[4];
 +	int	     i;
 +	
 +	dx = linear->p2.x - linear->p1.x;
 +	dy = linear->p2.y - linear->p1.y;
 +	l = dx * dx + dy * dy;
 +	if (l)
 +	{
 +	    a = (dx << 32) / l;
 +	    b = (dy << 32) / l;
 +	}
 +	else
 +	{
 +	    a = b = 0;
 +	}
 +	
 +	off = (-a * linear->p1.x
 +	       -b * linear->p1.y) >> 16;
 +	
 +	for (i = 0; i < 3; i++)
 +	{
 +	    v.vector[0] = pixman_int_to_fixed ((i % 2) * (width  - 1) + x);
 +	    v.vector[1] = pixman_int_to_fixed ((i / 2) * (height - 1) + y);
 +	    v.vector[2] = pixman_fixed_1;
 +	    
 +	    if (pict->common.transform)
 +	    {
 +		if (!pixman_transform_point_3d (pict->common.transform, &v))
 +		    return SOURCE_IMAGE_CLASS_UNKNOWN;
 +	    }
 +	    
 +	    factors[i] = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off;
 +	}
 +	
 +	if (factors[2] == factors[0])
 +	    pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	else if (factors[1] == factors[0])
 +	    pict->class = SOURCE_IMAGE_CLASS_VERTICAL;
 +    }
 +    
 +    return pict->class;
 +}
 +
 +#define SCANLINE_BUFFER_LENGTH 2048
 +
- typedef FASTCALL void (*fetchProc_32)(pixman_image_t *image,
- 				      const uint32_t *bits,
- 				      int x, int width,
- 				      uint32_t *buffer,
- 				      const pixman_indexed_t * indexed);
++typedef FASTCALL void (*fetchProc_32)(bits_image_t *pict, int x, int y, int width, uint32_t *buffer);
 +
 +static fetchProc_32 fetchProcForPicture_32 (bits_image_t * pict);
 +
 +
- typedef FASTCALL uint32_t (*fetchPixelProc_32)(pixman_image_t *image,
- 					       const uint32_t *bits, int offset,
- 					       const pixman_indexed_t * indexed);
++typedef FASTCALL uint32_t (*fetchPixelProc_32)(bits_image_t *pict, int offset, int line);
 +
 +static fetchPixelProc_32 fetchPixelProcForPicture_32 (bits_image_t * pict);
 +
 +typedef FASTCALL void (*storeProc_32) (pixman_image_t *image,
 +				       uint32_t *bits, const uint32_t *values,
 +				       int x, int width,
 +				       const pixman_indexed_t * indexed);
 +
 +static storeProc_32 storeProcForPicture_32 (bits_image_t * pict);
 +
- typedef FASTCALL void (*fetchProc_64)(pixman_image_t *image,
- 				      const uint32_t *bits,
- 				      int x, int width,
- 				      uint64_t *buffer,
- 				      const pixman_indexed_t * indexed);
++typedef FASTCALL void (*fetchProc_64)(bits_image_t *pict, int x, int y, int width, uint64_t *buffer);
 +
 +static fetchProc_64 fetchProcForPicture_64 (bits_image_t * pict);
 +
 +
- typedef FASTCALL uint64_t (*fetchPixelProc_64)(pixman_image_t *image,
- 					       const uint32_t *bits, int offset,
- 					       const pixman_indexed_t * indexed);
++typedef FASTCALL uint64_t (*fetchPixelProc_64)(bits_image_t *pict, int offset, int line);
 +
 +static fetchPixelProc_64 fetchPixelProcForPicture_64 (bits_image_t * pict);
 +
 +typedef FASTCALL void (*storeProc_64) (pixman_image_t *image,
 +				       uint32_t *bits, const uint64_t *values,
 +				       int x, int width,
 +				       const pixman_indexed_t * indexed);
 +
 +static storeProc_64 storeProcForPicture_64 (bits_image_t * pict);
 +
 +/* handcoded fetch/store functions. */
 +#define fbFetch_g8_32 fbFetch_c8_32
 +#define fbFetchPixel_g8_32 fbFetchPixel_c8_32
 +#define fbStore_g8_32 fbStore_c8_32
 +
 +#define fbFetch_g4_32 fbFetch_c4_32
 +#define fbFetchPixel_g4_32 fbFetchPixel_c4_32
 +#define fbStore_g4_32 fbStore_c4_32
 +
 +#define fbFetch_g8_64 fbFetch_c8_64
 +#define fbFetchPixel_g8_64 fbFetchPixel_c8_64
 +#define fbStore_g8_64 fbStore_c8_64
 +
 +#define fbFetch_g4_64 fbFetch_c4_64
 +#define fbFetchPixel_g4_64 fbFetchPixel_c4_64
 +#define fbStore_g4_64 fbStore_c4_64
 +
++/*
++ * Used by READ/WRITE macros
++ */
++#define image ((pixman_image_t *)pict)
 +
 +static FASTCALL void
- fbFetch_r8g8b8_32 (pixman_image_t *image,
- 		   const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
++fbFetch_r8g8b8_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
++    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint32_t b = Fetch24(pixel) | 0xff000000;
 +	pixel += 3;
 +	*buffer++ = b;
 +    }
 +}
 +
 +static FASTCALL void
- fbFetch_r8g8b8_64 (pixman_image_t *image,
- 		   const uint32_t *bits, int x, int width, uint64_t *buffer, const pixman_indexed_t * indexed)
++fbFetch_r8g8b8_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
++    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint64_t r = (uint64_t)READ(pixel) << 32;
 +	uint64_t g = (uint64_t)READ(pixel + 1) << 16;
 +	uint64_t b = (uint64_t)READ(pixel + 2);
 +	uint64_t p = r | g | b;
 +
 +	pixel += 3;
 +	*buffer++ = (p << 16) | p;
 +    }
 +}
 +
 +static FASTCALL void
- fbFetch_b8g8r8_32 (pixman_image_t *image,
- 		   const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
++fbFetch_b8g8r8_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
++    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint32_t b = 0xff000000;
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +	b |= (READ(pixel++));
 +	b |= (READ(pixel++) << 8);
 +	b |= (READ(pixel++) << 16);
 +#else
 +	b |= (READ(pixel++) << 16);
 +	b |= (READ(pixel++) << 8);
 +	b |= (READ(pixel++));
 +#endif
 +	*buffer++ = b;
 +    }
 +}
 +
 +static FASTCALL void
- fbFetch_b8g8r8_64 (pixman_image_t *image,
- 		   const uint32_t *bits, int x, int width, uint64_t *buffer, const pixman_indexed_t * indexed)
++fbFetch_b8g8r8_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
++    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
 +    const uint8_t *end = pixel + 3*width;
 +    while (pixel < end) {
 +	uint64_t b = 0xffff000000000000LL;
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +	b |= ((uint64_t)READ(pixel++));
 +	b |= ((uint64_t)READ(pixel++) << 16);
 +	b |= ((uint64_t)READ(pixel++) << 32);
 +#else
 +	b |= ((uint64_t)READ(pixel++) << 32);
 +	b |= ((uint64_t)READ(pixel++) << 16);
 +	b |= ((uint64_t)READ(pixel++));
 +#endif
 +	*buffer++ = b | (b << 8);
 +    }
 +}
 +
 +static FASTCALL uint32_t
- fbFetchPixel_r8g8b8_32 (pixman_image_t *image,
- 			const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
++fbFetchPixel_r8g8b8_32 (bits_image_t *pict, int offset, int line)
 +{
++    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    return (0xff000000 |
 +	    (READ(pixel + 0) << 16) |
 +	    (READ(pixel + 1) << 8) |
 +	    (READ(pixel + 2)));
 +#else
 +    return (0xff000000 |
 +	    (READ(pixel + 2) << 16) |
 +	    (READ(pixel + 1) << 8) |
 +	    (READ(pixel + 0)));
 +#endif
 +}
 +
 +static FASTCALL uint64_t
- fbFetchPixel_r8g8b8_64 (pixman_image_t *image,
- 			const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
++fbFetchPixel_r8g8b8_64 (bits_image_t *pict, int offset, int line)
 +{
++    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    uint64_t p = (0xffff000000000000LL |
 +		  ((uint64_t)READ(pixel + 0) << 32) |
 +		  ((uint64_t)READ(pixel + 1) << 16) |
 +		  ((uint64_t)READ(pixel + 2)));
 +#else
 +    uint64_t p = (0xffff000000000000LL |
 +		  ((uint64_t)READ(pixel + 2) << 32) |
 +		  ((uint64_t)READ(pixel + 1) << 16) |
 +		  ((uint64_t)READ(pixel + 0)));
 +#endif
 +    return p | (p << 8);
 +}
 +
 +static FASTCALL uint32_t
- fbFetchPixel_b8g8r8_32 (pixman_image_t *image,
- 			const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
++fbFetchPixel_b8g8r8_32 (bits_image_t *pict, int offset, int line)
 +{
++    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    return (0xff000000 |
 +	    (READ(pixel + 2) << 16) |
 +	    (READ(pixel + 1) << 8) |
 +	    (READ(pixel + 0)));
 +#else
 +    return (0xff000000 |
 +	    (READ(pixel + 0) << 16) |
 +	    (READ(pixel + 1) << 8) |
 +	    (READ(pixel + 2)));
 +#endif
 +}
 +
 +static FASTCALL uint64_t
- fbFetchPixel_b8g8r8_64 (pixman_image_t *image,
- 			const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
++fbFetchPixel_b8g8r8_64 (bits_image_t *pict, int offset, int line)
 +{
++    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +    uint64_t p = (0xffff000000000000LL |
 +		  ((uint64_t)READ(pixel + 2) << 32) |
 +		  ((uint64_t)READ(pixel + 1) << 16) |
 +		  ((uint64_t)READ(pixel + 0)));
 +#else
 +    uint64_t p = (0xffff000000000000LL |
 +		  ((uint64_t)READ(pixel + 0) << 32) |
 +		  ((uint64_t)READ(pixel + 1) << 16) |
 +		  ((uint64_t)READ(pixel + 2)));
 +#endif
 +    return p | (p << 8);
 +}
 +
++#undef image
++
 +static FASTCALL void
 +fbStore_r8g8b8_32 (pixman_image_t *image,
 +		   uint32_t *bits, const uint32_t *values, int x, int width,
 +		   const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	Store24(pixel, values[i]);
 +	pixel += 3;
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_r8g8b8_64 (pixman_image_t *image,
 +		   uint32_t *bits, const uint64_t *values, int x, int width,
 +		   const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t r = (values[i] >> 40) & 0xff;
 +	uint32_t g = (values[i] >> 24) & 0xff;
 +	uint32_t b = (values[i] >> 8) & 0xff;
 +	Store24(pixel, (r|g|b));
 +	pixel += 3;
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_b8g8r8_32 (pixman_image_t *image,
 +		   uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t val = values[i];
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +	WRITE(pixel++, Blue(val));
 +	WRITE(pixel++, Green(val));
 +	WRITE(pixel++, Red(val));
 +#else
 +	WRITE(pixel++, Red(val));
 +	WRITE(pixel++, Green(val));
 +	WRITE(pixel++, Blue(val));
 +#endif
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_b8g8r8_64 (pixman_image_t *image,
 +		   uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t r = (values[i] >> 40) & 0xff;
 +	uint32_t g = (values[i] >> 24) & 0xff;
 +	uint32_t b = (values[i] >> 8) & 0xff;
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +	WRITE(pixel++, b);
 +	WRITE(pixel++, g);
 +	WRITE(pixel++, r);
 +#else
 +	WRITE(pixel++, r);
 +	WRITE(pixel++, g);
 +	WRITE(pixel++, b);
 +#endif
 +    }
 +}
 +
++#define image ((pixman_image_t *)pict)
++
 +static FASTCALL void
- fbFetch_c8_32 (pixman_image_t *image,
- 	       const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
++fbFetch_c8_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
++    const uint32_t *bits = pict->bits + y*pict->rowstride;
++    const pixman_indexed_t * indexed = pict->indexed;
 +    const uint8_t *pixel = (const uint8_t *)bits + x;
 +    const uint8_t *end = pixel + width;
 +    while (pixel < end) {
 +	uint32_t  p = READ(pixel++);
 +	*buffer++ = indexed->rgba[p];
 +    }
 +}
 +
 +static FASTCALL void
- fbFetch_c8_64 (pixman_image_t *image,
- 	       const uint32_t *bits, int x, int width, uint64_t *buffer, const pixman_indexed_t * indexed)
++fbFetch_c8_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
++    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    const uint8_t *pixel = (const uint8_t *)bits + x;
 +    const uint8_t *end = pixel + width;
 +    while (pixel < end) {
 +	uint64_t  p = READ(pixel++);
 +	uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +	*buffer++ = px | (px << 8);
 +    }
 +}
 +
 +#define Fetch8(l,o)    (READ((uint8_t *)(l) + ((o) >> 2)))
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +#define Fetch4(l,o)    ((o) & 2 ? Fetch8(l,o) & 0xf : Fetch8(l,o) >> 4)
 +#else
 +#define Fetch4(l,o)    ((o) & 2 ? Fetch8(l,o) >> 4 : Fetch8(l,o) & 0xf)
 +#endif
 +
 +static FASTCALL void
- fbFetch_c4_32 (pixman_image_t *image,
- 	       const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
++fbFetch_c4_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
++    const uint32_t *bits = pict->bits + y*pict->rowstride;
++    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  p = Fetch4(bits, i + x);
 +	
 +	*buffer++ = indexed->rgba[p];
 +    }
 +}
 +
 +static FASTCALL void
- fbFetch_c4_64 (pixman_image_t *image,
- 	       const uint32_t *bits, int x, int width, uint64_t *buffer, const pixman_indexed_t * indexed)
++fbFetch_c4_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
++    const uint32_t *bits = pict->bits + y*pict->rowstride;
++    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint64_t  p = indexed->rgba[Fetch4(bits, i + x)];
 +	uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +	*buffer++ = px | (px << 8);
 +    }
 +}
 +
 +static FASTCALL void
- fbFetch_a1_32 (pixman_image_t *image,
- 	       const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
++fbFetch_a1_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
++    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  p = READ(bits + ((i + x) >> 5));
 +	uint32_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = p >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = p >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +	a |= a << 1;
 +	a |= a << 2;
 +	a |= a << 4;
 +	*buffer++ = a << 24;
 +    }
 +}
 +
 +static FASTCALL void
- fbFetch_a1_64 (pixman_image_t *image,
- 	       const uint32_t *bits, int x, int width, uint64_t *buffer, const pixman_indexed_t * indexed)
++fbFetch_a1_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
++    const uint32_t *bits = pict->bits + y*pict->rowstride;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  p = READ(bits + ((i + x) >> 5));
 +	uint64_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = p >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = p >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +	a |= a << 1;
 +	a |= a << 2;
 +	a |= a << 4;
 +	a |= a << 8;
 +	*buffer++ = a << 48;
 +    }
 +}
 +
 +static FASTCALL void
- fbFetch_g1_32 (pixman_image_t *image,
- 	       const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
++fbFetch_g1_32 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 +{
++    const uint32_t *bits = pict->bits + y*pict->rowstride;
++    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t p = READ(bits + ((i+x) >> 5));
 +	uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = p >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = p >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +	*buffer++ = indexed->rgba[a];
 +    }
 +}
 +
 +static FASTCALL void
- fbFetch_g1_64 (pixman_image_t *image,
- 	       const uint32_t *bits, int x, int width, uint64_t *buffer, const pixman_indexed_t * indexed)
++fbFetch_g1_64 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer)
 +{
++    const uint32_t *bits = pict->bits + y*pict->rowstride;
++    const pixman_indexed_t * indexed = pict->indexed;
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t t = READ(bits + ((i+x) >> 5));
 +	uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +	a = t >> (0x1f - ((i+x) & 0x1f));
 +#else
 +	a = t >> ((i+x) & 0x1f);
 +#endif
 +	a = a & 1;
 +
 +	uint64_t  p = indexed->rgba[Fetch4(bits, i + x)];
 +	uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +	*buffer++ = px | (px << 8);
 +    }
 +}
 +
 +static FASTCALL uint32_t
- fbFetchPixel_c8_32 (pixman_image_t *image,
- 		    const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
++fbFetchPixel_c8_32 (bits_image_t *pict, int offset, int line)
 +{
++    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t   pixel = READ((uint8_t *) bits + offset);
++    const pixman_indexed_t * indexed = pict->indexed;
 +    return indexed->rgba[pixel];
 +}
 +
 +static FASTCALL uint64_t
- fbFetchPixel_c8_64 (pixman_image_t *image,
- 		    const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
++fbFetchPixel_c8_64 (bits_image_t *pict, int offset, int line)
 +{
++    uint32_t *bits = pict->bits + line*pict->rowstride;
++    const pixman_indexed_t * indexed = pict->indexed;
 +    uint64_t p = indexed->rgba[READ((uint8_t *) bits + offset)];
 +    uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +    return px | (px << 8);
 +}
 +
 +
 +static FASTCALL uint32_t
- fbFetchPixel_c4_32 (pixman_image_t *image,
- 		    const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
++fbFetchPixel_c4_32 (bits_image_t *pict, int offset, int line)
 +{
++    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t  pixel = Fetch4(bits, offset);
++    const pixman_indexed_t * indexed = pict->indexed;
 +    
 +    return indexed->rgba[pixel];
 +}
 +
 +static FASTCALL uint64_t
- fbFetchPixel_c4_64 (pixman_image_t *image,
- 		    const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
++fbFetchPixel_c4_64 (bits_image_t *pict, int offset, int line)
 +{
++    uint32_t *bits = pict->bits + line*pict->rowstride;
++    const pixman_indexed_t * indexed = pict->indexed;
 +    uint64_t p = indexed->rgba[Fetch4(bits, offset)];
 +    uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +
 +    return px | (px << 8);
 +}
 +
 +static FASTCALL uint32_t
- fbFetchPixel_a1_32 (pixman_image_t *image,
- 		    const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
++fbFetchPixel_a1_32 (bits_image_t *pict, int offset, int line)
 +{
++    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t  pixel = READ(bits + (offset >> 5));
 +    uint32_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    a |= a << 1;
 +    a |= a << 2;
 +    a |= a << 4;
 +    return a << 24;
 +}
 +
 +static FASTCALL uint64_t
- fbFetchPixel_a1_64 (pixman_image_t *image,
- 		    const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
++fbFetchPixel_a1_64 (bits_image_t *pict, int offset, int line)
 +{
++    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t  pixel = READ(bits + (offset >> 5));
 +    uint64_t  a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    a |= a << 1;
 +    a |= a << 2;
 +    a |= a << 4;
 +    a |= a << 8;
 +    return a << 48;
 +}
 +
 +static FASTCALL uint32_t
- fbFetchPixel_g1_32 (pixman_image_t *image,
- 		    const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
++fbFetchPixel_g1_32 (bits_image_t *pict, int offset, int line)
 +{
++    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t pixel = READ(bits + (offset >> 5));
++    const pixman_indexed_t * indexed = pict->indexed;
 +    uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    return indexed->rgba[a];
 +}
 +
 +static FASTCALL uint64_t
- fbFetchPixel_g1_64 (pixman_image_t *image,
- 		    const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
++fbFetchPixel_g1_64 (bits_image_t *pict, int offset, int line)
 +{
++    uint32_t *bits = pict->bits + line*pict->rowstride;
 +    uint32_t pixel = READ(bits + (offset >> 5));
++    const pixman_indexed_t * indexed = pict->indexed;
 +    uint32_t a;
 +#if BITMAP_BIT_ORDER == MSBFirst
 +    a = pixel >> (0x1f - (offset & 0x1f));
 +#else
 +    a = pixel >> (offset & 0x1f);
 +#endif
 +    a = a & 1;
 +    uint64_t p = indexed->rgba[a];
 +    uint64_t px = (p & 0xff) | ((p & 0xff00) << 8) | ((p & 0xff0000) << 16) | ((p & 0xff000000) << 24);
 +    return px | (px << 8);
 +}
 +
++#undef image
++
 +static FASTCALL void
 +fbStore_c8_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t   *pixel = ((uint8_t *) bits) + x;
 +    for (i = 0; i < width; ++i) {
 +	WRITE(pixel++, miIndexToEnt24(indexed,values[i]));
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_c8_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    uint8_t   *pixel = ((uint8_t *) bits) + x;
 +    for (i = 0; i < width; ++i) {
 +	WRITE(pixel++, miIndexToEnt48(indexed,values[i]));
 +    }
 +}
 +
 +#define Store8(l,o,v)  (WRITE((uint8_t *)(l) + ((o) >> 3), (v)))
 +#if IMAGE_BYTE_ORDER == MSBFirst
 +#define Store4(l,o,v)  Store8(l,o,((o) & 4 ?				\
 +				   (Fetch8(l,o) & 0xf0) | (v) :		\
 +				   (Fetch8(l,o) & 0x0f) | ((v) << 4)))
 +#else
 +#define Store4(l,o,v)  Store8(l,o,((o) & 4 ?			       \
 +				   (Fetch8(l,o) & 0x0f) | ((v) << 4) : \
 +				   (Fetch8(l,o) & 0xf0) | (v)))
 +#endif
 +
 +static FASTCALL void
 +fbStore_c4_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  pixel;
 +	
 +	pixel = miIndexToEnt24(indexed, values[i]);
 +	Store4(bits, i + x, pixel);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_c4_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  pixel;
 +
 +	pixel = miIndexToEnt48(indexed, values[i]);
 +	Store4(bits, i + x, pixel);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_a1_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +	
 +	uint32_t v = values[i] & 0x80000000 ? mask : 0;
 +	WRITE(pixel, (READ(pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_a1_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +
 +	uint64_t v = values[i] & 0x8000000000000000LL ? mask : 0;
 +	WRITE(pixel, (READ(pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_g1_32 (pixman_image_t *image,
 +	       uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +	
 +	uint32_t v = miIndexToEntY24(indexed,values[i]) ? mask : 0;
 +	WRITE(pixel, (READ(pixel) & ~mask) | v);
 +    }
 +}
 +
 +static FASTCALL void
 +fbStore_g1_64 (pixman_image_t *image,
 +	       uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed)
 +{
 +    int i;
 +    for (i = 0; i < width; ++i) {
 +	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 +	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
 +
 +	uint32_t v = miIndexToEntY48(indexed,values[i]) ? mask : 0;
 +	WRITE(pixel, (READ(pixel) & ~mask) | v);
 +    }
 +}
 +
 +/* end of handcoded fetch/store functions */
 +
++#define image ((pixman_image_t *)pict)
 +
 +#ifdef PIXMAN_FB_ACCESSORS
 +static
 +#endif
 +void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
-     uint32_t *bits;
 +    uint32_t color;
 +    uint32_t *end;
 +    fetchPixelProc_32 fetch = fetchPixelProcForPicture_32(pict);
-     const pixman_indexed_t * indexed = pict->indexed;
 +    
-     bits = pict->bits;
-     
-     color = fetch((pixman_image_t *)pict, bits, 0, indexed);
++    color = fetch(pict, 0, 0);
 +    
 +    end = buffer + width;
 +    while (buffer < end)
 +	*(buffer++) = color;
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +#ifdef PIXMAN_FB_ACCESSORS
 +static
 +#endif
 +void fbFetchSolid64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
-     uint32_t *bits;
 +    uint64_t color;
 +    uint64_t *end;
 +    fetchPixelProc_64 fetch = fetchPixelProcForPicture_64(pict);
-     const pixman_indexed_t * indexed = pict->indexed;
-     
-     bits = pict->bits;
 +    
-     color = fetch((pixman_image_t *)pict, bits, 0, indexed);
++    color = fetch(pict, 0, 0);
 +    
 +    end = buffer + width;
 +    while (buffer < end)
 +	*(buffer++) = color;
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbFetch(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
-     uint32_t *bits;
-     int32_t stride;
 +    fetchProc_32 fetch = fetchProcForPicture_32(pict);
-     const pixman_indexed_t * indexed = pict->indexed;
-     
-     bits = pict->bits;
-     stride = pict->rowstride;
 +    
-     bits += y*stride;
-     
-     fetch((pixman_image_t *)pict, bits, x, width, buffer, indexed);
++    fetch(pict, x, y, width, buffer);
 +}
 +
 +static void fbFetch64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
-     uint32_t *bits;
-     int32_t stride;
 +    fetchProc_64 fetch = fetchProcForPicture_64(pict);
-     const pixman_indexed_t * indexed = pict->indexed;
 +    
-     bits = pict->bits;
-     stride = pict->rowstride;
-     
-     bits += y*stride;
-     
-     fetch((pixman_image_t *)pict, bits, x, width, buffer, indexed);
++    fetch(pict, x, y, width, buffer);
 +}
 +
 +#ifdef PIXMAN_FB_ACCESSORS
 +#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_accessors
 +#define PIXMAN_COMPOSITE_RECT_GENERAL_WIDE pixman_composite_rect_general_wide_accessors
 +#else
 +#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
 +#define PIXMAN_COMPOSITE_RECT_GENERAL_WIDE pixman_composite_rect_general_wide_no_accessors
 +#endif
 +
 +typedef struct
 +{
 +    uint32_t        left_ag;
 +    uint32_t        left_rb;
 +    uint32_t        right_ag;
 +    uint32_t        right_rb;
 +    int32_t       left_x;
 +    int32_t       right_x;
 +    int32_t       stepper;
 +    
 +    pixman_gradient_stop_t	*stops;
 +    int                      num_stops;
 +    unsigned int             spread;
 +    
 +    int		  need_reset;
 +} GradientWalker;
 +
 +static void
 +_gradient_walker_init (GradientWalker  *walker,
 +		       gradient_t      *gradient,
 +		       unsigned int     spread)
 +{
 +    walker->num_stops = gradient->n_stops;
 +    walker->stops     = gradient->stops;
 +    walker->left_x    = 0;
 +    walker->right_x   = 0x10000;
 +    walker->stepper   = 0;
 +    walker->left_ag   = 0;
 +    walker->left_rb   = 0;
 +    walker->right_ag  = 0;
 +    walker->right_rb  = 0;
 +    walker->spread    = spread;
 +    
 +    walker->need_reset = TRUE;
 +}
 +
 +static void
 +_gradient_walker_reset (GradientWalker  *walker,
 +                        pixman_fixed_32_32_t     pos)
 +{
 +    int32_t                  x, left_x, right_x;
 +    pixman_color_t          *left_c, *right_c;
 +    int                      n, count = walker->num_stops;
 +    pixman_gradient_stop_t *      stops = walker->stops;
 +    
 +    static const pixman_color_t   transparent_black = { 0, 0, 0, 0 };
 +    
 +    switch (walker->spread)
 +    {
 +    case PIXMAN_REPEAT_NORMAL:
 +	x = (int32_t)pos & 0xFFFF;
 +	for (n = 0; n < count; n++)
 +	    if (x < stops[n].x)
 +		break;
 +	if (n == 0) {
 +	    left_x =  stops[count-1].x - 0x10000;
 +	    left_c = &stops[count-1].color;
 +	} else {
 +	    left_x =  stops[n-1].x;
 +	    left_c = &stops[n-1].color;
 +	}
 +	
 +	if (n == count) {
 +	    right_x =  stops[0].x + 0x10000;
 +	    right_c = &stops[0].color;
 +	} else {
 +	    right_x =  stops[n].x;
 +	    right_c = &stops[n].color;
 +	}
 +	left_x  += (pos - x);
 +	right_x += (pos - x);
 +	break;
 +	
 +    case PIXMAN_REPEAT_PAD:
 +	for (n = 0; n < count; n++)
 +	    if (pos < stops[n].x)
 +		break;
 +	
 +	if (n == 0) {
 +	    left_x =  INT32_MIN;
 +	    left_c = &stops[0].color;
 +	} else {
 +	    left_x =  stops[n-1].x;
 +	    left_c = &stops[n-1].color;
 +	}
 +	
 +	if (n == count) {
 +	    right_x =  INT32_MAX;
 +	    right_c = &stops[n-1].color;
 +	} else {
 +	    right_x =  stops[n].x;
 +	    right_c = &stops[n].color;
 +	}
 +	break;
 +	
 +    case PIXMAN_REPEAT_REFLECT:
 +	x = (int32_t)pos & 0xFFFF;
 +	if ((int32_t)pos & 0x10000)
 +	    x = 0x10000 - x;
 +	for (n = 0; n < count; n++)
 +	    if (x < stops[n].x)
 +		break;
 +	
 +	if (n == 0) {
 +	    left_x =  -stops[0].x;
 +	    left_c = &stops[0].color;
 +	} else {
 +	    left_x =  stops[n-1].x;
 +	    left_c = &stops[n-1].color;
 +	}
 +	
 +	if (n == count) {
 +	    right_x = 0x20000 - stops[n-1].x;
 +	    right_c = &stops[n-1].color;
 +	} else {
 +	    right_x =  stops[n].x;
 +	    right_c = &stops[n].color;
 +	}
 +	
 +	if ((int32_t)pos & 0x10000) {
 +	    pixman_color_t  *tmp_c;
 +	    int32_t          tmp_x;
 +	    
 +	    tmp_x   = 0x10000 - right_x;
 +	    right_x = 0x10000 - left_x;
 +	    left_x  = tmp_x;
 +	    
 +	    tmp_c   = right_c;
 +	    right_c = left_c;
 +	    left_c  = tmp_c;
 +	    
 +	    x = 0x10000 - x;
 +	}
 +	left_x  += (pos - x);
 +	right_x += (pos - x);
 +	break;
 +	
 +    default:  /* RepeatNone */
 +	for (n = 0; n < count; n++)
 +	    if (pos < stops[n].x)
 +		break;
 +	
 +	if (n == 0)
 +	{
 +	    left_x  =  INT32_MIN;
 +	    right_x =  stops[0].x;
 +	    left_c  = right_c = (pixman_color_t*) &transparent_black;
 +	}
 +	else if (n == count)
 +	{
 +	    left_x  = stops[n-1].x;
 +	    right_x = INT32_MAX;
 +	    left_c  = right_c = (pixman_color_t*) &transparent_black;
 +	}
 +	else
 +	{
 +	    left_x  =  stops[n-1].x;
 +	    right_x =  stops[n].x;
 +	    left_c  = &stops[n-1].color;
 +	    right_c = &stops[n].color;
 +	}
 +    }
 +    
 +    walker->left_x   = left_x;
 +    walker->right_x  = right_x;
 +    walker->left_ag  = ((left_c->alpha >> 8) << 16)   | (left_c->green >> 8);
 +    walker->left_rb  = ((left_c->red & 0xff00) << 8)  | (left_c->blue >> 8);
 +    walker->right_ag = ((right_c->alpha >> 8) << 16)  | (right_c->green >> 8);
 +    walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
 +    
 +    if ( walker->left_x == walker->right_x                ||
 +	 ( walker->left_ag == walker->right_ag &&
 +	   walker->left_rb == walker->right_rb )   )
 +    {
 +	walker->stepper = 0;
 +    }
 +    else
 +    {
 +	int32_t width = right_x - left_x;
 +	walker->stepper = ((1 << 24) + width/2)/width;
 +    }
 +    
 +    walker->need_reset = FALSE;
 +}
 +
 +#define  GRADIENT_WALKER_NEED_RESET(w,x)				\
 +    ( (w)->need_reset || (x) < (w)->left_x || (x) >= (w)->right_x)
 +
++#undef image
 +
 +/* the following assumes that GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */
 +static uint32_t
 +_gradient_walker_pixel (GradientWalker  *walker,
 +                        pixman_fixed_32_32_t     x)
 +{
 +    int  dist, idist;
 +    uint32_t  t1, t2, a, color;
 +    
 +    if (GRADIENT_WALKER_NEED_RESET (walker, x))
 +        _gradient_walker_reset (walker, x);
 +    
 +    dist  = ((int)(x - walker->left_x)*walker->stepper) >> 16;
 +    idist = 256 - dist;
 +    
 +    /* combined INTERPOLATE and premultiply */
 +    t1 = walker->left_rb*idist + walker->right_rb*dist;
 +    t1 = (t1 >> 8) & 0xff00ff;
 +    
 +    t2  = walker->left_ag*idist + walker->right_ag*dist;
 +    t2 &= 0xff00ff00;
 +    
 +    color = t2 & 0xff000000;
 +    a     = t2 >> 24;
 +    
 +    t1  = t1*a + 0x800080;
 +    t1  = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
 +    
 +    t2  = (t2 >> 8)*a + 0x800080;
 +    t2  = (t2 + ((t2 >> 8) & 0xff00ff));
 +    
 +    return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
 +}
 +
 +static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +#if 0
 +    SourcePictPtr   pGradient = pict->pSourcePict;
 +#endif
 +    GradientWalker  walker;
 +    uint32_t       *end = buffer + width;
 +    gradient_t	    *gradient;
 +    
 +    if (pict->common.type == SOLID)
 +    {
 +	register uint32_t color = ((solid_fill_t *)pict)->color;
 +	
 +	while (buffer < end)
 +	    *(buffer++) = color;
 +	
 +	return;
 +    }
 +    
 +    gradient = (gradient_t *)pict;
 +    
 +    _gradient_walker_init (&walker, gradient, pict->common.repeat);
 +    
 +    if (pict->common.type == LINEAR) {
 +	pixman_vector_t v, unit;
 +	pixman_fixed_32_32_t l;
 +	pixman_fixed_48_16_t dx, dy, a, b, off;
 +	linear_gradient_t *linear = (linear_gradient_t *)pict;
 +	
 +        /* reference point is the center of the pixel */
 +        v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
 +        v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
 +        v.vector[2] = pixman_fixed_1;
 +        if (pict->common.transform) {
 +            if (!pixman_transform_point_3d (pict->common.transform, &v))
 +                return;
 +            unit.vector[0] = pict->common.transform->matrix[0][0];
 +            unit.vector[1] = pict->common.transform->matrix[1][0];
 +            unit.vector[2] = pict->common.transform->matrix[2][0];
 +        } else {
 +            unit.vector[0] = pixman_fixed_1;
 +            unit.vector[1] = 0;
 +            unit.vector[2] = 0;
 +        }
 +	
 +        dx = linear->p2.x - linear->p1.x;
 +        dy = linear->p2.y - linear->p1.y;
 +        l = dx*dx + dy*dy;
 +        if (l != 0) {
 +            a = (dx << 32) / l;
 +            b = (dy << 32) / l;
 +            off = (-a*linear->p1.x - b*linear->p1.y)>>16;
 +        }
 +        if (l == 0  || (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)) {
 +            pixman_fixed_48_16_t inc, t;
 +            /* affine transformation only */
 +            if (l == 0) {
 +                t = 0;
 +                inc = 0;
 +            } else {
 +                t = ((a*v.vector[0] + b*v.vector[1]) >> 16) + off;
 +                inc = (a * unit.vector[0] + b * unit.vector[1]) >> 16;
 +            }
 +	    
 +	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
 +	    {
 +		register uint32_t color;
 +		
 +		color = _gradient_walker_pixel( &walker, t );
 +		while (buffer < end)
 +		    *(buffer++) = color;
 +	    }
 +	    else
 +	    {
 +                if (!mask) {
 +                    while (buffer < end)
 +                    {
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +                        buffer += 1;
 +                        t      += inc;
 +                    }
 +                } else {
 +                    while (buffer < end) {
 +                        if (*mask++ & maskBits)
 +                        {
 +			    *(buffer) = _gradient_walker_pixel (&walker, t);
 +                        }
 +                        buffer += 1;
 +                        t      += inc;
 +                    }
 +                }
 +	    }
 +	}
 +	else /* projective transformation */
 +	{
 +	    pixman_fixed_48_16_t t;
 +	    
 +	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
 +	    {
 +		register uint32_t color;
 +		
 +		if (v.vector[2] == 0)
 +		{
 +		    t = 0;
 +		}
 +		else
 +		{
 +		    pixman_fixed_48_16_t x, y;
 +		    
 +		    x = ((pixman_fixed_48_16_t) v.vector[0] << 16) / v.vector[2];
 +		    y = ((pixman_fixed_48_16_t) v.vector[1] << 16) / v.vector[2];
 +		    t = ((a * x + b * y) >> 16) + off;
 +		}
 +		
 + 		color = _gradient_walker_pixel( &walker, t );
 +		while (buffer < end)
 +		    *(buffer++) = color;
 +	    }
 +	    else
 +	    {
 +		while (buffer < end)
 +		{
 +		    if (!mask || *mask++ & maskBits)
 +		    {
 +			if (v.vector[2] == 0) {
 +			    t = 0;
 +			} else {
 +			    pixman_fixed_48_16_t x, y;
 +			    x = ((pixman_fixed_48_16_t)v.vector[0] << 16) / v.vector[2];
 +			    y = ((pixman_fixed_48_16_t)v.vector[1] << 16) / v.vector[2];
 +			    t = ((a*x + b*y) >> 16) + off;
 +			}
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    ++buffer;
 +		    v.vector[0] += unit.vector[0];
 +		    v.vector[1] += unit.vector[1];
 +		    v.vector[2] += unit.vector[2];
 +		}
 +            }
 +        }
 +    } else {
 +	
 +/*
 + * In the radial gradient problem we are given two circles (c₁,r₁) and
 + * (câ‚‚,râ‚‚) that define the gradient itself. Then, for any point p, we
 + * must compute the value(s) of t within [0.0, 1.0] representing the
 + * circle(s) that would color the point.
 + *
 + * There are potentially two values of t since the point p can be
 + * colored by both sides of the circle, (which happens whenever one
 + * circle is not entirely contained within the other).
 + *
 + * If we solve for a value of t that is outside of [0.0, 1.0] then we
 + * use the extend mode (NONE, REPEAT, REFLECT, or PAD) to map to a
 + * value within [0.0, 1.0].
 + *
 + * Here is an illustration of the problem:
 + *
 + *              pâ‚‚
 + *           p  •
 + *           •   ╲
 + *        ·       ╲r₂
 + *  p₁ ·           ╲
 + *  •              θ╲
 + *   ╲             ╌╌•
 + *    ╲r₁        ·   c₂
 + *    θ╲    ·
 + *    ╌╌•
 + *      c₁
 + *
 + * Given (c₁,r₁), (c₂,r₂) and p, we must find an angle θ such that two
 + * points p₁ and p₂ on the two circles are collinear with p. Then, the
 + * desired value of t is the ratio of the length of p₁p to the length
 + * of p₁p₂.
 + *
 + * So, we have six unknown values: (p₁x, p₁y), (p₂x, p₂y), θ and t.
 + * We can also write six equations that constrain the problem:
 + *
 + * Point p₁ is a distance r₁ from c₁ at an angle of θ:
 + *
 + *	1. p₁x = c₁x + r₁·cos θ
 + *	2. p₁y = c₁y + r₁·sin θ
 + *
 + * Point p₂ is a distance r₂ from c₂ at an angle of θ:
 + *
 + *	3. p₂x = c₂x + r2·cos θ
 + *	4. p₂y = c₂y + r2·sin θ
 + *
 + * Point p lies at a fraction t along the line segment p₁p₂:
 + *
 + *	5. px = t·p₂x + (1-t)·p₁x
 + *	6. py = t·p₂y + (1-t)·p₁y
 + *
 + * To solve, first subtitute 1-4 into 5 and 6:
 + *
 + * px = t·(c₂x + r₂·cos θ) + (1-t)·(c₁x + r₁·cos θ)
 + * py = t·(c₂y + r₂·sin θ) + (1-t)·(c₁y + r₁·sin θ)
 + *
 + * Then solve each for cos θ and sin θ expressed as a function of t:
 + *
 + * cos θ = (-(c₂x - c₁x)·t + (px - c₁x)) / ((r₂-r₁)·t + r₁)
 + * sin θ = (-(c₂y - c₁y)·t + (py - c₁y)) / ((r₂-r₁)·t + r₁)
 + *
 + * To simplify this a bit, we define new variables for several of the
 + * common terms as shown below:
 + *
 + *              pâ‚‚
 + *           p  •
 + *           •   ╲
 + *        ·  ┆    ╲r₂
 + *  p₁ ·     ┆     ╲
 + *  •     pdy┆      ╲
 + *   ╲       ┆       •c₂
 + *    ╲r₁    ┆   ·   ┆
 + *     ╲    ·┆       ┆cdy
 + *      •╌╌╌╌┴╌╌╌╌╌╌╌┘
 + *    c₁  pdx   cdx
 + *
 + * cdx = (c₂x - c₁x)
 + * cdy = (c₂y - c₁y)
 + *  dr =  r₂-r₁
 + * pdx =  px - c₁x
 + * pdy =  py - c₁y
 + *
 + * Note that cdx, cdy, and dr do not depend on point p at all, so can
 + * be pre-computed for the entire gradient. The simplifed equations
 + * are now:
 + *
 + * cos θ = (-cdx·t + pdx) / (dr·t + r₁)
 + * sin θ = (-cdy·t + pdy) / (dr·t + r₁)
 + *
 + * Finally, to get a single function of t and eliminate the last
 + * unknown θ, we use the identity sin²θ + cos²θ = 1. First, square
 + * each equation, (we knew a quadratic was coming since it must be
 + * possible to obtain two solutions in some cases):
 + *
 + * cos²θ = (cdx²t² - 2·cdx·pdx·t + pdx²) / (dr²·t² + 2·r₁·dr·t + r₁²)
 + * sin²θ = (cdy²t² - 2·cdy·pdy·t + pdy²) / (dr²·t² + 2·r₁·dr·t + r₁²)
 + *
 + * Then add both together, set the result equal to 1, and express as a
 + * standard quadratic equation in t of the form At² + Bt + C = 0
 + *
 + * (cdx² + cdy² - dr²)·t² - 2·(cdx·pdx + cdy·pdy + r₁·dr)·t + (pdx² + pdy² - r₁²) = 0
 + *
 + * In other words:
 + *
 + * A = cdx² + cdy² - dr²
 + * B = -2·(pdx·cdx + pdy·cdy + r₁·dr)
 + * C = pdx² + pdy² - r₁²
 + *
 + * And again, notice that A does not depend on p, so can be
 + * precomputed. From here we just use the quadratic formula to solve
 + * for t:
 + *
 + * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A
 + */
 +        /* radial or conical */
 +        pixman_bool_t affine = TRUE;
 +        double cx = 1.;
 +        double cy = 0.;
 +        double cz = 0.;
 +	double rx = x + 0.5;
 +	double ry = y + 0.5;
 +        double rz = 1.;
 +	
 +        if (pict->common.transform) {
 +            pixman_vector_t v;
 +            /* reference point is the center of the pixel */
 +            v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
 +            v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
 +            v.vector[2] = pixman_fixed_1;
 +            if (!pixman_transform_point_3d (pict->common.transform, &v))
 +                return;
 +	    
 +            cx = pict->common.transform->matrix[0][0]/65536.;
 +            cy = pict->common.transform->matrix[1][0]/65536.;
 +            cz = pict->common.transform->matrix[2][0]/65536.;
 +            rx = v.vector[0]/65536.;
 +            ry = v.vector[1]/65536.;
 +            rz = v.vector[2]/65536.;
 +            affine = pict->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1;
 +        }
 +	
 +        if (pict->common.type == RADIAL) {
 +	    radial_gradient_t *radial = (radial_gradient_t *)pict;
 +            if (affine) {
 +                while (buffer < end) {
 +		    if (!mask || *mask++ & maskBits)
 +		    {
 +			double pdx, pdy;
 +			double B, C;
 +			double det;
 +			double c1x = radial->c1.x / 65536.0;
 +			double c1y = radial->c1.y / 65536.0;
 +			double r1  = radial->c1.radius / 65536.0;
 +                        pixman_fixed_48_16_t t;
 +			
 +			pdx = rx - c1x;
 +			pdy = ry - c1y;
 +			
 +			B = -2 * (  pdx * radial->cdx
 +				    + pdy * radial->cdy
 +				    + r1 * radial->dr);
 +			C = (pdx * pdx + pdy * pdy - r1 * r1);
 +			
 +                        det = (B * B) - (4 * radial->A * C);
 +			if (det < 0.0)
 +			    det = 0.0;
 +			
 +			if (radial->A < 0)
 +			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
 +			else
 +			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
 +			
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    ++buffer;
 +		    
 +                    rx += cx;
 +                    ry += cy;
 +                }
 +            } else {
 +		/* projective */
 +                while (buffer < end) {
 +		    if (!mask || *mask++ & maskBits)
 +		    {
 +			double pdx, pdy;
 +			double B, C;
 +			double det;
 +			double c1x = radial->c1.x / 65536.0;
 +			double c1y = radial->c1.y / 65536.0;
 +			double r1  = radial->c1.radius / 65536.0;
 +                        pixman_fixed_48_16_t t;
 +			double x, y;
 +			
 +			if (rz != 0) {
 +			    x = rx/rz;
 +			    y = ry/rz;
 +			} else {
 +			    x = y = 0.;
 +			}
 +			
 +			pdx = x - c1x;
 +			pdy = y - c1y;
 +			
 +			B = -2 * (  pdx * radial->cdx
 +				    + pdy * radial->cdy
 +				    + r1 * radial->dr);
 +			C = (pdx * pdx + pdy * pdy - r1 * r1);
 +			
 +                        det = (B * B) - (4 * radial->A * C);
 +			if (det < 0.0)
 +			    det = 0.0;
 +			
 +			if (radial->A < 0)
 +			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
 +			else
 +			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
 +			
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    ++buffer;
 +		    
 +                    rx += cx;
 +                    ry += cy;
 +		    rz += cz;
 +                }
 +            }
 +        } else /* SourcePictTypeConical */ {
 +	    conical_gradient_t *conical = (conical_gradient_t *)pict;
 +            double a = conical->angle/(180.*65536);
 +            if (affine) {
 +                rx -= conical->center.x/65536.;
 +                ry -= conical->center.y/65536.;
 +		
 +                while (buffer < end) {
 +		    double angle;
 +		    
 +                    if (!mask || *mask++ & maskBits)
 +		    {
 +                        pixman_fixed_48_16_t   t;
 +			
 +                        angle = atan2(ry, rx) + a;
 +			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
 +			
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    
 +                    ++buffer;
 +                    rx += cx;
 +                    ry += cy;
 +                }
 +            } else {
 +                while (buffer < end) {
 +                    double x, y;
 +                    double angle;
 +		    
 +                    if (!mask || *mask++ & maskBits)
 +                    {
 +			pixman_fixed_48_16_t  t;
 +			
 +			if (rz != 0) {
 +			    x = rx/rz;
 +			    y = ry/rz;
 +			} else {
 +			    x = y = 0.;
 +			}
 +			x -= conical->center.x/65536.;
 +			y -= conical->center.y/65536.;
 +			angle = atan2(y, x) + a;
 +			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
 +			
 +			*(buffer) = _gradient_walker_pixel (&walker, t);
 +		    }
 +		    
 +                    ++buffer;
 +                    rx += cx;
 +                    ry += cy;
 +                    rz += cz;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    uint32_t     *bits;
 +    int32_t    stride;
 +    fetchPixelProc_32   fetch;
 +    pixman_vector_t	v;
 +    pixman_vector_t  unit;
 +    int         i;
 +    pixman_box16_t box;
-     const pixman_indexed_t * indexed = pict->indexed;
 +    pixman_bool_t affine = TRUE;
 +    
 +    fetch = fetchPixelProcForPicture_32(pict);
 +    
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    
 +    /* reference point is the center of the pixel */
 +    v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1 / 2;
 +    v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1 / 2;
 +    v.vector[2] = pixman_fixed_1;
 +    
 +    /* when using convolution filters one might get here without a transform */
 +    if (pict->common.transform)
 +    {
 +        if (!pixman_transform_point_3d (pict->common.transform, &v))
 +	{
 +            fbFinishAccess (pict->pDrawable);
 +            return;
 +        }
 +        unit.vector[0] = pict->common.transform->matrix[0][0];
 +        unit.vector[1] = pict->common.transform->matrix[1][0];
 +        unit.vector[2] = pict->common.transform->matrix[2][0];
 +        affine = v.vector[2] == pixman_fixed_1 && unit.vector[2] == 0;
 +    }
 +    else
 +    {
 +        unit.vector[0] = pixman_fixed_1;
 +        unit.vector[1] = 0;
 +        unit.vector[2] = 0;
 +    }
 +    
 +    if (pict->common.filter == PIXMAN_FILTER_NEAREST || pict->common.filter == PIXMAN_FILTER_FAST)
 +    {
 +        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL) {
 +            if (pixman_region_n_rects (pict->common.src_clip) == 1) {
 +		for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = MOD(DIV(v.vector[1],v.vector[2]), pict->height);
 +				x = MOD(DIV(v.vector[0],v.vector[2]), pict->width);
 +			    } else {
 +				y = MOD(v.vector[1]>>16, pict->height);
 +				x = MOD(v.vector[0]>>16, pict->width);
 +			    }
- 			    *(buffer + i) = fetch((pixman_image_t *)pict, bits + y * stride, x, indexed);
++			    *(buffer + i) = fetch(pict, x, y);
 +			}
 +		    }
 +		    
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = MOD(DIV(v.vector[1],v.vector[2]), pict->height);
 +				x = MOD(DIV(v.vector[0],v.vector[2]), pict->width);
 +			    } else {
 +				y = MOD(v.vector[1]>>16, pict->height);
 +				x = MOD(v.vector[0]>>16, pict->width);
 +			    }
 +			    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box))
- 				*(buffer + i) = fetch ((pixman_image_t *)pict, bits + y*stride, x, indexed);
++				*(buffer + i) = fetch (pict, x, y);
 +			    else
 +				*(buffer + i) = 0;
 +			}
 +		    }
 +		    
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        } else {
 +            if (pixman_region_n_rects(pict->common.src_clip) == 1) {
 +                box = pict->common.src_clip->extents;
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = DIV(v.vector[1],v.vector[2]);
 +				x = DIV(v.vector[0],v.vector[2]);
 +			    } else {
 +				y = v.vector[1]>>16;
 +				x = v.vector[0]>>16;
 +			    }
 +			    *(buffer + i) = ((x < box.x1) | (x >= box.x2) | (y < box.y1) | (y >= box.y2)) ?
- 				0 : fetch((pixman_image_t *)pict, bits + (y)*stride, x, indexed);
++				0 : fetch(pict, x, y);
 +			}
 +		    }
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +                    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    if (!affine) {
 +				y = DIV(v.vector[1],v.vector[2]);
 +				x = DIV(v.vector[0],v.vector[2]);
 +			    } else {
 +				y = v.vector[1]>>16;
 +				x = v.vector[0]>>16;
 +			    }
 +			    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box))
- 				*(buffer + i) = fetch((pixman_image_t *)pict, bits + y*stride, x, indexed);
++				*(buffer + i) = fetch(pict, x, y);
 +			    else
 +				*(buffer + i) = 0;
 +			}
 +		    }
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        }
 +    } else if (pict->common.filter == PIXMAN_FILTER_BILINEAR	||
 +	       pict->common.filter == PIXMAN_FILTER_GOOD	||
 +	       pict->common.filter == PIXMAN_FILTER_BEST)
 +    {
 +        /* adjust vector for maximum contribution at 0.5, 0.5 of each texel. */
 +        v.vector[0] -= v.vector[2] / 2;
 +        v.vector[1] -= v.vector[2] / 2;
 +        unit.vector[0] -= unit.vector[2] / 2;
 +        unit.vector[1] -= unit.vector[2] / 2;
 +	
 +        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL) {
 +            if (pixman_region_n_rects(pict->common.src_clip) == 1) {
 +                for (i = 0; i < width; ++i) {
 +                    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
- 			    uint32_t *b;
 +			    uint32_t tl, tr, bl, br, r;
 +			    uint32_t ft, fb;
 +			    
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
 +			    
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
 +			    
 +			    x1 = MOD (x1, pict->width);
 +			    x2 = MOD (x2, pict->width);
 +			    y1 = MOD (y1, pict->height);
 +			    y2 = MOD (y2, pict->height);
 +			    
- 			    b = bits + y1*stride;
- 			    
- 			    tl = fetch((pixman_image_t *)pict, b, x1, indexed);
- 			    tr = fetch((pixman_image_t *)pict, b, x2, indexed);
- 			    b = bits + y2*stride;
- 			    bl = fetch((pixman_image_t *)pict, b, x1, indexed);
- 			    br = fetch((pixman_image_t *)pict, b, x2, indexed);
++			    tl = fetch(pict, x1, y1);
++			    tr = fetch(pict, x2, y1);
++			    bl = fetch(pict, x1, y2);
++			    br = fetch(pict, x2, y2);
 +			    
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
 +			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
- 			    uint32_t *b;
 +			    uint32_t tl, tr, bl, br, r;
 +			    uint32_t ft, fb;
 +			    
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
 +			    
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
 +			    
 +			    x1 = MOD (x1, pict->width);
 +			    x2 = MOD (x2, pict->width);
 +			    y1 = MOD (y1, pict->height);
 +			    y2 = MOD (y2, pict->height);
 +			    
- 			    b = bits + y1*stride;
- 			    
 +			    tl = pixman_region_contains_point(pict->common.src_clip, x1, y1, &box)
- 				? fetch((pixman_image_t *)pict, b, x1, indexed) : 0;
++				? fetch(pict, x1, y1) : 0;
 +			    tr = pixman_region_contains_point(pict->common.src_clip, x2, y1, &box)
- 				? fetch((pixman_image_t *)pict, b, x2, indexed) : 0;
- 			    b = bits + (y2)*stride;
++				? fetch(pict, x2, y1) : 0;
 +			    bl = pixman_region_contains_point(pict->common.src_clip, x1, y2, &box)
- 				? fetch((pixman_image_t *)pict, b, x1, indexed) : 0;
++				? fetch(pict, x1, y2) : 0;
 +			    br = pixman_region_contains_point(pict->common.src_clip, x2, y2, &box)
- 				? fetch((pixman_image_t *)pict, b, x2, indexed) : 0;
++				? fetch(pict, x2, y2) : 0;
 +			    
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
 +		    
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        } else {
 +            if (pixman_region_n_rects(pict->common.src_clip) == 1) {
 +                box = pict->common.src_clip->extents;
 +                for (i = 0; i < width; ++i) {
 +		    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
- 			    int x1, x2, y1, y2, distx, idistx, disty, idisty, x_off;
- 			    uint32_t *b;
++			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    pixman_bool_t x1_out, x2_out, y1_out, y2_out;
 +			    uint32_t ft, fb;
 +			    
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
 +			    
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
 +			    
- 			    b = bits + (y1)*stride;
- 			    x_off = x1;
- 			    
 +			    x1_out = (x1 < box.x1) | (x1 >= box.x2);
 +			    x2_out = (x2 < box.x1) | (x2 >= box.x2);
 +			    y1_out = (y1 < box.y1) | (y1 >= box.y2);
 +			    y2_out = (y2 < box.y1) | (y2 >= box.y2);
 +			    
- 			    tl = x1_out|y1_out ? 0 : fetch((pixman_image_t *)pict, b, x_off, indexed);
- 			    tr = x2_out|y1_out ? 0 : fetch((pixman_image_t *)pict, b, x_off + 1, indexed);
- 			    b += stride;
- 			    bl = x1_out|y2_out ? 0 : fetch((pixman_image_t *)pict, b, x_off, indexed);
- 			    br = x2_out|y2_out ? 0 : fetch((pixman_image_t *)pict, b, x_off + 1, indexed);
++			    tl = x1_out|y1_out ? 0 : fetch(pict, x1, y1);
++			    tr = x2_out|y1_out ? 0 : fetch(pict, x2, y1);
++			    bl = x1_out|y2_out ? 0 : fetch(pict, x1, y2);
++			    br = x2_out|y2_out ? 0 : fetch(pict, x2, y2);
 +			    
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
 +		    
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            } else {
 +                for (i = 0; i < width; ++i) {
 +                    if (!mask || mask[i] & maskBits)
 +		    {
 +			if (!v.vector[2]) {
 +			    *(buffer + i) = 0;
 +			} else {
- 			    int x1, x2, y1, y2, distx, idistx, disty, idisty, x_off;
- 			    uint32_t *b;
++			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 +			    uint32_t tl, tr, bl, br, r;
 +			    uint32_t ft, fb;
 +			    
 +			    if (!affine) {
 +				pixman_fixed_48_16_t div;
 +				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
 +				x1 = div >> 16;
 +				distx = ((pixman_fixed_t)div >> 8) & 0xff;
 +				div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
 +				y1 = div >> 16;
 +				disty = ((pixman_fixed_t)div >> 8) & 0xff;
 +			    } else {
 +				x1 = v.vector[0] >> 16;
 +				distx = (v.vector[0] >> 8) & 0xff;
 +				y1 = v.vector[1] >> 16;
 +				disty = (v.vector[1] >> 8) & 0xff;
 +			    }
 +			    x2 = x1 + 1;
 +			    y2 = y1 + 1;
 +			    
 +			    idistx = 256 - distx;
 +			    idisty = 256 - disty;
 +			    
- 			    b = bits + (y1)*stride;
- 			    x_off = x1;
- 			    
 +			    tl = pixman_region_contains_point(pict->common.src_clip, x1, y1, &box)
- 				? fetch((pixman_image_t *)pict, b, x_off, indexed) : 0;
++				? fetch(pict, x1, y1) : 0;
 +			    tr = pixman_region_contains_point(pict->common.src_clip, x2, y1, &box)
- 				? fetch((pixman_image_t *)pict, b, x_off + 1, indexed) : 0;
- 			    b += stride;
++				? fetch(pict, x2, y1) : 0;
 +			    bl = pixman_region_contains_point(pict->common.src_clip, x1, y2, &box)
- 				? fetch((pixman_image_t *)pict, b, x_off, indexed) : 0;
++				? fetch(pict, x1, y2) : 0;
 +			    br = pixman_region_contains_point(pict->common.src_clip, x2, y2, &box)
- 				? fetch((pixman_image_t *)pict, b, x_off + 1, indexed) : 0;
++				? fetch(pict, x2, y2) : 0;
 +			    
 +			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 +			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 +			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
 +			    ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
 +			    fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
 +			    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
 +			    ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
 +			    fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
 +			    r |= (((ft * idisty + fb * disty)) & 0xff0000);
 +			    ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
 +			    fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
 +			    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 +			    *(buffer + i) = r;
 +			}
 +		    }
 +		    
 +                    v.vector[0] += unit.vector[0];
 +                    v.vector[1] += unit.vector[1];
 +                    v.vector[2] += unit.vector[2];
 +                }
 +            }
 +        }
 +    } else if (pict->common.filter == PIXMAN_FILTER_CONVOLUTION) {
 +        pixman_fixed_t *params = pict->common.filter_params;
 +        int32_t cwidth = pixman_fixed_to_int(params[0]);
 +        int32_t cheight = pixman_fixed_to_int(params[1]);
 +        int xoff = (params[0] - pixman_fixed_1) >> 1;
 +	int yoff = (params[1] - pixman_fixed_1) >> 1;
 +        params += 2;
 +        for (i = 0; i < width; ++i) {
 +	    if (!mask || mask[i] & maskBits)
 +	    {
 +		if (!v.vector[2]) {
 +		    *(buffer + i) = 0;
 +		} else {
 +		    int x1, x2, y1, y2, x, y;
 +		    int32_t srtot, sgtot, sbtot, satot;
 +		    pixman_fixed_t *p = params;
 +		    
 +		    if (!affine) {
 +			pixman_fixed_48_16_t tmp;
 +			tmp = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2] - xoff;
 +			x1 = pixman_fixed_to_int(tmp);
 +			tmp = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2] - yoff;
 +			y1 = pixman_fixed_to_int(tmp);
 +		    } else {
 +			x1 = pixman_fixed_to_int(v.vector[0] - xoff);
 +			y1 = pixman_fixed_to_int(v.vector[1] - yoff);
 +		    }
 +		    x2 = x1 + cwidth;
 +		    y2 = y1 + cheight;
 +		    
 +		    srtot = sgtot = sbtot = satot = 0;
 +		    
 +		    for (y = y1; y < y2; y++) {
 +			int ty = (pict->common.repeat == PIXMAN_REPEAT_NORMAL) ? MOD (y, pict->height) : y;
 +			for (x = x1; x < x2; x++) {
 +			    if (*p) {
 +				int tx = (pict->common.repeat == PIXMAN_REPEAT_NORMAL) ? MOD (x, pict->width) : x;
 +				if (pixman_region_contains_point (pict->common.src_clip, tx, ty, &box)) {
- 				    uint32_t *b = bits + (ty)*stride;
- 				    uint32_t c = fetch((pixman_image_t *)pict, b, tx, indexed);
++				    uint32_t c = fetch(pict, tx, ty);
 +				    
 +				    srtot += Red(c) * *p;
 +				    sgtot += Green(c) * *p;
 +				    sbtot += Blue(c) * *p;
 +				    satot += Alpha(c) * *p;
 +				}
 +			    }
 +			    p++;
 +			}
 +		    }
 +		    
 +		    satot >>= 16;
 +		    srtot >>= 16;
 +		    sgtot >>= 16;
 +		    sbtot >>= 16;
 +		    
 +		    if (satot < 0) satot = 0; else if (satot > 0xff) satot = 0xff;
 +		    if (srtot < 0) srtot = 0; else if (srtot > 0xff) srtot = 0xff;
 +		    if (sgtot < 0) sgtot = 0; else if (sgtot > 0xff) sgtot = 0xff;
 +		    if (sbtot < 0) sbtot = 0; else if (sbtot > 0xff) sbtot = 0xff;
 +		    
 +		    *(buffer + i) = ((satot << 24) |
 +				     (srtot << 16) |
 +				     (sgtot <<  8) |
 +				     (sbtot       ));
 +		}
 +	    }
 +            v.vector[0] += unit.vector[0];
 +            v.vector[1] += unit.vector[1];
 +            v.vector[2] += unit.vector[2];
 +        }
 +    }
 +    
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +
 +static void fbFetchExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 +{
 +    int i;
 +    uint32_t _alpha_buffer[SCANLINE_BUFFER_LENGTH];
 +    uint32_t *alpha_buffer = _alpha_buffer;
 +    
 +    if (!pict->common.alpha_map) {
 +        fbFetchTransformed (pict, x, y, width, buffer, mask, maskBits);
 +	return;
 +    }
 +    if (width > SCANLINE_BUFFER_LENGTH)
 +        alpha_buffer = (uint32_t *) pixman_malloc_ab (width, sizeof(uint32_t));
 +    
 +    fbFetchTransformed(pict, x, y, width, buffer, mask, maskBits);
 +    fbFetchTransformed((bits_image_t *)pict->common.alpha_map, x - pict->common.alpha_origin.x,
 +		       y - pict->common.alpha_origin.y, width, alpha_buffer,
 +		       mask, maskBits);
 +    for (i = 0; i < width; ++i) {
 +        if (!mask || mask[i] & maskBits)
 +	{
 +	    int a = alpha_buffer[i]>>24;
 +	    *(buffer + i) = (a << 24)
 +		| (div_255(Red(*(buffer + i)) * a) << 16)
 +		| (div_255(Green(*(buffer + i)) * a) << 8)
 +		| (div_255(Blue(*(buffer + i)) * a));
 +	}
 +    }
 +    
 +    if (alpha_buffer != _alpha_buffer)
 +        free(alpha_buffer);
 +}
 +
 +static void fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    uint32_t *bits;
 +    int32_t stride;
 +    storeProc_32 store = storeProcForPicture_32(pict);
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    bits += y*stride;
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbStore64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    uint32_t *bits;
 +    int32_t stride;
 +    storeProc_64 store = storeProcForPicture_64(pict);
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    bits += y*stride;
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbStoreExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 +{
 +    uint32_t *bits, *alpha_bits;
 +    int32_t stride, astride;
 +    int ax, ay;
 +    storeProc_32 store;
 +    storeProc_32 astore;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    const pixman_indexed_t * aindexed;
 +    
 +    if (!pict->common.alpha_map) {
 +        fbStore(pict, x, y, width, buffer);
 +	return;
 +    }
 +    
 +    store = storeProcForPicture_32(pict);
 +    astore = storeProcForPicture_32(pict->common.alpha_map);
 +    aindexed = pict->common.alpha_map->indexed;
 +    
 +    ax = x;
 +    ay = y;
 +    
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    
 +    alpha_bits = pict->common.alpha_map->bits;
 +    astride = pict->common.alpha_map->rowstride;
 +    
 +    bits       += y*stride;
 +    alpha_bits += (ay - pict->common.alpha_origin.y)*astride;
 +    
 +    
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    astore((pixman_image_t *)pict->common.alpha_map,
 +	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
 +    
 +    fbFinishAccess (pict->alpha_map->pDrawable);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void fbStoreExternalAlpha64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer)
 +{
 +    uint32_t *bits, *alpha_bits;
 +    int32_t stride, astride;
 +    int ax, ay;
 +    storeProc_64 store;
 +    storeProc_64 astore;
 +    const pixman_indexed_t * indexed = pict->indexed;
 +    const pixman_indexed_t * aindexed;
 +    
 +    if (!pict->common.alpha_map) {
 +        fbStore64(pict, x, y, width, buffer);
 +	return;
 +    }
 +    
 +    store = storeProcForPicture_64(pict);
 +    astore = storeProcForPicture_64(pict->common.alpha_map);
 +    aindexed = pict->common.alpha_map->indexed;
 +    
 +    ax = x;
 +    ay = y;
 +    
 +    bits = pict->bits;
 +    stride = pict->rowstride;
 +    
 +    alpha_bits = pict->common.alpha_map->bits;
 +    astride = pict->common.alpha_map->rowstride;
 +    
 +    bits       += y*stride;
 +    alpha_bits += (ay - pict->common.alpha_origin.y)*astride;
 +    
 +    
 +    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 +    astore((pixman_image_t *)pict->common.alpha_map,
 +	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
 +    
 +    fbFinishAccess (pict->alpha_map->pDrawable);
 +    fbFinishAccess (pict->pDrawable);
 +}
 +
 +static void
 +fbExpand(uint32_t * source, uint64_t * dest, int width)
 +{
 +    int i;
 +    for(i = 0; i < width; i++) {
 +	uint32_t p = source[i];
 +	uint64_t r = (uint64_t)(p & 0x00ff0000) << 24;
 +	uint64_t g = (uint64_t)(p & 0x0000ff00) << 16;
 +	uint64_t b = (uint64_t)(p & 0x000000ff) << 8;
 +	uint64_t a = (uint64_t)(p & 0xff000000) << 32;
 +	    
 +	dest[i] = r | g | b | a;
 +    }
 +}
 +
 +static void
 +fbContract(uint64_t * source, uint32_t * dest, int width)
 +{
 +    int i;
 +    for(i = 0; i < width; i++) {
 +       uint64_t p = source[i];
 +       uint64_t r = (p >> 24) & 0x00ff0000;
 +       uint64_t g = (p >> 16) & 0x0000ff00;
 +       uint64_t b = (p >> 8) & 0x000000ff;
 +       uint64_t a = (p >> 32) & 0xff000000;
 +       
 +       dest[i] = r | g | b | a;
 +    }
 +}
 +
 +static uint32_t
 +fbContractPixel(uint64_t p)
 +{
 +    uint64_t r = (p >> 24) & 0x00ff0000;
 +    uint64_t g = (p >> 16) & 0x0000ff00;
 +    uint64_t b = (p >> 8) & 0x000000ff;
 +    uint64_t a = (p >> 32) & 0xff000000;
 +       
 +    return r | g | b | a;
 +}
 +
 +static void
 +pixmanFetchSourcePict64(source_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    // use the space in the existing buffer for the 32-bit mask and result
 +    uint32_t * mask32 = (uint32_t *) buffer;
 +    uint32_t * buffer32 = ((uint32_t *) buffer) + width;
 +    uint32_t maskBits32 = fbContractPixel(maskBits);
 +
 +    if(mask)
 +	fbContract(mask, mask32, width);
 +    else
 +	mask32 = NULL;
 +    pixmanFetchSourcePict(pict, x, y, width, buffer32, mask32, maskBits32);
 +    fbExpand(buffer32, buffer, width);
 +}
 +
 +static void
 +fbFetchExternalAlpha64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    uint32_t * mask32 = (uint32_t *) buffer;
 +    uint32_t * buffer32 = ((uint32_t *) buffer) + width;
 +    uint32_t maskBits32 = fbContractPixel(maskBits);
 +
 +    if (mask)
 +	fbContract(mask, mask32, width);
 +    else
 +	mask32 = NULL;
 +    fbFetchExternalAlpha(pict, x, y, width, buffer32, mask32, maskBits32);
 +    fbExpand(buffer32, buffer, width);
 +}
 +
 +static void
 +fbFetchTransformed64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, uint64_t *mask, uint64_t maskBits)
 +{
 +    uint32_t * mask32 = (uint32_t *) buffer;
 +    uint32_t * buffer32 = ((uint32_t *) buffer) + width;
 +    uint32_t maskBits32 = fbContractPixel(maskBits);
 +
 +    if (mask)
 +	fbContract(mask, mask32, width);
 +    else
 +	mask32 = NULL;
 +    fbFetchTransformed(pict, x, y, width, buffer32, mask32, maskBits32);
 +    fbExpand(buffer32, buffer, width);
 +}
 +
 +typedef void (*scanStoreProc)(pixman_image_t *, int, int, int, uint32_t *);
 +typedef void (*scanFetchProc)(pixman_image_t *, int, int, int, uint32_t *,
 +			      uint32_t *, uint32_t);
 +
 +typedef void (*scanStoreProc64)(pixman_image_t *, int, int, int, uint64_t *);
 +typedef void (*scanFetchProc64)(pixman_image_t *, int, int, int, uint64_t *,
 +				uint64_t *, uint64_t);
 +
 +//#ifndef PIXMAN_FB_ACCESSORS
 +//static
 +//#endif
 +void
 +PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 +			       uint32_t *scanline_buffer)
 +{
 +    uint32_t *src_buffer = scanline_buffer;
 +    uint32_t *dest_buffer = src_buffer + data->width;
 +    int i;
 +    scanStoreProc store;
 +    scanFetchProc fetchSrc = NULL, fetchMask = NULL, fetchDest = NULL;
 +    unsigned int srcClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    unsigned int maskClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    uint32_t *bits;
 +    int32_t stride;
 +    int xoff, yoff;
 +    
 +    if (data->op == PIXMAN_OP_CLEAR)
 +        fetchSrc = NULL;
 +    else if (IS_SOURCE_IMAGE (data->src))
 +    {
 +	fetchSrc = (scanFetchProc)pixmanFetchSourcePict;
 +	srcClass = SourcePictureClassify ((source_image_t *)data->src,
 +					  data->xSrc, data->ySrc,
 +					  data->width, data->height);
 +    }
 +    else
 +    {
 +	bits_image_t *bits = (bits_image_t *)data->src;
 +	
 +	if (bits->common.alpha_map)
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetchExternalAlpha;
 +	}
 +	else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		 bits->width == 1 &&
 +		 bits->height == 1)
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetchSolid;
 +	    srcClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	}
 +	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetch;
 +	}
 +	else
 +	{
 +	    fetchSrc = (scanFetchProc)fbFetchTransformed;
 +	}
 +    }
 +    
 +    if (!data->mask || data->op == PIXMAN_OP_CLEAR)
 +    {
 +	fetchMask = NULL;
 +    }
 +    else
 +    {
 +	if (IS_SOURCE_IMAGE (data->mask))
 +	{
 +	    fetchMask = (scanFetchProc)pixmanFetchSourcePict;
 +	    maskClass = SourcePictureClassify ((source_image_t *)data->mask,
 +					       data->xMask, data->yMask,
 +					       data->width, data->height);
 +	}
 +	else
 +	{
 +	    bits_image_t *bits = (bits_image_t *)data->mask;
 +	    
 +	    if (bits->common.alpha_map)
 +	    {
 +		fetchMask = (scanFetchProc)fbFetchExternalAlpha;
 +	    }
 +	    else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		     bits->width == 1 && bits->height == 1)
 +	    {
 +		fetchMask = (scanFetchProc)fbFetchSolid;
 +		maskClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	    }
 +	    else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +		fetchMask = (scanFetchProc)fbFetch;
 +	    else
 +		fetchMask = (scanFetchProc)fbFetchTransformed;
 +	}
 +    }
 +    
 +    if (data->dest->common.alpha_map)
 +    {
 +	fetchDest = (scanFetchProc)fbFetchExternalAlpha;
 +	store = (scanStoreProc)fbStoreExternalAlpha;
 +	
 +	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
 +	    fetchDest = NULL;
 +    }
 +    else
 +    {
 +	fetchDest = (scanFetchProc)fbFetch;
 +	store = (scanStoreProc)fbStore;
 +	
 +	switch (data->op)
 +	{
 +	case PIXMAN_OP_CLEAR:
 +	case PIXMAN_OP_SRC:
 +	    fetchDest = NULL;
 +#ifndef PIXMAN_FB_ACCESSORS
 +	    /* fall-through */
 +	case PIXMAN_OP_ADD:
 +	case PIXMAN_OP_OVER:
 +	    switch (data->dest->bits.format) {
 +	    case PIXMAN_a8r8g8b8:
 +	    case PIXMAN_x8r8g8b8:
 +		store = NULL;
 +		break;
 +	    default:
 +		break;
 +	    }
 +#endif
 +	    break;
 +	}
 +    }
 +    
 +    if (!store)
 +    {
 +	bits = data->dest->bits.bits;
 +	stride = data->dest->bits.rowstride;
 +	xoff = yoff = 0;
 +    }
 +    else
 +    {
 +	bits = NULL;
 +	stride = 0;
 +	xoff = yoff = 0;
 +    }
 +    
 +    if (fetchSrc		   &&
 +	fetchMask		   &&
 +	data->mask		   &&
 +	data->mask->common.type == BITS && 
 +	data->mask->common.component_alpha &&
 +	PIXMAN_FORMAT_RGB (data->mask->bits.format))
 +    {
 +	uint32_t *mask_buffer = dest_buffer + data->width;
 +	CombineFuncC compose = pixman_composeFunctions.combineC[data->op];
 +	if (!compose)
 +	    return;
 +	
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
 +		    
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
 +		
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xffffffff);
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +	    }
 +	    
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
 +		
 +		/* blend */
 +		compose (dest_buffer, src_buffer, mask_buffer, data->width);
 +		
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		/* blend */
 +		compose (bits + (data->yDest + i+ yoff) * stride +
 +			 data->xDest + xoff,
 +			 src_buffer, mask_buffer, data->width);
 +	    }
 +	}
 +    }
 +    else
 +    {
 +	uint32_t *src_mask_buffer = 0, *mask_buffer = 0;
 +	CombineFuncU compose = pixman_composeFunctions.combineU[data->op];
 +	if (!compose)
 +	    return;
 +	
 +	if (fetchMask)
 +	    mask_buffer = dest_buffer + data->width;
 +	
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
 +		    
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
 +		
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +		    
 +		    if (mask_buffer)
 +		    {
 +			pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +			src_mask_buffer = mask_buffer;
 +		    }
 +		    else
 +			src_mask_buffer = src_buffer;
 +		    
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xff000000);
 +		    
 +		    if (mask_buffer)
 +			pixman_composeFunctions.combineMaskU (src_buffer,
 +							      mask_buffer,
 +							      data->width);
 +		    
 +		    src_mask_buffer = src_buffer;
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +		
 +		pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +		
 +		src_mask_buffer = mask_buffer;
 +	    }
 +	    
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
 +		
 +		/* blend */
 +		compose (dest_buffer, src_mask_buffer, data->width);
 +		
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		/* blend */
 +		compose (bits + (data->yDest + i+ yoff) * stride +
 +			 data->xDest + xoff,
 +			 src_mask_buffer, data->width);
 +	    }
 +	}
 +    }
 +    
 +    if (!store)
 +	fbFinishAccess (data->dest->pDrawable);
 +}
 +
 +
 +void
 +PIXMAN_COMPOSITE_RECT_GENERAL_WIDE (const FbComposeData *data,
 +			            uint64_t *scanline_buffer)
 +{
 +    uint64_t *src_buffer = scanline_buffer;
 +    uint64_t *dest_buffer = src_buffer + data->width;
 +    int i;
 +    scanStoreProc64 store;
 +    scanFetchProc64 fetchSrc = NULL, fetchMask = NULL, fetchDest = NULL;
 +    unsigned int srcClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    unsigned int maskClass = SOURCE_IMAGE_CLASS_UNKNOWN;
 +    uint32_t *bits;
 +    int32_t stride;
 +    int xoff, yoff;
 +    
 +    if (data->op == PIXMAN_OP_CLEAR)
 +        fetchSrc = NULL;
 +    else if (IS_SOURCE_IMAGE (data->src))
 +    {
 +	fetchSrc = (scanFetchProc64)pixmanFetchSourcePict64;
 +	srcClass = SourcePictureClassify ((source_image_t *)data->src,
 +					  data->xSrc, data->ySrc,
 +					  data->width, data->height);
 +    }
 +    else
 +    {
 +	bits_image_t *bits = (bits_image_t *)data->src;
 +	
 +	if (bits->common.alpha_map)
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetchExternalAlpha64;
 +	}
 +	else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		 bits->width == 1 &&
 +		 bits->height == 1)
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetchSolid64;
 +	    srcClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	}
 +	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetch64;
 +	}
 +	else
 +	{
 +	    fetchSrc = (scanFetchProc64)fbFetchTransformed64;
 +	}
 +    }
 +    
 +    if (!data->mask || data->op == PIXMAN_OP_CLEAR)
 +    {
 +	fetchMask = NULL;
 +    }
 +    else
 +    {
 +	if (IS_SOURCE_IMAGE (data->mask))
 +	{
 +	    fetchMask = (scanFetchProc64)pixmanFetchSourcePict64;
 +	    maskClass = SourcePictureClassify ((source_image_t *)data->mask,
 +					       data->xMask, data->yMask,
 +					       data->width, data->height);
 +	}
 +	else
 +	{
 +	    bits_image_t *bits = (bits_image_t *)data->mask;
 +	    
 +	    if (bits->common.alpha_map)
 +	    {
 +		fetchMask = (scanFetchProc64)fbFetchExternalAlpha64;
 +	    }
 +	    else if (bits->common.repeat == PIXMAN_REPEAT_NORMAL &&
 +		     bits->width == 1 && bits->height == 1)
 +	    {
 +		fetchMask = (scanFetchProc64)fbFetchSolid64;
 +		maskClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 +	    }
 +	    else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION)
 +		fetchMask = (scanFetchProc64)fbFetch64;
 +	    else
 +		fetchMask = (scanFetchProc64)fbFetchTransformed64;
 +	}
 +    }
 +    
 +    if (data->dest->common.alpha_map)
 +    {
 +	fetchDest = (scanFetchProc64)fbFetchExternalAlpha64;
 +	store = (scanStoreProc64)fbStoreExternalAlpha64;
 +	
 +	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
 +	    fetchDest = NULL;
 +    }
 +    else
 +    {
 +	fetchDest = (scanFetchProc64)fbFetch64;
 +	store = (scanStoreProc64)fbStore64;
 +	
 +	switch (data->op)
 +	{
 +	case PIXMAN_OP_CLEAR:
 +	case PIXMAN_OP_SRC:
 +	    fetchDest = NULL;
 +	    break;
 +	}
 +    }
 +    
 +    if (!store)
 +    {
 +	bits = data->dest->bits.bits;
 +	stride = data->dest->bits.rowstride;
 +	xoff = yoff = 0;
 +    }
 +    else
 +    {
 +	bits = NULL;
 +	stride = 0;
 +	xoff = yoff = 0;
 +    }
 +    
 +    if (fetchSrc		   &&
 +	fetchMask		   &&
 +	data->mask		   &&
 +	data->mask->common.type == BITS && 
 +	data->mask->common.component_alpha &&
 +	PIXMAN_FORMAT_RGB (data->mask->bits.format))
 +    {
 +	uint64_t *mask_buffer = dest_buffer + data->width;
 +	CombineFuncC64 compose = pixman_composeFunctions_wide.combineC[data->op];
 +	if (!compose)
 +	    return;
 +	
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
 +		    
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
 +		
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xffffffffffffffffLL);
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +	    }
 +	    
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
 +		
 +		/* blend */
 +		compose (dest_buffer, src_buffer, mask_buffer, data->width);
 +		
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		assert(!"need to have a storeproc with 64-bit internal format");
 +	    }
 +	}
 +    }
 +    else
 +    {
 +	uint64_t *src_mask_buffer = 0, *mask_buffer = 0;
 +	CombineFuncU64 compose = pixman_composeFunctions_wide.combineU[data->op];
 +	if (!compose)
 +	    return;
 +	
 +	if (fetchMask)
 +	    mask_buffer = dest_buffer + data->width;
 +	
 +	for (i = 0; i < data->height; ++i) {
 +	    /* fill first half of scanline with source */
 +	    if (fetchSrc)
 +	    {
 +		if (fetchMask)
 +		{
 +		    /* fetch mask before source so that fetching of
 +		       source can be optimized */
 +		    fetchMask (data->mask, data->xMask, data->yMask + i,
 +			       data->width, mask_buffer, 0, 0);
 +		    
 +		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +			fetchMask = NULL;
 +		}
 +		
 +		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, 0, 0);
 +		    
 +		    if (mask_buffer)
 +		    {
 +			pixman_composeFunctions_wide.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +			src_mask_buffer = mask_buffer;
 +		    }
 +		    else
 +			src_mask_buffer = src_buffer;
 +		    
 +		    fetchSrc = NULL;
 +		}
 +		else
 +		{
 +		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 +			      data->width, src_buffer, mask_buffer,
 +			      0xffff000000000000LL);
 +		    
 +		    if (mask_buffer)
 +			pixman_composeFunctions_wide.combineMaskU (src_buffer,
 +							      mask_buffer,
 +							      data->width);
 +		    
 +		    src_mask_buffer = src_buffer;
 +		}
 +	    }
 +	    else if (fetchMask)
 +	    {
 +		fetchMask (data->mask, data->xMask, data->yMask + i,
 +			   data->width, mask_buffer, 0, 0);
 +		
 +		pixman_composeFunctions_wide.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 +		
 +		src_mask_buffer = mask_buffer;
 +	    }
 +	    
 +	    if (store)
 +	    {
 +		/* fill dest into second half of scanline */
 +		if (fetchDest)
 +		    fetchDest (data->dest, data->xDest, data->yDest + i,
 +			       data->width, dest_buffer, 0, 0);
 +		
 +		/* blend */
 +		compose (dest_buffer, src_mask_buffer, data->width);
 +		
 +		/* write back */
 +		store (data->dest, data->xDest, data->yDest + i, data->width,
 +		       dest_buffer);
 +	    }
 +	    else
 +	    {
 +		assert(!"need to have a storeproc with 64-bit internal format");
 +	    }
 +	}
 +    }
 +    
 +    if (!store)
 +	fbFinishAccess (data->dest->pDrawable);
 +}
commit d4d78c8c1a73d1007ebaae3117923bb72d09118f
Merge: 85bccce... b39ca42...
Author: Alan Hourihane <alanh at tungstengraphics.com>
Date:   Wed Oct 24 21:39:34 2007 +0100

    Merge branch 'master' of git+ssh://git.freedesktop.org/git/pixman into yuv-porting

commit b39ca42fce85248f6c19459388f71bf73a147792
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Wed Oct 24 15:48:45 2007 -0400

    Bump version number

diff --git a/configure.ac b/configure.ac
index fded892..22a91ef 100644
--- a/configure.ac
+++ b/configure.ac
@@ -42,7 +42,7 @@ AC_PREREQ([2.57])
 
 m4_define([pixman_major], 0)
 m4_define([pixman_minor], 9)
-m4_define([pixman_micro], 5)
+m4_define([pixman_micro], 6)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 
commit 85bccce4d863b99be4b9ce62a8ac7d95f0acab3d
Merge: 8aa38d2... 2853243...
Author: Alan Hourihane <alanh at tungstengraphics.com>
Date:   Thu Oct 18 17:33:43 2007 +0100

    Merge branch 'master' of git+ssh://git.freedesktop.org/git/pixman into yuv-porting
    
    Conflicts:
    
    	pixman/pixman-compose.c
    	pixman/pixman.h

diff --cc pixman/pixman-compose.c
index 8dd0714,4c73822..ef49850
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@@ -4189,10 -4093,10 +4189,10 @@@ static void fbFetchExternalAlpha(bits_i
  static void fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
  {
      uint32_t *bits;
-     int stride;
+     int32_t stride;
      storeProc store = storeProcForPicture(pict);
      const pixman_indexed_t * indexed = pict->indexed;
 -    
 +
      bits = pict->bits;
      stride = pict->rowstride;
      bits += y*stride;
@@@ -4259,9 -4163,9 +4259,9 @@@ PIXMAN_COMPOSITE_RECT_GENERAL (const Fb
      unsigned int srcClass = SOURCE_IMAGE_CLASS_UNKNOWN;
      unsigned int maskClass = SOURCE_IMAGE_CLASS_UNKNOWN;
      uint32_t *bits;
-     int stride;
+     int32_t stride;
      int xoff, yoff;
 -    
 +
      if (data->op == PIXMAN_OP_CLEAR)
          fetchSrc = NULL;
      else if (IS_SOURCE_IMAGE (data->src))
diff --cc pixman/pixman.h
index 3b9601c,7b93214..604e4a8
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@@ -454,11 -452,7 +454,11 @@@ typedef enum 
  /* 1bpp formats */
      PIXMAN_a1 =		PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0),
      
 -    PIXMAN_g1 =		PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0)
 +    PIXMAN_g1 =		PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0),
 +
 +/* YUV formats */
 +    PIXMAN_yuy2 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0),
-     PIXMAN_yv12 =	PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0),
++    PIXMAN_yv12 =	PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0)
  } pixman_format_code_t;
  
  /* Constructors */
commit 8aa38d2256c191bf3437034f6176bae30c3c3d19
Merge: cea752b... 39a67d3...
Author: Alan Hourihane <alanh at tungstengraphics.com>
Date:   Wed Oct 17 19:39:41 2007 +0100

    Merge branch 'master' of git+ssh://git.freedesktop.org/git/pixman into yuv-porting
    
    Conflicts:
    
    	pixman/pixman-image.c

diff --cc pixman/pixman-image.c
index 0bcd901,d40234d..1c4abc2
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@@ -320,23 -345,26 +345,26 @@@ pixman_image_create_bits (pixman_format
      pixman_image_t *image;
      uint32_t *free_me = NULL;
  
 -    /* must be a whole number of uint32_t's 
 +    /* must be a whole number of uint32_t's
       */
      return_val_if_fail (bits == NULL ||
 -			(rowstride_bytes % sizeof (uint32_t)) == 0, NULL); 
 +			(rowstride_bytes % sizeof (uint32_t)) == 0, NULL);
  
-     if (!bits)
+     if (!bits && width && height)
      {
  	free_me = bits = create_bits (format, width, height, &rowstride_bytes);
  	if (!bits)
  	    return NULL;
      }
 -    
 +
      image = allocate_image();
  
-     if (!image)
+     if (!image) {
+ 	if (free_me)
+ 	    free (free_me);
  	return NULL;
- 
+     }
+     
      image->type = BITS;
      image->bits.format = format;
      image->bits.width = width;
commit cea752bdb80c8e6317a8c8af8510807c87d87694
Author: José Fonseca <jrfonseca at tungstengraphics.com>
Date:   Mon Sep 17 18:49:40 2007 +0100

    Prevent promotion to unsigned when calculating strides.

diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index c39ee9b..0bcd901 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -49,7 +49,7 @@ init_gradient (gradient_t     *gradient,
 	return FALSE;
 
     memcpy (gradient->stops, stops, n_stops * sizeof (pixman_gradient_stop_t));
-    
+
     gradient->n_stops = n_stops;
 
     gradient->stop_range = 0xffff;
@@ -73,7 +73,7 @@ static pixman_image_t *
 allocate_image (void)
 {
     pixman_image_t *image = malloc (sizeof (pixman_image_t));
-    
+
     if (image)
     {
 	image_common_t *common = &image->common;
@@ -132,7 +132,7 @@ pixman_image_unref (pixman_image_t *image)
 	if (image->type == BITS && image->bits.indexed)
 	    free (image->bits.indexed);
 #endif
-	
+
 #if 0
 	memset (image, 0xaa, sizeof (pixman_image_t));
 #endif
@@ -142,10 +142,10 @@ pixman_image_unref (pixman_image_t *image)
 		free (image->gradient.stops);
 	}
 
-	
+
 	if (image->type == BITS && image->bits.free_me)
 	    free (image->bits.free_me);
-	
+
 	free (image);
 
 	return TRUE;
@@ -161,9 +161,9 @@ pixman_image_create_solid_fill (pixman_color_t *color)
     pixman_image_t *img = allocate_image();
     if (!img)
 	return NULL;
-    
+
     init_source_image (&img->solid.common);
-    
+
     img->type = SOLID;
     img->solid.color = color_to_uint32 (color);
 
@@ -180,14 +180,14 @@ pixman_image_create_linear_gradient (pixman_point_fixed_t         *p1,
     linear_gradient_t *linear;
 
     return_val_if_fail (n_stops >= 2, NULL);
-    
+
     image = allocate_image();
-    
+
     if (!image)
 	return NULL;
 
     linear = &image->linear;
-    
+
     if (!init_gradient (&linear->common, stops, n_stops))
     {
 	free (image);
@@ -215,7 +215,7 @@ pixman_image_create_radial_gradient (pixman_point_fixed_t         *inner,
     radial_gradient_t *radial;
 
     return_val_if_fail (n_stops >= 2, NULL);
-    
+
     image = allocate_image();
 
     if (!image)
@@ -230,7 +230,7 @@ pixman_image_create_radial_gradient (pixman_point_fixed_t         *inner,
     }
 
     image->type = RADIAL;
-    
+
     radial->c1.x = inner->x;
     radial->c1.y = inner->y;
     radial->c1.radius = inner_radius;
@@ -243,7 +243,7 @@ pixman_image_create_radial_gradient (pixman_point_fixed_t         *inner,
     radial->A = (radial->cdx * radial->cdx
 		 + radial->cdy * radial->cdy
 		 - radial->dr  * radial->dr);
-    
+
     return image;
 }
 
@@ -260,7 +260,7 @@ pixman_image_create_conical_gradient (pixman_point_fixed_t *center,
 	return NULL;
 
     conical = &image->conical;
-    
+
     if (!init_gradient (&conical->common, stops, n_stops))
     {
 	free (image);
@@ -283,9 +283,9 @@ create_bits (pixman_format_code_t format,
     int stride;
     int buf_size;
     int bpp;
-    
+
     bpp = PIXMAN_FORMAT_BPP (format);
-    stride = ((width * bpp + FB_MASK) >> FB_SHIFT) * sizeof (uint32_t);
+    stride = ((width * bpp + FB_MASK) >> FB_SHIFT) * (int) sizeof (uint32_t);
     buf_size = height * stride;
 
     if (rowstride_bytes)
@@ -298,11 +298,11 @@ static void
 reset_clip_region (pixman_image_t *image)
 {
     pixman_region_fini (&image->common.clip_region);
-    
+
     if (image->type == BITS)
     {
 	pixman_region_init_rect (&image->common.clip_region, 0, 0,
-				 image->bits.width, image->bits.height);	
+				 image->bits.width, image->bits.height);
     }
     else
     {
@@ -320,10 +320,10 @@ pixman_image_create_bits (pixman_format_code_t  format,
     pixman_image_t *image;
     uint32_t *free_me = NULL;
 
-    /* must be a whole number of uint32_t's 
+    /* must be a whole number of uint32_t's
      */
     return_val_if_fail (bits == NULL ||
-			(rowstride_bytes % sizeof (uint32_t)) == 0, NULL); 
+			(rowstride_bytes % sizeof (uint32_t)) == 0, NULL);
 
     if (!bits)
     {
@@ -331,20 +331,20 @@ pixman_image_create_bits (pixman_format_code_t  format,
 	if (!bits)
 	    return NULL;
     }
-    
+
     image = allocate_image();
 
     if (!image)
 	return NULL;
-    
+
     image->type = BITS;
     image->bits.format = format;
     image->bits.width = width;
     image->bits.height = height;
     image->bits.bits = bits;
     image->bits.free_me = free_me;
-    
-    image->bits.rowstride = rowstride_bytes / sizeof (uint32_t); /* we store it in number
+
+    image->bits.rowstride = rowstride_bytes / (int) sizeof (uint32_t); /* we store it in number
 								  * of uint32_t's
 								  */
     image->bits.indexed = NULL;
@@ -370,7 +370,7 @@ pixman_image_set_clip_region (pixman_image_t    *image,
     else
     {
 	reset_clip_region (image);
-	
+
 	return TRUE;
     }
 }
@@ -395,7 +395,7 @@ pixman_image_set_transform (pixman_image_t           *image,
 	  { 0, 0, pixman_fixed_1 }
 	}
     };
-    
+
     image_common_t *common = (image_common_t *)image;
 
     if (common->transform == transform)
@@ -407,7 +407,7 @@ pixman_image_set_transform (pixman_image_t           *image,
 	common->transform = NULL;
 	return TRUE;
     }
-    
+
     if (common->transform == NULL)
 	common->transform = malloc (sizeof (pixman_transform_t));
     if (common->transform == NULL)
@@ -425,7 +425,7 @@ pixman_image_set_repeat (pixman_image_t  *image,
     image->common.repeat = repeat;
 }
 
-pixman_bool_t 
+pixman_bool_t
 pixman_image_set_filter (pixman_image_t       *image,
 			 pixman_filter_t       filter,
 			 const pixman_fixed_t *params,
@@ -449,7 +449,7 @@ pixman_image_set_filter (pixman_image_t       *image,
     }
 
     common->filter = filter;
-	
+
     if (common->filter_params)
 	free (common->filter_params);
 
@@ -490,7 +490,7 @@ pixman_image_set_alpha_map (pixman_image_t *image,
 			    int16_t         y)
 {
     image_common_t *common = (image_common_t *)image;
-    
+
     return_if_fail (!alpha_map || alpha_map->type == BITS);
 
     if (common->alpha_map != (bits_image_t *)alpha_map)
@@ -558,7 +558,7 @@ int
 pixman_image_get_stride (pixman_image_t *image)
 {
     if (image->type == BITS)
-	return image->bits.rowstride * sizeof (uint32_t);
+	return image->bits.rowstride * (int) sizeof (uint32_t);
 
     return 0;
 }
@@ -589,7 +589,7 @@ color_to_pixel (pixman_color_t *color,
     {
 	return FALSE;
     }
-    
+
     if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_ABGR)
     {
 	c = ((c & 0xff000000) >>  0) |
@@ -608,7 +608,7 @@ color_to_pixel (pixman_color_t *color,
     printf ("color: %x %x %x %x\n", color->alpha, color->red, color->green, color->blue);
     printf ("pixel: %x\n", c);
 #endif
-    
+
     *pixel = c;
     return TRUE;
 }
@@ -623,7 +623,7 @@ pixman_image_fill_rectangles (pixman_op_t		    op,
     pixman_image_t *solid;
     pixman_color_t c;
     int i;
-    
+
     if (color->alpha == 0xffff)
     {
 	if (op == PIXMAN_OP_OVER)
@@ -638,14 +638,14 @@ pixman_image_fill_rectangles (pixman_op_t		    op,
 	c.alpha = 0;
 
 	color = &c;
-	
+
 	op = PIXMAN_OP_SRC;
     }
 
     if (op == PIXMAN_OP_SRC)
     {
 	uint32_t pixel;
-	
+
 	if (color_to_pixel (color, &pixel, dest->bits.format))
 	{
 	    for (i = 0; i < n_rects; ++i)
@@ -653,7 +653,7 @@ pixman_image_fill_rectangles (pixman_op_t		    op,
 		pixman_region16_t fill_region;
 		int n_boxes, j;
 		pixman_box16_t *boxes;
-		
+
 		pixman_region_init_rect (&fill_region, rects[i].x, rects[i].y, rects[i].width, rects[i].height);
 		pixman_region_intersect (&fill_region, &fill_region, &dest->common.clip_region);
 
@@ -671,7 +671,7 @@ pixman_image_fill_rectangles (pixman_op_t		    op,
 	    return TRUE;
 	}
     }
-    
+
     solid = pixman_image_create_solid_fill (color);
     if (!solid)
 	return FALSE;
@@ -679,13 +679,13 @@ pixman_image_fill_rectangles (pixman_op_t		    op,
     for (i = 0; i < n_rects; ++i)
     {
 	const pixman_rectangle16_t *rect = &(rects[i]);
-	
+
 	pixman_image_composite (op, solid, NULL, dest,
 				0, 0, 0, 0,
 				rect->x, rect->y,
 				rect->width, rect->height);
     }
-    
+
     pixman_image_unref (solid);
 
     return TRUE;
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 5a752ce..30d18cf 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -1698,14 +1698,14 @@ pixman_fill_mmx (uint32_t *bits,
 
     if (bpp == 16)
     {
-	stride = stride * sizeof (uint32_t) / 2;
+	stride = stride * (int) sizeof (uint32_t) / 2;
 	byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
 	byte_width = 2 * width;
 	stride *= 2;
     }
     else
     {
-	stride = stride * sizeof (uint32_t) / 4;
+	stride = stride * (int) sizeof (uint32_t) / 4;
 	byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
 	byte_width = 4 * width;
 	stride *= 4;
@@ -2765,16 +2765,16 @@ pixman_blt_mmx (uint32_t *src_bits,
 
     if (src_bpp == 16)
     {
-	src_stride = src_stride * sizeof (uint32_t) / 2;
-	dst_stride = dst_stride * sizeof (uint32_t) / 2;
+	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
 	src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
 	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
 	byte_width = 2 * width;
 	src_stride *= 2;
 	dst_stride *= 2;
     } else if (src_bpp == 32) {
-	src_stride = src_stride * sizeof (uint32_t) / 4;
-	dst_stride = dst_stride * sizeof (uint32_t) / 4;
+	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
 	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
 	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
 	byte_width = 4 * width;
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 56dec83..faaf830 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -712,7 +712,7 @@ union pixman_image
 	__bits__ = pict->bits.bits;					\
 	__stride__ = pict->bits.rowstride;				\
 	__bpp__ = PIXMAN_FORMAT_BPP(pict->bits.format);			\
-	(out_stride) = __stride__ * sizeof (uint32_t) / sizeof (type);	\
+	(out_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (type);	\
 	(line) = ((type *) __bits__) +					\
 	    (out_stride) * (y) + (mul) * (x);				\
     } while (0)
diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c
index cdf115d..33b1ee6 100644
--- a/pixman/pixman-utils.c
+++ b/pixman/pixman-utils.c
@@ -15,7 +15,7 @@
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  *
  * Author:  Keith Packard, SuSE, Inc.
@@ -51,10 +51,10 @@ pixman_transform_point_3d (pixman_transform_t *transform,
 
 	result.vector[j] = (pixman_fixed_48_16_t) v;
     }
-    
+
     if (!result.vector[2])
 	return FALSE;
-    
+
     *vector = result;
     return TRUE;
 }
@@ -90,7 +90,7 @@ pixman_fill8 (uint32_t  *bits,
 	      int	height,
 	      uint32_t  xor)
 {
-    int byte_stride = stride * sizeof (uint32_t);
+    int byte_stride = stride * (int) sizeof (uint32_t);
     uint8_t *dst = (uint8_t *) bits;
     uint8_t v = xor & 0xff;
     int i;
@@ -115,7 +115,7 @@ pixman_fill16 (uint32_t *bits,
 	       int       height,
 	       uint32_t  xor)
 {
-    int short_stride = (stride * sizeof (uint32_t)) / sizeof (uint16_t);
+    int short_stride = (stride * (int) sizeof (uint32_t)) / (int) sizeof (uint16_t);
     uint16_t *dst = (uint16_t *)bits;
     uint16_t v = xor & 0xffff;
     int i;
@@ -141,9 +141,9 @@ pixman_fill32 (uint32_t *bits,
 	       uint32_t  xor)
 {
     int i;
-    
+
     bits = bits + y * stride + x;
-    
+
     while (height--)
     {
 	for (i = 0; i < width; ++i)
@@ -167,7 +167,7 @@ pixman_fill (uint32_t *bits,
     printf ("filling: %d %d %d %d (stride: %d, bpp: %d)   pixel: %x\n",
 	    x, y, width, height, stride, bpp, xor);
 #endif
-    
+
 #ifdef USE_MMX
     if (!pixman_have_mmx() || !pixman_fill_mmx (bits, stride, bpp, x, y, width, height, xor))
 #endif
@@ -177,11 +177,11 @@ pixman_fill (uint32_t *bits,
 	case 8:
 	    pixman_fill8 (bits, stride, x, y, width, height, xor);
 	    break;
-	    
+
 	case 16:
 	    pixman_fill16 (bits, stride, x, y, width, height, xor);
 	    break;
-	    
+
 	case 32:
 	    pixman_fill32 (bits, stride, x, y, width, height, xor);
 	    break;
@@ -191,10 +191,10 @@ pixman_fill (uint32_t *bits,
 	    break;
 	}
     }
-	
+
     return TRUE;
 }
-	    
+
 
 /*
  * Compute the smallest value no less than y which is on a
@@ -206,7 +206,7 @@ pixman_sample_ceil_y (pixman_fixed_t y, int n)
 {
     pixman_fixed_t   f = pixman_fixed_frac(y);
     pixman_fixed_t   i = pixman_fixed_floor(y);
-    
+
     f = ((f + Y_FRAC_FIRST(n)) / STEP_Y_SMALL(n)) * STEP_Y_SMALL(n) + Y_FRAC_FIRST(n);
     if (f > Y_FRAC_LAST(n))
     {
@@ -227,7 +227,7 @@ pixman_sample_floor_y (pixman_fixed_t y, int n)
 {
     pixman_fixed_t   f = pixman_fixed_frac(y);
     pixman_fixed_t   i = pixman_fixed_floor (y);
-    
+
     f = _div(f - Y_FRAC_FIRST(n), STEP_Y_SMALL(n)) * STEP_Y_SMALL(n) + Y_FRAC_FIRST(n);
     if (f < Y_FRAC_FIRST(n))
     {
@@ -246,9 +246,9 @@ pixman_edge_step (pixman_edge_t *e, int n)
     pixman_fixed_48_16_t	ne;
 
     e->x += n * e->stepx;
-    
+
     ne = e->e + n * (pixman_fixed_48_16_t) e->dx;
-    
+
     if (n >= 0)
     {
 	if (ne > 0)
@@ -278,7 +278,7 @@ _pixman_edge_tMultiInit (pixman_edge_t *e, int n, pixman_fixed_t *stepx_p, pixma
 {
     pixman_fixed_t	stepx;
     pixman_fixed_48_16_t	ne;
-    
+
     ne = n * (pixman_fixed_48_16_t) e->dx;
     stepx = n * e->stepx;
     if (ne > 0)
@@ -328,7 +328,7 @@ pixman_edge_init (pixman_edge_t	*e,
 	    e->dx = -dx % dy;
 	    e->e = 0;
 	}
-    
+
 	_pixman_edge_tMultiInit (e, STEP_Y_SMALL(n), &e->stepx_small, &e->dx_small);
 	_pixman_edge_tMultiInit (e, STEP_Y_BIG(n), &e->stepx_big, &e->dx_big);
     }
commit 1092cdd09f71748182e578957de9610b729df257
Author: José Fonseca <jrfonseca at tungstengraphics.com>
Date:   Mon Sep 17 17:08:31 2007 +0100

    Use a consistent type for stride and more importantly, use always a signed type.

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 31ad490..b2b977c 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -3674,7 +3674,7 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
 static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 {
     uint32_t     *bits;
-    uint32_t    stride;
+    int    stride;
     fetchPixelProc   fetch;
     pixman_vector_t	v;
     pixman_vector_t  unit;
@@ -4187,7 +4187,7 @@ static void fbFetchExternalAlpha(bits_image_t * pict, int x, int y, int width, u
 static void fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t *bits;
-    uint32_t stride;
+    int stride;
     storeProc store = storeProcForPicture(pict);
     const pixman_indexed_t * indexed = pict->indexed;
 
@@ -4201,7 +4201,7 @@ static void fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buff
 static void fbStoreExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t *bits, *alpha_bits;
-    uint32_t stride, astride;
+    int stride, astride;
     int ax, ay;
     storeProc store;
     storeProc astore;
@@ -4257,7 +4257,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     unsigned int srcClass = SOURCE_IMAGE_CLASS_UNKNOWN;
     unsigned int maskClass = SOURCE_IMAGE_CLASS_UNKNOWN;
     uint32_t *bits;
-    uint32_t stride;
+    int stride;
     int xoff, yoff;
 
     if (data->op == PIXMAN_OP_CLEAR)
diff --git a/pixman/pixman-edge-imp.h b/pixman/pixman-edge-imp.h
index c89a449..5a95abe 100644
--- a/pixman/pixman-edge-imp.h
+++ b/pixman/pixman-edge-imp.h
@@ -35,8 +35,8 @@ rasterizeEdges (pixman_image_t  *image,
     pixman_fixed_t  y = t;
     uint32_t  *line;
     uint32_t *buf = (image)->bits.bits;
-    int32_t stride = (image)->bits.rowstride;
-    int32_t width = (image)->bits.width;
+    int stride = (image)->bits.rowstride;
+    int width = (image)->bits.width;
 
     line = buf + pixman_fixed_to_int (y) * stride;
 
diff --git a/pixman/pixman-edge.c b/pixman/pixman-edge.c
index d9e2d9a..1e7acb0 100644
--- a/pixman/pixman-edge.c
+++ b/pixman/pixman-edge.c
@@ -128,8 +128,8 @@ fbRasterizeEdges8 (pixman_image_t       *image,
     int fill_start = -1, fill_end = -1;
     int fill_size = 0;
     uint32_t *buf = (image)->bits.bits;
-    int32_t stride = (image)->bits.rowstride;
-    int32_t width = (image)->bits.width;
+    int stride = (image)->bits.rowstride;
+    int width = (image)->bits.width;
 
     line = buf + pixman_fixed_to_int (y) * stride;
 
commit be4990f4a0a8d278b99df7669c5162136120ccd5
Author: José Fonseca <jrfonseca at tungstengraphics.com>
Date:   Fri Sep 14 15:48:18 2007 +0100

    Remove the macro magic from READ/WRITE macros.

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 0d4a0d8..31ad490 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -45,7 +45,7 @@
 #define INLINE inline
 
 /*   End of stuff added to get it to compile
- */ 
+ */
 
 static unsigned int
 SourcePictureClassify (source_image_t *pict,
@@ -66,7 +66,7 @@ SourcePictureClassify (source_image_t *pict,
 	pixman_fixed_48_16_t dx, dy, a, b, off;
 	pixman_fixed_48_16_t factors[4];
 	int	     i;
-	
+
 	dx = linear->p2.x - linear->p1.x;
 	dy = linear->p2.y - linear->p1.y;
 	l = dx * dx + dy * dy;
@@ -79,31 +79,31 @@ SourcePictureClassify (source_image_t *pict,
 	{
 	    a = b = 0;
 	}
-	
+
 	off = (-a * linear->p1.x
 	       -b * linear->p1.y) >> 16;
-	
+
 	for (i = 0; i < 3; i++)
 	{
 	    v.vector[0] = pixman_int_to_fixed ((i % 2) * (width  - 1) + x);
 	    v.vector[1] = pixman_int_to_fixed ((i / 2) * (height - 1) + y);
 	    v.vector[2] = pixman_fixed_1;
-	    
+
 	    if (pict->common.transform)
 	    {
 		if (!pixman_transform_point_3d (pict->common.transform, &v))
 		    return SOURCE_IMAGE_CLASS_UNKNOWN;
 	    }
-	    
+
 	    factors[i] = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off;
 	}
-	
+
 	if (factors[2] == factors[0])
 	    pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
 	else if (factors[1] == factors[0])
 	    pict->class = SOURCE_IMAGE_CLASS_VERTICAL;
     }
-    
+
     return pict->class;
 }
 
@@ -121,7 +121,7 @@ SourcePictureClassify (source_image_t *pict,
 		stride * pict->height; \
 	int offset1 = stride < 0 ? \
 		offset0 + ((-stride) >> 1) * ((pict->height) >> 1) : \
-		offset0 + (offset0 >> 2); 
+		offset0 + (offset0 >> 2);
 
 #define YV12_Y(line)		\
     ((uint8_t *) ((bits) + (stride) * (line)))
@@ -137,11 +137,6 @@ SourcePictureClassify (source_image_t *pict,
 typedef FASTCALL void (*fetchProc)(bits_image_t *pict, int x, int y, int width, uint32_t *buffer);
 
 /*
- * Used by READ/WRITE macros
- */
-#define image ((pixman_image_t *)pict)
-
-/*
  * All of the fetch functions
  */
 
@@ -149,7 +144,8 @@ static FASTCALL void
 fbFetch_a8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     const uint32_t *bits = pict->bits + y*pict->rowstride;
-    MEMCPY_WRAPPED(buffer, (const uint32_t *)bits + x,
+    MEMCPY_WRAPPED(pict,
+                   buffer, (const uint32_t *)bits + x,
 		   width*sizeof(uint32_t));
 }
 
@@ -160,7 +156,7 @@ fbFetch_x8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint32_t *pixel = (const uint32_t *)bits + x;
     const uint32_t *end = pixel + width;
     while (pixel < end) {
-	*buffer++ = READ(pixel++) | 0xff000000;
+	*buffer++ = READ(pict, pixel++) | 0xff000000;
     }
 }
 
@@ -171,7 +167,7 @@ fbFetch_a8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint32_t *pixel = (uint32_t *)bits + x;
     const uint32_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t p = READ(pixel++);
+	uint32_t p = READ(pict, pixel++);
 	*buffer++ = (p & 0xff00ff00) |
 	            ((p >> 16) & 0xff) |
 	    ((p & 0xff) << 16);
@@ -185,7 +181,7 @@ fbFetch_x8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint32_t *pixel = (uint32_t *)bits + x;
     const uint32_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t p = READ(pixel++);
+	uint32_t p = READ(pict, pixel++);
 	*buffer++ = 0xff000000 |
 	    (p & 0x0000ff00) |
 	    ((p >> 16) & 0xff) |
@@ -200,7 +196,7 @@ fbFetch_r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint8_t *pixel = (const uint8_t *)bits + 3*x;
     const uint8_t *end = pixel + 3*width;
     while (pixel < end) {
-	uint32_t b = Fetch24(pixel) | 0xff000000;
+	uint32_t b = Fetch24(pict, pixel) | 0xff000000;
 	pixel += 3;
 	*buffer++ = b;
     }
@@ -215,13 +211,13 @@ fbFetch_b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     while (pixel < end) {
 	uint32_t b = 0xff000000;
 #if IMAGE_BYTE_ORDER == MSBFirst
-	b |= (READ(pixel++));
-	b |= (READ(pixel++) << 8);
-	b |= (READ(pixel++) << 16);
+	b |= (READ(pict, pixel++));
+	b |= (READ(pict, pixel++) << 8);
+	b |= (READ(pict, pixel++) << 16);
 #else
-	b |= (READ(pixel++) << 16);
-	b |= (READ(pixel++) << 8);
-	b |= (READ(pixel++));
+	b |= (READ(pict, pixel++) << 16);
+	b |= (READ(pict, pixel++) << 8);
+	b |= (READ(pict, pixel++));
 #endif
 	*buffer++ = b;
     }
@@ -234,8 +230,8 @@ fbFetch_r5g6b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t p = READ(pixel++);
-	uint32_t r = (((p) << 3) & 0xf8) | 
+	uint32_t p = READ(pict, pixel++);
+	uint32_t r = (((p) << 3) & 0xf8) |
 	    (((p) << 5) & 0xfc00) |
 	    (((p) << 8) & 0xf80000);
 	r |= (r >> 5) & 0x70007;
@@ -252,7 +248,7 @@ fbFetch_b5g6r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
+	uint32_t  p = READ(pict, pixel++);
 	b = ((p & 0xf800) | ((p & 0xe000) >> 5)) >> 8;
 	g = ((p & 0x07e0) | ((p & 0x0600) >> 6)) << 5;
 	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
@@ -268,8 +264,8 @@ fbFetch_a1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
-	
+	uint32_t  p = READ(pict, pixel++);
+
 	a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
 	r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
 	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
@@ -286,8 +282,8 @@ fbFetch_x1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
-	
+	uint32_t  p = READ(pict, pixel++);
+
 	r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
 	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
 	b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2;
@@ -303,8 +299,8 @@ fbFetch_a1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
-	
+	uint32_t  p = READ(pict, pixel++);
+
 	a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
 	b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
 	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
@@ -321,8 +317,8 @@ fbFetch_x1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
-	
+	uint32_t  p = READ(pict, pixel++);
+
 	b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
 	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
 	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
@@ -338,8 +334,8 @@ fbFetch_a4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
-	
+	uint32_t  p = READ(pict, pixel++);
+
 	a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
 	r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
 	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
@@ -356,8 +352,8 @@ fbFetch_x4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
-	
+	uint32_t  p = READ(pict, pixel++);
+
 	r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
 	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
 	b = ((p & 0x000f) | ((p & 0x000f) << 4));
@@ -373,8 +369,8 @@ fbFetch_a4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
-	
+	uint32_t  p = READ(pict, pixel++);
+
 	a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
 	b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
 	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
@@ -391,8 +387,8 @@ fbFetch_x4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
-	
+	uint32_t  p = READ(pict, pixel++);
+
 	b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
 	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
 	r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16;
@@ -407,7 +403,7 @@ fbFetch_a8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
-	*buffer++ = READ(pixel++) << 24;
+	*buffer++ = READ(pict, pixel++) << 24;
     }
 }
 
@@ -419,8 +415,8 @@ fbFetch_r3g3b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
-	
+	uint32_t  p = READ(pict, pixel++);
+
 	r = ((p & 0xe0) | ((p & 0xe0) >> 3) | ((p & 0xc0) >> 6)) << 16;
 	g = ((p & 0x1c) | ((p & 0x18) >> 3) | ((p & 0x1c) << 3)) << 8;
 	b = (((p & 0x03)     ) |
@@ -439,8 +435,8 @@ fbFetch_b2g3r3 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
-	
+	uint32_t  p = READ(pict, pixel++);
+
 	b = (((p & 0xc0)     ) |
 	     ((p & 0xc0) >> 2) |
 	     ((p & 0xc0) >> 4) |
@@ -461,8 +457,8 @@ fbFetch_a2r2g2b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
-	
+	uint32_t  p = READ(pict, pixel++);
+
 	a = ((p & 0xc0) * 0x55) << 18;
 	r = ((p & 0x30) * 0x55) << 12;
 	g = ((p & 0x0c) * 0x55) << 6;
@@ -479,8 +475,8 @@ fbFetch_a2b2g2r2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
-	
+	uint32_t  p = READ(pict, pixel++);
+
 	a = ((p & 0xc0) * 0x55) << 18;
 	b = ((p & 0x30) * 0x55) >> 6;
 	g = ((p & 0x0c) * 0x55) << 6;
@@ -497,7 +493,7 @@ fbFetch_c8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
-	uint32_t  p = READ(pixel++);
+	uint32_t  p = READ(pict, pixel++);
 	*buffer++ = indexed->rgba[p];
     }
 }
@@ -509,16 +505,16 @@ fbFetch_x4a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
-	uint8_t p = READ(pixel++) & 0xf;
+	uint8_t p = READ(pict, pixel++) & 0xf;
 	*buffer++ = (p | (p << 4)) << 24;
     }
 }
 
-#define Fetch8(l,o)    (READ((uint8_t *)(l) + ((o) >> 2)))
+#define Fetch8(img,l,o)    (READ(img, (uint8_t *)(l) + ((o) >> 2)))
 #if IMAGE_BYTE_ORDER == MSBFirst
-#define Fetch4(l,o)    ((o) & 2 ? Fetch8(l,o) & 0xf : Fetch8(l,o) >> 4)
+#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) & 0xf : Fetch8(img,l,o) >> 4)
 #else
-#define Fetch4(l,o)    ((o) & 2 ? Fetch8(l,o) >> 4 : Fetch8(l,o) & 0xf)
+#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) >> 4 : Fetch8(img,l,o) & 0xf)
 #endif
 
 static FASTCALL void
@@ -527,8 +523,8 @@ fbFetch_a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint32_t *bits = pict->bits + y*pict->rowstride;
     int i;
     for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(bits, i + x);
-	
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
 	p |= p << 4;
 	*buffer++ = p << 24;
     }
@@ -541,8 +537,8 @@ fbFetch_r1g2b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint32_t *bits = pict->bits + y*pict->rowstride;
     int i;
     for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(bits, i + x);
-	
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
 	r = ((p & 0x8) * 0xff) << 13;
 	g = ((p & 0x6) * 0x55) << 7;
 	b = ((p & 0x1) * 0xff);
@@ -557,8 +553,8 @@ fbFetch_b1g2r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint32_t *bits = pict->bits + y*pict->rowstride;
     int i;
     for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(bits, i + x);
-	
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
 	b = ((p & 0x8) * 0xff) >> 3;
 	g = ((p & 0x6) * 0x55) << 7;
 	r = ((p & 0x1) * 0xff) << 16;
@@ -573,8 +569,8 @@ fbFetch_a1r1g1b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint32_t *bits = pict->bits + y*pict->rowstride;
     int i;
     for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(bits, i + x);
-	
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
 	a = ((p & 0x8) * 0xff) << 21;
 	r = ((p & 0x4) * 0xff) << 14;
 	g = ((p & 0x2) * 0xff) << 7;
@@ -590,8 +586,8 @@ fbFetch_a1b1g1r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint32_t *bits = pict->bits + y*pict->rowstride;
     int i;
     for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(bits, i + x);
-	
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
 	a = ((p & 0x8) * 0xff) << 21;
 	r = ((p & 0x4) * 0xff) >> 3;
 	g = ((p & 0x2) * 0xff) << 7;
@@ -607,8 +603,8 @@ fbFetch_c4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const pixman_indexed_t * indexed = pict->indexed;
     int i;
     for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(bits, i + x);
-	
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
 	*buffer++ = indexed->rgba[p];
     }
 }
@@ -620,7 +616,7 @@ fbFetch_a1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const uint32_t *bits = pict->bits + y*pict->rowstride;
     int i;
     for (i = 0; i < width; ++i) {
-	uint32_t  p = READ(bits + ((i + x) >> 5));
+	uint32_t  p = READ(pict, bits + ((i + x) >> 5));
 	uint32_t  a;
 #if BITMAP_BIT_ORDER == MSBFirst
 	a = p >> (0x1f - ((i+x) & 0x1f));
@@ -642,7 +638,7 @@ fbFetch_g1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     const pixman_indexed_t * indexed = pict->indexed;
     int i;
     for (i = 0; i < width; ++i) {
-	uint32_t p = READ(bits + ((i+x) >> 5));
+	uint32_t p = READ(pict, bits + ((i+x) >> 5));
 	uint32_t a;
 #if BITMAP_BIT_ORDER == MSBFirst
 	a = p >> (0x1f - ((i+x) & 0x1f));
@@ -676,7 +672,7 @@ fbFetch_yuy2 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
 	/* B = 1.164(Y - 16) + 2.018(U - 128) */
 	b = 0x012b27 * y + 0x0206a2 * u;
 
-    WRITE(buffer++, 0xff000000 |
+    WRITE(pict, buffer++, 0xff000000 |
 	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
 	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
 	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
@@ -707,7 +703,7 @@ fbFetch_yv12 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
 	/* B = 1.164(Y - 16) + 2.018(U - 128) */
 	b = 0x012b27 * y + 0x0206a2 * u;
 
-	WRITE(buffer++, 0xff000000 |
+	WRITE(pict, buffer++, 0xff000000 |
 	    (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
 	    (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
 	    (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
@@ -721,15 +717,15 @@ static fetchProc fetchProcForPicture (bits_image_t * pict)
     case PIXMAN_x8r8g8b8: return fbFetch_x8r8g8b8;
     case PIXMAN_a8b8g8r8: return fbFetch_a8b8g8r8;
     case PIXMAN_x8b8g8r8: return fbFetch_x8b8g8r8;
-	
+
         /* 24bpp formats */
     case PIXMAN_r8g8b8: return fbFetch_r8g8b8;
     case PIXMAN_b8g8r8: return fbFetch_b8g8r8;
-	
+
         /* 16bpp formats */
     case PIXMAN_r5g6b5: return fbFetch_r5g6b5;
     case PIXMAN_b5g6r5: return fbFetch_b5g6r5;
-	
+
     case PIXMAN_a1r5g5b5: return fbFetch_a1r5g5b5;
     case PIXMAN_x1r5g5b5: return fbFetch_x1r5g5b5;
     case PIXMAN_a1b5g5r5: return fbFetch_a1b5g5r5;
@@ -738,7 +734,7 @@ static fetchProc fetchProcForPicture (bits_image_t * pict)
     case PIXMAN_x4r4g4b4: return fbFetch_x4r4g4b4;
     case PIXMAN_a4b4g4r4: return fbFetch_a4b4g4r4;
     case PIXMAN_x4b4g4r4: return fbFetch_x4b4g4r4;
-	
+
         /* 8bpp formats */
     case PIXMAN_a8: return  fbFetch_a8;
     case PIXMAN_r3g3b2: return fbFetch_r3g3b2;
@@ -748,7 +744,7 @@ static fetchProc fetchProcForPicture (bits_image_t * pict)
     case PIXMAN_c8: return  fbFetch_c8;
     case PIXMAN_g8: return  fbFetch_c8;
     case PIXMAN_x4a4: return fbFetch_x4a4;
-	
+
         /* 4bpp formats */
     case PIXMAN_a4: return  fbFetch_a4;
     case PIXMAN_r1g2b1: return fbFetch_r1g2b1;
@@ -757,7 +753,7 @@ static fetchProc fetchProcForPicture (bits_image_t * pict)
     case PIXMAN_a1b1g1r1: return fbFetch_a1b1g1r1;
     case PIXMAN_c4: return  fbFetch_c4;
     case PIXMAN_g4: return  fbFetch_c4;
-	
+
         /* 1bpp formats */
     case PIXMAN_a1: return  fbFetch_a1;
     case PIXMAN_g1: return  fbFetch_g1;
@@ -766,7 +762,7 @@ static fetchProc fetchProcForPicture (bits_image_t * pict)
     case PIXMAN_yuy2: return fbFetch_yuy2;
     case PIXMAN_yv12: return fbFetch_yv12;
     }
-    
+
     return NULL;
 }
 
@@ -780,22 +776,22 @@ static FASTCALL uint32_t
 fbFetchPixel_a8r8g8b8 (bits_image_t *pict, int offset, int line)
 {
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    return READ((uint32_t *)bits + offset);
+    return READ(pict, (uint32_t *)bits + offset);
 }
 
 static FASTCALL uint32_t
 fbFetchPixel_x8r8g8b8 (bits_image_t *pict, int offset, int line)
 {
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    return READ((uint32_t *)bits + offset) | 0xff000000;
+    return READ(pict, (uint32_t *)bits + offset) | 0xff000000;
 }
 
 static FASTCALL uint32_t
 fbFetchPixel_a8b8g8r8 (bits_image_t *pict, int offset, int line)
 {
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ((uint32_t *)bits + offset);
-    
+    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
+
     return ((pixel & 0xff000000) |
 	    ((pixel >> 16) & 0xff) |
 	    (pixel & 0x0000ff00) |
@@ -806,8 +802,8 @@ static FASTCALL uint32_t
 fbFetchPixel_x8b8g8r8 (bits_image_t *pict, int offset, int line)
 {
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ((uint32_t *)bits + offset);
-    
+    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
+
     return ((0xff000000) |
 	    ((pixel >> 16) & 0xff) |
 	    (pixel & 0x0000ff00) |
@@ -821,14 +817,14 @@ fbFetchPixel_r8g8b8 (bits_image_t *pict, int offset, int line)
     uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 #if IMAGE_BYTE_ORDER == MSBFirst
     return (0xff000000 |
-	    (READ(pixel + 0) << 16) |
-	    (READ(pixel + 1) << 8) |
-	    (READ(pixel + 2)));
+	    (READ(pict, pixel + 0) << 16) |
+	    (READ(pict, pixel + 1) << 8) |
+	    (READ(pict, pixel + 2)));
 #else
     return (0xff000000 |
-	    (READ(pixel + 2) << 16) |
-	    (READ(pixel + 1) << 8) |
-	    (READ(pixel + 0)));
+	    (READ(pict, pixel + 2) << 16) |
+	    (READ(pict, pixel + 1) << 8) |
+	    (READ(pict, pixel + 0)));
 #endif
 }
 
@@ -839,14 +835,14 @@ fbFetchPixel_b8g8r8 (bits_image_t *pict, int offset, int line)
     uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 #if IMAGE_BYTE_ORDER == MSBFirst
     return (0xff000000 |
-	    (READ(pixel + 2) << 16) |
-	    (READ(pixel + 1) << 8) |
-	    (READ(pixel + 0)));
+	    (READ(pict, pixel + 2) << 16) |
+	    (READ(pict, pixel + 1) << 8) |
+	    (READ(pict, pixel + 0)));
 #else
     return (0xff000000 |
-	    (READ(pixel + 0) << 16) |
-	    (READ(pixel + 1) << 8) |
-	    (READ(pixel + 2)));
+	    (READ(pict, pixel + 0) << 16) |
+	    (READ(pict, pixel + 1) << 8) |
+	    (READ(pict, pixel + 2)));
 #endif
 }
 
@@ -855,8 +851,8 @@ fbFetchPixel_r5g6b5 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ((uint16_t *) bits + offset);
-    
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
     r = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) << 8;
     g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
     b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
@@ -868,8 +864,8 @@ fbFetchPixel_b5g6r5 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ((uint16_t *) bits + offset);
-    
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
     b = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) >> 8;
     g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
     r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
@@ -881,8 +877,8 @@ fbFetchPixel_a1r5g5b5 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  a,r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ((uint16_t *) bits + offset);
-    
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
     a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
     r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
     g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
@@ -895,8 +891,8 @@ fbFetchPixel_x1r5g5b5 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ((uint16_t *) bits + offset);
-    
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
     r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
     g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
     b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
@@ -908,8 +904,8 @@ fbFetchPixel_a1b5g5r5 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  a,r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ((uint16_t *) bits + offset);
-    
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
     a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
     b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
     g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
@@ -922,8 +918,8 @@ fbFetchPixel_x1b5g5r5 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ((uint16_t *) bits + offset);
-    
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
     b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
     g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
     r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
@@ -935,8 +931,8 @@ fbFetchPixel_a4r4g4b4 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  a,r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ((uint16_t *) bits + offset);
-    
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
     a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
     r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
     g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
@@ -949,8 +945,8 @@ fbFetchPixel_x4r4g4b4 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ((uint16_t *) bits + offset);
-    
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
     r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
     g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
     b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4));
@@ -962,8 +958,8 @@ fbFetchPixel_a4b4g4r4 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  a,r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ((uint16_t *) bits + offset);
-    
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
     a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
     b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
     g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
@@ -976,8 +972,8 @@ fbFetchPixel_x4b4g4r4 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ((uint16_t *) bits + offset);
-    
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
     b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
     g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
     r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16;
@@ -988,8 +984,8 @@ static FASTCALL uint32_t
 fbFetchPixel_a8 (bits_image_t *pict, int offset, int line)
 {
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ((uint8_t *) bits + offset);
-    
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
     return pixel << 24;
 }
 
@@ -998,8 +994,8 @@ fbFetchPixel_r3g3b2 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ((uint8_t *) bits + offset);
-    
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
     r = ((pixel & 0xe0) | ((pixel & 0xe0) >> 3) | ((pixel & 0xc0) >> 6)) << 16;
     g = ((pixel & 0x1c) | ((pixel & 0x18) >> 3) | ((pixel & 0x1c) << 3)) << 8;
     b = (((pixel & 0x03)     ) |
@@ -1014,8 +1010,8 @@ fbFetchPixel_b2g3r3 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ((uint8_t *) bits + offset);
-    
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
     b = (((pixel & 0xc0)     ) |
 	 ((pixel & 0xc0) >> 2) |
 	 ((pixel & 0xc0) >> 4) |
@@ -1032,8 +1028,8 @@ fbFetchPixel_a2r2g2b2 (bits_image_t *pict, int offset, int line)
 {
     uint32_t   a,r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ((uint8_t *) bits + offset);
-    
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
     a = ((pixel & 0xc0) * 0x55) << 18;
     r = ((pixel & 0x30) * 0x55) << 12;
     g = ((pixel & 0x0c) * 0x55) << 6;
@@ -1046,8 +1042,8 @@ fbFetchPixel_a2b2g2r2 (bits_image_t *pict, int offset, int line)
 {
     uint32_t   a,r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ((uint8_t *) bits + offset);
-    
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
     a = ((pixel & 0xc0) * 0x55) << 18;
     b = ((pixel & 0x30) * 0x55) >> 6;
     g = ((pixel & 0x0c) * 0x55) << 6;
@@ -1059,7 +1055,7 @@ static FASTCALL uint32_t
 fbFetchPixel_c8 (bits_image_t *pict, int offset, int line)
 {
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ((uint8_t *) bits + offset);
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
     const pixman_indexed_t * indexed = pict->indexed;
     return indexed->rgba[pixel];
 }
@@ -1068,8 +1064,8 @@ static FASTCALL uint32_t
 fbFetchPixel_x4a4 (bits_image_t *pict, int offset, int line)
 {
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ((uint8_t *) bits + offset);
-    
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
     return ((pixel & 0xf) | ((pixel & 0xf) << 4)) << 24;
 }
 
@@ -1077,8 +1073,8 @@ static FASTCALL uint32_t
 fbFetchPixel_a4 (bits_image_t *pict, int offset, int line)
 {
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(bits, offset);
-    
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
     pixel |= pixel << 4;
     return pixel << 24;
 }
@@ -1088,8 +1084,8 @@ fbFetchPixel_r1g2b1 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(bits, offset);
-    
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
     r = ((pixel & 0x8) * 0xff) << 13;
     g = ((pixel & 0x6) * 0x55) << 7;
     b = ((pixel & 0x1) * 0xff);
@@ -1101,8 +1097,8 @@ fbFetchPixel_b1g2r1 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(bits, offset);
-    
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
     b = ((pixel & 0x8) * 0xff) >> 3;
     g = ((pixel & 0x6) * 0x55) << 7;
     r = ((pixel & 0x1) * 0xff) << 16;
@@ -1114,8 +1110,8 @@ fbFetchPixel_a1r1g1b1 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  a,r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(bits, offset);
-    
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
     a = ((pixel & 0x8) * 0xff) << 21;
     r = ((pixel & 0x4) * 0xff) << 14;
     g = ((pixel & 0x2) * 0xff) << 7;
@@ -1128,8 +1124,8 @@ fbFetchPixel_a1b1g1r1 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  a,r,g,b;
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(bits, offset);
-    
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
     a = ((pixel & 0x8) * 0xff) << 21;
     r = ((pixel & 0x4) * 0xff) >> 3;
     g = ((pixel & 0x2) * 0xff) << 7;
@@ -1141,9 +1137,9 @@ static FASTCALL uint32_t
 fbFetchPixel_c4 (bits_image_t *pict, int offset, int line)
 {
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(bits, offset);
+    uint32_t  pixel = Fetch4(pict, bits, offset);
     const pixman_indexed_t * indexed = pict->indexed;
-    
+
     return indexed->rgba[pixel];
 }
 
@@ -1152,7 +1148,7 @@ static FASTCALL uint32_t
 fbFetchPixel_a1 (bits_image_t *pict, int offset, int line)
 {
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(bits + (offset >> 5));
+    uint32_t  pixel = READ(pict, bits + (offset >> 5));
     uint32_t  a;
 #if BITMAP_BIT_ORDER == MSBFirst
     a = pixel >> (0x1f - (offset & 0x1f));
@@ -1170,7 +1166,7 @@ static FASTCALL uint32_t
 fbFetchPixel_g1 (bits_image_t *pict, int offset, int line)
 {
     uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t pixel = READ(bits + (offset >> 5));
+    uint32_t pixel = READ(pict, bits + (offset >> 5));
     const pixman_indexed_t * indexed = pict->indexed;
     uint32_t a;
 #if BITMAP_BIT_ORDER == MSBFirst
@@ -1236,15 +1232,15 @@ static fetchPixelProc fetchPixelProcForPicture (bits_image_t * pict)
     case PIXMAN_x8r8g8b8: return fbFetchPixel_x8r8g8b8;
     case PIXMAN_a8b8g8r8: return fbFetchPixel_a8b8g8r8;
     case PIXMAN_x8b8g8r8: return fbFetchPixel_x8b8g8r8;
-	
+
         /* 24bpp formats */
     case PIXMAN_r8g8b8: return fbFetchPixel_r8g8b8;
     case PIXMAN_b8g8r8: return fbFetchPixel_b8g8r8;
-	
+
         /* 16bpp formats */
     case PIXMAN_r5g6b5: return fbFetchPixel_r5g6b5;
     case PIXMAN_b5g6r5: return fbFetchPixel_b5g6r5;
-	
+
     case PIXMAN_a1r5g5b5: return fbFetchPixel_a1r5g5b5;
     case PIXMAN_x1r5g5b5: return fbFetchPixel_x1r5g5b5;
     case PIXMAN_a1b5g5r5: return fbFetchPixel_a1b5g5r5;
@@ -1253,7 +1249,7 @@ static fetchPixelProc fetchPixelProcForPicture (bits_image_t * pict)
     case PIXMAN_x4r4g4b4: return fbFetchPixel_x4r4g4b4;
     case PIXMAN_a4b4g4r4: return fbFetchPixel_a4b4g4r4;
     case PIXMAN_x4b4g4r4: return fbFetchPixel_x4b4g4r4;
-	
+
         /* 8bpp formats */
     case PIXMAN_a8: return  fbFetchPixel_a8;
     case PIXMAN_r3g3b2: return fbFetchPixel_r3g3b2;
@@ -1263,7 +1259,7 @@ static fetchPixelProc fetchPixelProcForPicture (bits_image_t * pict)
     case PIXMAN_c8: return  fbFetchPixel_c8;
     case PIXMAN_g8: return  fbFetchPixel_c8;
     case PIXMAN_x4a4: return fbFetchPixel_x4a4;
-	
+
         /* 4bpp formats */
     case PIXMAN_a4: return  fbFetchPixel_a4;
     case PIXMAN_r1g2b1: return fbFetchPixel_r1g2b1;
@@ -1272,7 +1268,7 @@ static fetchPixelProc fetchPixelProcForPicture (bits_image_t * pict)
     case PIXMAN_a1b1g1r1: return fbFetchPixel_a1b1g1r1;
     case PIXMAN_c4: return  fbFetchPixel_c4;
     case PIXMAN_g4: return  fbFetchPixel_c4;
-	
+
         /* 1bpp formats */
     case PIXMAN_a1: return  fbFetchPixel_a1;
     case PIXMAN_g1: return  fbFetchPixel_g1;
@@ -1281,12 +1277,10 @@ static fetchPixelProc fetchPixelProcForPicture (bits_image_t * pict)
     case PIXMAN_yuy2: return fbFetchPixel_yuy2;
     case PIXMAN_yv12: return fbFetchPixel_yv12;
     }
-    
+
     return NULL;
 }
 
-#undef image
-
 
 /*
  * All the store functions
@@ -1302,7 +1296,7 @@ static FASTCALL void
 fbStore_a8r8g8b8 (pixman_image_t *image,
 		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
 {
-    MEMCPY_WRAPPED(((uint32_t *)bits) + x, values, width*sizeof(uint32_t));
+    MEMCPY_WRAPPED(image, ((uint32_t *)bits) + x, values, width*sizeof(uint32_t));
 }
 
 static FASTCALL void
@@ -1312,7 +1306,7 @@ fbStore_x8r8g8b8 (pixman_image_t *image,
     int i;
     uint32_t *pixel = (uint32_t *)bits + x;
     for (i = 0; i < width; ++i)
-	WRITE(pixel++, values[i] & 0xffffff);
+	WRITE(image, pixel++, values[i] & 0xffffff);
 }
 
 static FASTCALL void
@@ -1322,7 +1316,7 @@ fbStore_a8b8g8r8 (pixman_image_t *image,
     int i;
     uint32_t *pixel = (uint32_t *)bits + x;
     for (i = 0; i < width; ++i)
-	WRITE(pixel++, (values[i] & 0xff00ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
+	WRITE(image, pixel++, (values[i] & 0xff00ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
 }
 
 static FASTCALL void
@@ -1332,7 +1326,7 @@ fbStore_x8b8g8r8 (pixman_image_t *image,
     int i;
     uint32_t *pixel = (uint32_t *)bits + x;
     for (i = 0; i < width; ++i)
-	WRITE(pixel++, (values[i] & 0x0000ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
+	WRITE(image, pixel++, (values[i] & 0x0000ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
 }
 
 static FASTCALL void
@@ -1343,7 +1337,7 @@ fbStore_r8g8b8 (pixman_image_t *image,
     int i;
     uint8_t *pixel = ((uint8_t *) bits) + 3*x;
     for (i = 0; i < width; ++i) {
-	Store24(pixel, values[i]);
+	Store24(image, pixel, values[i]);
 	pixel += 3;
     }
 }
@@ -1357,13 +1351,13 @@ fbStore_b8g8r8 (pixman_image_t *image,
     for (i = 0; i < width; ++i) {
 	uint32_t val = values[i];
 #if IMAGE_BYTE_ORDER == MSBFirst
-	WRITE(pixel++, Blue(val));
-	WRITE(pixel++, Green(val));
-	WRITE(pixel++, Red(val));
+	WRITE(image, pixel++, Blue(val));
+	WRITE(image, pixel++, Green(val));
+	WRITE(image, pixel++, Red(val));
 #else
-	WRITE(pixel++, Red(val));
-	WRITE(pixel++, Green(val));
-	WRITE(pixel++, Blue(val));
+	WRITE(image, pixel++, Red(val));
+	WRITE(image, pixel++, Green(val));
+	WRITE(image, pixel++, Blue(val));
 #endif
     }
 }
@@ -1376,7 +1370,7 @@ fbStore_r5g6b5 (pixman_image_t *image,
     uint16_t *pixel = ((uint16_t *) bits) + x;
     for (i = 0; i < width; ++i) {
 	uint32_t s = values[i];
-	WRITE(pixel++, ((s >> 3) & 0x001f) |
+	WRITE(image, pixel++, ((s >> 3) & 0x001f) |
 	      ((s >> 5) & 0x07e0) |
 	      ((s >> 8) & 0xf800));
     }
@@ -1390,7 +1384,7 @@ fbStore_b5g6r5 (pixman_image_t *image,
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     for (i = 0; i < width; ++i) {
 	Split(values[i]);
-	WRITE(pixel++, ((b << 8) & 0xf800) |
+	WRITE(image, pixel++, ((b << 8) & 0xf800) |
 	      ((g << 3) & 0x07e0) |
 	      ((r >> 3)         ));
     }
@@ -1404,7 +1398,7 @@ fbStore_a1r5g5b5 (pixman_image_t *image,
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     for (i = 0; i < width; ++i) {
 	Splita(values[i]);
-	WRITE(pixel++, ((a << 8) & 0x8000) |
+	WRITE(image, pixel++, ((a << 8) & 0x8000) |
 	      ((r << 7) & 0x7c00) |
 	      ((g << 2) & 0x03e0) |
 	      ((b >> 3)         ));
@@ -1419,7 +1413,7 @@ fbStore_x1r5g5b5 (pixman_image_t *image,
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     for (i = 0; i < width; ++i) {
 	Split(values[i]);
-	WRITE(pixel++, ((r << 7) & 0x7c00) |
+	WRITE(image, pixel++, ((r << 7) & 0x7c00) |
 	      ((g << 2) & 0x03e0) |
 	      ((b >> 3)         ));
     }
@@ -1433,7 +1427,7 @@ fbStore_a1b5g5r5 (pixman_image_t *image,
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     for (i = 0; i < width; ++i) {
 	Splita(values[i]);
-	WRITE(pixel++, ((a << 8) & 0x8000) |
+	WRITE(image, pixel++, ((a << 8) & 0x8000) |
 	      ((b << 7) & 0x7c00) |
 	      ((g << 2) & 0x03e0) |
 	      ((r >> 3)         ));
@@ -1448,7 +1442,7 @@ fbStore_x1b5g5r5 (pixman_image_t *image,
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     for (i = 0; i < width; ++i) {
 	Split(values[i]);
-	WRITE(pixel++, ((b << 7) & 0x7c00) |
+	WRITE(image, pixel++, ((b << 7) & 0x7c00) |
 	      ((g << 2) & 0x03e0) |
 	      ((r >> 3)         ));
     }
@@ -1462,7 +1456,7 @@ fbStore_a4r4g4b4 (pixman_image_t *image,
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     for (i = 0; i < width; ++i) {
 	Splita(values[i]);
-	WRITE(pixel++, ((a << 8) & 0xf000) |
+	WRITE(image, pixel++, ((a << 8) & 0xf000) |
 	      ((r << 4) & 0x0f00) |
 	      ((g     ) & 0x00f0) |
 	      ((b >> 4)         ));
@@ -1477,7 +1471,7 @@ fbStore_x4r4g4b4 (pixman_image_t *image,
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     for (i = 0; i < width; ++i) {
 	Split(values[i]);
-	WRITE(pixel++, ((r << 4) & 0x0f00) |
+	WRITE(image, pixel++, ((r << 4) & 0x0f00) |
 	      ((g     ) & 0x00f0) |
 	      ((b >> 4)         ));
     }
@@ -1491,7 +1485,7 @@ fbStore_a4b4g4r4 (pixman_image_t *image,
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     for (i = 0; i < width; ++i) {
 	Splita(values[i]);
-	WRITE(pixel++, ((a << 8) & 0xf000) |
+	WRITE(image, pixel++, ((a << 8) & 0xf000) |
 	      ((b << 4) & 0x0f00) |
 	      ((g     ) & 0x00f0) |
 	      ((r >> 4)         ));
@@ -1506,7 +1500,7 @@ fbStore_x4b4g4r4 (pixman_image_t *image,
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     for (i = 0; i < width; ++i) {
 	Split(values[i]);
-	WRITE(pixel++, ((b << 4) & 0x0f00) |
+	WRITE(image, pixel++, ((b << 4) & 0x0f00) |
 	      ((g     ) & 0x00f0) |
 	      ((r >> 4)         ));
     }
@@ -1519,7 +1513,7 @@ fbStore_a8 (pixman_image_t *image,
     int i;
     uint8_t   *pixel = ((uint8_t *) bits) + x;
     for (i = 0; i < width; ++i) {
-	WRITE(pixel++, values[i] >> 24);
+	WRITE(image, pixel++, values[i] >> 24);
     }
 }
 
@@ -1531,7 +1525,7 @@ fbStore_r3g3b2 (pixman_image_t *image,
     uint8_t   *pixel = ((uint8_t *) bits) + x;
     for (i = 0; i < width; ++i) {
 	Split(values[i]);
-	WRITE(pixel++,
+	WRITE(image, pixel++,
 	      ((r     ) & 0xe0) |
 	      ((g >> 3) & 0x1c) |
 	      ((b >> 6)       ));
@@ -1546,7 +1540,7 @@ fbStore_b2g3r3 (pixman_image_t *image,
     uint8_t   *pixel = ((uint8_t *) bits) + x;
     for (i = 0; i < width; ++i) {
 	Split(values[i]);
-	WRITE(pixel++,
+	WRITE(image, pixel++,
 	      ((b     ) & 0xc0) |
 	      ((g >> 2) & 0x1c) |
 	      ((r >> 5)       ));
@@ -1561,7 +1555,7 @@ fbStore_a2r2g2b2 (pixman_image_t *image,
     uint8_t   *pixel = ((uint8_t *) bits) + x;
     for (i = 0; i < width; ++i) {
 	Splita(values[i]);
-	WRITE(pixel++, ((a     ) & 0xc0) |
+	WRITE(image, pixel++, ((a     ) & 0xc0) |
 	      ((r >> 2) & 0x30) |
 	      ((g >> 4) & 0x0c) |
 	      ((b >> 6)       ));
@@ -1575,7 +1569,7 @@ fbStore_c8 (pixman_image_t *image,
     int i;
     uint8_t   *pixel = ((uint8_t *) bits) + x;
     for (i = 0; i < width; ++i) {
-	WRITE(pixel++, miIndexToEnt24(indexed,values[i]));
+	WRITE(image, pixel++, miIndexToEnt24(indexed,values[i]));
     }
 }
 
@@ -1586,19 +1580,19 @@ fbStore_x4a4 (pixman_image_t *image,
     int i;
     uint8_t   *pixel = ((uint8_t *) bits) + x;
     for (i = 0; i < width; ++i) {
-	WRITE(pixel++, values[i] >> 28);
+	WRITE(image, pixel++, values[i] >> 28);
     }
 }
 
-#define Store8(l,o,v)  (WRITE((uint8_t *)(l) + ((o) >> 3), (v)))
+#define Store8(img,l,o,v)  (WRITE(img, (uint8_t *)(l) + ((o) >> 3), (v)))
 #if IMAGE_BYTE_ORDER == MSBFirst
-#define Store4(l,o,v)  Store8(l,o,((o) & 4 ?				\
-				   (Fetch8(l,o) & 0xf0) | (v) :		\
-				   (Fetch8(l,o) & 0x0f) | ((v) << 4)))
+#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?				\
+				   (Fetch8(img,l,o) & 0xf0) | (v) :		\
+				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4)))
 #else
-#define Store4(l,o,v)  Store8(l,o,((o) & 4 ?			       \
-				   (Fetch8(l,o) & 0x0f) | ((v) << 4) : \
-				   (Fetch8(l,o) & 0xf0) | (v)))
+#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?			       \
+				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4) : \
+				   (Fetch8(img,l,o) & 0xf0) | (v)))
 #endif
 
 static FASTCALL void
@@ -1607,7 +1601,7 @@ fbStore_a4 (pixman_image_t *image,
 {
     int i;
     for (i = 0; i < width; ++i) {
-	Store4(bits, i + x, values[i]>>28);
+	Store4(image, bits, i + x, values[i]>>28);
     }
 }
 
@@ -1618,12 +1612,12 @@ fbStore_r1g2b1 (pixman_image_t *image,
     int i;
     for (i = 0; i < width; ++i) {
 	uint32_t  pixel;
-	
+
 	Split(values[i]);
 	pixel = (((r >> 4) & 0x8) |
 		 ((g >> 5) & 0x6) |
 		 ((b >> 7)      ));
-	Store4(bits, i + x, pixel);
+	Store4(image, bits, i + x, pixel);
     }
 }
 
@@ -1634,12 +1628,12 @@ fbStore_b1g2r1 (pixman_image_t *image,
     int i;
     for (i = 0; i < width; ++i) {
 	uint32_t  pixel;
-	
+
 	Split(values[i]);
 	pixel = (((b >> 4) & 0x8) |
 		 ((g >> 5) & 0x6) |
 		 ((r >> 7)      ));
-	Store4(bits, i + x, pixel);
+	Store4(image, bits, i + x, pixel);
     }
 }
 
@@ -1655,7 +1649,7 @@ fbStore_a1r1g1b1 (pixman_image_t *image,
 		 ((r >> 5) & 0x4) |
 		 ((g >> 6) & 0x2) |
 		 ((b >> 7)      ));
-	Store4(bits, i + x, pixel);
+	Store4(image, bits, i + x, pixel);
     }
 }
 
@@ -1671,7 +1665,7 @@ fbStore_a1b1g1r1 (pixman_image_t *image,
 		 ((b >> 5) & 0x4) |
 		 ((g >> 6) & 0x2) |
 		 ((r >> 7)      ));
-	Store4(bits, i + x, pixel);
+	Store4(image, bits, i + x, pixel);
     }
 }
 
@@ -1682,9 +1676,9 @@ fbStore_c4 (pixman_image_t *image,
     int i;
     for (i = 0; i < width; ++i) {
 	uint32_t  pixel;
-	
+
 	pixel = miIndexToEnt24(indexed, values[i]);
-	Store4(bits, i + x, pixel);
+	Store4(image, bits, i + x, pixel);
     }
 }
 
@@ -1696,9 +1690,9 @@ fbStore_a1 (pixman_image_t *image,
     for (i = 0; i < width; ++i) {
 	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
-	
+
 	uint32_t v = values[i] & 0x80000000 ? mask : 0;
-	WRITE(pixel, (READ(pixel) & ~mask) | v);
+	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
     }
 }
 
@@ -1710,9 +1704,9 @@ fbStore_g1 (pixman_image_t *image,
     for (i = 0; i < width; ++i) {
 	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
 	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
-	
+
 	uint32_t v = miIndexToEntY24(indexed,values[i]) ? mask : 0;
-	WRITE(pixel, (READ(pixel) & ~mask) | v);
+	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
     }
 }
 
@@ -1724,15 +1718,15 @@ static storeProc storeProcForPicture (bits_image_t * pict)
     case PIXMAN_x8r8g8b8: return fbStore_x8r8g8b8;
     case PIXMAN_a8b8g8r8: return fbStore_a8b8g8r8;
     case PIXMAN_x8b8g8r8: return fbStore_x8b8g8r8;
-	
+
         /* 24bpp formats */
     case PIXMAN_r8g8b8: return fbStore_r8g8b8;
     case PIXMAN_b8g8r8: return fbStore_b8g8r8;
-	
+
         /* 16bpp formats */
     case PIXMAN_r5g6b5: return fbStore_r5g6b5;
     case PIXMAN_b5g6r5: return fbStore_b5g6r5;
-	
+
     case PIXMAN_a1r5g5b5: return fbStore_a1r5g5b5;
     case PIXMAN_x1r5g5b5: return fbStore_x1r5g5b5;
     case PIXMAN_a1b5g5r5: return fbStore_a1b5g5r5;
@@ -1741,7 +1735,7 @@ static storeProc storeProcForPicture (bits_image_t * pict)
     case PIXMAN_x4r4g4b4: return fbStore_x4r4g4b4;
     case PIXMAN_a4b4g4r4: return fbStore_a4b4g4r4;
     case PIXMAN_x4b4g4r4: return fbStore_x4b4g4r4;
-	
+
         /* 8bpp formats */
     case PIXMAN_a8: return  fbStore_a8;
     case PIXMAN_r3g3b2: return fbStore_r3g3b2;
@@ -1750,7 +1744,7 @@ static storeProc storeProcForPicture (bits_image_t * pict)
     case PIXMAN_c8: return  fbStore_c8;
     case PIXMAN_g8: return  fbStore_c8;
     case PIXMAN_x4a4: return fbStore_x4a4;
-	
+
         /* 4bpp formats */
     case PIXMAN_a4: return  fbStore_a4;
     case PIXMAN_r1g2b1: return fbStore_r1g2b1;
@@ -1759,7 +1753,7 @@ static storeProc storeProcForPicture (bits_image_t * pict)
     case PIXMAN_a1b1g1r1: return fbStore_a1b1g1r1;
     case PIXMAN_c4: return  fbStore_c4;
     case PIXMAN_g4: return  fbStore_c4;
-	
+
         /* 1bpp formats */
     case PIXMAN_a1: return  fbStore_a1;
     case PIXMAN_g1: return  fbStore_g1;
@@ -1809,7 +1803,7 @@ fbCombineOverU (uint32_t *dest, const uint32_t *src, int width)
         uint32_t s = *(src + i);
         uint32_t d = *(dest + i);
         uint32_t ia = Alpha(~s);
-	
+
         FbByteMulAdd(d, ia, s);
 	*(dest + i) = d;
     }
@@ -1885,7 +1879,7 @@ fbCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
         uint32_t d = *(dest + i);
         uint32_t dest_a = Alpha(d);
         uint32_t src_ia = Alpha(~s);
-	
+
         FbByteAddMul(s, dest_a, d, src_ia);
 	*(dest + i) = s;
     }
@@ -1900,7 +1894,7 @@ fbCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
         uint32_t d = *(dest + i);
         uint32_t src_a = Alpha(s);
         uint32_t dest_ia = Alpha(~d);
-	
+
         FbByteAddMul(s, dest_ia, d, src_a);
 	*(dest + i) = s;
     }
@@ -1915,7 +1909,7 @@ fbCombineXorU (uint32_t *dest, const uint32_t *src, int width)
         uint32_t d = *(dest + i);
         uint32_t src_ia = Alpha(~s);
         uint32_t dest_ia = Alpha(~d);
-	
+
         FbByteAddMul(s, dest_ia, d, src_ia);
 	*(dest + i) = s;
     }
@@ -1941,7 +1935,7 @@ fbCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
         uint32_t  s = *(src + i);
         uint32_t d = *(dest + i);
         uint16_t  sa, da;
-	
+
         sa = s >> 24;
         da = ~d >> 24;
         if (sa > da)
@@ -1956,12 +1950,12 @@ fbCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
 
 /*
  * All of the disjoint composing functions
- 
+
  The four entries in the first column indicate what source contributions
  come from each of the four areas of the picture -- areas covered by neither
  A nor B, areas covered only by A, areas covered only by B and finally
  areas covered by both A and B.
- 
+
  Disjoint			Conjoint
  Fa		Fb		Fa		Fb
  (0,0,0,0)	0		0		0		0
@@ -1976,7 +1970,7 @@ fbCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
  (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
  (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
  (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
- 
+
 */
 
 #define CombineAOut 1
@@ -1998,7 +1992,7 @@ static INLINE uint8_t
 fbCombineDisjointOutPart (uint8_t a, uint8_t b)
 {
     /* min (1, (1-b) / a) */
-    
+
     b = ~b;		    /* 1 - b */
     if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
 	return 0xff;	    /* 1 */
@@ -2012,7 +2006,7 @@ fbCombineDisjointInPart (uint8_t a, uint8_t b)
     /* max (1-(1-b)/a,0) */
     /*  = - min ((1-b)/a - 1, 0) */
     /*  = 1 - min (1, (1-b)/a) */
-    
+
     b = ~b;		    /* 1 - b */
     if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
 	return 0;	    /* 1 - 1 */
@@ -2030,7 +2024,7 @@ fbCombineDisjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8
         uint16_t Fa, Fb, t, u, v;
         uint8_t sa = s >> 24;
         uint8_t da = d >> 24;
-	
+
         switch (combine & CombineA) {
         default:
             Fa = 0;
@@ -2045,7 +2039,7 @@ fbCombineDisjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8
             Fa = 0xff;
             break;
         }
-	
+
         switch (combine & CombineB) {
         default:
             Fb = 0;
@@ -2076,7 +2070,7 @@ fbCombineDisjointOverU (uint32_t *dest, const uint32_t *src, int width)
     for (i = 0; i < width; ++i) {
         uint32_t  s = *(src + i);
         uint16_t  a = s >> 24;
-	
+
         if (a != 0x00)
         {
             if (a != 0xff)
@@ -2139,9 +2133,9 @@ fbCombineConjointOutPart (uint8_t a, uint8_t b)
 {
     /* max (1-b/a,0) */
     /* = 1-min(b/a,1) */
-    
+
     /* min (1, (1-b) / a) */
-    
+
     if (b >= a)		    /* b >= a -> b/a >= 1 */
 	return 0x00;	    /* 0 */
     return ~FbIntDiv(b,a);   /* 1 - b/a */
@@ -2152,7 +2146,7 @@ static INLINE uint8_t
 fbCombineConjointInPart (uint8_t a, uint8_t b)
 {
     /* min (1,b/a) */
-    
+
     if (b >= a)		    /* b >= a -> b/a >= 1 */
 	return 0xff;	    /* 1 */
     return FbIntDiv(b,a);   /* b/a */
@@ -2169,7 +2163,7 @@ fbCombineConjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8
         uint16_t  Fa, Fb, t, u, v;
         uint8_t sa = s >> 24;
         uint8_t da = d >> 24;
-	
+
         switch (combine & CombineA) {
         default:
             Fa = 0;
@@ -2184,7 +2178,7 @@ fbCombineConjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8
             Fa = 0xff;
             break;
         }
-	
+
         switch (combine & CombineB) {
         default:
             Fb = 0;
@@ -2316,16 +2310,16 @@ static INLINE void
 fbCombineMaskC (uint32_t *src, uint32_t *mask)
 {
     uint32_t a = *mask;
-    
+
     uint32_t	x;
     uint16_t	xa;
-    
+
     if (!a)
     {
 	*(src) = 0;
 	return;
     }
-    
+
     x = *(src);
     if (a == 0xffffffff)
     {
@@ -2335,7 +2329,7 @@ fbCombineMaskC (uint32_t *src, uint32_t *mask)
 	*(mask) = x;
 	return;
     }
-    
+
     xa = x >> 24;
     FbByteMulC(x, a);
     *(src) = x;
@@ -2348,16 +2342,16 @@ fbCombineMaskValueC (uint32_t *src, const uint32_t *mask)
 {
     uint32_t a = *mask;
     uint32_t	x;
-    
+
     if (!a)
     {
 	*(src) = 0;
 	return;
     }
-    
+
     if (a == 0xffffffff)
 	return;
-    
+
     x = *(src);
     FbByteMulC(x, a);
     *(src) =x;
@@ -2368,10 +2362,10 @@ fbCombineMaskAlphaC (const uint32_t *src, uint32_t *mask)
 {
     uint32_t a = *(mask);
     uint32_t	x;
-    
+
     if (!a)
 	return;
-    
+
     x = *(src) >> 24;
     if (x == 0xff)
 	return;
@@ -2383,7 +2377,7 @@ fbCombineMaskAlphaC (const uint32_t *src, uint32_t *mask)
 	*(mask) = x;
 	return;
     }
-    
+
     FbByteMul(a, x);
     *(mask) = a;
 }
@@ -2398,13 +2392,13 @@ static FASTCALL void
 fbCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
 	uint32_t s = *(src + i);
 	uint32_t m = *(mask + i);
-	
+
 	fbCombineMaskValueC (&s, &m);
-	
+
 	*(dest) = s;
     }
 }
@@ -2413,14 +2407,14 @@ static FASTCALL void
 fbCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
 	uint32_t s = *(src + i);
 	uint32_t m = *(mask + i);
 	uint32_t a;
-	
+
 	fbCombineMaskC (&s, &m);
-	
+
 	a = ~m;
         if (a != 0xffffffff)
         {
@@ -2439,18 +2433,18 @@ static FASTCALL void
 fbCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
         uint32_t d = *(dest + i);
         uint32_t a = ~d >> 24;
-	
+
         if (a)
         {
             uint32_t s = *(src + i);
 	    uint32_t m = *(mask + i);
-	    
+
 	    fbCombineMaskValueC (&s, &m);
-	    
+
             if (a != 0xff)
             {
                 FbByteMulAdd(s, a, d);
@@ -2464,7 +2458,7 @@ static FASTCALL void
 fbCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
         uint32_t d = *(dest + i);
         uint16_t a = d >> 24;
@@ -2472,7 +2466,7 @@ fbCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
         if (a)
         {
 	    uint32_t m = *(mask + i);
-	    
+
 	    s = *(src + i);
 	    fbCombineMaskValueC (&s, &m);
             if (a != 0xff)
@@ -2488,14 +2482,14 @@ static FASTCALL void
 fbCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
         uint32_t s = *(src + i);
         uint32_t m = *(mask + i);
         uint32_t a;
-	
+
 	fbCombineMaskAlphaC (&s, &m);
-	
+
 	a = m;
         if (a != 0xffffffff)
         {
@@ -2505,7 +2499,7 @@ fbCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
                 d = *(dest + i);
                 FbByteMulC(d, a);
             }
-	    *(dest + i) = d; 
+	    *(dest + i) = d;
         }
     }
 }
@@ -2514,7 +2508,7 @@ static FASTCALL void
 fbCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
         uint32_t d = *(dest + i);
         uint16_t a = ~d >> 24;
@@ -2522,10 +2516,10 @@ fbCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
         if (a)
         {
 	    uint32_t m = *(mask + i);
-	    
+
 	    s = *(src + i);
 	    fbCombineMaskValueC (&s, &m);
-	    
+
             if (a != 0xff)
             {
                 FbByteMul(s, a);
@@ -2539,14 +2533,14 @@ static FASTCALL void
 fbCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
 	uint32_t s = *(src + i);
 	uint32_t m = *(mask + i);
 	uint32_t a;
-	
+
 	fbCombineMaskAlphaC (&s, &m);
-	
+
         a = ~m;
         if (a != 0xffffffff)
         {
@@ -2565,18 +2559,18 @@ static FASTCALL void
 fbCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
         uint32_t d = *(dest + i);
         uint32_t s = *(src + i);
         uint32_t m = *(mask + i);
         uint32_t ad;
         uint16_t as = d >> 24;
-	
+
 	fbCombineMaskC (&s, &m);
-	
+
         ad = ~m;
-	
+
         FbByteAddMulC(d, ad, s, as);
 	*(dest + i) = d;
     }
@@ -2586,19 +2580,19 @@ static FASTCALL void
 fbCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
-	
+
         uint32_t d = *(dest + i);
         uint32_t s = *(src + i);
         uint32_t m = *(mask + i);
         uint32_t ad;
         uint16_t as = ~d >> 24;
-	
+
 	fbCombineMaskC (&s, &m);
-	
+
 	ad = m;
-	
+
         FbByteAddMulC(d, ad, s, as);
 	*(dest + i) = d;
     }
@@ -2608,18 +2602,18 @@ static FASTCALL void
 fbCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
         uint32_t d = *(dest + i);
         uint32_t s = *(src + i);
         uint32_t m = *(mask + i);
         uint32_t ad;
         uint16_t as = ~d >> 24;
-	
+
 	fbCombineMaskC (&s, &m);
-	
+
 	ad = ~m;
-	
+
         FbByteAddMulC(d, ad, s, as);
 	*(dest + i) = d;
     }
@@ -2629,14 +2623,14 @@ static FASTCALL void
 fbCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
         uint32_t s = *(src + i);
         uint32_t m = *(mask + i);
         uint32_t d = *(dest + i);
-	
+
 	fbCombineMaskValueC (&s, &m);
-	
+
         FbByteAdd(d, s);
 	*(dest + i) = d;
     }
@@ -2646,45 +2640,45 @@ static FASTCALL void
 fbCombineSaturateC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
         uint32_t  s, d;
         uint16_t  sa, sr, sg, sb, da;
         uint16_t  t, u, v;
         uint32_t  m,n,o,p;
-	
+
         d = *(dest + i);
         s = *(src + i);
 	m = *(mask + i);
-	
+
 	fbCombineMaskC (&s, &m);
-	
+
         sa = (m >> 24);
         sr = (m >> 16) & 0xff;
         sg = (m >>  8) & 0xff;
         sb = (m      ) & 0xff;
         da = ~d >> 24;
-	
+
         if (sb <= da)
             m = FbAdd(s,d,0,t);
         else
             m = FbGen (s, d, 0, (da << 8) / sb, 0xff, t, u, v);
-	
+
         if (sg <= da)
             n = FbAdd(s,d,8,t);
         else
             n = FbGen (s, d, 8, (da << 8) / sg, 0xff, t, u, v);
-	
+
         if (sr <= da)
             o = FbAdd(s,d,16,t);
         else
             o = FbGen (s, d, 16, (da << 8) / sr, 0xff, t, u, v);
-	
+
         if (sa <= da)
             p = FbAdd(s,d,24,t);
         else
             p = FbGen (s, d, 24, (da << 8) / sa, 0xff, t, u, v);
-	
+
 	*(dest + i) = m|n|o|p;
     }
 }
@@ -2693,7 +2687,7 @@ static FASTCALL void
 fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
         uint32_t  s, d;
         uint32_t  m,n,o,p;
@@ -2701,16 +2695,16 @@ fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int wi
         uint16_t  t, u, v;
         uint32_t  sa;
         uint8_t   da;
-	
+
         s = *(src + i);
         m = *(mask + i);
         d = *(dest + i);
         da = d >> 24;
-	
+
 	fbCombineMaskC (&s, &m);
-	
+
 	sa = m;
-	
+
         switch (combine & CombineA) {
         default:
             Fa = 0;
@@ -2733,7 +2727,7 @@ fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int wi
             Fa = 0xffffffff;
             break;
         }
-	
+
         switch (combine & CombineB) {
         default:
             Fb = 0;
@@ -2817,7 +2811,7 @@ static FASTCALL void
 fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
 {
     int i;
-    
+
     for (i = 0; i < width; ++i) {
         uint32_t  s, d;
         uint32_t  m,n,o,p;
@@ -2825,16 +2819,16 @@ fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int wi
         uint16_t  t, u, v;
         uint32_t  sa;
         uint8_t   da;
-	
+
         s = *(src + i);
         m = *(mask + i);
         d = *(dest + i);
         da = d >> 24;
-	
+
 	fbCombineMaskC (&s, &m);
-	
+
         sa = m;
-	
+
         switch (combine & CombineA) {
         default:
             Fa = 0;
@@ -2857,7 +2851,7 @@ fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int wi
             Fa = 0xffffffff;
             break;
         }
-	
+
         switch (combine & CombineB) {
         default:
             Fb = 0;
@@ -2996,9 +2990,9 @@ static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t
     uint32_t color;
     uint32_t *end;
     fetchPixelProc fetch = fetchPixelProcForPicture(pict);
-    
+
     color = fetch(pict, 0, 0);
-    
+
     end = buffer + width;
     while (buffer < end)
 	*(buffer++) = color;
@@ -3008,7 +3002,7 @@ static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t
 static void fbFetch(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 {
     fetchProc fetch = fetchProcForPicture(pict);
-    
+
     fetch(pict, x, y, width, buffer);
 }
 
@@ -3038,11 +3032,11 @@ typedef struct
     int32_t       left_x;
     int32_t       right_x;
     int32_t       stepper;
-    
+
     pixman_gradient_stop_t	*stops;
     int                      num_stops;
     unsigned int             spread;
-    
+
     int		  need_reset;
 } GradientWalker;
 
@@ -3061,7 +3055,7 @@ _gradient_walker_init (GradientWalker  *walker,
     walker->right_ag  = 0;
     walker->right_rb  = 0;
     walker->spread    = spread;
-    
+
     walker->need_reset = TRUE;
 }
 
@@ -3073,9 +3067,9 @@ _gradient_walker_reset (GradientWalker  *walker,
     pixman_color_t          *left_c, *right_c;
     int                      n, count = walker->num_stops;
     pixman_gradient_stop_t *      stops = walker->stops;
-    
+
     static const pixman_color_t   transparent_black = { 0, 0, 0, 0 };
-    
+
     switch (walker->spread)
     {
     case PIXMAN_REPEAT_NORMAL:
@@ -3090,7 +3084,7 @@ _gradient_walker_reset (GradientWalker  *walker,
 	    left_x =  stops[n-1].x;
 	    left_c = &stops[n-1].color;
 	}
-	
+
 	if (n == count) {
 	    right_x =  stops[0].x + 0x10000;
 	    right_c = &stops[0].color;
@@ -3101,12 +3095,12 @@ _gradient_walker_reset (GradientWalker  *walker,
 	left_x  += (pos - x);
 	right_x += (pos - x);
 	break;
-	
+
     case PIXMAN_REPEAT_PAD:
 	for (n = 0; n < count; n++)
 	    if (pos < stops[n].x)
 		break;
-	
+
 	if (n == 0) {
 	    left_x =  INT32_MIN;
 	    left_c = &stops[0].color;
@@ -3114,7 +3108,7 @@ _gradient_walker_reset (GradientWalker  *walker,
 	    left_x =  stops[n-1].x;
 	    left_c = &stops[n-1].color;
 	}
-	
+
 	if (n == count) {
 	    right_x =  INT32_MAX;
 	    right_c = &stops[n-1].color;
@@ -3123,7 +3117,7 @@ _gradient_walker_reset (GradientWalker  *walker,
 	    right_c = &stops[n].color;
 	}
 	break;
-	
+
     case PIXMAN_REPEAT_REFLECT:
 	x = (int32_t)pos & 0xFFFF;
 	if ((int32_t)pos & 0x10000)
@@ -3131,7 +3125,7 @@ _gradient_walker_reset (GradientWalker  *walker,
 	for (n = 0; n < count; n++)
 	    if (x < stops[n].x)
 		break;
-	
+
 	if (n == 0) {
 	    left_x =  -stops[0].x;
 	    left_c = &stops[0].color;
@@ -3139,7 +3133,7 @@ _gradient_walker_reset (GradientWalker  *walker,
 	    left_x =  stops[n-1].x;
 	    left_c = &stops[n-1].color;
 	}
-	
+
 	if (n == count) {
 	    right_x = 0x20000 - stops[n-1].x;
 	    right_c = &stops[n-1].color;
@@ -3147,30 +3141,30 @@ _gradient_walker_reset (GradientWalker  *walker,
 	    right_x =  stops[n].x;
 	    right_c = &stops[n].color;
 	}
-	
+
 	if ((int32_t)pos & 0x10000) {
 	    pixman_color_t  *tmp_c;
 	    int32_t          tmp_x;
-	    
+
 	    tmp_x   = 0x10000 - right_x;
 	    right_x = 0x10000 - left_x;
 	    left_x  = tmp_x;
-	    
+
 	    tmp_c   = right_c;
 	    right_c = left_c;
 	    left_c  = tmp_c;
-	    
+
 	    x = 0x10000 - x;
 	}
 	left_x  += (pos - x);
 	right_x += (pos - x);
 	break;
-	
+
     default:  /* RepeatNone */
 	for (n = 0; n < count; n++)
 	    if (pos < stops[n].x)
 		break;
-	
+
 	if (n == 0)
 	{
 	    left_x  =  INT32_MIN;
@@ -3191,14 +3185,14 @@ _gradient_walker_reset (GradientWalker  *walker,
 	    right_c = &stops[n].color;
 	}
     }
-    
+
     walker->left_x   = left_x;
     walker->right_x  = right_x;
     walker->left_ag  = ((left_c->alpha >> 8) << 16)   | (left_c->green >> 8);
     walker->left_rb  = ((left_c->red & 0xff00) << 8)  | (left_c->blue >> 8);
     walker->right_ag = ((right_c->alpha >> 8) << 16)  | (right_c->green >> 8);
     walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
-    
+
     if ( walker->left_x == walker->right_x                ||
 	 ( walker->left_ag == walker->right_ag &&
 	   walker->left_rb == walker->right_rb )   )
@@ -3210,7 +3204,7 @@ _gradient_walker_reset (GradientWalker  *walker,
 	int32_t width = right_x - left_x;
 	walker->stepper = ((1 << 24) + width/2)/width;
     }
-    
+
     walker->need_reset = FALSE;
 }
 
@@ -3225,29 +3219,29 @@ _gradient_walker_pixel (GradientWalker  *walker,
 {
     int  dist, idist;
     uint32_t  t1, t2, a, color;
-    
+
     if (GRADIENT_WALKER_NEED_RESET (walker, x))
         _gradient_walker_reset (walker, x);
-    
+
     dist  = ((int)(x - walker->left_x)*walker->stepper) >> 16;
     idist = 256 - dist;
-    
+
     /* combined INTERPOLATE and premultiply */
     t1 = walker->left_rb*idist + walker->right_rb*dist;
     t1 = (t1 >> 8) & 0xff00ff;
-    
+
     t2  = walker->left_ag*idist + walker->right_ag*dist;
     t2 &= 0xff00ff00;
-    
+
     color = t2 & 0xff000000;
     a     = t2 >> 24;
-    
+
     t1  = t1*a + 0x800080;
     t1  = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
-    
+
     t2  = (t2 >> 8)*a + 0x800080;
     t2  = (t2 + ((t2 >> 8) & 0xff00ff));
-    
+
     return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
 }
 
@@ -3259,27 +3253,27 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
     GradientWalker  walker;
     uint32_t       *end = buffer + width;
     gradient_t	    *gradient;
-    
+
     if (pict->common.type == SOLID)
     {
 	register uint32_t color = ((solid_fill_t *)pict)->color;
-	
+
 	while (buffer < end)
 	    *(buffer++) = color;
-	
+
 	return;
     }
-    
+
     gradient = (gradient_t *)pict;
-    
+
     _gradient_walker_init (&walker, gradient, pict->common.repeat);
-    
+
     if (pict->common.type == LINEAR) {
 	pixman_vector_t v, unit;
 	pixman_fixed_32_32_t l;
 	pixman_fixed_48_16_t dx, dy, a, b, off;
 	linear_gradient_t *linear = (linear_gradient_t *)pict;
-	
+
         /* reference point is the center of the pixel */
         v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
         v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
@@ -3295,7 +3289,7 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
             unit.vector[1] = 0;
             unit.vector[2] = 0;
         }
-	
+
         dx = linear->p2.x - linear->p1.x;
         dy = linear->p2.y - linear->p1.y;
         l = dx*dx + dy*dy;
@@ -3314,11 +3308,11 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
                 t = ((a*v.vector[0] + b*v.vector[1]) >> 16) + off;
                 inc = (a * unit.vector[0] + b * unit.vector[1]) >> 16;
             }
-	    
+
 	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
 	    {
 		register uint32_t color;
-		
+
 		color = _gradient_walker_pixel( &walker, t );
 		while (buffer < end)
 		    *(buffer++) = color;
@@ -3347,11 +3341,11 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
 	else /* projective transformation */
 	{
 	    pixman_fixed_48_16_t t;
-	    
+
 	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
 	    {
 		register uint32_t color;
-		
+
 		if (v.vector[2] == 0)
 		{
 		    t = 0;
@@ -3359,12 +3353,12 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
 		else
 		{
 		    pixman_fixed_48_16_t x, y;
-		    
+
 		    x = ((pixman_fixed_48_16_t) v.vector[0] << 16) / v.vector[2];
 		    y = ((pixman_fixed_48_16_t) v.vector[1] << 16) / v.vector[2];
 		    t = ((a * x + b * y) >> 16) + off;
 		}
-		
+
  		color = _gradient_walker_pixel( &walker, t );
 		while (buffer < end)
 		    *(buffer++) = color;
@@ -3393,7 +3387,7 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
             }
         }
     } else {
-	
+
 /*
  * In the radial gradient problem we are given two circles (c₁,r₁) and
  * (câ‚‚,râ‚‚) that define the gradient itself. Then, for any point p, we
@@ -3516,7 +3510,7 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
 	double rx = x + 0.5;
 	double ry = y + 0.5;
         double rz = 1.;
-	
+
         if (pict->common.transform) {
             pixman_vector_t v;
             /* reference point is the center of the pixel */
@@ -3525,7 +3519,7 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
             v.vector[2] = pixman_fixed_1;
             if (!pixman_transform_point_3d (pict->common.transform, &v))
                 return;
-	    
+
             cx = pict->common.transform->matrix[0][0]/65536.;
             cy = pict->common.transform->matrix[1][0]/65536.;
             cz = pict->common.transform->matrix[2][0]/65536.;
@@ -3534,7 +3528,7 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
             rz = v.vector[2]/65536.;
             affine = pict->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1;
         }
-	
+
         if (pict->common.type == RADIAL) {
 	    radial_gradient_t *radial = (radial_gradient_t *)pict;
             if (affine) {
@@ -3548,28 +3542,28 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
 			double c1y = radial->c1.y / 65536.0;
 			double r1  = radial->c1.radius / 65536.0;
                         pixman_fixed_48_16_t t;
-			
+
 			pdx = rx - c1x;
 			pdy = ry - c1y;
-			
+
 			B = -2 * (  pdx * radial->cdx
 				    + pdy * radial->cdy
 				    + r1 * radial->dr);
 			C = (pdx * pdx + pdy * pdy - r1 * r1);
-			
+
                         det = (B * B) - (4 * radial->A * C);
 			if (det < 0.0)
 			    det = 0.0;
-			
+
 			if (radial->A < 0)
 			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
 			else
 			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
-			
+
 			*(buffer) = _gradient_walker_pixel (&walker, t);
 		    }
 		    ++buffer;
-		    
+
                     rx += cx;
                     ry += cy;
                 }
@@ -3586,35 +3580,35 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
 			double r1  = radial->c1.radius / 65536.0;
                         pixman_fixed_48_16_t t;
 			double x, y;
-			
+
 			if (rz != 0) {
 			    x = rx/rz;
 			    y = ry/rz;
 			} else {
 			    x = y = 0.;
 			}
-			
+
 			pdx = x - c1x;
 			pdy = y - c1y;
-			
+
 			B = -2 * (  pdx * radial->cdx
 				    + pdy * radial->cdy
 				    + r1 * radial->dr);
 			C = (pdx * pdx + pdy * pdy - r1 * r1);
-			
+
                         det = (B * B) - (4 * radial->A * C);
 			if (det < 0.0)
 			    det = 0.0;
-			
+
 			if (radial->A < 0)
 			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
 			else
 			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
-			
+
 			*(buffer) = _gradient_walker_pixel (&walker, t);
 		    }
 		    ++buffer;
-		    
+
                     rx += cx;
                     ry += cy;
 		    rz += cz;
@@ -3626,20 +3620,20 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
             if (affine) {
                 rx -= conical->center.x/65536.;
                 ry -= conical->center.y/65536.;
-		
+
                 while (buffer < end) {
 		    double angle;
-		    
+
                     if (!mask || *mask++ & maskBits)
 		    {
                         pixman_fixed_48_16_t   t;
-			
+
                         angle = atan2(ry, rx) + a;
 			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
-			
+
 			*(buffer) = _gradient_walker_pixel (&walker, t);
 		    }
-		    
+
                     ++buffer;
                     rx += cx;
                     ry += cy;
@@ -3648,11 +3642,11 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
                 while (buffer < end) {
                     double x, y;
                     double angle;
-		    
+
                     if (!mask || *mask++ & maskBits)
                     {
 			pixman_fixed_48_16_t  t;
-			
+
 			if (rz != 0) {
 			    x = rx/rz;
 			    y = ry/rz;
@@ -3663,10 +3657,10 @@ static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
 			y -= conical->center.y/65536.;
 			angle = atan2(y, x) + a;
 			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
-			
+
 			*(buffer) = _gradient_walker_pixel (&walker, t);
 		    }
-		    
+
                     ++buffer;
                     rx += cx;
                     ry += cy;
@@ -3687,17 +3681,17 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
     int         i;
     pixman_box16_t box;
     pixman_bool_t affine = TRUE;
-    
+
     fetch = fetchPixelProcForPicture(pict);
-    
+
     bits = pict->bits;
     stride = pict->rowstride;
-    
+
     /* reference point is the center of the pixel */
     v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1 / 2;
     v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1 / 2;
     v.vector[2] = pixman_fixed_1;
-    
+
     /* when using convolution filters one might get here without a transform */
     if (pict->common.transform)
     {
@@ -3717,7 +3711,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
         unit.vector[1] = 0;
         unit.vector[2] = 0;
     }
-    
+
     if (pict->common.filter == PIXMAN_FILTER_NEAREST || pict->common.filter == PIXMAN_FILTER_FAST)
     {
         if (pict->common.repeat == PIXMAN_REPEAT_NORMAL) {
@@ -3738,7 +3732,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    *(buffer + i) = fetch(pict, x, y);
 			}
 		    }
-		    
+
                     v.vector[0] += unit.vector[0];
                     v.vector[1] += unit.vector[1];
                     v.vector[2] += unit.vector[2];
@@ -3763,7 +3757,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 				*(buffer + i) = 0;
 			}
 		    }
-		    
+
                     v.vector[0] += unit.vector[0];
                     v.vector[1] += unit.vector[1];
                     v.vector[2] += unit.vector[2];
@@ -3828,7 +3822,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
         v.vector[1] -= v.vector[2] / 2;
         unit.vector[0] -= unit.vector[2] / 2;
         unit.vector[1] -= unit.vector[2] / 2;
-	
+
         if (pict->common.repeat == PIXMAN_REPEAT_NORMAL) {
             if (pixman_region_n_rects(pict->common.src_clip) == 1) {
                 for (i = 0; i < width; ++i) {
@@ -3840,7 +3834,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 			    uint32_t tl, tr, bl, br, r;
 			    uint32_t ft, fb;
-			    
+
 			    if (!affine) {
 				pixman_fixed_48_16_t div;
 				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
@@ -3857,20 +3851,20 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    }
 			    x2 = x1 + 1;
 			    y2 = y1 + 1;
-			    
+
 			    idistx = 256 - distx;
 			    idisty = 256 - disty;
-			    
+
 			    x1 = MOD (x1, pict->width);
 			    x2 = MOD (x2, pict->width);
 			    y1 = MOD (y1, pict->height);
 			    y2 = MOD (y2, pict->height);
-			    
+
 			    tl = fetch(pict, x1, y1);
 			    tr = fetch(pict, x2, y1);
 			    bl = fetch(pict, x1, y2);
 			    br = fetch(pict, x2, y2);
-			    
+
 			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
@@ -3900,7 +3894,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 			    uint32_t tl, tr, bl, br, r;
 			    uint32_t ft, fb;
-			    
+
 			    if (!affine) {
 				pixman_fixed_48_16_t div;
 				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
@@ -3917,15 +3911,15 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    }
 			    x2 = x1 + 1;
 			    y2 = y1 + 1;
-			    
+
 			    idistx = 256 - distx;
 			    idisty = 256 - disty;
-			    
+
 			    x1 = MOD (x1, pict->width);
 			    x2 = MOD (x2, pict->width);
 			    y1 = MOD (y1, pict->height);
 			    y2 = MOD (y2, pict->height);
-			    
+
 			    tl = pixman_region_contains_point(pict->common.src_clip, x1, y1, &box)
 				? fetch(pict, x1, y1) : 0;
 			    tr = pixman_region_contains_point(pict->common.src_clip, x2, y1, &box)
@@ -3934,7 +3928,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 				? fetch(pict, x1, y2) : 0;
 			    br = pixman_region_contains_point(pict->common.src_clip, x2, y2, &box)
 				? fetch(pict, x2, y2) : 0;
-			    
+
 			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
@@ -3950,7 +3944,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    *(buffer + i) = r;
 			}
 		    }
-		    
+
                     v.vector[0] += unit.vector[0];
                     v.vector[1] += unit.vector[1];
                     v.vector[2] += unit.vector[2];
@@ -3969,7 +3963,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    uint32_t tl, tr, bl, br, r;
 			    pixman_bool_t x1_out, x2_out, y1_out, y2_out;
 			    uint32_t ft, fb;
-			    
+
 			    if (!affine) {
 				pixman_fixed_48_16_t div;
 				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
@@ -3986,20 +3980,20 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    }
 			    x2 = x1 + 1;
 			    y2 = y1 + 1;
-			    
+
 			    idistx = 256 - distx;
 			    idisty = 256 - disty;
-			    
+
 			    x1_out = (x1 < box.x1) | (x1 >= box.x2);
 			    x2_out = (x2 < box.x1) | (x2 >= box.x2);
 			    y1_out = (y1 < box.y1) | (y1 >= box.y2);
 			    y2_out = (y2 < box.y1) | (y2 >= box.y2);
-			    
+
 			    tl = x1_out|y1_out ? 0 : fetch(pict, x1, y1);
 			    tr = x2_out|y1_out ? 0 : fetch(pict, x2, y1);
 			    bl = x1_out|y2_out ? 0 : fetch(pict, x1, y2);
 			    br = x2_out|y2_out ? 0 : fetch(pict, x2, y2);
-			    
+
 			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
@@ -4015,7 +4009,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    *(buffer + i) = r;
 			}
 		    }
-		    
+
                     v.vector[0] += unit.vector[0];
                     v.vector[1] += unit.vector[1];
                     v.vector[2] += unit.vector[2];
@@ -4030,7 +4024,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 			    uint32_t tl, tr, bl, br, r;
 			    uint32_t ft, fb;
-			    
+
 			    if (!affine) {
 				pixman_fixed_48_16_t div;
 				div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
@@ -4047,10 +4041,10 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    }
 			    x2 = x1 + 1;
 			    y2 = y1 + 1;
-			    
+
 			    idistx = 256 - distx;
 			    idisty = 256 - disty;
-			    
+
 			    tl = pixman_region_contains_point(pict->common.src_clip, x1, y1, &box)
 				? fetch(pict, x1, y1) : 0;
 			    tr = pixman_region_contains_point(pict->common.src_clip, x2, y1, &box)
@@ -4059,7 +4053,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 				? fetch(pict, x1, y2) : 0;
 			    br = pixman_region_contains_point(pict->common.src_clip, x2, y2, &box)
 				? fetch(pict, x2, y2) : 0;
-			    
+
 			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
 			    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
@@ -4075,7 +4069,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    *(buffer + i) = r;
 			}
 		    }
-		    
+
                     v.vector[0] += unit.vector[0];
                     v.vector[1] += unit.vector[1];
                     v.vector[2] += unit.vector[2];
@@ -4098,7 +4092,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 		    int x1, x2, y1, y2, x, y;
 		    int32_t srtot, sgtot, sbtot, satot;
 		    pixman_fixed_t *p = params;
-		    
+
 		    if (!affine) {
 			pixman_fixed_48_16_t tmp;
 			tmp = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2] - xoff;
@@ -4111,9 +4105,9 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 		    }
 		    x2 = x1 + cwidth;
 		    y2 = y1 + cheight;
-		    
+
 		    srtot = sgtot = sbtot = satot = 0;
-		    
+
 		    for (y = y1; y < y2; y++) {
 			int ty = (pict->common.repeat == PIXMAN_REPEAT_NORMAL) ? MOD (y, pict->height) : y;
 			for (x = x1; x < x2; x++) {
@@ -4121,7 +4115,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 				int tx = (pict->common.repeat == PIXMAN_REPEAT_NORMAL) ? MOD (x, pict->width) : x;
 				if (pixman_region_contains_point (pict->common.src_clip, tx, ty, &box)) {
 				    uint32_t c = fetch(pict, tx, ty);
-				    
+
 				    srtot += Red(c) * *p;
 				    sgtot += Green(c) * *p;
 				    sbtot += Blue(c) * *p;
@@ -4131,17 +4125,17 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    p++;
 			}
 		    }
-		    
+
 		    satot >>= 16;
 		    srtot >>= 16;
 		    sgtot >>= 16;
 		    sbtot >>= 16;
-		    
+
 		    if (satot < 0) satot = 0; else if (satot > 0xff) satot = 0xff;
 		    if (srtot < 0) srtot = 0; else if (srtot > 0xff) srtot = 0xff;
 		    if (sgtot < 0) sgtot = 0; else if (sgtot > 0xff) sgtot = 0xff;
 		    if (sbtot < 0) sbtot = 0; else if (sbtot > 0xff) sbtot = 0xff;
-		    
+
 		    *(buffer + i) = ((satot << 24) |
 				     (srtot << 16) |
 				     (sgtot <<  8) |
@@ -4153,7 +4147,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
             v.vector[2] += unit.vector[2];
         }
     }
-    
+
     fbFinishAccess (pict->pDrawable);
 }
 
@@ -4163,14 +4157,14 @@ static void fbFetchExternalAlpha(bits_image_t * pict, int x, int y, int width, u
     int i;
     uint32_t _alpha_buffer[SCANLINE_BUFFER_LENGTH];
     uint32_t *alpha_buffer = _alpha_buffer;
-    
+
     if (!pict->common.alpha_map) {
         fbFetchTransformed (pict, x, y, width, buffer, mask, maskBits);
 	return;
     }
     if (width > SCANLINE_BUFFER_LENGTH)
         alpha_buffer = (uint32_t *) pixman_malloc_ab (width, sizeof(uint32_t));
-    
+
     fbFetchTransformed(pict, x, y, width, buffer, mask, maskBits);
     fbFetchTransformed((bits_image_t *)pict->common.alpha_map, x - pict->common.alpha_origin.x,
 		       y - pict->common.alpha_origin.y, width, alpha_buffer,
@@ -4185,7 +4179,7 @@ static void fbFetchExternalAlpha(bits_image_t * pict, int x, int y, int width, u
 		| (div_255(Blue(*(buffer + i)) * a));
 	}
     }
-    
+
     if (alpha_buffer != _alpha_buffer)
         free(alpha_buffer);
 }
@@ -4196,7 +4190,7 @@ static void fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buff
     uint32_t stride;
     storeProc store = storeProcForPicture(pict);
     const pixman_indexed_t * indexed = pict->indexed;
-    
+
     bits = pict->bits;
     stride = pict->rowstride;
     bits += y*stride;
@@ -4213,33 +4207,33 @@ static void fbStoreExternalAlpha(bits_image_t * pict, int x, int y, int width, u
     storeProc astore;
     const pixman_indexed_t * indexed = pict->indexed;
     const pixman_indexed_t * aindexed;
-    
+
     if (!pict->common.alpha_map) {
         fbStore(pict, x, y, width, buffer);
 	return;
     }
-    
+
     store = storeProcForPicture(pict);
     astore = storeProcForPicture(pict->common.alpha_map);
     aindexed = pict->common.alpha_map->indexed;
-    
+
     ax = x;
     ay = y;
-    
+
     bits = pict->bits;
     stride = pict->rowstride;
-    
+
     alpha_bits = pict->common.alpha_map->bits;
     astride = pict->common.alpha_map->rowstride;
-    
+
     bits       += y*stride;
     alpha_bits += (ay - pict->common.alpha_origin.y)*astride;
-    
-    
+
+
     store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
     astore((pixman_image_t *)pict->common.alpha_map,
 	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
-    
+
     fbFinishAccess (pict->alpha_map->pDrawable);
     fbFinishAccess (pict->pDrawable);
 }
@@ -4265,7 +4259,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     uint32_t *bits;
     uint32_t stride;
     int xoff, yoff;
-    
+
     if (data->op == PIXMAN_OP_CLEAR)
         fetchSrc = NULL;
     else if (IS_SOURCE_IMAGE (data->src))
@@ -4278,7 +4272,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     else
     {
 	bits_image_t *bits = (bits_image_t *)data->src;
-	
+
 	if (bits->common.alpha_map)
 	{
 	    fetchSrc = (scanFetchProc)fbFetchExternalAlpha;
@@ -4299,7 +4293,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	    fetchSrc = (scanFetchProc)fbFetchTransformed;
 	}
     }
-    
+
     if (!data->mask || data->op == PIXMAN_OP_CLEAR)
     {
 	fetchMask = NULL;
@@ -4316,7 +4310,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	else
 	{
 	    bits_image_t *bits = (bits_image_t *)data->mask;
-	    
+
 	    if (bits->common.alpha_map)
 	    {
 		fetchMask = (scanFetchProc)fbFetchExternalAlpha;
@@ -4333,12 +4327,12 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 		fetchMask = (scanFetchProc)fbFetchTransformed;
 	}
     }
-    
+
     if (data->dest->common.alpha_map)
     {
 	fetchDest = (scanFetchProc)fbFetchExternalAlpha;
 	store = (scanStoreProc)fbStoreExternalAlpha;
-	
+
 	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
 	    fetchDest = NULL;
     }
@@ -4346,7 +4340,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     {
 	fetchDest = (scanFetchProc)fbFetch;
 	store = (scanStoreProc)fbStore;
-	
+
 	switch (data->op)
 	{
 	case PIXMAN_OP_CLEAR:
@@ -4368,7 +4362,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	    break;
 	}
     }
-    
+
     if (!store)
     {
 	bits = data->dest->bits.bits;
@@ -4381,11 +4375,11 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	stride = 0;
 	xoff = yoff = 0;
     }
-    
+
     if (fetchSrc		   &&
 	fetchMask		   &&
 	data->mask		   &&
-	data->mask->common.type == BITS && 
+	data->mask->common.type == BITS &&
 	data->mask->common.component_alpha &&
 	PIXMAN_FORMAT_RGB (data->mask->bits.format))
     {
@@ -4393,7 +4387,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	CombineFuncC compose = PIXMAN_COMPOSE_FUNCTIONS.combineC[data->op];
 	if (!compose)
 	    return;
-	
+
 	for (i = 0; i < data->height; ++i) {
 	    /* fill first half of scanline with source */
 	    if (fetchSrc)
@@ -4404,11 +4398,11 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 		       source can be optimized */
 		    fetchMask (data->mask, data->xMask, data->yMask + i,
 			       data->width, mask_buffer, 0, 0);
-		    
+
 		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 			fetchMask = NULL;
 		}
-		
+
 		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 		{
 		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
@@ -4427,17 +4421,17 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 		fetchMask (data->mask, data->xMask, data->yMask + i,
 			   data->width, mask_buffer, 0, 0);
 	    }
-	    
+
 	    if (store)
 	    {
 		/* fill dest into second half of scanline */
 		if (fetchDest)
 		    fetchDest (data->dest, data->xDest, data->yDest + i,
 			       data->width, dest_buffer, 0, 0);
-		
+
 		/* blend */
 		compose (dest_buffer, src_buffer, mask_buffer, data->width);
-		
+
 		/* write back */
 		store (data->dest, data->xDest, data->yDest + i, data->width,
 		       dest_buffer);
@@ -4457,10 +4451,10 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	CombineFuncU compose = PIXMAN_COMPOSE_FUNCTIONS.combineU[data->op];
 	if (!compose)
 	    return;
-	
+
 	if (fetchMask)
 	    mask_buffer = dest_buffer + data->width;
-	
+
 	for (i = 0; i < data->height; ++i) {
 	    /* fill first half of scanline with source */
 	    if (fetchSrc)
@@ -4471,16 +4465,16 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 		       source can be optimized */
 		    fetchMask (data->mask, data->xMask, data->yMask + i,
 			       data->width, mask_buffer, 0, 0);
-		    
+
 		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 			fetchMask = NULL;
 		}
-		
+
 		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 		{
 		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 			      data->width, src_buffer, 0, 0);
-		    
+
 		    if (mask_buffer)
 		    {
 			fbCombineInU (mask_buffer, src_buffer, data->width);
@@ -4488,7 +4482,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 		    }
 		    else
 			src_mask_buffer = src_buffer;
-		    
+
 		    fetchSrc = NULL;
 		}
 		else
@@ -4496,12 +4490,12 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 			      data->width, src_buffer, mask_buffer,
 			      0xff000000);
-		    
+
 		    if (mask_buffer)
 			PIXMAN_COMPOSE_FUNCTIONS.combineMaskU (src_buffer,
 							       mask_buffer,
 							       data->width);
-		    
+
 		    src_mask_buffer = src_buffer;
 		}
 	    }
@@ -4509,22 +4503,22 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	    {
 		fetchMask (data->mask, data->xMask, data->yMask + i,
 			   data->width, mask_buffer, 0, 0);
-		
+
 		fbCombineInU (mask_buffer, src_buffer, data->width);
-		
+
 		src_mask_buffer = mask_buffer;
 	    }
-	    
+
 	    if (store)
 	    {
 		/* fill dest into second half of scanline */
 		if (fetchDest)
 		    fetchDest (data->dest, data->xDest, data->yDest + i,
 			       data->width, dest_buffer, 0, 0);
-		
+
 		/* blend */
 		compose (dest_buffer, src_mask_buffer, data->width);
-		
+
 		/* write back */
 		store (data->dest, data->xDest, data->yDest + i, data->width,
 		       dest_buffer);
@@ -4538,7 +4532,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	    }
 	}
     }
-    
+
     if (!store)
 	fbFinishAccess (data->dest->pDrawable);
 }
diff --git a/pixman/pixman-edge-imp.h b/pixman/pixman-edge-imp.h
index c242aa4..c89a449 100644
--- a/pixman/pixman-edge-imp.h
+++ b/pixman/pixman-edge-imp.h
@@ -37,16 +37,16 @@ rasterizeEdges (pixman_image_t  *image,
     uint32_t *buf = (image)->bits.bits;
     int32_t stride = (image)->bits.rowstride;
     int32_t width = (image)->bits.width;
-    
+
     line = buf + pixman_fixed_to_int (y) * stride;
-    
+
     for (;;)
     {
 	pixman_fixed_t	lx;
 	pixman_fixed_t      rx;
 	int	lxi;
 	int rxi;
-	
+
 	/* clip X */
 	lx = l->x;
 	if (lx < 0)
@@ -54,15 +54,15 @@ rasterizeEdges (pixman_image_t  *image,
 	rx = r->x;
 	if (pixman_fixed_to_int (rx) >= width)
 	    rx = pixman_int_to_fixed (width);
-	
+
 	/* Skip empty (or backwards) sections */
 	if (rx > lx)
 	{
-	    
+
 	    /* Find pixel bounds for span */
 	    lxi = pixman_fixed_to_int (lx);
 	    rxi = pixman_fixed_to_int (rx);
-	    
+
 #if N_BITS == 1
 	    {
 		uint32_t  *a = line;
@@ -71,30 +71,30 @@ rasterizeEdges (pixman_image_t  *image,
 		int	    nmiddle;
 		int	    width = rxi - lxi;
 		int	    x = lxi;
-		
+
 		a += x >> FB_SHIFT;
 		x &= FB_MASK;
-		
+
 		FbMaskBits (x, width, startmask, nmiddle, endmask);
 		    if (startmask) {
-			WRITE(a, READ(a) | startmask);
+			WRITE(image, a, READ(image, a) | startmask);
 			a++;
 		    }
 		    while (nmiddle--)
-			WRITE(a++, FB_ALLONES);
+			WRITE(image, a++, FB_ALLONES);
 		    if (endmask)
-			WRITE(a, READ(a) | endmask);
+			WRITE(image, a, READ(image, a) | endmask);
 	    }
 #else
 	    {
 		DefineAlpha(line,lxi);
 		int	    lxs;
 		int     rxs;
-		
+
 		/* Sample coverage for edge pixels */
 		lxs = RenderSamplesX (lx, N_BITS);
 		rxs = RenderSamplesX (rx, N_BITS);
-		
+
 		/* Add coverage across row */
 		if (lxi == rxi)
 		{
@@ -103,7 +103,7 @@ rasterizeEdges (pixman_image_t  *image,
 		else
 		{
 		    int	xi;
-		    
+
 		    AddAlpha (N_X_FRAC(N_BITS) - lxs);
 		    StepAlpha;
 		    for (xi = lxi + 1; xi < rxi; xi++)
@@ -121,10 +121,10 @@ rasterizeEdges (pixman_image_t  *image,
 	    }
 #endif
 	}
-	
+
 	if (y == b)
 	    break;
-	
+
 #if N_BITS > 1
 	if (pixman_fixed_frac (y) != Y_FRAC_LAST(N_BITS))
 	{
diff --git a/pixman/pixman-edge.c b/pixman/pixman-edge.c
index 24758c3..d9e2d9a 100644
--- a/pixman/pixman-edge.c
+++ b/pixman/pixman-edge.c
@@ -54,10 +54,10 @@
 
 #define StepAlpha	((__ap += __ao), (__ao ^= 1))
 
-#define AddAlpha(a) {						\
-	uint8_t   __o = READ(__ap);				\
-	uint8_t   __a = (a) + Get4(__o, __ao);			\
-	WRITE(__ap, Put4 (__o, __ao, __a | (0 - ((__a) >> 4))));	\
+#define AddAlpha(a) {							\
+	uint8_t   __o = READ(image, __ap);				\
+	uint8_t   __a = (a) + Get4(__o, __ao);				\
+	WRITE(image, __ap, Put4 (__o, __ao, __a | (0 - ((__a) >> 4))));	\
     }
 
 #include "pixman-edge-imp.h"
@@ -100,7 +100,7 @@ clip255 (int x)
 								\
 	while (i__--)						\
 	{							\
-	    WRITE((buf__), clip255 (READ((buf__)) + (val__)));	\
+	    WRITE(image, (buf__), clip255 (READ(image, (buf__)) + (val__)));	\
 	    (buf__)++;						\
 	}							\
     } while (0)
@@ -127,18 +127,18 @@ fbRasterizeEdges8 (pixman_image_t       *image,
     uint32_t  *line;
     int fill_start = -1, fill_end = -1;
     int fill_size = 0;
-    uint32_t *buf = (image)->bits.bits;		
-    int32_t stride = (image)->bits.rowstride;	
+    uint32_t *buf = (image)->bits.bits;
+    int32_t stride = (image)->bits.rowstride;
     int32_t width = (image)->bits.width;
-    
+
     line = buf + pixman_fixed_to_int (y) * stride;
-    
+
     for (;;)
     {
         uint8_t *ap = (uint8_t *) line;
 	pixman_fixed_t	lx, rx;
 	int	lxi, rxi;
-	
+
 	/* clip X */
 	lx = l->x;
 	if (lx < 0)
@@ -146,32 +146,32 @@ fbRasterizeEdges8 (pixman_image_t       *image,
 	rx = r->x;
 	if (pixman_fixed_to_int (rx) >= width)
 	    rx = pixman_int_to_fixed (width);
-	
+
 	/* Skip empty (or backwards) sections */
 	if (rx > lx)
 	{
             int lxs, rxs;
-	    
+
 	    /* Find pixel bounds for span. */
 	    lxi = pixman_fixed_to_int (lx);
 	    rxi = pixman_fixed_to_int (rx);
-	    
+
             /* Sample coverage for edge pixels */
             lxs = RenderSamplesX (lx, 8);
             rxs = RenderSamplesX (rx, 8);
-	    
+
             /* Add coverage across row */
 	    if (lxi == rxi)
 	    {
-		WRITE(ap +lxi, clip255 (READ(ap + lxi) + rxs - lxs));
+		WRITE(image, ap +lxi, clip255 (READ(image, ap + lxi) + rxs - lxs));
 	    }
 	    else
 	    {
-		WRITE(ap + lxi, clip255 (READ(ap + lxi) + N_X_FRAC(8) - lxs));
-		
+		WRITE(image, ap + lxi, clip255 (READ(image, ap + lxi) + N_X_FRAC(8) - lxs));
+
 		/* Move forward so that lxi/rxi is the pixel span */
 		lxi++;
-		
+
 		/* Don't bother trying to optimize the fill unless
 		 * the span is longer than 4 pixels. */
 		if (rxi - lxi > 4)
@@ -209,7 +209,7 @@ fbRasterizeEdges8 (pixman_image_t       *image,
 				add_saturate_8 (ap + lxi, N_X_FRAC(8),
 						fill_start - lxi);
 			    }
-			    
+
 			    /* Update fill_end */
 			    if (rxi < fill_end)
 			    {
@@ -232,21 +232,21 @@ fbRasterizeEdges8 (pixman_image_t       *image,
 		{
 		    add_saturate_8 (ap + lxi, N_X_FRAC(8), rxi - lxi);
 		}
-		
+
 		/* Do not add in a 0 alpha here. This check is
 		 * necessary to avoid a buffer overrun, (when rx
 		 * is exactly on a pixel boundary). */
 		if (rxs)
-		    WRITE(ap + rxi, clip255 (READ(ap + rxi) + rxs));
+		    WRITE(image, ap + rxi, clip255 (READ(image, ap + rxi) + rxs));
 	    }
 	}
-	
+
 	if (y == b) {
             /* We're done, make sure we clean up any remaining fill. */
             if (fill_start != fill_end) {
 		if (fill_size == N_Y_FRAC(8))
 		{
-		    MEMSET_WRAPPED (ap + fill_start, 0xff, fill_end - fill_start);
+		    MEMSET_WRAPPED (image, ap + fill_start, 0xff, fill_end - fill_start);
 		}
 		else
 		{
@@ -256,7 +256,7 @@ fbRasterizeEdges8 (pixman_image_t       *image,
             }
 	    break;
         }
-	
+
 	if (pixman_fixed_frac (y) != Y_FRAC_LAST(8))
 	{
 	    RenderEdgeStepSmall (l);
@@ -272,7 +272,7 @@ fbRasterizeEdges8 (pixman_image_t       *image,
             {
 		if (fill_size == N_Y_FRAC(8))
 		{
-		    MEMSET_WRAPPED (ap + fill_start, 0xff, fill_end - fill_start);
+		    MEMSET_WRAPPED (image, ap + fill_start, 0xff, fill_end - fill_start);
 		}
 		else
 		{
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 8c7be6d..5a752ce 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -24,7 +24,7 @@
  *
  * Author:  Søren Sandmann (sandmann at redhat.com)
  * Minor Improvements: Nicholas Miell (nmiell at gmail.com)
- * MMX code paths for fbcompose.c by Lars Knoll (lars at trolltech.com) 
+ * MMX code paths for fbcompose.c by Lars Knoll (lars at trolltech.com)
  *
  * Based on work by Owen Taylor
  */
@@ -45,8 +45,8 @@
 
 #undef READ
 #undef WRITE
-#define READ(x) *(x)
-#define WRITE(ptr,v)   (*(ptr) = (v));
+#define READ(img,x) *(x)
+#define WRITE(img,ptr,v) (*(ptr) = (v));
 
 #define noVERBOSE
 
@@ -177,12 +177,12 @@ static inline __m64
 pix_multiply (__m64 a, __m64 b)
 {
     __m64 res;
-    
+
     res = _mm_mullo_pi16 (a, b);
     res = _mm_adds_pu16 (res, MC(4x0080));
     res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8));
     res = _mm_srli_pi16 (res, 8);
-    
+
     return res;
 }
 
@@ -204,7 +204,7 @@ static inline __m64
 expand_alpha_rev (__m64 pixel)
 {
     return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(0, 0, 0, 0));
-}    
+}
 
 static inline __m64
 invert_colors (__m64 pixel)
@@ -218,7 +218,7 @@ static inline __m64
 expand_alpha (__m64 pixel)
 {
     __m64 t1, t2;
-    
+
     t1 = shift (pixel, -48);
     t2 = shift (t1, 16);
     t1 = _mm_or_si64 (t1, t2);
@@ -278,7 +278,7 @@ over_rev_non_pre (__m64 src, __m64 dest)
 {
     __m64 srca = expand_alpha (src);
     __m64 srcfaaa = _mm_or_si64 (srca, MC(full_alpha));
-    
+
     return over(pix_multiply(invert_colors(src), srcfaaa), srca, dest);
 }
 
@@ -331,14 +331,14 @@ store8888 (__m64 v)
 /* Expand 16 bits positioned at @pos (0-3) of a mmx register into
  *
  *    00RR00GG00BB
- * 
+ *
  * --- Expanding 565 in the low word ---
- * 
+ *
  * m = (m << (32 - 3)) | (m << (16 - 5)) | m;
  * m = m & (01f0003f001f);
  * m = m * (008404100840);
  * m = m >> 8;
- * 
+ *
  * Note the trick here - the top word is shifted by another nibble to
  * avoid it bumping into the middle word
  */
@@ -347,17 +347,17 @@ expand565 (__m64 pixel, int pos)
 {
     __m64 p = pixel;
     __m64 t1, t2;
-    
+
     /* move pixel to low 16 bit and zero the rest */
-    p = shift (shift (p, (3 - pos) * 16), -48); 
-    
+    p = shift (shift (p, (3 - pos) * 16), -48);
+
     t1 = shift (p, 36 - 11);
     t2 = shift (p, 16 - 5);
-    
+
     p = _mm_or_si64 (t1, p);
     p = _mm_or_si64 (t2, p);
     p = _mm_and_si64 (p, MC(565_rgb));
-    
+
     pixel = _mm_mullo_pi16 (p, MC(565_unpack_multiplier));
     return _mm_srli_pi16 (pixel, 8);
 }
@@ -377,15 +377,15 @@ pack565 (__m64 pixel, __m64 target, int pos)
     __m64 p = pixel;
     __m64 t = target;
     __m64 r, g, b;
-    
+
     r = _mm_and_si64 (p, MC(565_r));
     g = _mm_and_si64 (p, MC(565_g));
     b = _mm_and_si64 (p, MC(565_b));
-    
+
     r = shift (r, - (32 - 8) + pos * 16);
     g = shift (g, - (16 - 3) + pos * 16);
     b = shift (b, - (0  + 3) + pos * 16);
-    
+
     if (pos == 0)
 	t = _mm_and_si64 (t, MC(mask_0));
     else if (pos == 1)
@@ -394,10 +394,10 @@ pack565 (__m64 pixel, __m64 target, int pos)
 	t = _mm_and_si64 (t, MC(mask_2));
     else if (pos == 3)
 	t = _mm_and_si64 (t, MC(mask_3));
-    
+
     p = _mm_or_si64 (r, t);
     p = _mm_or_si64 (g, p);
-    
+
     return _mm_or_si64 (b, p);
 }
 
@@ -453,7 +453,7 @@ static FASTCALL void
 mmxCombineOverU (uint32_t *dest, const uint32_t *src, int width)
 {
     const uint32_t *end = dest + width;
-    
+
     while (dest < end) {
 	uint32_t ssrc = *src;
 	uint32_t a = ssrc >> 24;
@@ -691,9 +691,9 @@ mmxCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
         __m64 s = load8888(*src);
         __m64 d = load8888(*dest);
         __m64 sa = expand_alpha(s);
-	
+
 	*dest = store8888(in_over (s, sa, a, d));
-	
+
         ++src;
         ++dest;
         ++mask;
@@ -712,7 +712,7 @@ mmxCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width
         __m64 da = expand_alpha(d);
 
 	*dest = store8888(over (d, da, in (s, a)));
-	
+
         ++src;
         ++dest;
         ++mask;
@@ -808,7 +808,7 @@ mmxCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
         __m64 s = load8888(*src);
         __m64 d = load8888(*dest);
         __m64 da = expand_alpha(d);
-        __m64 sa = expand_alpha(s); 
+        __m64 sa = expand_alpha(s);
         s = pix_multiply(s, a);
         a = pix_multiply(a, sa);
         a = negate(a);
@@ -914,7 +914,7 @@ void fbComposeSetupMMX(void)
         pixman_composeFunctions.combineC[PIXMAN_OP_ADD] = mmxCombineAddC;
 
         pixman_composeFunctions.combineMaskU = mmxCombineMaskU;
-    } 
+    }
 }
 
 
@@ -939,62 +939,62 @@ fbCompositeSolid_nx8888mmx (pixman_op_t op,
     uint16_t	w;
     int	dstStride;
     __m64	vsrc, vsrca;
-    
+
     CHECKPOINT();
-    
+
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
-    
+
     if (src >> 24 == 0)
 	return;
-    
+
     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-    
+
     vsrc = load8888 (src);
     vsrca = expand_alpha (vsrc);
-    
+
     while (height--)
     {
 	dst = dstLine;
 	dstLine += dstStride;
 	w = width;
-	
+
 	CHECKPOINT();
-	
+
 	while (w && (unsigned long)dst & 7)
 	{
 	    *dst = store8888(over(vsrc, vsrca, load8888(*dst)));
-	    
+
 	    w--;
 	    dst++;
 	}
-	
+
 	while (w >= 2)
 	{
 	    __m64 vdest;
 	    __m64 dest0, dest1;
-	    
+
 	    vdest = *(__m64 *)dst;
-	    
+
 	    dest0 = over(vsrc, vsrca, expand8888(vdest, 0));
 	    dest1 = over(vsrc, vsrca, expand8888(vdest, 1));
-	    
+
 	    *(__m64 *)dst = pack8888(dest0, dest1);
-	    
+
 	    dst += 2;
 	    w -= 2;
 	}
-	
+
 	CHECKPOINT();
-	
+
 	while (w)
 	{
 	    *dst = store8888(over(vsrc, vsrca, load8888(*dst)));
-	    
+
 	    w--;
 	    dst++;
 	}
     }
-    
+
     _mm_empty();
 }
 
@@ -1017,69 +1017,69 @@ fbCompositeSolid_nx0565mmx (pixman_op_t op,
     uint16_t	w;
     int	dstStride;
     __m64	vsrc, vsrca;
-    
+
     CHECKPOINT();
-    
+
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
-    
+
     if (src >> 24 == 0)
 	return;
-    
+
     fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-    
+
     vsrc = load8888 (src);
     vsrca = expand_alpha (vsrc);
-    
+
     while (height--)
     {
 	dst = dstLine;
 	dstLine += dstStride;
 	w = width;
-	
+
 	CHECKPOINT();
-	
+
 	while (w && (unsigned long)dst & 7)
 	{
 	    ullong d = *dst;
 	    __m64 vdest = expand565 ((__m64)d, 0);
 	    vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
 	    *dst = (ullong)vdest;
-	    
+
 	    w--;
 	    dst++;
 	}
-	
+
 	while (w >= 4)
 	{
 	    __m64 vdest;
-	    
+
 	    vdest = *(__m64 *)dst;
-	    
+
 	    vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 0)), vdest, 0);
 	    vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 1)), vdest, 1);
 	    vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 2)), vdest, 2);
 	    vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 3)), vdest, 3);
-	    
+
 	    *(__m64 *)dst = vdest;
-	    
+
 	    dst += 4;
 	    w -= 4;
 	}
-	
+
 	CHECKPOINT();
-	
+
 	while (w)
 	{
 	    ullong d = *dst;
 	    __m64 vdest = expand565 ((__m64)d, 0);
 	    vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
 	    *dst = (ullong)vdest;
-	    
+
 	    w--;
 	    dst++;
 	}
     }
-    
+
     _mm_empty();
 }
 
@@ -1102,87 +1102,87 @@ fbCompositeSolidMask_nx8888x8888Cmmx (pixman_op_t op,
     uint32_t	*maskLine;
     int	dstStride, maskStride;
     __m64	vsrc, vsrca;
-    
+
     CHECKPOINT();
-    
+
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
-    
+
     srca = src >> 24;
     if (srca == 0)
 	return;
-    
+
     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
     fbComposeGetStart (pMask, xMask, yMask, uint32_t, maskStride, maskLine, 1);
-    
+
     vsrc = load8888(src);
     vsrca = expand_alpha(vsrc);
-    
+
     while (height--)
     {
 	int twidth = width;
 	uint32_t *p = (uint32_t *)maskLine;
 	uint32_t *q = (uint32_t *)dstLine;
-	
+
 	while (twidth && (unsigned long)q & 7)
 	{
 	    uint32_t m = *(uint32_t *)p;
-	    
+
 	    if (m)
 	    {
 		__m64 vdest = load8888(*q);
 		vdest = in_over(vsrc, vsrca, load8888(m), vdest);
 		*q = store8888(vdest);
 	    }
-	    
+
 	    twidth--;
 	    p++;
 	    q++;
 	}
-	
+
 	while (twidth >= 2)
 	{
 	    uint32_t m0, m1;
 	    m0 = *p;
 	    m1 = *(p + 1);
-	    
+
 	    if (m0 | m1)
 	    {
 		__m64 dest0, dest1;
 		__m64 vdest = *(__m64 *)q;
-		
+
 		dest0 = in_over(vsrc, vsrca, load8888(m0),
 				expand8888 (vdest, 0));
 		dest1 = in_over(vsrc, vsrca, load8888(m1),
 				expand8888 (vdest, 1));
-		
+
 		*(__m64 *)q = pack8888(dest0, dest1);
 	    }
-	    
+
 	    p += 2;
 	    q += 2;
 	    twidth -= 2;
 	}
-	
+
 	while (twidth)
 	{
 	    uint32_t m = *(uint32_t *)p;
-	    
+
 	    if (m)
 	    {
 		__m64 vdest = load8888(*q);
 		vdest = in_over(vsrc, vsrca, load8888(m), vdest);
 		*q = store8888(vdest);
 	    }
-	    
+
 	    twidth--;
 	    p++;
 	    q++;
 	}
-	
+
 	dstLine += dstStride;
 	maskLine += maskStride;
     }
-    
+
     _mm_empty();
 }
 
@@ -1332,7 +1332,7 @@ fbCompositeSrc_x888xnx8888mmx (pixman_op_t op,
 	    __m64 vd5 = *(__m64 *)(dst + 10);
 	    __m64 vd6 = *(__m64 *)(dst + 12);
 	    __m64 vd7 = *(__m64 *)(dst + 14);
-	    
+
 	    __m64 vs0 = *(__m64 *)(src + 0);
 	    __m64 vs1 = *(__m64 *)(src + 2);
 	    __m64 vs2 = *(__m64 *)(src + 4);
@@ -1341,27 +1341,27 @@ fbCompositeSrc_x888xnx8888mmx (pixman_op_t op,
 	    __m64 vs5 = *(__m64 *)(src + 10);
 	    __m64 vs6 = *(__m64 *)(src + 12);
 	    __m64 vs7 = *(__m64 *)(src + 14);
-	    
+
 	    vd0 = pack8888 (
 		in_over (expand8888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
 		in_over (expand8888 (vs0, 1), srca, vmask, expand8888 (vd0, 1)));
-	    
+
 	    vd1 = pack8888 (
 		in_over (expand8888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)),
 		in_over (expand8888 (vs1, 1), srca, vmask, expand8888 (vd1, 1)));
-	    
+
 	    vd2 = pack8888 (
 		in_over (expand8888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)),
 		in_over (expand8888 (vs2, 1), srca, vmask, expand8888 (vd2, 1)));
-	    
+
 	    vd3 = pack8888 (
 		in_over (expand8888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)),
 		in_over (expand8888 (vs3, 1), srca, vmask, expand8888 (vd3, 1)));
-	    
+
 	    vd4 = pack8888 (
 		in_over (expand8888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)),
 		in_over (expand8888 (vs4, 1), srca, vmask, expand8888 (vd4, 1)));
-	    
+
 	    vd5 = pack8888 (
 		in_over (expand8888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)),
 		in_over (expand8888 (vs5, 1), srca, vmask, expand8888 (vd5, 1)));
@@ -1387,7 +1387,7 @@ fbCompositeSrc_x888xnx8888mmx (pixman_op_t op,
 	    dst += 16;
 	    src += 16;
 	}
-	
+
 	while (w)
 	{
 	    __m64 s = load8888 (*src | 0xff000000);
@@ -1424,9 +1424,9 @@ fbCompositeSrc_8888x8888mmx (pixman_op_t op,
     int	dstStride, srcStride;
     uint8_t     a;
     uint16_t	w;
-    
+
     CHECKPOINT();
-    
+
     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
     fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
 
@@ -1453,7 +1453,7 @@ fbCompositeSrc_8888x8888mmx (pixman_op_t op,
 	    dst++;
 	}
     }
-    _mm_empty(); 
+    _mm_empty();
 }
 
 void
@@ -1474,17 +1474,17 @@ fbCompositeSrc_8888x0565mmx (pixman_op_t op,
     uint32_t	*srcLine, *src;
     int	dstStride, srcStride;
     uint16_t	w;
-    
+
     CHECKPOINT();
-    
+
     fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
     fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-    
+
 #if 0
     /* FIXME */
     assert (pSrc->pDrawable == pMask->pDrawable);
 #endif
-    
+
     while (height--)
     {
 	dst = dstLine;
@@ -1492,26 +1492,26 @@ fbCompositeSrc_8888x0565mmx (pixman_op_t op,
 	src = srcLine;
 	srcLine += srcStride;
 	w = width;
-	
+
 	CHECKPOINT();
-	
+
 	while (w && (unsigned long)dst & 7)
 	{
 	    __m64 vsrc = load8888 (*src);
 	    ullong d = *dst;
 	    __m64 vdest = expand565 ((__m64)d, 0);
-	    
+
 	    vdest = pack565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0);
-	    
+
 	    *dst = (ullong)vdest;
-	    
+
 	    w--;
 	    dst++;
 	    src++;
 	}
-	
+
 	CHECKPOINT();
-	
+
 	while (w >= 4)
 	{
 	    __m64 vsrc0, vsrc1, vsrc2, vsrc3;
@@ -1523,12 +1523,12 @@ fbCompositeSrc_8888x0565mmx (pixman_op_t op,
 	    vsrc3 = load8888(*(src + 3));
 
 	    vdest = *(__m64 *)dst;
-	    
+
 	    vdest = pack565(over(vsrc0, expand_alpha(vsrc0), expand565(vdest, 0)), vdest, 0);
 	    vdest = pack565(over(vsrc1, expand_alpha(vsrc1), expand565(vdest, 1)), vdest, 1);
 	    vdest = pack565(over(vsrc2, expand_alpha(vsrc2), expand565(vdest, 2)), vdest, 2);
 	    vdest = pack565(over(vsrc3, expand_alpha(vsrc3), expand565(vdest, 3)), vdest, 3);
-	    
+
 	    *(__m64 *)dst = vdest;
 
 	    w -= 4;
@@ -1537,23 +1537,23 @@ fbCompositeSrc_8888x0565mmx (pixman_op_t op,
 	}
 
 	CHECKPOINT();
-	
+
 	while (w)
 	{
 	    __m64 vsrc = load8888 (*src);
 	    ullong d = *dst;
 	    __m64 vdest = expand565 ((__m64)d, 0);
-	    
+
 	    vdest = pack565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0);
-	    
+
 	    *dst = (ullong)vdest;
-	    
+
 	    w--;
 	    dst++;
 	    src++;
 	}
     }
-    
+
     _mm_empty();
 }
 
@@ -1578,23 +1578,23 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_op_t op,
     uint16_t	w;
     __m64	vsrc, vsrca;
     ullong	srcsrc;
-    
+
     CHECKPOINT();
-    
+
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
-    
+
     srca = src >> 24;
     if (srca == 0)
 	return;
-    
+
     srcsrc = (unsigned long long)src << 32 | src;
-    
+
     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
     fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
-    
+
     vsrc = load8888 (src);
     vsrca = expand_alpha (vsrc);
-    
+
     while (height--)
     {
 	dst = dstLine;
@@ -1602,32 +1602,32 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_op_t op,
 	mask = maskLine;
 	maskLine += maskStride;
 	w = width;
-	
+
 	CHECKPOINT();
-	
+
 	while (w && (unsigned long)dst & 7)
 	{
 	    ullong m = *mask;
-	    
+
 	    if (m)
 	    {
 		__m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), load8888(*dst));
 		*dst = store8888(vdest);
 	    }
-	    
+
 	    w--;
 	    mask++;
 	    dst++;
 	}
-	
+
 	CHECKPOINT();
-	
+
 	while (w >= 2)
 	{
 	    ullong m0, m1;
 	    m0 = *mask;
 	    m1 = *(mask + 1);
-	    
+
 	    if (srca == 0xff && (m0 & m1) == 0xff)
 	    {
 		*(unsigned long long *)dst = srcsrc;
@@ -1636,39 +1636,39 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_op_t op,
 	    {
 		__m64 vdest;
 		__m64 dest0, dest1;
-		
+
 		vdest = *(__m64 *)dst;
-		
+
 		dest0 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m0), expand8888(vdest, 0));
 		dest1 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m1), expand8888(vdest, 1));
-		
+
 		*(__m64 *)dst = pack8888(dest0, dest1);
 	    }
-	    
+
 	    mask += 2;
 	    dst += 2;
 	    w -= 2;
 	}
-	
+
 	CHECKPOINT();
-	
+
 	while (w)
 	{
 	    ullong m = *mask;
-	    
+
 	    if (m)
 	    {
 		__m64 vdest = load8888(*dst);
 		vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), vdest);
 		*dst = store8888(vdest);
 	    }
-	    
+
 	    w--;
 	    mask++;
 	    dst++;
 	}
     }
-    
+
     _mm_empty();
 }
 
@@ -1689,13 +1689,13 @@ pixman_fill_mmx (uint32_t *bits,
 #ifdef __GNUC__
     __m64	v1, v2, v3, v4, v5, v6, v7;
 #endif
-    
+
     if (bpp == 16 && (xor >> 16 != (xor & 0xffff)))
 	return FALSE;
-    
+
     if (bpp != 16 && bpp != 32)
 	return FALSE;
-    
+
     if (bpp == 16)
     {
 	stride = stride * sizeof (uint32_t) / 2;
@@ -1710,10 +1710,10 @@ pixman_fill_mmx (uint32_t *bits,
 	byte_width = 4 * width;
 	stride *= 4;
     }
-    
+
     fill = ((ullong)xor << 32) | xor;
     vfill = (__m64)fill;
-    
+
 #ifdef __GNUC__
     __asm__ (
 	"movq		%7,	%0\n"
@@ -1727,25 +1727,25 @@ pixman_fill_mmx (uint32_t *bits,
 	  "=y" (v4), "=y" (v5), "=y" (v6), "=y" (v7)
 	: "y" (vfill));
 #endif
-    
+
     while (height--)
     {
 	int w;
 	uint8_t *d = byte_line;
 	byte_line += stride;
 	w = byte_width;
-	
+
 	while (w >= 2 && ((unsigned long)d & 3))
 	{
 	    *(uint16_t *)d = xor;
 	    w -= 2;
 	    d += 2;
 	}
-	
+
 	while (w >= 4 && ((unsigned long)d & 7))
 	{
 	    *(uint32_t *)d = xor;
-	    
+
 	    w -= 4;
 	    d += 4;
 	}
@@ -1776,15 +1776,15 @@ pixman_fill_mmx (uint32_t *bits,
 	    *(__m64*) (d + 40) = vfill;
 	    *(__m64*) (d + 48) = vfill;
 	    *(__m64*) (d + 56) = vfill;
-#endif    
+#endif
 	    w -= 64;
 	    d += 64;
 	}
-	
+
 	while (w >= 4)
 	{
 	    *(uint32_t *)d = xor;
-	    
+
 	    w -= 4;
 	    d += 4;
 	}
@@ -1795,7 +1795,7 @@ pixman_fill_mmx (uint32_t *bits,
 	    d += 2;
 	}
     }
-    
+
     _mm_empty();
     return TRUE;
 }
@@ -1952,26 +1952,26 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
     uint16_t	w;
     __m64	vsrc, vsrca;
     unsigned long long srcsrcsrcsrc, src16;
-    
+
     CHECKPOINT();
-    
+
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
-    
+
     srca = src >> 24;
     if (srca == 0)
 	return;
-    
+
     fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
     fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
-    
+
     vsrc = load8888 (src);
     vsrca = expand_alpha (vsrc);
-    
+
     src16 = (ullong)pack565(vsrc, _mm_setzero_si64(), 0);
-    
+
     srcsrcsrcsrc = (ullong)src16 << 48 | (ullong)src16 << 32 |
 	(ullong)src16 << 16 | (ullong)src16;
-    
+
     while (height--)
     {
 	dst = dstLine;
@@ -1979,13 +1979,13 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
 	mask = maskLine;
 	maskLine += maskStride;
 	w = width;
-	
+
 	CHECKPOINT();
-	
+
 	while (w && (unsigned long)dst & 7)
 	{
 	    ullong m = *mask;
-	    
+
 	    if (m)
 	    {
 		ullong d = *dst;
@@ -1993,14 +1993,14 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
 		__m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), expand565(vd, 0));
 		*dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0);
 	    }
-	    
+
 	    w--;
 	    mask++;
 	    dst++;
 	}
-	
+
 	CHECKPOINT();
-	
+
 	while (w >= 4)
 	{
 	    ullong m0, m1, m2, m3;
@@ -2008,7 +2008,7 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
 	    m1 = *(mask + 1);
 	    m2 = *(mask + 2);
 	    m3 = *(mask + 3);
-	    
+
 	    if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff)
 	    {
 		*(unsigned long long *)dst = srcsrcsrcsrc;
@@ -2017,9 +2017,9 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
 	    {
 		__m64 vdest;
 		__m64 vm0, vm1, vm2, vm3;
-		
+
 		vdest = *(__m64 *)dst;
-		
+
 		vm0 = (__m64)m0;
 		vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm0), expand565(vdest, 0)), vdest, 0);
 		vm1 = (__m64)m1;
@@ -2028,21 +2028,21 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
 		vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm2), expand565(vdest, 2)), vdest, 2);
 		vm3 = (__m64)m3;
 		vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm3), expand565(vdest, 3)), vdest, 3);
-		
+
 		*(__m64 *)dst = vdest;
 	    }
-	    
+
 	    w -= 4;
 	    mask += 4;
 	    dst += 4;
 	}
-	
+
 	CHECKPOINT();
-	
+
 	while (w)
 	{
 	    ullong m = *mask;
-	    
+
 	    if (m)
 	    {
 		ullong d = *dst;
@@ -2050,13 +2050,13 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
 		__m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), expand565(vd, 0));
 		*dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0);
 	    }
-	    
+
 	    w--;
 	    mask++;
 	    dst++;
 	}
     }
-    
+
     _mm_empty();
 }
 
@@ -2078,17 +2078,17 @@ fbCompositeSrc_8888RevNPx0565mmx (pixman_op_t op,
     uint32_t	*srcLine, *src;
     int	dstStride, srcStride;
     uint16_t	w;
-    
+
     CHECKPOINT();
-    
+
     fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
     fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-    
+
 #if 0
     /* FIXME */
     assert (pSrc->pDrawable == pMask->pDrawable);
 #endif
-    
+
     while (height--)
     {
 	dst = dstLine;
@@ -2096,41 +2096,41 @@ fbCompositeSrc_8888RevNPx0565mmx (pixman_op_t op,
 	src = srcLine;
 	srcLine += srcStride;
 	w = width;
-	
+
 	CHECKPOINT();
-	
+
 	while (w && (unsigned long)dst & 7)
 	{
 	    __m64 vsrc = load8888 (*src);
 	    ullong d = *dst;
 	    __m64 vdest = expand565 ((__m64)d, 0);
-	    
+
 	    vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
-	    
+
 	    *dst = (ullong)vdest;
-	    
+
 	    w--;
 	    dst++;
 	    src++;
 	}
-	
+
 	CHECKPOINT();
-	
+
 	while (w >= 4)
 	{
 	    uint32_t s0, s1, s2, s3;
 	    unsigned char a0, a1, a2, a3;
-	    
+
 	    s0 = *src;
 	    s1 = *(src + 1);
 	    s2 = *(src + 2);
 	    s3 = *(src + 3);
-	    
+
 	    a0 = (s0 >> 24);
 	    a1 = (s1 >> 24);
 	    a2 = (s2 >> 24);
 	    a3 = (s3 >> 24);
-	    
+
 	    if ((a0 & a1 & a2 & a3) == 0xFF)
 	    {
 		__m64 vdest;
@@ -2138,44 +2138,44 @@ fbCompositeSrc_8888RevNPx0565mmx (pixman_op_t op,
 		vdest = pack565(invert_colors(load8888(s1)), vdest, 1);
 		vdest = pack565(invert_colors(load8888(s2)), vdest, 2);
 		vdest = pack565(invert_colors(load8888(s3)), vdest, 3);
-		
+
 		*(__m64 *)dst = vdest;
 	    }
 	    else if (a0 | a1 | a2 | a3)
 	    {
 		__m64 vdest = *(__m64 *)dst;
-		
+
 		vdest = pack565(over_rev_non_pre(load8888(s0), expand565(vdest, 0)), vdest, 0);
 	        vdest = pack565(over_rev_non_pre(load8888(s1), expand565(vdest, 1)), vdest, 1);
 		vdest = pack565(over_rev_non_pre(load8888(s2), expand565(vdest, 2)), vdest, 2);
 		vdest = pack565(over_rev_non_pre(load8888(s3), expand565(vdest, 3)), vdest, 3);
-		
+
 		*(__m64 *)dst = vdest;
 	    }
-	    
+
 	    w -= 4;
 	    dst += 4;
 	    src += 4;
 	}
-	
+
 	CHECKPOINT();
-	
+
 	while (w)
 	{
 	    __m64 vsrc = load8888 (*src);
 	    ullong d = *dst;
 	    __m64 vdest = expand565 ((__m64)d, 0);
-	    
+
 	    vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
-	    
+
 	    *dst = (ullong)vdest;
-	    
+
 	    w--;
 	    dst++;
 	    src++;
 	}
     }
-    
+
     _mm_empty();
 }
 
@@ -2199,17 +2199,17 @@ fbCompositeSrc_8888RevNPx8888mmx (pixman_op_t op,
     uint32_t	*srcLine, *src;
     int	dstStride, srcStride;
     uint16_t	w;
-    
+
     CHECKPOINT();
-    
+
     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
     fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-    
+
 #if 0
     /* FIXME */
     assert (pSrc->pDrawable == pMask->pDrawable);
 #endif
-    
+
     while (height--)
     {
 	dst = dstLine;
@@ -2217,66 +2217,66 @@ fbCompositeSrc_8888RevNPx8888mmx (pixman_op_t op,
 	src = srcLine;
 	srcLine += srcStride;
 	w = width;
-	
+
 	while (w && (unsigned long)dst & 7)
 	{
 	    __m64 s = load8888 (*src);
 	    __m64 d = load8888 (*dst);
-	    
+
 	    *dst = store8888 (over_rev_non_pre (s, d));
-	    
+
 	    w--;
 	    dst++;
 	    src++;
 	}
-	
+
 	while (w >= 2)
 	{
 	    ullong s0, s1;
 	    unsigned char a0, a1;
 	    __m64 d0, d1;
-	    
+
 	    s0 = *src;
 	    s1 = *(src + 1);
-	    
+
 	    a0 = (s0 >> 24);
 	    a1 = (s1 >> 24);
-	    
+
 	    if ((a0 & a1) == 0xFF)
 	    {
 		d0 = invert_colors(load8888(s0));
 		d1 = invert_colors(load8888(s1));
-		
+
 		*(__m64 *)dst = pack8888 (d0, d1);
 	    }
 	    else if (a0 | a1)
 	    {
 		__m64 vdest = *(__m64 *)dst;
-		
+
 		d0 = over_rev_non_pre (load8888(s0), expand8888 (vdest, 0));
 		d1 = over_rev_non_pre (load8888(s1), expand8888 (vdest, 1));
-		
+
 		*(__m64 *)dst = pack8888 (d0, d1);
 	    }
-	    
+
 	    w -= 2;
 	    dst += 2;
 	    src += 2;
 	}
-	
+
 	while (w)
 	{
 	    __m64 s = load8888 (*src);
 	    __m64 d = load8888 (*dst);
-	    
+
 	    *dst = store8888 (over_rev_non_pre (s, d));
-	    
+
 	    w--;
 	    dst++;
 	    src++;
 	}
     }
-    
+
     _mm_empty();
 }
 
@@ -2299,31 +2299,31 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pixman_op_t op,
     uint32_t	*maskLine;
     int	dstStride, maskStride;
     __m64  vsrc, vsrca;
-    
+
     CHECKPOINT();
-    
+
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
-    
+
     srca = src >> 24;
     if (srca == 0)
 	return;
-    
+
     fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
     fbComposeGetStart (pMask, xMask, yMask, uint32_t, maskStride, maskLine, 1);
-    
+
     vsrc = load8888 (src);
     vsrca = expand_alpha (vsrc);
-    
+
     while (height--)
     {
 	int twidth = width;
 	uint32_t *p = (uint32_t *)maskLine;
 	uint16_t *q = (uint16_t *)dstLine;
-	
+
 	while (twidth && ((unsigned long)q & 7))
 	{
 	    uint32_t m = *(uint32_t *)p;
-	    
+
 	    if (m)
 	    {
 		ullong d = *q;
@@ -2331,41 +2331,41 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pixman_op_t op,
 		vdest = pack565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
 		*q = (ullong)vdest;
 	    }
-	    
+
 	    twidth--;
 	    p++;
 	    q++;
 	}
-	
+
 	while (twidth >= 4)
 	{
 	    uint32_t m0, m1, m2, m3;
-	    
+
 	    m0 = *p;
 	    m1 = *(p + 1);
 	    m2 = *(p + 2);
 	    m3 = *(p + 3);
-	    
+
 	    if ((m0 | m1 | m2 | m3))
 	    {
 		__m64 vdest = *(__m64 *)q;
-		
+
 		vdest = pack565(in_over(vsrc, vsrca, load8888(m0), expand565(vdest, 0)), vdest, 0);
 		vdest = pack565(in_over(vsrc, vsrca, load8888(m1), expand565(vdest, 1)), vdest, 1);
 		vdest = pack565(in_over(vsrc, vsrca, load8888(m2), expand565(vdest, 2)), vdest, 2);
 		vdest = pack565(in_over(vsrc, vsrca, load8888(m3), expand565(vdest, 3)), vdest, 3);
-		
+
 		*(__m64 *)q = vdest;
 	    }
 	    twidth -= 4;
 	    p += 4;
 	    q += 4;
 	}
-	
+
 	while (twidth)
 	{
 	    uint32_t m;
-	    
+
 	    m = *(uint32_t *)p;
 	    if (m)
 	    {
@@ -2374,16 +2374,16 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pixman_op_t op,
 		vdest = pack565 (in_over(vsrc, vsrca, load8888(m), vdest), vdest, 0);
 		*q = (ullong)vdest;
 	    }
-	    
+
 	    twidth--;
 	    p++;
 	    q++;
 	}
-	
+
 	maskLine += maskStride;
 	dstLine += dstStride;
     }
-    
+
     _mm_empty ();
 }
 
@@ -2633,12 +2633,12 @@ fbCompositeSrcAdd_8000x8000mmx (pixman_op_t op,
     uint16_t	w;
     uint8_t	s, d;
     uint16_t	t;
-    
+
     CHECKPOINT();
-    
+
     fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
     fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
-    
+
     while (height--)
     {
 	dst = dstLine;
@@ -2646,7 +2646,7 @@ fbCompositeSrcAdd_8000x8000mmx (pixman_op_t op,
 	src = srcLine;
 	srcLine += srcStride;
 	w = width;
-	
+
 	while (w && (unsigned long)dst & 7)
 	{
 	    s = *src;
@@ -2654,12 +2654,12 @@ fbCompositeSrcAdd_8000x8000mmx (pixman_op_t op,
 	    t = d + s;
 	    s = t | (0 - (t >> 8));
 	    *dst = s;
-	    
+
 	    dst++;
 	    src++;
 	    w--;
 	}
-	
+
 	while (w >= 8)
 	{
 	    *(__m64*)dst = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
@@ -2667,7 +2667,7 @@ fbCompositeSrcAdd_8000x8000mmx (pixman_op_t op,
 	    src += 8;
 	    w -= 8;
 	}
-	
+
 	while (w)
 	{
 	    s = *src;
@@ -2675,13 +2675,13 @@ fbCompositeSrcAdd_8000x8000mmx (pixman_op_t op,
 	    t = d + s;
 	    s = t | (0 - (t >> 8));
 	    *dst = s;
-	    
+
 	    dst++;
 	    src++;
 	    w--;
 	}
     }
-    
+
     _mm_empty();
 }
 
@@ -2703,12 +2703,12 @@ fbCompositeSrcAdd_8888x8888mmx (pixman_op_t 	op,
     uint32_t	*srcLine, *src;
     int	dstStride, srcStride;
     uint16_t	w;
-    
+
     CHECKPOINT();
-    
+
     fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-    
+
     while (height--)
     {
 	dst = dstLine;
@@ -2716,7 +2716,7 @@ fbCompositeSrcAdd_8888x8888mmx (pixman_op_t 	op,
 	src = srcLine;
 	srcLine += srcStride;
 	w = width;
-	
+
 	while (w && (unsigned long)dst & 7)
 	{
 	    *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src),
@@ -2725,7 +2725,7 @@ fbCompositeSrcAdd_8888x8888mmx (pixman_op_t 	op,
 	    src++;
 	    w--;
 	}
-	
+
 	while (w >= 2)
 	{
 	    *(ullong*)dst = (ullong) _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
@@ -2733,19 +2733,19 @@ fbCompositeSrcAdd_8888x8888mmx (pixman_op_t 	op,
 	    src += 2;
 	    w -= 2;
 	}
-	
+
 	if (w)
 	{
 	    *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src),
 						 _mm_cvtsi32_si64(*dst)));
-	    
+
 	}
     }
-    
+
     _mm_empty();
 }
 
-pixman_bool_t 
+pixman_bool_t
 pixman_blt_mmx (uint32_t *src_bits,
 		uint32_t *dst_bits,
 		int src_stride,
@@ -2759,10 +2759,10 @@ pixman_blt_mmx (uint32_t *src_bits,
     uint8_t *	src_bytes;
     uint8_t *	dst_bytes;
     int		byte_width;
-    
+
     if (src_bpp != dst_bpp)
 	return FALSE;
-    
+
     if (src_bpp == 16)
     {
 	src_stride = src_stride * sizeof (uint32_t) / 2;
@@ -2792,7 +2792,7 @@ pixman_blt_mmx (uint32_t *src_bits,
 	src_bytes += src_stride;
 	dst_bytes += dst_stride;
 	w = byte_width;
-	
+
 	while (w >= 2 && ((unsigned long)d & 3))
 	{
 	    *(uint16_t *)d = *(uint16_t *)s;
@@ -2800,16 +2800,16 @@ pixman_blt_mmx (uint32_t *src_bits,
 	    s += 2;
 	    d += 2;
 	}
-	
+
 	while (w >= 4 && ((unsigned long)d & 7))
 	{
 	    *(uint32_t *)d = *(uint32_t *)s;
-	    
+
 	    w -= 4;
 	    s += 4;
 	    d += 4;
 	}
-	
+
 	while (w >= 64)
 	{
 #ifdef __GNUC__
@@ -2853,8 +2853,8 @@ pixman_blt_mmx (uint32_t *src_bits,
 	    *(__m64 *)(d + 40) = v5;
 	    *(__m64 *)(d + 48) = v6;
 	    *(__m64 *)(d + 56) = v7;
-#endif	    
-	    
+#endif
+
 	    w -= 64;
 	    s += 64;
 	    d += 64;
@@ -2875,7 +2875,7 @@ pixman_blt_mmx (uint32_t *src_bits,
 	    d += 2;
 	}
     }
-    
+
     _mm_empty();
 
     return TRUE;
@@ -2960,7 +2960,7 @@ fbCompositeOver_x888x8x8888mmx (pixman_op_t      op,
 		    *dst = store8888 (vdest);
 		}
 	    }
-	    
+
 	    mask++;
 	    dst++;
 	    src++;
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index c7d73fc..d007e4b 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -34,8 +34,8 @@
 
 #undef READ
 #undef WRITE
-#define READ(x) (*(x))
-#define WRITE(ptr,v) ((*(ptr)) = (v))
+#define READ(img,x) (*(x))
+#define WRITE(img,ptr,v) ((*(ptr)) = (v))
 
 typedef void (* CompositeFunc) (pixman_op_t,
 				pixman_image_t *, pixman_image_t *, pixman_image_t *,
@@ -127,24 +127,24 @@ fbCompositeOver_x888x8x8888 (pixman_op_t      op,
 	w = width;
 	while (w--)
 	{
-	    m = READ(mask++);
+	    m = READ(pMask, mask++);
 	    if (m)
 	    {
-		s = READ(src) | 0xff000000;
+		s = READ(pSrc, src) | 0xff000000;
 
 		if (m == 0xff)
-		    WRITE (dst, s);
+		    WRITE(pDst, dst, s);
 		else
 		{
 		    d = fbIn (s, m);
-		    WRITE(dst, fbOver (d, READ(dst)));
+		    WRITE(pDst, dst, fbOver (d, READ(pDst, dst)));
 		}
 	    }
 	    src++;
 	    dst++;
 	}
     }
-    
+
     fbFinishAccess (pMask->pDrawable);
     fbFinishAccess (pDst->pDrawable);
 }
@@ -169,12 +169,12 @@ fbCompositeSolidMaskIn_nx8x8 (pixman_op_t      op,
     int	dstStride, maskStride;
     uint16_t	w;
     uint16_t    t;
-    
+
     fbComposeGetSolid(iSrc, src, iDst->bits.format);
 
     dstMask = FbFullMask (PIXMAN_FORMAT_DEPTH (iDst->bits.format));
     srca = src >> 24;
-    
+
     fbComposeGetStart (iDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
     fbComposeGetStart (iMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
 
@@ -251,10 +251,10 @@ fbCompositeSrcIn_8x8 (pixman_op_t      op,
     uint16_t	w;
     uint8_t	s;
     uint16_t	t;
-    
+
     fbComposeGetStart (iSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
     fbComposeGetStart (iDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
-    
+
     while (height--)
     {
 	dst = dstLine;
@@ -262,7 +262,7 @@ fbCompositeSrcIn_8x8 (pixman_op_t      op,
 	src = srcLine;
 	srcLine += srcStride;
 	w = width;
-	
+
 	while (w--)
 	{
 	    s = *src++;
@@ -305,10 +305,10 @@ fbCompositeSolidMask_nx8x8888 (pixman_op_t      op,
     srca = src >> 24;
     if (src == 0)
 	return;
-    
+
     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
     fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
-    
+
     while (height--)
     {
 	dst = dstLine;
@@ -316,26 +316,26 @@ fbCompositeSolidMask_nx8x8888 (pixman_op_t      op,
 	mask = maskLine;
 	maskLine += maskStride;
 	w = width;
-	
+
 	while (w--)
 	{
-	    m = READ(mask++);
+	    m = READ(pMask, mask++);
 	    if (m == 0xff)
 	    {
 		if (srca == 0xff)
-		    WRITE(dst, src & dstMask);
+		    WRITE(pDst, dst, src & dstMask);
 		else
-		    WRITE(dst, fbOver (src, READ(dst)) & dstMask);
+		    WRITE(pDst, dst, fbOver (src, READ(pDst, dst)) & dstMask);
 	    }
 	    else if (m)
 	    {
 		d = fbIn (src, m);
-		WRITE(dst, fbOver (d, READ(dst)) & dstMask);
+		WRITE(pDst, dst, fbOver (d, READ(pDst, dst)) & dstMask);
 	    }
 	    dst++;
 	}
     }
-    
+
     fbFinishAccess (pMask->pDrawable);
     fbFinishAccess (pDst->pDrawable);
 }
@@ -381,17 +381,17 @@ fbCompositeSolidMask_nx8888x8888C (pixman_op_t op,
 
 	while (w--)
 	{
-	    ma = READ(mask++);
+	    ma = READ(pMask, mask++);
 	    if (ma == 0xffffffff)
 	    {
 		if (srca == 0xff)
-		    WRITE(dst, src & dstMask);
+		    WRITE(pDst, dst, src & dstMask);
 		else
-		    WRITE(dst, fbOver (src, READ(dst)) & dstMask);
+		    WRITE(pDst, dst, fbOver (src, READ(pDst, dst)) & dstMask);
 	    }
 	    else if (ma)
 	    {
-		d = READ(dst);
+		d = READ(pDst, dst);
 #define FbInOverC(src,srca,msk,dst,i,result) { \
     uint16_t  __a = FbGet8(msk,i); \
     uint32_t  __t, __ta; \
@@ -406,7 +406,7 @@ fbCompositeSolidMask_nx8888x8888C (pixman_op_t op,
 		FbInOverC (src, srca, ma, d, 8, n);
 		FbInOverC (src, srca, ma, d, 16, o);
 		FbInOverC (src, srca, ma, d, 24, p);
-		WRITE(dst, m|n|o|p);
+		WRITE(pDst, dst, m|n|o|p);
 	    }
 	    dst++;
 	}
@@ -456,22 +456,22 @@ fbCompositeSolidMask_nx8x0888 (pixman_op_t op,
 
 	while (w--)
 	{
-	    m = READ(mask++);
+	    m = READ(pMask, mask++);
 	    if (m == 0xff)
 	    {
 		if (srca == 0xff)
 		    d = src;
 		else
 		{
-		    d = Fetch24(dst);
+		    d = Fetch24(pDst, dst);
 		    d = fbOver24 (src, d);
 		}
-		Store24(dst,d);
+		Store24(pDst, dst,d);
 	    }
 	    else if (m)
 	    {
-		d = fbOver24 (fbIn(src,m), Fetch24(dst));
-		Store24(dst,d);
+		d = fbOver24 (fbIn(src,m), Fetch24(pDst, dst));
+		Store24(pDst, dst, d);
 	    }
 	    dst += 3;
 	}
@@ -521,23 +521,23 @@ fbCompositeSolidMask_nx8x0565 (pixman_op_t op,
 
 	while (w--)
 	{
-	    m = READ(mask++);
+	    m = READ(pMask, mask++);
 	    if (m == 0xff)
 	    {
 		if (srca == 0xff)
 		    d = src;
 		else
 		{
-		    d = READ(dst);
+		    d = READ(pDst, dst);
 		    d = fbOver24 (src, cvt0565to0888(d));
 		}
-		WRITE(dst, cvt8888to0565(d));
+		WRITE(pDst, dst, cvt8888to0565(d));
 	    }
 	    else if (m)
 	    {
-		d = READ(dst);
+		d = READ(pDst, dst);
 		d = fbOver24 (fbIn(src,m), cvt0565to0888(d));
-		WRITE(dst, cvt8888to0565(d));
+		WRITE(pDst, dst, cvt8888to0565(d));
 	    }
 	    dst++;
 	}
@@ -591,29 +591,29 @@ fbCompositeSolidMask_nx8888x0565C (pixman_op_t op,
 
 	while (w--)
 	{
-	    ma = READ(mask++);
+	    ma = READ(pMask, mask++);
 	    if (ma == 0xffffffff)
 	    {
 		if (srca == 0xff)
 		{
-		    WRITE(dst, src16);
+		    WRITE(pDst, dst, src16);
 		}
 		else
 		{
-		    d = READ(dst);
+		    d = READ(pDst, dst);
 		    d = fbOver24 (src, cvt0565to0888(d));
-		    WRITE(dst, cvt8888to0565(d));
+		    WRITE(pDst, dst, cvt8888to0565(d));
 		}
 	    }
 	    else if (ma)
 	    {
-		d = READ(dst);
+		d = READ(pDst, dst);
 		d = cvt0565to0888(d);
 		FbInOverC (src, srca, ma, d, 0, m);
 		FbInOverC (src, srca, ma, d, 8, n);
 		FbInOverC (src, srca, ma, d, 16, o);
 		d = m|n|o;
-		WRITE(dst, cvt8888to0565(d));
+		WRITE(pDst, dst, cvt8888to0565(d));
 	    }
 	    dst++;
 	}
@@ -658,12 +658,12 @@ fbCompositeSrc_8888x8888 (pixman_op_t op,
 
 	while (w--)
 	{
-	    s = READ(src++);
+	    s = READ(pSrc, src++);
 	    a = s >> 24;
 	    if (a == 0xff)
-		WRITE(dst, s & dstMask);
+		WRITE(pDst, dst, s & dstMask);
 	    else if (a)
-		WRITE(dst, fbOver (s, READ(dst)) & dstMask);
+		WRITE(pDst, dst, fbOver (s, READ(pDst, dst)) & dstMask);
 	    dst++;
 	}
     }
@@ -706,15 +706,15 @@ fbCompositeSrc_8888x0888 (pixman_op_t op,
 
 	while (w--)
 	{
-	    s = READ(src++);
+	    s = READ(pSrc, src++);
 	    a = s >> 24;
 	    if (a)
 	    {
 		if (a == 0xff)
 		    d = s;
 		else
-		    d = fbOver24 (s, Fetch24(dst));
-		Store24(dst,d);
+		    d = fbOver24 (s, Fetch24(pDst, dst));
+		Store24(pDst, dst, d);
 	    }
 	    dst += 3;
 	}
@@ -758,7 +758,7 @@ fbCompositeSrc_8888x0565 (pixman_op_t op,
 
 	while (w--)
 	{
-	    s = READ(src++);
+	    s = READ(pSrc, src++);
 	    a = s >> 24;
 	    if (a)
 	    {
@@ -766,10 +766,10 @@ fbCompositeSrc_8888x0565 (pixman_op_t op,
 		    d = s;
 		else
 		{
-		    d = READ(dst);
+		    d = READ(pDst, dst);
 		    d = fbOver24 (s, cvt0565to0888(d));
 		}
-		WRITE(dst, cvt8888to0565(d));
+		WRITE(pDst, dst, cvt8888to0565(d));
 	    }
 	    dst++;
 	}
@@ -813,16 +813,16 @@ fbCompositeSrcAdd_8000x8000 (pixman_op_t	op,
 
 	while (w--)
 	{
-	    s = READ(src++);
+	    s = READ(pSrc, src++);
 	    if (s)
 	    {
 		if (s != 0xff)
 		{
-		    d = READ(dst);
+		    d = READ(pDst, dst);
 		    t = d + s;
 		    s = t | (0 - (t >> 8));
 		}
-		WRITE(dst, s);
+		WRITE(pDst, dst, s);
 	    }
 	    dst++;
 	}
@@ -867,12 +867,12 @@ fbCompositeSrcAdd_8888x8888 (pixman_op_t	op,
 
 	while (w--)
 	{
-	    s = READ(src++);
+	    s = READ(pSrc, src++);
 	    if (s)
 	    {
 		if (s != 0xffffffff)
 		{
-		    d = READ(dst);
+		    d = READ(pDst, dst);
 		    if (d)
 		    {
 			m = FbAdd(s,d,0,t);
@@ -882,7 +882,7 @@ fbCompositeSrcAdd_8888x8888 (pixman_op_t	op,
 			s = m|n|o|p;
 		    }
 		}
-		WRITE(dst, s);
+		WRITE(pDst, dst, s);
 	    }
 	    dst++;
 	}
@@ -933,16 +933,16 @@ fbCompositeSrcAdd_8888x8x8 (pixman_op_t op,
 	    uint32_t	m, d;
 	    uint32_t	r;
 
-	    a = READ(mask++);
-	    d = READ(dst);
+	    a = READ(pMask, mask++);
+	    d = READ(pDst, dst);
 
 	    m = FbInU (sa, 0, a, tmp);
 	    r = FbAdd (m, d, 0, tmp);
 
-	    WRITE(dst++, r);
+	    WRITE(pDst, dst++, r);
 	}
     }
-    
+
     fbFinishAccess(pDst->pDrawable);
     fbFinishAccess(pMask->pDrawable);
 }
@@ -963,7 +963,7 @@ fbCompositeSrcAdd_1000x1000 (pixman_op_t	op,
 {
     /* FIXME */
 #if 0
-    
+
     uint32_t	*dstBits, *srcBits;
     int	dstStride, srcStride;
     int		dstBpp, srcBpp;
@@ -1105,14 +1105,14 @@ fbCompositeSrcSrc_nxn  (pixman_op_t	   op,
     int		dstBpp;
     pixman_bool_t	reverse = FALSE;
     pixman_bool_t	upsidedown = FALSE;
-    
+
     fbGetDrawable(pSrc->pDrawable,src,srcStride,srcBpp,srcXoff,srcYoff);
     fbGetDrawable(pDst->pDrawable,dst,dstStride,dstBpp,dstXoff,dstYoff);
-	
+
     fbBlt (src + (ySrc + srcYoff) * srcStride,
 	   srcStride,
 	   (xSrc + srcXoff) * srcBpp,
- 
+
 	   dst + (yDst + dstYoff) * dstStride,
 	   dstStride,
 	   (xDst + dstXoff) * dstBpp,
@@ -1126,7 +1126,7 @@ fbCompositeSrcSrc_nxn  (pixman_op_t	   op,
 
 	   reverse,
 	   upsidedown);
-    
+
     fbFinishAccess(pSrc->pDrawable);
     fbFinishAccess(pDst->pDrawable);
 #endif
@@ -1160,7 +1160,7 @@ fbCompositeSolidFill (pixman_op_t op,
 		      uint16_t     height)
 {
     uint32_t	src;
-    
+
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
 
     if (pDst->bits.format == PIXMAN_a8)
@@ -1205,7 +1205,7 @@ fbCompositeSrc_8888xx888 (pixman_op_t op,
 	dst += dstStride;
 	src += srcStride;
     }
-    
+
     fbFinishAccess(pSrc->pDrawable);
     fbFinishAccess(pDst->pDrawable);
 }
@@ -1242,7 +1242,7 @@ pixman_walk_composite_region (pixman_op_t op,
     }
 
     region = &reg;
-    
+
     pbox = pixman_region_rectangles (region, &n);
     while (n--)
     {
@@ -1307,7 +1307,7 @@ can_get_solid (pixman_image_t *image)
 {
     if (image->type == SOLID)
 	return TRUE;
-    
+
     if (image->type != BITS	||
 	image->bits.width != 1	||
 	image->bits.height != 1)
@@ -1356,7 +1356,7 @@ pixman_image_composite_rect  (pixman_op_t                   op,
 
     return_if_fail (src != NULL);
     return_if_fail (dest != NULL);
-    
+
     if (width > SCANLINE_BUFFER_LENGTH)
     {
 	scanline_buffer = (uint32_t *)pixman_malloc_abc (width, 3, sizeof (uint32_t));
@@ -1364,7 +1364,7 @@ pixman_image_composite_rect  (pixman_op_t                   op,
 	if (!scanline_buffer)
 	    return;
     }
-    
+
     compose_data.op = op;
     compose_data.src = src;
     compose_data.mask = mask;
@@ -1382,7 +1382,7 @@ pixman_image_composite_rect  (pixman_op_t                   op,
 
     if (scanline_buffer != _scanline_buffer)
 	free (scanline_buffer);
-}    
+}
 
 
 void
@@ -1421,7 +1421,7 @@ pixman_image_composite (pixman_op_t      op,
 	pSrc->bits.width == 1 &&
 	pSrc->bits.height == 1)
 	srcTransform = FALSE;
-    
+
     if (pMask && pMask->type == BITS)
     {
 	maskRepeat = pMask->common.repeat == PIXMAN_REPEAT_NORMAL;
@@ -1666,7 +1666,7 @@ pixman_image_composite (pixman_op_t      op,
 		    default:
 			break;
 		    }
-		    
+
 		    if (func)
 			maskRepeat = FALSE;
 		}
@@ -1985,10 +1985,10 @@ pixman_image_composite (pixman_op_t      op,
 	if (pSrc->bits.format == PIXMAN_a8 &&
 	    pDst->bits.format == PIXMAN_a8 &&
 	    !pMask)
-	{	
+	{
 #ifdef USE_MMX
 	    if (pixman_have_mmx())
-		func = fbCompositeIn_8x8mmx;	    
+		func = fbCompositeIn_8x8mmx;
 	    else
 #endif
 		func = fbCompositeSrcIn_8x8;
@@ -2029,7 +2029,7 @@ pixman_image_composite (pixman_op_t      op,
 	 */
 	func = NULL;
     }
-    
+
     if (!func) {
 	func = pixman_image_composite_rect;
 
@@ -2039,7 +2039,7 @@ pixman_image_composite (pixman_op_t      op,
 	{
 	    srcRepeat = FALSE;
 	}
-	
+
 	if (pMask && pMask->type == BITS &&
 	    pMask->bits.width == 1 && pMask->bits.height == 1)
 	{
@@ -2049,7 +2049,7 @@ pixman_image_composite (pixman_op_t      op,
 	/* if we are transforming, repeats are handled in fbFetchTransformed */
 	if (srcTransform)
 	    srcRepeat = FALSE;
-	
+
 	if (maskTransform)
 	    maskTransform = FALSE;
     }
@@ -2075,7 +2075,7 @@ pixman_image_composite (pixman_op_t      op,
 enum CPUFeatures {
     NoFeatures = 0,
     MMX = 0x1,
-    MMX_Extensions = 0x2, 
+    MMX_Extensions = 0x2,
     SSE = 0x6,
     SSE2 = 0x8,
     CMOV = 0x10
@@ -2141,9 +2141,9 @@ static unsigned int detectCPUFeatures(void) {
 	     "pop %%ebx\n"
              "1:\n"
              "mov %%edx, %0\n"
-             : "=r" (result), 
-               "=m" (vendor[0]), 
-               "=m" (vendor[4]), 
+             : "=r" (result),
+               "=m" (vendor[0]),
+               "=m" (vendor[4]),
                "=m" (vendor[8])
              :
              : "%eax", "%ecx", "%edx"
@@ -2186,7 +2186,7 @@ static unsigned int detectCPUFeatures(void) {
 #else
 #   error unsupported compiler
 #endif
-    
+
     features = 0;
     if (result) {
         /* result now contains the standard feature bits */
@@ -2254,8 +2254,8 @@ pixman_have_mmx (void)
 	mmx_present = (features & (MMX|MMX_Extensions)) == (MMX|MMX_Extensions);
         initialized = TRUE;
     }
-    
+
     return mmx_present;
 }
 #endif /* __amd64__ */
-#endif 
+#endif
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 775f7a7..56dec83 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -43,7 +43,7 @@
 #define FB_HALFUNIT (1 << (FB_SHIFT-1))
 #define FB_MASK     (FB_UNIT - 1)
 #define FB_ALLONES  ((uint32_t) -1)
-    
+
 /* Memory allocation helpers */
 void *pixman_malloc_ab (unsigned int n, unsigned int b);
 void *pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c);
@@ -197,7 +197,7 @@ struct solid_fill
     source_image_t	common;
     uint32_t		color;		/* FIXME: shouldn't this be a pixman_color_t? */
 };
-    
+
 struct gradient
 {
     source_image_t		common;
@@ -239,7 +239,7 @@ struct conical_gradient
     gradient_t			common;
     pixman_point_fixed_t	center;
     pixman_fixed_t		angle;
-}; 
+};
 
 struct bits_image
 {
@@ -307,23 +307,23 @@ union pixman_image
     }
 
 #if IMAGE_BYTE_ORDER == MSBFirst
-#define Fetch24(a)  ((unsigned long) (a) & 1 ?			      \
-		     ((READ(a) << 16) | READ((uint16_t *) ((a)+1))) : \
-		     ((READ((uint16_t *) (a)) << 8) | READ((a)+2)))
-#define Store24(a,v) ((unsigned long) (a) & 1 ?		\
-		      (WRITE(a, (uint8_t) ((v) >> 16)),		      \
-		       WRITE((uint16_t *) ((a)+1), (uint16_t) (v))) :  \
-		      (WRITE((uint16_t *) (a), (uint16_t) ((v) >> 8)), \
-		       WRITE((a)+2, (uint8_t) (v))))
+#define Fetch24(img, a)  ((unsigned long) (a) & 1 ?	      \
+    ((READ(img, a) << 16) | READ(img, (uint16_t *) ((a)+1))) : \
+    ((READ(img, (uint16_t *) (a)) << 8) | READ(img, (a)+2)))
+#define Store24(img,a,v) ((unsigned long) (a) & 1 ? \
+    (WRITE(img, a, (uint8_t) ((v) >> 16)),		  \
+     WRITE(img, (uint16_t *) ((a)+1), (uint16_t) (v))) :  \
+    (WRITE(img, (uint16_t *) (a), (uint16_t) ((v) >> 8)), \
+     WRITE(img, (a)+2, (uint8_t) (v))))
 #else
-#define Fetch24(a)  ((unsigned long) (a) & 1 ?			     \
-		     (READ(a) | (READ((uint16_t *) ((a)+1)) << 8)) : \
-		     (READ((uint16_t *) (a)) | (READ((a)+2) << 16)))
-#define Store24(a,v) ((unsigned long) (a) & 1 ? \
-		      (WRITE(a, (uint8_t) (v)),				\
-		       WRITE((uint16_t *) ((a)+1), (uint16_t) ((v) >> 8))) : \
-		      (WRITE((uint16_t *) (a), (uint16_t) (v)),		\
-		       WRITE((a)+2, (uint8_t) ((v) >> 16))))
+#define Fetch24(img,a)  ((unsigned long) (a) & 1 ?			     \
+    (READ(img, a) | (READ(img, (uint16_t *) ((a)+1)) << 8)) : \
+    (READ(img, (uint16_t *) (a)) | (READ(img, (a)+2) << 16)))
+#define Store24(img,a,v) ((unsigned long) (a) & 1 ? \
+    (WRITE(img, a, (uint8_t) (v)),				\
+     WRITE(img, (uint16_t *) ((a)+1), (uint16_t) ((v) >> 8))) : \
+    (WRITE(img, (uint16_t *) (a), (uint16_t) (v)),		\
+     WRITE(img, (a)+2, (uint8_t) ((v) >> 16))))
 #endif
 
 #define Alpha(x) ((x) >> 24)
@@ -612,26 +612,26 @@ union pixman_image
 
 #ifdef PIXMAN_FB_ACCESSORS
 
-#define READ(ptr)							\
-    (image->common.read_func ((ptr), sizeof(*(ptr))))
-#define WRITE(ptr,val)							\
-    (image->common.write_func ((ptr), (val), sizeof (*(ptr))))
+#define READ(img, ptr)							\
+    ((img)->common.read_func ((ptr), sizeof(*(ptr))))
+#define WRITE(img, ptr,val)						\
+    ((img)->common.write_func ((ptr), (val), sizeof (*(ptr))))
 
-#define MEMCPY_WRAPPED(dst, src, size)					\
+#define MEMCPY_WRAPPED(img, dst, src, size)				\
     do {								\
 	size_t _i;							\
 	uint8_t *_dst = (uint8_t*)(dst), *_src = (uint8_t*)(src);	\
 	for(_i = 0; _i < size; _i++) {					\
-	    WRITE(_dst +_i, READ(_src + _i));				\
+	    WRITE((img), _dst +_i, READ((img), _src + _i));		\
 	}								\
     } while (0)
-	
-#define MEMSET_WRAPPED(dst, val, size)					\
+
+#define MEMSET_WRAPPED(img, dst, val, size)				\
     do {								\
 	size_t _i;							\
 	uint8_t *_dst = (uint8_t*)(dst);				\
-	for(_i = 0; _i < (size_t) size; _i++) {                          \
-	    WRITE(_dst +_i, (val));					\
+	for(_i = 0; _i < (size_t) size; _i++) {				\
+	    WRITE((img), _dst +_i, (val));				\
 	}								\
     } while (0)
 
@@ -641,11 +641,11 @@ union pixman_image
 
 #else
 
-#define READ(ptr)		(*(ptr))
-#define WRITE(ptr, val)		(*(ptr) = (val))
-#define MEMCPY_WRAPPED(dst, src, size)					\
+#define READ(img, ptr)		(*(ptr))
+#define WRITE(img, ptr, val)	(*(ptr) = (val))
+#define MEMCPY_WRAPPED(img, dst, src, size)					\
     memcpy(dst, src, size)
-#define MEMSET_WRAPPED(dst, val, size)					\
+#define MEMSET_WRAPPED(img, dst, val, size)					\
     memset(dst, val, size)
 #define fbPrepareAccess(x)
 #define fbFinishAccess(x)
@@ -668,21 +668,21 @@ union pixman_image
 	    switch (PIXMAN_FORMAT_BPP((img)->bits.format))		\
 	    {								\
 	    case 32:							\
-		(res) = READ((uint32_t *)bits__);			\
+		(res) = READ(img, (uint32_t *)bits__);			\
 		break;							\
 	    case 24:							\
-		(res) = Fetch24 ((uint8_t *) bits__);			\
+		(res) = Fetch24(img, (uint8_t *) bits__);			\
 		break;							\
 	    case 16:							\
-		(res) = READ((uint16_t *) bits__);			\
+		(res) = READ(img, (uint16_t *) bits__);			\
 		(res) = cvt0565to0888(res);				\
 		break;							\
 	    case 8:							\
-		(res) = READ((uint8_t *) bits__);			\
+		(res) = READ(img, (uint8_t *) bits__);			\
 		(res) = (res) << 24;					\
 		break;							\
 	    case 1:							\
-		(res) = READ((uint32_t *) bits__);			\
+		(res) = READ(img, (uint32_t *) bits__);			\
 		(res) = FbLeftStipBits((res),1) ? 0xff000000 : 0x00000000; \
 		break;							\
 	    default:							\
@@ -812,7 +812,7 @@ void pixman_timer_register (PixmanTimer *timer);
 									\
 	timer##tname.n_times++;						\
 	begin##tname = OIL_STAMP();
-	
+
 #define TIMER_END(tname)						\
         timer##tname.total += OIL_STAMP() - begin##tname;		\
     }
commit 14f2a4c222bb6f0748a07e21663663b43beef466
Author: José Fonseca <jrfonseca at tungstengraphics.com>
Date:   Fri Sep 14 10:53:32 2007 +0100

    Remove the MMX code path for YV12 for the meanwhile, as pixel output is not
    exactly the same as the non-MMX code.
    
    This reverts commit c61d6ae39e5039dcb27bf95334a86520b562bbc5.

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 4fb2579..8c7be6d 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -30,9 +30,6 @@
  */
 #include <config.h>
 
-#include <stdlib.h>
-#include <limits.h>
-
 #ifdef USE_MMX
 
 #if defined(__amd64__) || defined(__x86_64__)
@@ -920,188 +917,6 @@ void fbComposeSetupMMX(void)
     } 
 }
 
-static __inline__ uint8_t
-interpolate_bilinear (int   distx,
-		      int   idistx,
-		      int   disty,
-		      int   idisty,
-		      uint8_t tl,
-		      uint8_t tr,
-		      uint8_t bl,
-		      uint8_t br)
-{
-    return ((tl * idistx + tr * distx) * idisty +
-	    (bl * idistx + br * distx) * disty) >> 16;
-}
-
-static __inline__ uint32_t
-interpolate_bilinear_8888 (int   distx,
-			   int   idistx,
-			   int   disty,
-			   int   idisty,
-			   uint8_t *l00,
-			   uint8_t *l01,
-			   uint8_t *l10,
-			   uint8_t *l11,
-			   int   x00,
-			   int   x01,
-			   int   x10,
-			   int   x11)
-{
-    uint8_t buffer[4];
-
-    buffer[0] = interpolate_bilinear (distx, idistx, disty, idisty,
-				      l00[x00], l01[x01],
-				      l10[x10], l11[x11]);
-
-    buffer[1] = interpolate_bilinear (distx, idistx, disty, idisty,
-				      l00[x00 + 1], l01[x01 + 1],
-				      l10[x10 + 1], l11[x11 + 1]);
-
-    buffer[2] = interpolate_bilinear (distx, idistx, disty, idisty,
-				      l00[x00 + 2], l01[x01 + 2],
-				      l10[x10 + 2], l11[x11 + 2]);
-
-    buffer[3] = interpolate_bilinear (distx, idistx, disty, idisty,
-				      l00[x00 + 3], l01[x01 + 3],
-				      l10[x10 + 3], l11[x11 + 3]);
-
-    return *((uint32_t *) buffer);
-}
-
-static __inline__ uint32_t
-fetch_bilinear2_8888 (int   distx,
-		      int   idistx,
-		      int   disty,
-		      int   idisty,
-		      uint8_t *l0,
-		      uint8_t *l1,
-		      int   x0,
-		      int   x1)
-{
-    return interpolate_bilinear_8888 (distx,
-				      idistx,
-				      disty,
-				      idisty,
-				      l0,
-				      l0,
-				      l1,
-				      l1,
-				      x0,
-				      x0 + 4,
-				      x1,
-				      x1 + 4);
-}
-
-static __inline__ uint32_t
-fetch_bilinear_8888 (int   distx,
-		     int   idistx,
-		     int   disty,
-		     int   idisty,
-		     uint8_t *l0,
-		     uint8_t *l1,
-		     int   x)
-{
-    return fetch_bilinear2_8888 (distx, idistx, disty, idisty, l0, l1, x, x);
-}
-
-static uint32_t _zero32x2[2] = { 0x0, 0x0 };
-static uint8_t  *_zero8x8 = (uint8_t *) _zero32x2;
-
-static __inline__ int
-set_scale_steps (uint32_t   *src,
-		 int srcStride,
-		 int	  xStart,
-		 int	  xStep,
-		 int	  width,
-		 int	  line,
-		 int	  lastLine,
-		 pixman_repeat_t repeatType,
-		 uint8_t	  **s0,
-		 uint8_t	  **s1,
-		 int	  *x0,
-		 int	  *x0Step,
-		 int	  *x1,
-		 int	  *x1Step)
-{
-    if (line < 0)
-    {
-	if (repeatType == PIXMAN_REPEAT_PAD)
-	{
-	    *s0 = (uint8_t *) src;
-	    *s1 = (uint8_t *) src;
-
-	    *x0     = xStart;
-	    *x0Step = xStep;
-	    *x1     = xStart;
-	    *x1Step = xStep;
-	}
-	else
-	{
-	    if (line == -1)
-	    {
-		*s0 = _zero8x8;
-
-		*x0     = 0;
-		*x0Step = 0;
-
-		*s1 = (uint8_t *) src;
-
-		*x1     = xStart;
-		*x1Step = xStep;
-	    }
-	    else
-	    {
-		return 0;
-	    }
-	}
-    }
-    else if (line >= lastLine)
-    {
-	if (repeatType == PIXMAN_REPEAT_PAD)
-	{
-	    *s0 = (uint8_t *) (src + srcStride * lastLine);
-	    *s1 = (uint8_t *) (src + srcStride * lastLine);
-
-	    *x0     = xStart;
-	    *x0Step = xStep;
-	    *x1     = xStart;
-	    *x1Step = xStep;
-	}
-	else
-	{
-	    if (line == lastLine)
-	    {
-		*s0 = (uint8_t *) (src + srcStride * line);
-
-		*x0     = xStart;
-		*x0Step = xStep;
-
-		*s1 = _zero8x8;
-
-		*x1     = 0;
-		*x1Step = 0;
-	    }
-	    else
-	    {
-		return 0;
-	    }
-	}
-    }
-    else
-    {
-	*s0 = (uint8_t *) (src + srcStride * line);
-	*s1 = (uint8_t *) (src + srcStride * (line + 1));
-
-	*x0     = xStart;
-	*x0Step = xStep;
-	*x1     = xStart;
-	*x1Step = xStep;
-    }
-
-    return width;
-}
-
 
 /* ------------------ MMX code paths called from fbpict.c ----------------------- */
 
@@ -3155,825 +2970,6 @@ fbCompositeOver_x888x8x8888mmx (pixman_op_t      op,
     _mm_empty();
 }
 
-typedef struct _ScanlineBuf {
-    pixman_bool_t lock[2];
-    int    y[2];
-    uint8_t *line[2];
-    int   height;
-    uint8_t *heap;
-} ScanlineBuf;
-
-static pixman_bool_t
-init_scanline_buffer (ScanlineBuf *slb,
-		      uint8_t	  *buffer,
-		      int	  size,
-		      int	  length,
-		      int	  height)
-{
-    int i, s;
-
-    s = length << 1;
-
-    if (size < s)
-    {
-	slb->heap = malloc (s);
-	if (!slb->heap)
-	    return FALSE;
-
-	buffer = slb->heap;
-    }
-    else
-    {
-	slb->heap = NULL;
-    }
-
-    for (i = 0; i < 2; i++)
-    {
-	slb->lock[i] = FALSE;
-	slb->y[i]    = SHRT_MAX;
-	slb->line[i] = buffer;
-
-	buffer += length;
-    }
-
-    slb->height = height;
-
-    return TRUE;
-}
-
-static void
-fini_scanline_buffer (ScanlineBuf *slb)
-{
-    if (slb->heap)
-	free (slb->heap);
-}
-
-static __inline__ void
-release_scanlines (ScanlineBuf *slb)
-{
-    int i;
-
-    for (i = 0; i < 2; i++)
-	slb->lock[i] = FALSE;
-}
-
-static __inline__ int
-_y_to_scanline (ScanlineBuf *slb,
-		int	    y)
-{
-    return (y < 0) ? 0 : (y >= slb->height) ? slb->height - 1 : y;
-}
-
-static __inline__ uint8_t *
-get_scanline (ScanlineBuf *slb,
-	      int	  y)
-{
-    int i;
-
-    y = _y_to_scanline (slb, y);
-
-    for (i = 0; i < 2; i++)
-    {
-	if (slb->y[i] == y)
-	{
-	    slb->lock[i] = TRUE;
-	    return slb->line[i];
-	}
-    }
-
-    return NULL;
-}
-
-typedef struct {
-    ullong subYw;
-    ullong U_green;
-    ullong U_blue;
-    ullong V_red;
-    ullong V_green;
-    ullong Y_coeff;
-    ullong mmx0080;
-    ullong mmx00ff;
-} YUVData;
-
-static const YUVData yuv = {
-    .subYw   = 0x1010101010101010ULL,
-    .U_green = 0xf377f377f377f377ULL,
-    .U_blue  = 0x408d408d408d408dULL,
-    .V_red   = 0x3313331333133313ULL,
-    .V_green = 0xe5fce5fce5fce5fcULL,
-    .Y_coeff = 0x2543254325432543ULL,
-    .mmx0080 = 0x0080008000800080ULL,
-    .mmx00ff = 0x00ff00ff00ff00ffULL
-};
-
-static __inline__ void
-mmx_loadyv12 (uint8_t *py,
-	      uint8_t *pu,
-	      uint8_t *pv)
-{
-    __asm__ __volatile__ (
-	"movq      %0,    %%mm6\n" /* mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
-	"pxor      %%mm4, %%mm4\n" /* mm4 = 0                       */
-	"psubusb   %1,    %%mm6\n" /* Y -= 16                       */
-	"movd      %2,    %%mm0\n" /* mm0 = 00 00 00 00 U3 U2 U1 U0 */
-	"movq      %%mm6, %%mm7\n" /* mm7 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
-	"pand      %3,    %%mm6\n" /* mm6 =    Y6    Y4    Y2    Y0 */
-	"psrlw     %4,    %%mm7\n" /* mm7 =    Y7    Y5    Y3    Y1 */
-	"movd      %5,    %%mm1\n" /* mm1 = 00 00 00 00 V3 V2 V1 V0 */
-	"psllw     %6,    %%mm6\n" /* promote precision             */
-	"pmulhw    %7,    %%mm6\n" /* mm6 = luma_rgb even           */
-	"psllw     %8,    %%mm7\n" /* promote precision             */
-	"punpcklbw %%mm4, %%mm0\n" /* mm0 = U3 U2 U1 U0             */
-	"psubsw    %9,    %%mm0\n" /* U -= 128                      */
-	"punpcklbw %%mm4, %%mm1\n" /* mm1 = V3 V2 V1 V0             */
-	"pmulhw    %10,   %%mm7\n" /* mm7 = luma_rgb odd            */
-	"psllw     %11,   %%mm0\n" /* promote precision             */
-	"psubsw    %12,   %%mm1\n" /* V -= 128                      */
-	"movq      %%mm0, %%mm2\n" /* mm2 = U3 U2 U1 U0             */
-	"psllw     %13,   %%mm1\n" /* promote precision             */
-	"movq      %%mm1, %%mm4\n" /* mm4 = V3 V2 V1 V0             */
-	"pmulhw    %14,   %%mm0\n" /* mm0 = chroma_b                */
-	"pmulhw    %15,   %%mm1\n" /* mm1 = chroma_r                */
-	"movq      %%mm0, %%mm3\n" /* mm3 = chroma_b                */
-	"paddsw    %%mm6, %%mm0\n" /* mm0 = B6 B4 B2 B0             */
-	"paddsw    %%mm7, %%mm3\n" /* mm3 = B7 B5 B3 B1             */
-	"packuswb  %%mm0, %%mm0\n" /* saturate to 0-255             */
-	"pmulhw    %16,   %%mm2\n" /* mm2 = U * U_green             */
-	"packuswb  %%mm3, %%mm3\n" /* saturate to 0-255             */
-	"punpcklbw %%mm3, %%mm0\n" /* mm0 = B7 B6 B5 B4 B3 B2 B1 B0 */
-	"pmulhw    %17,   %%mm4\n" /* mm4 = V * V_green             */
-	"paddsw    %%mm4, %%mm2\n" /* mm2 = chroma_g                */
-	"movq      %%mm2, %%mm5\n" /* mm5 = chroma_g                */
-	"movq      %%mm1, %%mm4\n" /* mm4 = chroma_r                */
-	"paddsw    %%mm6, %%mm2\n" /* mm2 = G6 G4 G2 G0             */
-	"packuswb  %%mm2, %%mm2\n" /* saturate to 0-255             */
-	"paddsw    %%mm6, %%mm1\n" /* mm1 = R6 R4 R2 R0             */
-	"packuswb  %%mm1, %%mm1\n" /* saturate to 0-255             */
-	"paddsw    %%mm7, %%mm4\n" /* mm4 = R7 R5 R3 R1             */
-	"packuswb  %%mm4, %%mm4\n" /* saturate to 0-255             */
-	"paddsw    %%mm7, %%mm5\n" /* mm5 = G7 G5 G3 G1             */
-	"packuswb  %%mm5, %%mm5\n" /* saturate to 0-255             */
-	"punpcklbw %%mm4, %%mm1\n" /* mm1 = R7 R6 R5 R4 R3 R2 R1 R0 */
-	"punpcklbw %%mm5, %%mm2\n" /* mm2 = G7 G6 G5 G4 G3 G2 G1 G0 */
-	: /* no outputs */
-	: "m" (*py), "m" (yuv.subYw), "m" (*pu), "m" (yuv.mmx00ff),
-	  "i" (8), "m" (*pv), "i" (3), "m" (yuv.Y_coeff),
-	  "i" (3), "m" (yuv.mmx0080), "m" (yuv.Y_coeff), "i" (3),
-	  "m" (yuv.mmx0080), "i" (3), "m" (yuv.U_blue), "m" (yuv.V_red),
-	  "m" (yuv.U_green), "m" (yuv.V_green));
-}
-
-static __inline__ void
-mmx_pack8888 (uint8_t *image)
-{
-    __asm__ __volatile__ (
-	"pxor      %%mm3, %%mm3\n"
-	"movq      %%mm0, %%mm6\n"
-	"punpcklbw %%mm2, %%mm6\n"
-	"movq      %%mm1, %%mm7\n"
-	"punpcklbw %%mm3, %%mm7\n"
-	"movq      %%mm0, %%mm4\n"
-	"punpcklwd %%mm7, %%mm6\n"
-	"movq      %%mm1, %%mm5\n"
-	"movq      %%mm6, (%0)\n"
-	"movq      %%mm0, %%mm6\n"
-	"punpcklbw %%mm2, %%mm6\n"
-	"punpckhwd %%mm7, %%mm6\n"
-	"movq      %%mm6, 8(%0)\n"
-	"punpckhbw %%mm2, %%mm4\n"
-	"punpckhbw %%mm3, %%mm5\n"
-	"punpcklwd %%mm5, %%mm4\n"
-	"movq      %%mm4, 16(%0)\n"
-	"movq      %%mm0, %%mm4\n"
-	"punpckhbw %%mm2, %%mm4\n"
-	"punpckhwd %%mm5, %%mm4\n"
-	"movq      %%mm4, 24(%0)\n"
-	: /* no outputs */
-	: "r" (image) );
-}
-
-static __inline__ uint32_t
-loadyuv (uint8_t *py,
-	 uint8_t *pu,
-	 uint8_t *pv)
-{
-    int16_t y, u, v;
-    int32_t r, g, b;
-
-    y = *py - 16;
-    u = *pu - 128;
-    v = *pv - 128;
-
-    /* R = 1.164(Y - 16) + 1.596(V - 128) */
-    r = 0x012b27 * y + 0x019a2e * v;
-    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
-    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
-    /* B = 1.164(Y - 16) + 2.018(U - 128) */
-    b = 0x012b27 * y + 0x0206a2 * u;
-
-    return 0xff000000 |
-	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
-	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
-	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
-}
-
-static __inline__ uint8_t *
-loadyv12_scanline (ScanlineBuf *slb,
-		   int	       y,
-		   uint8_t       *srcY,
-		   int	       yStride,
-		   uint8_t       *srcU,
-		   uint8_t       *srcV,
-		   int	       uvStride,
-		   int	       x,
-		   int	       width)
-{
-    uint8_t *py, *pu, *pv, *pd;
-    int   i, w;
-
-    y = _y_to_scanline (slb, y);
-
-    for (i = 0; slb->lock[i]; i++);
-
-    slb->y[i]    = y;
-    slb->lock[i] = TRUE;
-
-    py = srcY + yStride  * (y >> 0);
-    pu = srcU + uvStride * (y >> 1);
-    pv = srcV + uvStride * (y >> 1);
-
-    pd = slb->line[i];
-
-    w = width;
-
-    while (w && (unsigned long) py & 7)
-    {
-	*((uint32_t *) pd) = loadyuv (py, pu, pv);
-
-	pd += 4;
-	py += 1;
-
-	if (w & 1)
-	{
-	    pu += 1;
-	    pv += 1;
-	}
-
-	w--;
-    }
-
-    while (w >= 8)
-    {
-	mmx_loadyv12 (py, pu, pv);
-	mmx_pack8888 (pd);
-
-	py += 8;
-	pu += 4;
-	pv += 4;
-	pd += 32;
-
-	w -= 8;
-    }
-
-    while (w)
-    {
-	*((uint32_t *) pd) = loadyuv (py, pu, pv);
-
-	pd += 4;
-	py += 1;
-
-	if (w & 1)
-	{
-	    pu += 1;
-	    pv += 1;
-	}
-
-	w--;
-    }
-
-    return slb->line[i];
-}
-
-static __inline__ uint8_t *
-loadyuy2_scanline (ScanlineBuf *slb,
-		   int	       y,
-		   uint8_t       *src,
-		   int	       stride,
-		   int	       x,
-		   int	       width)
-{
-    uint8_t *py, *pu, *pv, *pd;
-    int   i, w;
-
-    y = _y_to_scanline (slb, y);
-
-    for (i = 0; slb->lock[i]; i++);
-
-    slb->y[i]    = y;
-    slb->lock[i] = TRUE;
-
-    py = src + stride * (y >> 0);
-    pu = py + 1;
-    pv = py + 3;
-
-    pd = slb->line[i];
-
-    w = width;
-
-    while (w)
-    {
-	*((uint32_t *) pd) = loadyuv (py, pu, pv);
-
-	pd += 4;
-	py += 2;
-
-	if (w & 1)
-	{
-	    pu += 4;
-	    pv += 4;
-	}
-
-	w--;
-    }
-
-    return slb->line[i];
-}
-
-/* TODO: MMX code for bilinear interpolation */
-void
-fbCompositeSrc_yv12x8888mmx (pixman_op_t      op,
-			     pixman_image_t * pSrc,
-			     pixman_image_t * pMask,
-			     pixman_image_t * pDst,
-			     int16_t      xSrc,
-			     int16_t      ySrc,
-			     int16_t      xMask,
-			     int16_t      yMask,
-			     int16_t      xDst,
-			     int16_t      yDst,
-			     uint16_t     width,
-			     uint16_t     height)
-{
-    pixman_transform_t *transform = pSrc->common.transform;
-    uint8_t	  *dst, *srcY, *srcU, *srcV;
-    uint32_t	  *srcBits = pSrc->bits.bits;
-    int		  srcStride, uvStride;
-    uint32_t	  *dstBits = pDst->bits.bits;
-    int		  dstStride;
-    int		  offset, w;
-    uint8_t	  *pd;
-
-    dst = (uint8_t *) dstBits;
-    dstStride = pDst->bits.rowstride * sizeof (uint32_t);
-
-    srcY = (uint8_t *) srcBits;
-    srcStride = pSrc->bits.rowstride;
-
-    if (srcStride < 0)
-    {
-	offset = ((-srcStride) >> 1) * ((pSrc->bits.height - 1) >> 1) -
-	    srcStride;
-	srcV = (uint8_t *) (srcBits + offset);
-	offset += ((-srcStride) >> 1) * ((pSrc->bits.height) >> 1);
-	srcU = (uint8_t *) (srcBits + offset);
-    }
-    else
-    {
-	offset = srcStride * pSrc->bits.height;
-
-	srcV = (uint8_t *) (srcBits + offset);
-	srcU = (uint8_t *) (srcBits + offset + (offset >> 2));
-    }
-
-    srcStride *= sizeof (uint32_t);
-    uvStride = srcStride >> 1;
-
-    if (transform)
-    {
-	/* transformation is a Y coordinate flip, this is achieved by
-	   moving start offsets for each plane and changing sign of stride */
-	if (transform->matrix[0][0] == (1 << 16)  &&
-	    transform->matrix[1][1] == -(1 << 16) &&
-	    transform->matrix[0][2] == 0          &&
-	    transform->matrix[1][2] == (pSrc->bits.height << 16))
-	{
-	    srcY = srcY + ((pSrc->bits.height >> 0) - 1) * srcStride;
-	    srcU = srcU + ((pSrc->bits.height >> 1) - 1) * uvStride;
-	    srcV = srcV + ((pSrc->bits.height >> 1) - 1) * uvStride;
-
-	    srcStride = -srcStride;
-	    uvStride  = -uvStride;
-
-	    transform = 0;
-	}
-    }
-
-    dst += dstStride * yDst + (xDst << 2);
-
-    if (transform)
-    {
-	ScanlineBuf slb;
-	uint8_t	    _scanline_buf[8192];
-	uint8_t	    *ps0, *ps1;
-	int	    x, x0, y, line, xStep, yStep;
-	int         distx, idistx, disty, idisty;
-	int	    srcEnd = pSrc->bits.width << 16;
-	int	    srcEndIndex = (pSrc->bits.width - 1) << 16;
-
-	xStep = transform->matrix[0][0];
-	yStep = transform->matrix[1][1];
-
-	x0 = transform->matrix[0][2] + xStep * xSrc;
-	y  = transform->matrix[1][2] + yStep * ySrc;
-
-	init_scanline_buffer (&slb,
-			      _scanline_buf, sizeof (_scanline_buf),
-			      pSrc->bits.width << 2,
-			      pSrc->bits.height);
-
-	while (height--)
-	{
-	    disty  = (y >> 8) & 0xff;
-	    idisty = 256 - disty;
-	    line   = y >> 16;
-
-	    ps0 = get_scanline (&slb, line);
-	    ps1 = get_scanline (&slb, line + 1);
-
-	    if (!ps0)
-		ps0 = loadyv12_scanline (&slb, line,
-					 srcY, srcStride, srcU, srcV, uvStride,
-					 0, pSrc->bits.width);
-
-	    if (!ps1)
-		ps1 = loadyv12_scanline (&slb, line + 1,
-					 srcY, srcStride, srcU, srcV, uvStride,
-					 0, pSrc->bits.width);
-
-	    pd = dst;
-
-	    x = x0;
-	    w = width;
-
-	    if (pSrc->common.filter == PIXMAN_FILTER_BILINEAR)
-	    {
-		while (w && x < 0)
-		{
-		    *(uint32_t *) pd = fetch_bilinear_8888 (0, 256, disty, idisty,
-							  ps0, ps1, 0);
-
-		    x  += xStep;
-		    pd += 4;
-		    w  -= 1;
-		}
-
-		while (w && x < srcEndIndex)
-		{
-		    distx  = (x >> 8) & 0xff;
-		    idistx = 256 - distx;
-
-		    *(uint32_t *) pd = fetch_bilinear_8888 (distx, idistx,
-							  disty, idisty,
-							  ps0, ps1,
-							  (x >> 14) & ~3);
-
-		    x  += xStep;
-		    pd += 4;
-		    w  -= 1;
-		}
-
-		while (w)
-		{
-		    *(uint32_t *) pd = fetch_bilinear_8888 (256, 0,
-							  disty, idisty,
-							  ps0, ps1,
-							  (x >> 14) & ~3);
-
-		    pd += 4;
-		    w  -= 1;
-		}
-	    }
-	    else
-	    {
-		while (w && x < 0)
-		{
-		    *(uint32_t *) pd = *(uint32_t *) ps0;
-
-		    x  += xStep;
-		    pd += 4;
-		    w  -= 1;
-		}
-
-		while (w && x < srcEnd)
-		{
-		    *(uint32_t *) pd = ((uint32_t *) ps0)[x >> 16];
-
-		    x  += xStep;
-		    pd += 4;
-		    w  -= 1;
-		}
-
-		while (w)
-		{
-		    *(uint32_t *) pd = ((uint32_t *) ps0)[x >> 16];
-
-		    pd += 4;
-		    w  -= 1;
-		}
-	    }
-
-	    y   += yStep;
-	    dst += dstStride;
-
-	    release_scanlines (&slb);
-	}
-
-	fini_scanline_buffer (&slb);
-    }
-    else
-    {
-	uint8_t *py, *pu, *pv;
-
-	srcY += srcStride * (ySrc >> 0) + (xSrc >> 0);
-	srcU += uvStride  * (ySrc >> 1) + (xSrc >> 1);
-	srcV += uvStride  * (ySrc >> 1) + (xSrc >> 1);
-
-	while (height)
-	{
-	    py = srcY;
-	    pu = srcU;
-	    pv = srcV;
-	    pd = dst;
-
-	    w = width;
-
-	    while (w && (unsigned long) py & 7)
-	    {
-		*((uint32_t *) pd) = loadyuv (py, pu, pv);
-
-		pd += 4;
-		py += 1;
-
-		if (w & 1)
-		{
-		    pu += 1;
-		    pv += 1;
-		}
-
-		w--;
-	    }
-
-	    while (w >= 8)
-	    {
-		mmx_loadyv12 (py, pu, pv);
-		mmx_pack8888 (pd);
-
-		py += 8;
-		pu += 4;
-		pv += 4;
-		pd += 32;
-
-		w -= 8;
-	    }
-
-	    while (w)
-	    {
-		*((uint32_t *) pd) = loadyuv (py, pu, pv);
-
-		pd += 4;
-		py += 1;
-
-		if (w & 1)
-		{
-		    pu += 1;
-		    pv += 1;
-		}
-
-		w--;
-	    }
-
-	    dst  += dstStride;
-	    srcY += srcStride;
-
-	    if (height & 1)
-	    {
-		srcU += uvStride;
-		srcV += uvStride;
-	    }
-
-	    height--;
-	}
-    }
-
-    _mm_empty ();
-}
-
-/* TODO: MMX code for yuy2 */
-void
-fbCompositeSrc_yuy2x8888mmx (pixman_op_t      op,
-			     pixman_image_t * pSrc,
-			     pixman_image_t * pMask,
-			     pixman_image_t * pDst,
-			     int16_t      xSrc,
-			     int16_t      ySrc,
-			     int16_t      xMask,
-			     int16_t      yMask,
-			     int16_t      xDst,
-			     int16_t      yDst,
-			     uint16_t     width,
-			     uint16_t     height)
-{
-    pixman_transform_t *transform = pSrc->common.transform;
-    uint8_t	  *dst, *src;
-    uint32_t	  *srcBits = pSrc->bits.bits;
-    int		  srcStride;
-    uint32_t	  *dstBits = pDst->bits.bits;
-    int		  dstStride;
-    int		  w;
-    uint8_t	  *pd;
-
-    dst = (uint8_t *) dstBits;
-    dstStride = pDst->bits.rowstride * sizeof (uint32_t);
-
-    src = (uint8_t *) srcBits;
-    srcStride = pSrc->bits.rowstride * sizeof (uint32_t);
-
-    if (transform)
-    {
-	/* transformation is a Y coordinate flip, this is achieved by
-	   moving start offsets for each plane and changing sign of stride */
-	if (transform->matrix[0][0] == (1 << 16)  &&
-	    transform->matrix[1][1] == -(1 << 16) &&
-	    transform->matrix[0][2] == 0          &&
-	    transform->matrix[1][2] == (pSrc->bits.height << 16))
-	{
-	    src = src + (pSrc->bits.height - 1) * srcStride;
-
-	    srcStride = -srcStride;
-
-	    transform = 0;
-	}
-    }
-
-    dst += dstStride * yDst + (xDst << 2);
-
-    if (transform)
-    {
-	ScanlineBuf slb;
-	uint8_t	    _scanline_buf[8192];
-	uint8_t	    *ps0, *ps1;
-	int	    x, x0, y, line, xStep, yStep;
-	int         distx, idistx, disty, idisty;
-	int	    srcEnd = pSrc->bits.width << 16;
-	int	    srcEndIndex = (pSrc->bits.width - 1) << 16;
-
-	xStep = transform->matrix[0][0];
-	yStep = transform->matrix[1][1];
-
-	x0 = transform->matrix[0][2] + xStep * xSrc;
-	y  = transform->matrix[1][2] + yStep * ySrc;
-
-	init_scanline_buffer (&slb,
-			      _scanline_buf, sizeof (_scanline_buf),
-			      pSrc->bits.width << 2,
-			      pSrc->bits.height);
-
-	while (height--)
-	{
-	    disty  = (y >> 8) & 0xff;
-	    idisty = 256 - disty;
-	    line   = y >> 16;
-
-	    ps0 = get_scanline (&slb, line);
-	    ps1 = get_scanline (&slb, line + 1);
-
-	    if (!ps0)
-		ps0 = loadyuy2_scanline (&slb, line,
-					 src, srcStride,
-					 0, pSrc->bits.width);
-
-	    if (!ps1)
-		ps1 = loadyuy2_scanline (&slb, line + 1,
-					 src, srcStride,
-					 0, pSrc->bits.width);
-
-	    pd = dst;
-
-	    x = x0;
-	    w = width;
-
-	    if (pSrc->common.filter == PIXMAN_FILTER_BILINEAR)
-	    {
-		while (w && x < 0)
-		{
-		    *(uint32_t *) pd = fetch_bilinear_8888 (0, 256, disty, idisty,
-							  ps0, ps1, 0);
-
-		    x  += xStep;
-		    pd += 4;
-		    w  -= 1;
-		}
-
-		while (w && x < srcEndIndex)
-		{
-		    distx  = (x >> 8) & 0xff;
-		    idistx = 256 - distx;
-
-		    *(uint32_t *) pd = fetch_bilinear_8888 (distx, idistx,
-							  disty, idisty,
-							  ps0, ps1,
-							  (x >> 14) & ~3);
-
-		    x  += xStep;
-		    pd += 4;
-		    w  -= 1;
-		}
-
-		while (w)
-		{
-		    *(uint32_t *) pd = fetch_bilinear_8888 (256, 0, disty, idisty,
-							  ps0, ps1,
-							  (x >> 14) & ~3);
-
-		    pd += 4;
-		    w  -= 1;
-		}
-	    }
-	    else
-	    {
-		while (w && x < 0)
-		{
-		    *(uint32_t *) pd = *(uint32_t *) ps0;
-
-		    x  += xStep;
-		    pd += 4;
-		    w  -= 1;
-		}
-
-		while (w && x < srcEnd)
-		{
-		    *(uint32_t *) pd = ((uint32_t *) ps0)[x >> 16];
-
-		    x  += xStep;
-		    pd += 4;
-		    w  -= 1;
-		}
-
-		while (w)
-		{
-		    *(uint32_t *) pd = ((uint32_t *) ps0)[x >> 16];
-
-		    pd += 4;
-		    w  -= 1;
-		}
-	    }
-
-	    y   += yStep;
-	    dst += dstStride;
-
-	    release_scanlines (&slb);
-	}
-
-	fini_scanline_buffer (&slb);
-    }
-    else
-    {
-	uint8_t *py, *pu, *pv;
-
-	src += srcStride * (ySrc >> 0) + xSrc;
-
-	while (height)
-	{
-	    py = src;
-	    pu = src + 1;
-	    pv = src + 3;
-	    pd = dst;
-
-	    w = width;
-
-	    while (w)
-	    {
-		*((uint32_t *) pd) = loadyuv (py, pu, pv);
-
-		pd += 4;
-		py += 2;
-
-		if (w & 1)
-		{
-		    pu += 4;
-		    pv += 4;
-		}
-
-		w--;
-	    }
-
-	    dst += dstStride;
-	    src += srcStride;
-
-	    height--;
-	}
-    }
-}
 
 
 #endif /* USE_MMX */
diff --git a/pixman/pixman-mmx.h b/pixman/pixman-mmx.h
index a8e27e4..a74d4ba 100644
--- a/pixman/pixman-mmx.h
+++ b/pixman/pixman-mmx.h
@@ -312,32 +312,4 @@ fbCompositeOver_x888x8x8888mmx (pixman_op_t      op,
 				uint16_t     width,
 				uint16_t     height);
 
-void
-fbCompositeSrc_yv12x8888mmx (pixman_op_t      op,
-			     pixman_image_t * pSrc,
-			     pixman_image_t * pMask,
-			     pixman_image_t * pDst,
-			     int16_t    xSrc,
-			     int16_t    ySrc,
-			     int16_t    xMask,
-			     int16_t    yMask,
-			     int16_t    xDst,
-			     int16_t    yDst,
-			     uint16_t   width,
-			     uint16_t   height);
-
-void
-fbCompositeSrc_yuy2x8888mmx (pixman_op_t      op,
-			     pixman_image_t * pSrc,
-			     pixman_image_t * pMask,
-			     pixman_image_t * pDst,
-			     int16_t    xSrc,
-			     int16_t    ySrc,
-			     int16_t    xMask,
-			     int16_t    yMask,
-			     int16_t    xDst,
-			     int16_t    yDst,
-			     uint16_t   width,
-			     uint16_t   height);
-
 #endif /* USE_MMX */
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index d2bc3d3..c7d73fc 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -1437,35 +1437,7 @@ pixman_image_composite (pixman_op_t      op,
 	    maskTransform = FALSE;
     }
 
-    /* YUV is only used internally for XVideo */
-    if (pSrc->bits.format == PIXMAN_yv12 || pSrc->bits.format == PIXMAN_yuy2)
-    {
-#ifdef USE_MMX
-	/* non rotating transformation */
-	if (!pSrc->common.transform ||
-	    (pSrc->common.transform->matrix[0][1] == 0 &&
-	     pSrc->common.transform->matrix[1][0] == 0 &&
-	     pSrc->common.transform->matrix[2][0] == 0 &&
-	     pSrc->common.transform->matrix[2][1] == 0 &&
-	     pSrc->common.transform->matrix[2][2] == 1 << 16))
-	{
-	    switch (pDst->bits.format) {
-	    case PIXMAN_a8r8g8b8:
-	    case PIXMAN_x8r8g8b8:
-		if (pixman_have_mmx())
-		{
-		    if (pSrc->bits.format == PIXMAN_yv12)
-			func = fbCompositeSrc_yv12x8888mmx;
-		    else
-			func = fbCompositeSrc_yuy2x8888mmx;
-		}
-	    default:
-		break;
-	    }
-	}
-#endif
-    }
-    else if ((pSrc->type == BITS || can_get_solid (pSrc)) && (!pMask || pMask->type == BITS)
+    if ((pSrc->type == BITS || can_get_solid (pSrc)) && (!pMask || pMask->type == BITS)
         && !srcTransform && !maskTransform
         && !maskAlphaMap && !srcAlphaMap && !dstAlphaMap
         && (pSrc->common.filter != PIXMAN_FILTER_CONVOLUTION)
commit c61d6ae39e5039dcb27bf95334a86520b562bbc5
Author: José Fonseca <jrfonseca at tungstengraphics.com>
Date:   Fri Sep 7 10:52:01 2007 +0100

    MMX code path for YV12 copy, taken from the xserver glucose-2 branch.

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 8c7be6d..4fb2579 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -30,6 +30,9 @@
  */
 #include <config.h>
 
+#include <stdlib.h>
+#include <limits.h>
+
 #ifdef USE_MMX
 
 #if defined(__amd64__) || defined(__x86_64__)
@@ -917,6 +920,188 @@ void fbComposeSetupMMX(void)
     } 
 }
 
+static __inline__ uint8_t
+interpolate_bilinear (int   distx,
+		      int   idistx,
+		      int   disty,
+		      int   idisty,
+		      uint8_t tl,
+		      uint8_t tr,
+		      uint8_t bl,
+		      uint8_t br)
+{
+    return ((tl * idistx + tr * distx) * idisty +
+	    (bl * idistx + br * distx) * disty) >> 16;
+}
+
+static __inline__ uint32_t
+interpolate_bilinear_8888 (int   distx,
+			   int   idistx,
+			   int   disty,
+			   int   idisty,
+			   uint8_t *l00,
+			   uint8_t *l01,
+			   uint8_t *l10,
+			   uint8_t *l11,
+			   int   x00,
+			   int   x01,
+			   int   x10,
+			   int   x11)
+{
+    uint8_t buffer[4];
+
+    buffer[0] = interpolate_bilinear (distx, idistx, disty, idisty,
+				      l00[x00], l01[x01],
+				      l10[x10], l11[x11]);
+
+    buffer[1] = interpolate_bilinear (distx, idistx, disty, idisty,
+				      l00[x00 + 1], l01[x01 + 1],
+				      l10[x10 + 1], l11[x11 + 1]);
+
+    buffer[2] = interpolate_bilinear (distx, idistx, disty, idisty,
+				      l00[x00 + 2], l01[x01 + 2],
+				      l10[x10 + 2], l11[x11 + 2]);
+
+    buffer[3] = interpolate_bilinear (distx, idistx, disty, idisty,
+				      l00[x00 + 3], l01[x01 + 3],
+				      l10[x10 + 3], l11[x11 + 3]);
+
+    return *((uint32_t *) buffer);
+}
+
+static __inline__ uint32_t
+fetch_bilinear2_8888 (int   distx,
+		      int   idistx,
+		      int   disty,
+		      int   idisty,
+		      uint8_t *l0,
+		      uint8_t *l1,
+		      int   x0,
+		      int   x1)
+{
+    return interpolate_bilinear_8888 (distx,
+				      idistx,
+				      disty,
+				      idisty,
+				      l0,
+				      l0,
+				      l1,
+				      l1,
+				      x0,
+				      x0 + 4,
+				      x1,
+				      x1 + 4);
+}
+
+static __inline__ uint32_t
+fetch_bilinear_8888 (int   distx,
+		     int   idistx,
+		     int   disty,
+		     int   idisty,
+		     uint8_t *l0,
+		     uint8_t *l1,
+		     int   x)
+{
+    return fetch_bilinear2_8888 (distx, idistx, disty, idisty, l0, l1, x, x);
+}
+
+static uint32_t _zero32x2[2] = { 0x0, 0x0 };
+static uint8_t  *_zero8x8 = (uint8_t *) _zero32x2;
+
+static __inline__ int
+set_scale_steps (uint32_t   *src,
+		 int srcStride,
+		 int	  xStart,
+		 int	  xStep,
+		 int	  width,
+		 int	  line,
+		 int	  lastLine,
+		 pixman_repeat_t repeatType,
+		 uint8_t	  **s0,
+		 uint8_t	  **s1,
+		 int	  *x0,
+		 int	  *x0Step,
+		 int	  *x1,
+		 int	  *x1Step)
+{
+    if (line < 0)
+    {
+	if (repeatType == PIXMAN_REPEAT_PAD)
+	{
+	    *s0 = (uint8_t *) src;
+	    *s1 = (uint8_t *) src;
+
+	    *x0     = xStart;
+	    *x0Step = xStep;
+	    *x1     = xStart;
+	    *x1Step = xStep;
+	}
+	else
+	{
+	    if (line == -1)
+	    {
+		*s0 = _zero8x8;
+
+		*x0     = 0;
+		*x0Step = 0;
+
+		*s1 = (uint8_t *) src;
+
+		*x1     = xStart;
+		*x1Step = xStep;
+	    }
+	    else
+	    {
+		return 0;
+	    }
+	}
+    }
+    else if (line >= lastLine)
+    {
+	if (repeatType == PIXMAN_REPEAT_PAD)
+	{
+	    *s0 = (uint8_t *) (src + srcStride * lastLine);
+	    *s1 = (uint8_t *) (src + srcStride * lastLine);
+
+	    *x0     = xStart;
+	    *x0Step = xStep;
+	    *x1     = xStart;
+	    *x1Step = xStep;
+	}
+	else
+	{
+	    if (line == lastLine)
+	    {
+		*s0 = (uint8_t *) (src + srcStride * line);
+
+		*x0     = xStart;
+		*x0Step = xStep;
+
+		*s1 = _zero8x8;
+
+		*x1     = 0;
+		*x1Step = 0;
+	    }
+	    else
+	    {
+		return 0;
+	    }
+	}
+    }
+    else
+    {
+	*s0 = (uint8_t *) (src + srcStride * line);
+	*s1 = (uint8_t *) (src + srcStride * (line + 1));
+
+	*x0     = xStart;
+	*x0Step = xStep;
+	*x1     = xStart;
+	*x1Step = xStep;
+    }
+
+    return width;
+}
+
 
 /* ------------------ MMX code paths called from fbpict.c ----------------------- */
 
@@ -2970,6 +3155,825 @@ fbCompositeOver_x888x8x8888mmx (pixman_op_t      op,
     _mm_empty();
 }
 
+typedef struct _ScanlineBuf {
+    pixman_bool_t lock[2];
+    int    y[2];
+    uint8_t *line[2];
+    int   height;
+    uint8_t *heap;
+} ScanlineBuf;
+
+static pixman_bool_t
+init_scanline_buffer (ScanlineBuf *slb,
+		      uint8_t	  *buffer,
+		      int	  size,
+		      int	  length,
+		      int	  height)
+{
+    int i, s;
+
+    s = length << 1;
+
+    if (size < s)
+    {
+	slb->heap = malloc (s);
+	if (!slb->heap)
+	    return FALSE;
+
+	buffer = slb->heap;
+    }
+    else
+    {
+	slb->heap = NULL;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+	slb->lock[i] = FALSE;
+	slb->y[i]    = SHRT_MAX;
+	slb->line[i] = buffer;
+
+	buffer += length;
+    }
+
+    slb->height = height;
+
+    return TRUE;
+}
+
+static void
+fini_scanline_buffer (ScanlineBuf *slb)
+{
+    if (slb->heap)
+	free (slb->heap);
+}
+
+static __inline__ void
+release_scanlines (ScanlineBuf *slb)
+{
+    int i;
+
+    for (i = 0; i < 2; i++)
+	slb->lock[i] = FALSE;
+}
+
+static __inline__ int
+_y_to_scanline (ScanlineBuf *slb,
+		int	    y)
+{
+    return (y < 0) ? 0 : (y >= slb->height) ? slb->height - 1 : y;
+}
+
+static __inline__ uint8_t *
+get_scanline (ScanlineBuf *slb,
+	      int	  y)
+{
+    int i;
+
+    y = _y_to_scanline (slb, y);
+
+    for (i = 0; i < 2; i++)
+    {
+	if (slb->y[i] == y)
+	{
+	    slb->lock[i] = TRUE;
+	    return slb->line[i];
+	}
+    }
+
+    return NULL;
+}
+
+typedef struct {
+    ullong subYw;
+    ullong U_green;
+    ullong U_blue;
+    ullong V_red;
+    ullong V_green;
+    ullong Y_coeff;
+    ullong mmx0080;
+    ullong mmx00ff;
+} YUVData;
+
+static const YUVData yuv = {
+    .subYw   = 0x1010101010101010ULL,
+    .U_green = 0xf377f377f377f377ULL,
+    .U_blue  = 0x408d408d408d408dULL,
+    .V_red   = 0x3313331333133313ULL,
+    .V_green = 0xe5fce5fce5fce5fcULL,
+    .Y_coeff = 0x2543254325432543ULL,
+    .mmx0080 = 0x0080008000800080ULL,
+    .mmx00ff = 0x00ff00ff00ff00ffULL
+};
+
+static __inline__ void
+mmx_loadyv12 (uint8_t *py,
+	      uint8_t *pu,
+	      uint8_t *pv)
+{
+    __asm__ __volatile__ (
+	"movq      %0,    %%mm6\n" /* mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+	"pxor      %%mm4, %%mm4\n" /* mm4 = 0                       */
+	"psubusb   %1,    %%mm6\n" /* Y -= 16                       */
+	"movd      %2,    %%mm0\n" /* mm0 = 00 00 00 00 U3 U2 U1 U0 */
+	"movq      %%mm6, %%mm7\n" /* mm7 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+	"pand      %3,    %%mm6\n" /* mm6 =    Y6    Y4    Y2    Y0 */
+	"psrlw     %4,    %%mm7\n" /* mm7 =    Y7    Y5    Y3    Y1 */
+	"movd      %5,    %%mm1\n" /* mm1 = 00 00 00 00 V3 V2 V1 V0 */
+	"psllw     %6,    %%mm6\n" /* promote precision             */
+	"pmulhw    %7,    %%mm6\n" /* mm6 = luma_rgb even           */
+	"psllw     %8,    %%mm7\n" /* promote precision             */
+	"punpcklbw %%mm4, %%mm0\n" /* mm0 = U3 U2 U1 U0             */
+	"psubsw    %9,    %%mm0\n" /* U -= 128                      */
+	"punpcklbw %%mm4, %%mm1\n" /* mm1 = V3 V2 V1 V0             */
+	"pmulhw    %10,   %%mm7\n" /* mm7 = luma_rgb odd            */
+	"psllw     %11,   %%mm0\n" /* promote precision             */
+	"psubsw    %12,   %%mm1\n" /* V -= 128                      */
+	"movq      %%mm0, %%mm2\n" /* mm2 = U3 U2 U1 U0             */
+	"psllw     %13,   %%mm1\n" /* promote precision             */
+	"movq      %%mm1, %%mm4\n" /* mm4 = V3 V2 V1 V0             */
+	"pmulhw    %14,   %%mm0\n" /* mm0 = chroma_b                */
+	"pmulhw    %15,   %%mm1\n" /* mm1 = chroma_r                */
+	"movq      %%mm0, %%mm3\n" /* mm3 = chroma_b                */
+	"paddsw    %%mm6, %%mm0\n" /* mm0 = B6 B4 B2 B0             */
+	"paddsw    %%mm7, %%mm3\n" /* mm3 = B7 B5 B3 B1             */
+	"packuswb  %%mm0, %%mm0\n" /* saturate to 0-255             */
+	"pmulhw    %16,   %%mm2\n" /* mm2 = U * U_green             */
+	"packuswb  %%mm3, %%mm3\n" /* saturate to 0-255             */
+	"punpcklbw %%mm3, %%mm0\n" /* mm0 = B7 B6 B5 B4 B3 B2 B1 B0 */
+	"pmulhw    %17,   %%mm4\n" /* mm4 = V * V_green             */
+	"paddsw    %%mm4, %%mm2\n" /* mm2 = chroma_g                */
+	"movq      %%mm2, %%mm5\n" /* mm5 = chroma_g                */
+	"movq      %%mm1, %%mm4\n" /* mm4 = chroma_r                */
+	"paddsw    %%mm6, %%mm2\n" /* mm2 = G6 G4 G2 G0             */
+	"packuswb  %%mm2, %%mm2\n" /* saturate to 0-255             */
+	"paddsw    %%mm6, %%mm1\n" /* mm1 = R6 R4 R2 R0             */
+	"packuswb  %%mm1, %%mm1\n" /* saturate to 0-255             */
+	"paddsw    %%mm7, %%mm4\n" /* mm4 = R7 R5 R3 R1             */
+	"packuswb  %%mm4, %%mm4\n" /* saturate to 0-255             */
+	"paddsw    %%mm7, %%mm5\n" /* mm5 = G7 G5 G3 G1             */
+	"packuswb  %%mm5, %%mm5\n" /* saturate to 0-255             */
+	"punpcklbw %%mm4, %%mm1\n" /* mm1 = R7 R6 R5 R4 R3 R2 R1 R0 */
+	"punpcklbw %%mm5, %%mm2\n" /* mm2 = G7 G6 G5 G4 G3 G2 G1 G0 */
+	: /* no outputs */
+	: "m" (*py), "m" (yuv.subYw), "m" (*pu), "m" (yuv.mmx00ff),
+	  "i" (8), "m" (*pv), "i" (3), "m" (yuv.Y_coeff),
+	  "i" (3), "m" (yuv.mmx0080), "m" (yuv.Y_coeff), "i" (3),
+	  "m" (yuv.mmx0080), "i" (3), "m" (yuv.U_blue), "m" (yuv.V_red),
+	  "m" (yuv.U_green), "m" (yuv.V_green));
+}
+
+static __inline__ void
+mmx_pack8888 (uint8_t *image)
+{
+    __asm__ __volatile__ (
+	"pxor      %%mm3, %%mm3\n"
+	"movq      %%mm0, %%mm6\n"
+	"punpcklbw %%mm2, %%mm6\n"
+	"movq      %%mm1, %%mm7\n"
+	"punpcklbw %%mm3, %%mm7\n"
+	"movq      %%mm0, %%mm4\n"
+	"punpcklwd %%mm7, %%mm6\n"
+	"movq      %%mm1, %%mm5\n"
+	"movq      %%mm6, (%0)\n"
+	"movq      %%mm0, %%mm6\n"
+	"punpcklbw %%mm2, %%mm6\n"
+	"punpckhwd %%mm7, %%mm6\n"
+	"movq      %%mm6, 8(%0)\n"
+	"punpckhbw %%mm2, %%mm4\n"
+	"punpckhbw %%mm3, %%mm5\n"
+	"punpcklwd %%mm5, %%mm4\n"
+	"movq      %%mm4, 16(%0)\n"
+	"movq      %%mm0, %%mm4\n"
+	"punpckhbw %%mm2, %%mm4\n"
+	"punpckhwd %%mm5, %%mm4\n"
+	"movq      %%mm4, 24(%0)\n"
+	: /* no outputs */
+	: "r" (image) );
+}
+
+static __inline__ uint32_t
+loadyuv (uint8_t *py,
+	 uint8_t *pu,
+	 uint8_t *pv)
+{
+    int16_t y, u, v;
+    int32_t r, g, b;
+
+    y = *py - 16;
+    u = *pu - 128;
+    v = *pv - 128;
+
+    /* R = 1.164(Y - 16) + 1.596(V - 128) */
+    r = 0x012b27 * y + 0x019a2e * v;
+    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+    /* B = 1.164(Y - 16) + 2.018(U - 128) */
+    b = 0x012b27 * y + 0x0206a2 * u;
+
+    return 0xff000000 |
+	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
+}
+
+static __inline__ uint8_t *
+loadyv12_scanline (ScanlineBuf *slb,
+		   int	       y,
+		   uint8_t       *srcY,
+		   int	       yStride,
+		   uint8_t       *srcU,
+		   uint8_t       *srcV,
+		   int	       uvStride,
+		   int	       x,
+		   int	       width)
+{
+    uint8_t *py, *pu, *pv, *pd;
+    int   i, w;
+
+    y = _y_to_scanline (slb, y);
+
+    for (i = 0; slb->lock[i]; i++);
+
+    slb->y[i]    = y;
+    slb->lock[i] = TRUE;
+
+    py = srcY + yStride  * (y >> 0);
+    pu = srcU + uvStride * (y >> 1);
+    pv = srcV + uvStride * (y >> 1);
+
+    pd = slb->line[i];
+
+    w = width;
+
+    while (w && (unsigned long) py & 7)
+    {
+	*((uint32_t *) pd) = loadyuv (py, pu, pv);
+
+	pd += 4;
+	py += 1;
+
+	if (w & 1)
+	{
+	    pu += 1;
+	    pv += 1;
+	}
+
+	w--;
+    }
+
+    while (w >= 8)
+    {
+	mmx_loadyv12 (py, pu, pv);
+	mmx_pack8888 (pd);
+
+	py += 8;
+	pu += 4;
+	pv += 4;
+	pd += 32;
+
+	w -= 8;
+    }
+
+    while (w)
+    {
+	*((uint32_t *) pd) = loadyuv (py, pu, pv);
+
+	pd += 4;
+	py += 1;
+
+	if (w & 1)
+	{
+	    pu += 1;
+	    pv += 1;
+	}
+
+	w--;
+    }
+
+    return slb->line[i];
+}
+
+static __inline__ uint8_t *
+loadyuy2_scanline (ScanlineBuf *slb,
+		   int	       y,
+		   uint8_t       *src,
+		   int	       stride,
+		   int	       x,
+		   int	       width)
+{
+    uint8_t *py, *pu, *pv, *pd;
+    int   i, w;
+
+    y = _y_to_scanline (slb, y);
+
+    for (i = 0; slb->lock[i]; i++);
+
+    slb->y[i]    = y;
+    slb->lock[i] = TRUE;
+
+    py = src + stride * (y >> 0);
+    pu = py + 1;
+    pv = py + 3;
+
+    pd = slb->line[i];
+
+    w = width;
+
+    while (w)
+    {
+	*((uint32_t *) pd) = loadyuv (py, pu, pv);
+
+	pd += 4;
+	py += 2;
+
+	if (w & 1)
+	{
+	    pu += 4;
+	    pv += 4;
+	}
+
+	w--;
+    }
+
+    return slb->line[i];
+}
+
+/* TODO: MMX code for bilinear interpolation */
+void
+fbCompositeSrc_yv12x8888mmx (pixman_op_t      op,
+			     pixman_image_t * pSrc,
+			     pixman_image_t * pMask,
+			     pixman_image_t * pDst,
+			     int16_t      xSrc,
+			     int16_t      ySrc,
+			     int16_t      xMask,
+			     int16_t      yMask,
+			     int16_t      xDst,
+			     int16_t      yDst,
+			     uint16_t     width,
+			     uint16_t     height)
+{
+    pixman_transform_t *transform = pSrc->common.transform;
+    uint8_t	  *dst, *srcY, *srcU, *srcV;
+    uint32_t	  *srcBits = pSrc->bits.bits;
+    int		  srcStride, uvStride;
+    uint32_t	  *dstBits = pDst->bits.bits;
+    int		  dstStride;
+    int		  offset, w;
+    uint8_t	  *pd;
+
+    dst = (uint8_t *) dstBits;
+    dstStride = pDst->bits.rowstride * sizeof (uint32_t);
+
+    srcY = (uint8_t *) srcBits;
+    srcStride = pSrc->bits.rowstride;
+
+    if (srcStride < 0)
+    {
+	offset = ((-srcStride) >> 1) * ((pSrc->bits.height - 1) >> 1) -
+	    srcStride;
+	srcV = (uint8_t *) (srcBits + offset);
+	offset += ((-srcStride) >> 1) * ((pSrc->bits.height) >> 1);
+	srcU = (uint8_t *) (srcBits + offset);
+    }
+    else
+    {
+	offset = srcStride * pSrc->bits.height;
+
+	srcV = (uint8_t *) (srcBits + offset);
+	srcU = (uint8_t *) (srcBits + offset + (offset >> 2));
+    }
+
+    srcStride *= sizeof (uint32_t);
+    uvStride = srcStride >> 1;
+
+    if (transform)
+    {
+	/* transformation is a Y coordinate flip, this is achieved by
+	   moving start offsets for each plane and changing sign of stride */
+	if (transform->matrix[0][0] == (1 << 16)  &&
+	    transform->matrix[1][1] == -(1 << 16) &&
+	    transform->matrix[0][2] == 0          &&
+	    transform->matrix[1][2] == (pSrc->bits.height << 16))
+	{
+	    srcY = srcY + ((pSrc->bits.height >> 0) - 1) * srcStride;
+	    srcU = srcU + ((pSrc->bits.height >> 1) - 1) * uvStride;
+	    srcV = srcV + ((pSrc->bits.height >> 1) - 1) * uvStride;
+
+	    srcStride = -srcStride;
+	    uvStride  = -uvStride;
+
+	    transform = 0;
+	}
+    }
+
+    dst += dstStride * yDst + (xDst << 2);
+
+    if (transform)
+    {
+	ScanlineBuf slb;
+	uint8_t	    _scanline_buf[8192];
+	uint8_t	    *ps0, *ps1;
+	int	    x, x0, y, line, xStep, yStep;
+	int         distx, idistx, disty, idisty;
+	int	    srcEnd = pSrc->bits.width << 16;
+	int	    srcEndIndex = (pSrc->bits.width - 1) << 16;
+
+	xStep = transform->matrix[0][0];
+	yStep = transform->matrix[1][1];
+
+	x0 = transform->matrix[0][2] + xStep * xSrc;
+	y  = transform->matrix[1][2] + yStep * ySrc;
+
+	init_scanline_buffer (&slb,
+			      _scanline_buf, sizeof (_scanline_buf),
+			      pSrc->bits.width << 2,
+			      pSrc->bits.height);
+
+	while (height--)
+	{
+	    disty  = (y >> 8) & 0xff;
+	    idisty = 256 - disty;
+	    line   = y >> 16;
+
+	    ps0 = get_scanline (&slb, line);
+	    ps1 = get_scanline (&slb, line + 1);
+
+	    if (!ps0)
+		ps0 = loadyv12_scanline (&slb, line,
+					 srcY, srcStride, srcU, srcV, uvStride,
+					 0, pSrc->bits.width);
+
+	    if (!ps1)
+		ps1 = loadyv12_scanline (&slb, line + 1,
+					 srcY, srcStride, srcU, srcV, uvStride,
+					 0, pSrc->bits.width);
+
+	    pd = dst;
+
+	    x = x0;
+	    w = width;
+
+	    if (pSrc->common.filter == PIXMAN_FILTER_BILINEAR)
+	    {
+		while (w && x < 0)
+		{
+		    *(uint32_t *) pd = fetch_bilinear_8888 (0, 256, disty, idisty,
+							  ps0, ps1, 0);
+
+		    x  += xStep;
+		    pd += 4;
+		    w  -= 1;
+		}
+
+		while (w && x < srcEndIndex)
+		{
+		    distx  = (x >> 8) & 0xff;
+		    idistx = 256 - distx;
+
+		    *(uint32_t *) pd = fetch_bilinear_8888 (distx, idistx,
+							  disty, idisty,
+							  ps0, ps1,
+							  (x >> 14) & ~3);
+
+		    x  += xStep;
+		    pd += 4;
+		    w  -= 1;
+		}
+
+		while (w)
+		{
+		    *(uint32_t *) pd = fetch_bilinear_8888 (256, 0,
+							  disty, idisty,
+							  ps0, ps1,
+							  (x >> 14) & ~3);
+
+		    pd += 4;
+		    w  -= 1;
+		}
+	    }
+	    else
+	    {
+		while (w && x < 0)
+		{
+		    *(uint32_t *) pd = *(uint32_t *) ps0;
+
+		    x  += xStep;
+		    pd += 4;
+		    w  -= 1;
+		}
+
+		while (w && x < srcEnd)
+		{
+		    *(uint32_t *) pd = ((uint32_t *) ps0)[x >> 16];
+
+		    x  += xStep;
+		    pd += 4;
+		    w  -= 1;
+		}
+
+		while (w)
+		{
+		    *(uint32_t *) pd = ((uint32_t *) ps0)[x >> 16];
+
+		    pd += 4;
+		    w  -= 1;
+		}
+	    }
+
+	    y   += yStep;
+	    dst += dstStride;
+
+	    release_scanlines (&slb);
+	}
+
+	fini_scanline_buffer (&slb);
+    }
+    else
+    {
+	uint8_t *py, *pu, *pv;
+
+	srcY += srcStride * (ySrc >> 0) + (xSrc >> 0);
+	srcU += uvStride  * (ySrc >> 1) + (xSrc >> 1);
+	srcV += uvStride  * (ySrc >> 1) + (xSrc >> 1);
+
+	while (height)
+	{
+	    py = srcY;
+	    pu = srcU;
+	    pv = srcV;
+	    pd = dst;
+
+	    w = width;
+
+	    while (w && (unsigned long) py & 7)
+	    {
+		*((uint32_t *) pd) = loadyuv (py, pu, pv);
+
+		pd += 4;
+		py += 1;
+
+		if (w & 1)
+		{
+		    pu += 1;
+		    pv += 1;
+		}
+
+		w--;
+	    }
+
+	    while (w >= 8)
+	    {
+		mmx_loadyv12 (py, pu, pv);
+		mmx_pack8888 (pd);
+
+		py += 8;
+		pu += 4;
+		pv += 4;
+		pd += 32;
+
+		w -= 8;
+	    }
+
+	    while (w)
+	    {
+		*((uint32_t *) pd) = loadyuv (py, pu, pv);
+
+		pd += 4;
+		py += 1;
+
+		if (w & 1)
+		{
+		    pu += 1;
+		    pv += 1;
+		}
+
+		w--;
+	    }
+
+	    dst  += dstStride;
+	    srcY += srcStride;
+
+	    if (height & 1)
+	    {
+		srcU += uvStride;
+		srcV += uvStride;
+	    }
+
+	    height--;
+	}
+    }
+
+    _mm_empty ();
+}
+
+/* TODO: MMX code for yuy2 */
+void
+fbCompositeSrc_yuy2x8888mmx (pixman_op_t      op,
+			     pixman_image_t * pSrc,
+			     pixman_image_t * pMask,
+			     pixman_image_t * pDst,
+			     int16_t      xSrc,
+			     int16_t      ySrc,
+			     int16_t      xMask,
+			     int16_t      yMask,
+			     int16_t      xDst,
+			     int16_t      yDst,
+			     uint16_t     width,
+			     uint16_t     height)
+{
+    pixman_transform_t *transform = pSrc->common.transform;
+    uint8_t	  *dst, *src;
+    uint32_t	  *srcBits = pSrc->bits.bits;
+    int		  srcStride;
+    uint32_t	  *dstBits = pDst->bits.bits;
+    int		  dstStride;
+    int		  w;
+    uint8_t	  *pd;
+
+    dst = (uint8_t *) dstBits;
+    dstStride = pDst->bits.rowstride * sizeof (uint32_t);
+
+    src = (uint8_t *) srcBits;
+    srcStride = pSrc->bits.rowstride * sizeof (uint32_t);
+
+    if (transform)
+    {
+	/* transformation is a Y coordinate flip, this is achieved by
+	   moving start offsets for each plane and changing sign of stride */
+	if (transform->matrix[0][0] == (1 << 16)  &&
+	    transform->matrix[1][1] == -(1 << 16) &&
+	    transform->matrix[0][2] == 0          &&
+	    transform->matrix[1][2] == (pSrc->bits.height << 16))
+	{
+	    src = src + (pSrc->bits.height - 1) * srcStride;
+
+	    srcStride = -srcStride;
+
+	    transform = 0;
+	}
+    }
+
+    dst += dstStride * yDst + (xDst << 2);
+
+    if (transform)
+    {
+	ScanlineBuf slb;
+	uint8_t	    _scanline_buf[8192];
+	uint8_t	    *ps0, *ps1;
+	int	    x, x0, y, line, xStep, yStep;
+	int         distx, idistx, disty, idisty;
+	int	    srcEnd = pSrc->bits.width << 16;
+	int	    srcEndIndex = (pSrc->bits.width - 1) << 16;
+
+	xStep = transform->matrix[0][0];
+	yStep = transform->matrix[1][1];
+
+	x0 = transform->matrix[0][2] + xStep * xSrc;
+	y  = transform->matrix[1][2] + yStep * ySrc;
+
+	init_scanline_buffer (&slb,
+			      _scanline_buf, sizeof (_scanline_buf),
+			      pSrc->bits.width << 2,
+			      pSrc->bits.height);
+
+	while (height--)
+	{
+	    disty  = (y >> 8) & 0xff;
+	    idisty = 256 - disty;
+	    line   = y >> 16;
+
+	    ps0 = get_scanline (&slb, line);
+	    ps1 = get_scanline (&slb, line + 1);
+
+	    if (!ps0)
+		ps0 = loadyuy2_scanline (&slb, line,
+					 src, srcStride,
+					 0, pSrc->bits.width);
+
+	    if (!ps1)
+		ps1 = loadyuy2_scanline (&slb, line + 1,
+					 src, srcStride,
+					 0, pSrc->bits.width);
+
+	    pd = dst;
+
+	    x = x0;
+	    w = width;
+
+	    if (pSrc->common.filter == PIXMAN_FILTER_BILINEAR)
+	    {
+		while (w && x < 0)
+		{
+		    *(uint32_t *) pd = fetch_bilinear_8888 (0, 256, disty, idisty,
+							  ps0, ps1, 0);
+
+		    x  += xStep;
+		    pd += 4;
+		    w  -= 1;
+		}
+
+		while (w && x < srcEndIndex)
+		{
+		    distx  = (x >> 8) & 0xff;
+		    idistx = 256 - distx;
+
+		    *(uint32_t *) pd = fetch_bilinear_8888 (distx, idistx,
+							  disty, idisty,
+							  ps0, ps1,
+							  (x >> 14) & ~3);
+
+		    x  += xStep;
+		    pd += 4;
+		    w  -= 1;
+		}
+
+		while (w)
+		{
+		    *(uint32_t *) pd = fetch_bilinear_8888 (256, 0, disty, idisty,
+							  ps0, ps1,
+							  (x >> 14) & ~3);
+
+		    pd += 4;
+		    w  -= 1;
+		}
+	    }
+	    else
+	    {
+		while (w && x < 0)
+		{
+		    *(uint32_t *) pd = *(uint32_t *) ps0;
+
+		    x  += xStep;
+		    pd += 4;
+		    w  -= 1;
+		}
+
+		while (w && x < srcEnd)
+		{
+		    *(uint32_t *) pd = ((uint32_t *) ps0)[x >> 16];
+
+		    x  += xStep;
+		    pd += 4;
+		    w  -= 1;
+		}
+
+		while (w)
+		{
+		    *(uint32_t *) pd = ((uint32_t *) ps0)[x >> 16];
+
+		    pd += 4;
+		    w  -= 1;
+		}
+	    }
+
+	    y   += yStep;
+	    dst += dstStride;
+
+	    release_scanlines (&slb);
+	}
+
+	fini_scanline_buffer (&slb);
+    }
+    else
+    {
+	uint8_t *py, *pu, *pv;
+
+	src += srcStride * (ySrc >> 0) + xSrc;
+
+	while (height)
+	{
+	    py = src;
+	    pu = src + 1;
+	    pv = src + 3;
+	    pd = dst;
+
+	    w = width;
+
+	    while (w)
+	    {
+		*((uint32_t *) pd) = loadyuv (py, pu, pv);
+
+		pd += 4;
+		py += 2;
+
+		if (w & 1)
+		{
+		    pu += 4;
+		    pv += 4;
+		}
+
+		w--;
+	    }
+
+	    dst += dstStride;
+	    src += srcStride;
+
+	    height--;
+	}
+    }
+}
 
 
 #endif /* USE_MMX */
diff --git a/pixman/pixman-mmx.h b/pixman/pixman-mmx.h
index a74d4ba..a8e27e4 100644
--- a/pixman/pixman-mmx.h
+++ b/pixman/pixman-mmx.h
@@ -312,4 +312,32 @@ fbCompositeOver_x888x8x8888mmx (pixman_op_t      op,
 				uint16_t     width,
 				uint16_t     height);
 
+void
+fbCompositeSrc_yv12x8888mmx (pixman_op_t      op,
+			     pixman_image_t * pSrc,
+			     pixman_image_t * pMask,
+			     pixman_image_t * pDst,
+			     int16_t    xSrc,
+			     int16_t    ySrc,
+			     int16_t    xMask,
+			     int16_t    yMask,
+			     int16_t    xDst,
+			     int16_t    yDst,
+			     uint16_t   width,
+			     uint16_t   height);
+
+void
+fbCompositeSrc_yuy2x8888mmx (pixman_op_t      op,
+			     pixman_image_t * pSrc,
+			     pixman_image_t * pMask,
+			     pixman_image_t * pDst,
+			     int16_t    xSrc,
+			     int16_t    ySrc,
+			     int16_t    xMask,
+			     int16_t    yMask,
+			     int16_t    xDst,
+			     int16_t    yDst,
+			     uint16_t   width,
+			     uint16_t   height);
+
 #endif /* USE_MMX */
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index c7d73fc..d2bc3d3 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -1437,7 +1437,35 @@ pixman_image_composite (pixman_op_t      op,
 	    maskTransform = FALSE;
     }
 
-    if ((pSrc->type == BITS || can_get_solid (pSrc)) && (!pMask || pMask->type == BITS)
+    /* YUV is only used internally for XVideo */
+    if (pSrc->bits.format == PIXMAN_yv12 || pSrc->bits.format == PIXMAN_yuy2)
+    {
+#ifdef USE_MMX
+	/* non rotating transformation */
+	if (!pSrc->common.transform ||
+	    (pSrc->common.transform->matrix[0][1] == 0 &&
+	     pSrc->common.transform->matrix[1][0] == 0 &&
+	     pSrc->common.transform->matrix[2][0] == 0 &&
+	     pSrc->common.transform->matrix[2][1] == 0 &&
+	     pSrc->common.transform->matrix[2][2] == 1 << 16))
+	{
+	    switch (pDst->bits.format) {
+	    case PIXMAN_a8r8g8b8:
+	    case PIXMAN_x8r8g8b8:
+		if (pixman_have_mmx())
+		{
+		    if (pSrc->bits.format == PIXMAN_yv12)
+			func = fbCompositeSrc_yv12x8888mmx;
+		    else
+			func = fbCompositeSrc_yuy2x8888mmx;
+		}
+	    default:
+		break;
+	    }
+	}
+#endif
+    }
+    else if ((pSrc->type == BITS || can_get_solid (pSrc)) && (!pMask || pMask->type == BITS)
         && !srcTransform && !maskTransform
         && !maskAlphaMap && !srcAlphaMap && !dstAlphaMap
         && (pSrc->common.filter != PIXMAN_FILTER_CONVOLUTION)
commit 562fa00e40b5466914b4f4cf49ae183e7b38e4f8
Author: José Fonseca <jrfonseca at tungstengraphics.com>
Date:   Fri Sep 7 10:49:21 2007 +0100

    YUV formats fetch support.

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index d39c662..0d4a0d8 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -109,6 +109,31 @@ SourcePictureClassify (source_image_t *pict,
 
 #define SCANLINE_BUFFER_LENGTH 2048
 
+/*
+ * YV12 setup and access macros
+ */
+
+#define YV12_SETUP(pict) \
+	uint32_t *bits = pict->bits; \
+	int stride = pict->rowstride; \
+	int offset0 = stride < 0 ? \
+		((-stride) >> 1) * ((pict->height - 1) >> 1) - stride : \
+		stride * pict->height; \
+	int offset1 = stride < 0 ? \
+		offset0 + ((-stride) >> 1) * ((pict->height) >> 1) : \
+		offset0 + (offset0 >> 2); 
+
+#define YV12_Y(line)		\
+    ((uint8_t *) ((bits) + (stride) * (line)))
+
+#define YV12_U(line)	      \
+    ((uint8_t *) ((bits) + offset1 + \
+		((stride) >> 1) * ((line) >> 1)))
+
+#define YV12_V(line)	      \
+    ((uint8_t *) ((bits) + offset0 + \
+		((stride) >> 1) * ((line) >> 1)))
+
 typedef FASTCALL void (*fetchProc)(bits_image_t *pict, int x, int y, int width, uint32_t *buffer);
 
 /*
@@ -629,6 +654,66 @@ fbFetch_g1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
     }
 }
 
+static FASTCALL void
+fbFetch_yuy2 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
+{
+    int16_t y, u, v;
+    int32_t r, g, b;
+    int   i;
+
+    const uint32_t *bits = pict->bits + pict->rowstride * line;
+
+    for (i = 0; i < width; i++)
+    {
+	y = ((uint8_t *) bits)[(x + i) << 1] - 16;
+	u = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 1] - 128;
+	v = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 3] - 128;
+
+	/* R = 1.164(Y - 16) + 1.596(V - 128) */
+	r = 0x012b27 * y + 0x019a2e * v;
+	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+	/* B = 1.164(Y - 16) + 2.018(U - 128) */
+	b = 0x012b27 * y + 0x0206a2 * u;
+
+    WRITE(buffer++, 0xff000000 |
+	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
+    }
+}
+
+static FASTCALL void
+fbFetch_yv12 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
+{
+    YV12_SETUP(pict);
+    uint8_t *pY = YV12_Y (line);
+    uint8_t *pU = YV12_U (line);
+    uint8_t *pV = YV12_V (line);
+    int16_t y, u, v;
+    int32_t r, g, b;
+    int   i;
+
+    for (i = 0; i < width; i++)
+    {
+	y = pY[x + i] - 16;
+	u = pU[(x + i) >> 1] - 128;
+	v = pV[(x + i) >> 1] - 128;
+
+	/* R = 1.164(Y - 16) + 1.596(V - 128) */
+	r = 0x012b27 * y + 0x019a2e * v;
+	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+	/* B = 1.164(Y - 16) + 2.018(U - 128) */
+	b = 0x012b27 * y + 0x0206a2 * u;
+
+	WRITE(buffer++, 0xff000000 |
+	    (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+	    (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+	    (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
+    }
+}
+
 static fetchProc fetchProcForPicture (bits_image_t * pict)
 {
     switch(pict->format) {
@@ -676,6 +761,10 @@ static fetchProc fetchProcForPicture (bits_image_t * pict)
         /* 1bpp formats */
     case PIXMAN_a1: return  fbFetch_a1;
     case PIXMAN_g1: return  fbFetch_g1;
+
+        /* YUV formats */
+    case PIXMAN_yuy2: return fbFetch_yuy2;
+    case PIXMAN_yv12: return fbFetch_yv12;
     }
     
     return NULL;
@@ -1093,6 +1182,53 @@ fbFetchPixel_g1 (bits_image_t *pict, int offset, int line)
     return indexed->rgba[a];
 }
 
+static FASTCALL uint32_t
+fbFetchPixel_yuy2 (bits_image_t *pict, int offset, int line)
+{
+    int16_t y, u, v;
+    int32_t r, g, b;
+
+    const uint32_t *bits = pict->bits + pict->rowstride * line;
+
+    y = ((uint8_t *) bits)[offset << 1] - 16;
+    u = ((uint8_t *) bits)[((offset << 1) & -4) + 1] - 128;
+    v = ((uint8_t *) bits)[((offset << 1) & -4) + 3] - 128;
+
+    /* R = 1.164(Y - 16) + 1.596(V - 128) */
+    r = 0x012b27 * y + 0x019a2e * v;
+    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+    /* B = 1.164(Y - 16) + 2.018(U - 128) */
+    b = 0x012b27 * y + 0x0206a2 * u;
+
+    return 0xff000000 |
+	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_yv12 (bits_image_t *pict, int offset, int line)
+{
+    YV12_SETUP(pict);
+    int16_t y = YV12_Y (line)[offset] - 16;
+    int16_t u = YV12_U (line)[offset >> 1] - 128;
+    int16_t v = YV12_V (line)[offset >> 1] - 128;
+    int32_t r, g, b;
+
+    /* R = 1.164(Y - 16) + 1.596(V - 128) */
+    r = 0x012b27 * y + 0x019a2e * v;
+    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+    /* B = 1.164(Y - 16) + 2.018(U - 128) */
+    b = 0x012b27 * y + 0x0206a2 * u;
+
+    return 0xff000000 |
+	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
+}
+
 static fetchPixelProc fetchPixelProcForPicture (bits_image_t * pict)
 {
     switch(pict->format) {
@@ -1140,6 +1276,10 @@ static fetchPixelProc fetchPixelProcForPicture (bits_image_t * pict)
         /* 1bpp formats */
     case PIXMAN_a1: return  fbFetchPixel_a1;
     case PIXMAN_g1: return  fbFetchPixel_g1;
+
+        /* YUV formats */
+    case PIXMAN_yuy2: return fbFetchPixel_yuy2;
+    case PIXMAN_yv12: return fbFetchPixel_yv12;
     }
     
     return NULL;
diff --git a/pixman/pixman.h b/pixman/pixman.h
index d76cc12..7f1be2b 100644
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@ -397,6 +397,8 @@ struct pixman_indexed
 #define PIXMAN_TYPE_ABGR	3
 #define PIXMAN_TYPE_COLOR	4
 #define PIXMAN_TYPE_GRAY	5
+#define PIXMAN_TYPE_YUY2	6
+#define PIXMAN_TYPE_YV12	7
 
 #define PIXMAN_FORMAT_COLOR(f)	(PIXMAN_FORMAT_TYPE(f) & 2)
 
@@ -453,6 +455,10 @@ typedef enum {
     PIXMAN_a1 =		PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0),
     
     PIXMAN_g1 =		PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0),
+
+/* YUV formats */
+    PIXMAN_yuy2 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0),
+    PIXMAN_yv12 =	PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0),
 } pixman_format_code_t;
 
 /* Constructors */
commit 469dd84cf8862a54ac31f0fc91ee539f3ad439e2
Author: José Fonseca <jrfonseca at tungstengraphics.com>
Date:   Thu Sep 6 12:27:12 2007 +0100

    New test for image fetching.

diff --git a/test/Makefile.am b/test/Makefile.am
index 6548ba5..3db77b3 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -1,7 +1,8 @@
 if HAVE_GTK
 TESTPROGRAMS = \
 	composite-test		\
-	gradient-test
+	gradient-test		\
+	fetch-test
 
 noinst_PROGRAMS = $(TESTPROGRAMS)
 
@@ -9,5 +10,6 @@ INCLUDES = -I$(top_srcdir)/pixman $(GTK_CFLAGS)
 
 composite_test_LDADD =	$(top_builddir)/pixman/libpixman-1.la $(GTK_LIBS)
 gradient_test_LDADD = $(top_builddir)/pixman/libpixman-1.la $(GTK_LIBS)
+fetch_test_LDADD = $(top_builddir)/pixman/libpixman-1.la
 
-endif
\ No newline at end of file
+endif
diff --git a/test/fetch-test.c b/test/fetch-test.c
new file mode 100644
index 0000000..717f6d4
--- /dev/null
+++ b/test/fetch-test.c
@@ -0,0 +1,163 @@
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "pixman.h"
+
+
+#define SIZE 1024
+
+
+pixman_indexed_t mono_pallete = {
+    .rgba = { 0x00000000, 0x00ffffff },
+};
+
+
+typedef struct {
+    pixman_format_code_t format;
+    int width, height;
+    int stride;
+    uint32_t src[SIZE];
+    uint32_t dst[SIZE];
+    pixman_indexed_t *indexed;
+} testcase_t;
+
+testcase_t testcases[] = {
+    {
+	.format = PIXMAN_a8r8g8b8,
+	.width = 2, .height = 2,
+	.stride = 8,
+	.src = { 0x00112233, 0x44556677, 
+	         0x8899aabb, 0xccddeeff },
+	.dst = { 0x00112233, 0x44556677, 
+	         0x8899aabb, 0xccddeeff },
+	.indexed = NULL,
+    },
+    {
+	.format = PIXMAN_g1,
+	.width = 8, .height = 2,
+	.stride = 4,
+	.src = { 0x00000055, 
+	         0x000000aa },
+	.dst = { 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000,
+	         0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff },
+	.indexed = &mono_pallete,
+    },
+#if 0
+    {
+	.format = PIXMAN_g8,
+	.width = 4, .height = 2,
+	.stride = 4,
+	.src = { 0x01234567, 
+	         0x89abcdef },
+	.dst = { 0x00010101, 0x00232323, 0x00454545, 0x00676767, 
+	         0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, },
+    },
+#endif
+    {
+	.format = PIXMAN_yv12,
+	.width = 8, .height = 2,
+	.stride = 8,
+	.src = { 0xff00ff00, 0xff00ff00, 
+	         0x00ff00ff, 0x00ff00ff, 
+	         0x0080ff80, 
+		 0xff800080},
+	.dst = { 
+		0xff000000, 0xffffffff, 0xffb80000, 0xffffe113,
+		0xff000000, 0xffffffff, 0xff0023ee, 0xff4affff,
+		0xffffffff, 0xff000000, 0xffffe113, 0xffb80000,
+		0xffffffff, 0xff000000, 0xff4affff, 0xff0023ee,
+	},
+    },
+};
+
+const int ntestcases = sizeof(testcases)/sizeof(testcases[0]);
+
+
+static uint32_t
+reader (const void *src, int size)
+{
+    switch (size)
+    {
+    case 1:
+	return *(uint8_t *)src;
+    case 2:
+	return *(uint16_t *)src;
+    case 4:
+	return *(uint32_t *)src;
+    default:
+	assert(0);
+    }
+}
+
+
+static void
+writer (void *src, uint32_t value, int size)
+{
+    switch (size)
+    {
+    case 1:
+	*(uint8_t *)src = value;
+	break;
+    case 2:
+	*(uint16_t *)src = value;
+	break;
+    case 4:
+	*(uint32_t *)src = value;
+	break;
+    default:
+	assert(0);
+    }
+}
+
+
+int
+main (int argc, char **argv)
+{
+    uint32_t dst[SIZE];
+    pixman_image_t *src_img;
+    pixman_image_t *dst_img;
+    int i, j, x, y;
+    int ret = 0;
+
+    for (i = 0; i < ntestcases; ++i) {
+	for (j = 0; j < 2; ++j) {
+	    src_img = pixman_image_create_bits (testcases[i].format,
+						testcases[i].width, 
+						testcases[i].height,
+						testcases[i].src,
+						testcases[i].stride);
+	    pixman_image_set_indexed(src_img, testcases[i].indexed);
+
+	    dst_img = pixman_image_create_bits (PIXMAN_a8r8g8b8,
+						testcases[i].width, 
+						testcases[i].height,
+						dst,
+						testcases[i].width*4);
+
+	    if (j) {
+		pixman_image_set_accessors (src_img, reader, writer);
+		pixman_image_set_accessors (dst_img, reader, writer);
+	    }
+	    
+	    pixman_image_composite (PIXMAN_OP_SRC, src_img, NULL, dst_img,
+				    0, 0, 0, 0, 0, 0, testcases[i].width, testcases[i].height);
+
+	    pixman_image_unref (src_img);
+	    pixman_image_unref (dst_img);
+
+	    for (y = 0; y < testcases[i].height; ++y)
+		for (x = 0; x < testcases[i].width; ++x) {
+		    int offset = y*testcases[i].width + x;
+		    if (dst[offset] != testcases[i].dst[offset]) {
+			printf ("test %i%c: pixel mismatch at (x=%d,y=%d): %08x expected, %08x obtained\n",
+			        i + 1, 'a' + j,
+			        x, y, 
+			        testcases[i].dst[offset], dst[offset]);
+			ret = 1;
+		    }
+		}
+	}
+    }
+    
+    return ret;
+}
commit ba5fc60317e177a4140d4ea2cf8cb538d1e4e07a
Author: José Fonseca <jrfonseca at tungstengraphics.com>
Date:   Wed Sep 5 21:56:24 2007 +0100

    Perform stride calculations inside the callback functions.

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index b48251d..d39c662 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -109,28 +109,29 @@ SourcePictureClassify (source_image_t *pict,
 
 #define SCANLINE_BUFFER_LENGTH 2048
 
-typedef FASTCALL void (*fetchProc)(pixman_image_t *image,
-				   const uint32_t *bits,
-				   int x, int width,
-				   uint32_t *buffer,
-				   const pixman_indexed_t * indexed);
+typedef FASTCALL void (*fetchProc)(bits_image_t *pict, int x, int y, int width, uint32_t *buffer);
+
+/*
+ * Used by READ/WRITE macros
+ */
+#define image ((pixman_image_t *)pict)
 
 /*
  * All of the fetch functions
  */
 
 static FASTCALL void
-fbFetch_a8r8g8b8 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_a8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     MEMCPY_WRAPPED(buffer, (const uint32_t *)bits + x,
 		   width*sizeof(uint32_t));
 }
 
 static FASTCALL void
-fbFetch_x8r8g8b8 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_x8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint32_t *pixel = (const uint32_t *)bits + x;
     const uint32_t *end = pixel + width;
     while (pixel < end) {
@@ -139,9 +140,9 @@ fbFetch_x8r8g8b8 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_a8b8g8r8 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_a8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint32_t *pixel = (uint32_t *)bits + x;
     const uint32_t *end = pixel + width;
     while (pixel < end) {
@@ -153,9 +154,9 @@ fbFetch_a8b8g8r8 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_x8b8g8r8 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_x8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint32_t *pixel = (uint32_t *)bits + x;
     const uint32_t *end = pixel + width;
     while (pixel < end) {
@@ -168,9 +169,9 @@ fbFetch_x8b8g8r8 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_r8g8b8 (pixman_image_t *image,
-		const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + 3*x;
     const uint8_t *end = pixel + 3*width;
     while (pixel < end) {
@@ -181,9 +182,9 @@ fbFetch_r8g8b8 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_b8g8r8 (pixman_image_t *image,
-		const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + 3*x;
     const uint8_t *end = pixel + 3*width;
     while (pixel < end) {
@@ -202,10 +203,9 @@ fbFetch_b8g8r8 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_r5g6b5 (pixman_image_t *image,
-		const uint32_t *bits, int x, int width, uint32_t *buffer,
-		const pixman_indexed_t * indexed)
+fbFetch_r5g6b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
@@ -220,12 +220,10 @@ fbFetch_r5g6b5 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_b5g6r5 (pixman_image_t *image,
-		const uint32_t *bits, int x, int width, uint32_t *buffer,
-		const pixman_indexed_t * indexed)
+fbFetch_b5g6r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  r,g,b;
-    
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
@@ -238,11 +236,10 @@ fbFetch_b5g6r5 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_a1r5g5b5 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_a1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  r,g,b, a;
-    
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
@@ -257,11 +254,10 @@ fbFetch_a1r5g5b5 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_x1r5g5b5 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_x1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  r,g,b;
-    
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
@@ -275,11 +271,10 @@ fbFetch_x1r5g5b5 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_a1b5g5r5 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_a1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  r,g,b, a;
-    
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
@@ -294,11 +289,10 @@ fbFetch_a1b5g5r5 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_x1b5g5r5 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_x1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  r,g,b;
-    
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
@@ -312,10 +306,10 @@ fbFetch_x1b5g5r5 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_a4r4g4b4 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_a4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  r,g,b, a;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
@@ -330,11 +324,10 @@ fbFetch_a4r4g4b4 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_x4r4g4b4 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_x4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  r,g,b;
-    
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
@@ -348,11 +341,10 @@ fbFetch_x4r4g4b4 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_a4b4g4r4 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_a4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  r,g,b, a;
-    
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
@@ -367,11 +359,10 @@ fbFetch_a4b4g4r4 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_x4b4g4r4 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_x4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  r,g,b;
-    
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
     while (pixel < end) {
@@ -385,9 +376,9 @@ fbFetch_x4b4g4r4 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_a8 (pixman_image_t *image,
-	    const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_a8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
@@ -396,11 +387,10 @@ fbFetch_a8 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_r3g3b2 (pixman_image_t *image,
-		const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_r3g3b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  r,g,b;
-    
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
@@ -417,11 +407,10 @@ fbFetch_r3g3b2 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_b2g3r3 (pixman_image_t *image,
-		const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_b2g3r3 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  r,g,b;
-    
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
@@ -440,10 +429,10 @@ fbFetch_b2g3r3 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_a2r2g2b2 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_a2r2g2b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t   a,r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
@@ -458,10 +447,10 @@ fbFetch_a2r2g2b2 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_a2b2g2r2 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_a2b2g2r2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t   a,r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
@@ -476,9 +465,10 @@ fbFetch_a2b2g2r2 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_c8 (pixman_image_t *image,
-	    const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_c8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const pixman_indexed_t * indexed = pict->indexed;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
@@ -488,9 +478,9 @@ fbFetch_c8 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_x4a4 (pixman_image_t *image,
-	      const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_x4a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
     while (pixel < end) {
@@ -507,9 +497,9 @@ fbFetch_x4a4 (pixman_image_t *image,
 #endif
 
 static FASTCALL void
-fbFetch_a4 (pixman_image_t *image,
-	    const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     int i;
     for (i = 0; i < width; ++i) {
 	uint32_t  p = Fetch4(bits, i + x);
@@ -520,10 +510,10 @@ fbFetch_a4 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_r1g2b1 (pixman_image_t *image,
-		const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_r1g2b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     int i;
     for (i = 0; i < width; ++i) {
 	uint32_t  p = Fetch4(bits, i + x);
@@ -536,10 +526,10 @@ fbFetch_r1g2b1 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_b1g2r1 (pixman_image_t *image,
-		const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_b1g2r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     int i;
     for (i = 0; i < width; ++i) {
 	uint32_t  p = Fetch4(bits, i + x);
@@ -552,10 +542,10 @@ fbFetch_b1g2r1 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_a1r1g1b1 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_a1r1g1b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  a,r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     int i;
     for (i = 0; i < width; ++i) {
 	uint32_t  p = Fetch4(bits, i + x);
@@ -569,10 +559,10 @@ fbFetch_a1r1g1b1 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_a1b1g1r1 (pixman_image_t *image,
-		  const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_a1b1g1r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t  a,r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     int i;
     for (i = 0; i < width; ++i) {
 	uint32_t  p = Fetch4(bits, i + x);
@@ -586,9 +576,10 @@ fbFetch_a1b1g1r1 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_c4 (pixman_image_t *image,
-	    const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_c4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const pixman_indexed_t * indexed = pict->indexed;
     int i;
     for (i = 0; i < width; ++i) {
 	uint32_t  p = Fetch4(bits, i + x);
@@ -599,9 +590,9 @@ fbFetch_c4 (pixman_image_t *image,
 
 
 static FASTCALL void
-fbFetch_a1 (pixman_image_t *image,
-	    const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_a1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
     int i;
     for (i = 0; i < width; ++i) {
 	uint32_t  p = READ(bits + ((i + x) >> 5));
@@ -620,9 +611,10 @@ fbFetch_a1 (pixman_image_t *image,
 }
 
 static FASTCALL void
-fbFetch_g1 (pixman_image_t *image,
-	    const uint32_t *bits, int x, int width, uint32_t *buffer, const pixman_indexed_t * indexed)
+fbFetch_g1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
 {
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const pixman_indexed_t * indexed = pict->indexed;
     int i;
     for (i = 0; i < width; ++i) {
 	uint32_t p = READ(bits + ((i+x) >> 5));
@@ -693,28 +685,26 @@ static fetchProc fetchProcForPicture (bits_image_t * pict)
  * Pixel wise fetching
  */
 
-typedef FASTCALL uint32_t (*fetchPixelProc)(pixman_image_t *image,
-					    const uint32_t *bits, int offset,
-					    const pixman_indexed_t * indexed);
+typedef FASTCALL uint32_t (*fetchPixelProc)(bits_image_t *pict, int offset, int line);
 
 static FASTCALL uint32_t
-fbFetchPixel_a8r8g8b8 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_a8r8g8b8 (bits_image_t *pict, int offset, int line)
 {
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     return READ((uint32_t *)bits + offset);
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_x8r8g8b8 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_x8r8g8b8 (bits_image_t *pict, int offset, int line)
 {
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     return READ((uint32_t *)bits + offset) | 0xff000000;
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_a8b8g8r8 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_a8b8g8r8 (bits_image_t *pict, int offset, int line)
 {
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = READ((uint32_t *)bits + offset);
     
     return ((pixel & 0xff000000) |
@@ -724,9 +714,9 @@ fbFetchPixel_a8b8g8r8 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_x8b8g8r8 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_x8b8g8r8 (bits_image_t *pict, int offset, int line)
 {
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = READ((uint32_t *)bits + offset);
     
     return ((0xff000000) |
@@ -736,9 +726,9 @@ fbFetchPixel_x8b8g8r8 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_r8g8b8 (pixman_image_t *image,
-		     const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_r8g8b8 (bits_image_t *pict, int offset, int line)
 {
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 #if IMAGE_BYTE_ORDER == MSBFirst
     return (0xff000000 |
@@ -754,9 +744,9 @@ fbFetchPixel_r8g8b8 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_b8g8r8 (pixman_image_t *image,
-		     const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_b8g8r8 (bits_image_t *pict, int offset, int line)
 {
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
 #if IMAGE_BYTE_ORDER == MSBFirst
     return (0xff000000 |
@@ -772,10 +762,10 @@ fbFetchPixel_b8g8r8 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_r5g6b5 (pixman_image_t *image,
-		     const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_r5g6b5 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = READ((uint16_t *) bits + offset);
     
     r = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) << 8;
@@ -785,10 +775,10 @@ fbFetchPixel_r5g6b5 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_b5g6r5 (pixman_image_t *image,
-		     const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_b5g6r5 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = READ((uint16_t *) bits + offset);
     
     b = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) >> 8;
@@ -798,10 +788,10 @@ fbFetchPixel_b5g6r5 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_a1r5g5b5 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_a1r5g5b5 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = READ((uint16_t *) bits + offset);
     
     a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
@@ -812,10 +802,10 @@ fbFetchPixel_a1r5g5b5 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_x1r5g5b5 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_x1r5g5b5 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = READ((uint16_t *) bits + offset);
     
     r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
@@ -825,10 +815,10 @@ fbFetchPixel_x1r5g5b5 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_a1b5g5r5 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_a1b5g5r5 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = READ((uint16_t *) bits + offset);
     
     a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
@@ -839,10 +829,10 @@ fbFetchPixel_a1b5g5r5 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_x1b5g5r5 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_x1b5g5r5 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = READ((uint16_t *) bits + offset);
     
     b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
@@ -852,10 +842,10 @@ fbFetchPixel_x1b5g5r5 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_a4r4g4b4 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_a4r4g4b4 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = READ((uint16_t *) bits + offset);
     
     a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
@@ -866,10 +856,10 @@ fbFetchPixel_a4r4g4b4 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_x4r4g4b4 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_x4r4g4b4 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = READ((uint16_t *) bits + offset);
     
     r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
@@ -879,10 +869,10 @@ fbFetchPixel_x4r4g4b4 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_a4b4g4r4 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_a4b4g4r4 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = READ((uint16_t *) bits + offset);
     
     a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
@@ -893,10 +883,10 @@ fbFetchPixel_a4b4g4r4 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_x4b4g4r4 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_x4b4g4r4 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = READ((uint16_t *) bits + offset);
     
     b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
@@ -906,19 +896,19 @@ fbFetchPixel_x4b4g4r4 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_a8 (pixman_image_t *image,
-		 const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_a8 (bits_image_t *pict, int offset, int line)
 {
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t   pixel = READ((uint8_t *) bits + offset);
     
     return pixel << 24;
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_r3g3b2 (pixman_image_t *image,
-		     const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_r3g3b2 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t   pixel = READ((uint8_t *) bits + offset);
     
     r = ((pixel & 0xe0) | ((pixel & 0xe0) >> 3) | ((pixel & 0xc0) >> 6)) << 16;
@@ -931,10 +921,10 @@ fbFetchPixel_r3g3b2 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_b2g3r3 (pixman_image_t *image,
-		     const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_b2g3r3 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t   pixel = READ((uint8_t *) bits + offset);
     
     b = (((pixel & 0xc0)     ) |
@@ -949,10 +939,10 @@ fbFetchPixel_b2g3r3 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_a2r2g2b2 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_a2r2g2b2 (bits_image_t *pict, int offset, int line)
 {
     uint32_t   a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t   pixel = READ((uint8_t *) bits + offset);
     
     a = ((pixel & 0xc0) * 0x55) << 18;
@@ -963,10 +953,10 @@ fbFetchPixel_a2r2g2b2 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_a2b2g2r2 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_a2b2g2r2 (bits_image_t *pict, int offset, int line)
 {
     uint32_t   a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t   pixel = READ((uint8_t *) bits + offset);
     
     a = ((pixel & 0xc0) * 0x55) << 18;
@@ -977,26 +967,27 @@ fbFetchPixel_a2b2g2r2 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_c8 (pixman_image_t *image,
-		 const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_c8 (bits_image_t *pict, int offset, int line)
 {
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t   pixel = READ((uint8_t *) bits + offset);
+    const pixman_indexed_t * indexed = pict->indexed;
     return indexed->rgba[pixel];
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_x4a4 (pixman_image_t *image,
-		   const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_x4a4 (bits_image_t *pict, int offset, int line)
 {
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t   pixel = READ((uint8_t *) bits + offset);
     
     return ((pixel & 0xf) | ((pixel & 0xf) << 4)) << 24;
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_a4 (pixman_image_t *image,
-		 const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_a4 (bits_image_t *pict, int offset, int line)
 {
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = Fetch4(bits, offset);
     
     pixel |= pixel << 4;
@@ -1004,10 +995,10 @@ fbFetchPixel_a4 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_r1g2b1 (pixman_image_t *image,
-		     const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_r1g2b1 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = Fetch4(bits, offset);
     
     r = ((pixel & 0x8) * 0xff) << 13;
@@ -1017,10 +1008,10 @@ fbFetchPixel_r1g2b1 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_b1g2r1 (pixman_image_t *image,
-		     const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_b1g2r1 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = Fetch4(bits, offset);
     
     b = ((pixel & 0x8) * 0xff) >> 3;
@@ -1030,10 +1021,10 @@ fbFetchPixel_b1g2r1 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_a1r1g1b1 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_a1r1g1b1 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = Fetch4(bits, offset);
     
     a = ((pixel & 0x8) * 0xff) << 21;
@@ -1044,10 +1035,10 @@ fbFetchPixel_a1r1g1b1 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_a1b1g1r1 (pixman_image_t *image,
-		       const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_a1b1g1r1 (bits_image_t *pict, int offset, int line)
 {
     uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = Fetch4(bits, offset);
     
     a = ((pixel & 0x8) * 0xff) << 21;
@@ -1058,19 +1049,20 @@ fbFetchPixel_a1b1g1r1 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_c4 (pixman_image_t *image,
-		 const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_c4 (bits_image_t *pict, int offset, int line)
 {
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = Fetch4(bits, offset);
+    const pixman_indexed_t * indexed = pict->indexed;
     
     return indexed->rgba[pixel];
 }
 
 
 static FASTCALL uint32_t
-fbFetchPixel_a1 (pixman_image_t *image,
-		 const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_a1 (bits_image_t *pict, int offset, int line)
 {
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t  pixel = READ(bits + (offset >> 5));
     uint32_t  a;
 #if BITMAP_BIT_ORDER == MSBFirst
@@ -1086,10 +1078,11 @@ fbFetchPixel_a1 (pixman_image_t *image,
 }
 
 static FASTCALL uint32_t
-fbFetchPixel_g1 (pixman_image_t *image,
-		 const uint32_t *bits, int offset, const pixman_indexed_t * indexed)
+fbFetchPixel_g1 (bits_image_t *pict, int offset, int line)
 {
+    uint32_t *bits = pict->bits + line*pict->rowstride;
     uint32_t pixel = READ(bits + (offset >> 5));
+    const pixman_indexed_t * indexed = pict->indexed;
     uint32_t a;
 #if BITMAP_BIT_ORDER == MSBFirst
     a = pixel >> (0x1f - (offset & 0x1f));
@@ -1152,6 +1145,7 @@ static fetchPixelProc fetchPixelProcForPicture (bits_image_t * pict)
     return NULL;
 }
 
+#undef image
 
 
 /*
@@ -2859,15 +2853,11 @@ static CombineFuncC pixman_fbCombineFuncC[] = {
 
 static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 {
-    uint32_t *bits;
     uint32_t color;
     uint32_t *end;
     fetchPixelProc fetch = fetchPixelProcForPicture(pict);
-    const pixman_indexed_t * indexed = pict->indexed;
-    
-    bits = pict->bits;
     
-    color = fetch((pixman_image_t *)pict, bits, 0, indexed);
+    color = fetch(pict, 0, 0);
     
     end = buffer + width;
     while (buffer < end)
@@ -2877,17 +2867,9 @@ static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t
 
 static void fbFetch(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 {
-    uint32_t *bits;
-    uint32_t stride;
     fetchProc fetch = fetchProcForPicture(pict);
-    const pixman_indexed_t * indexed = pict->indexed;
-    
-    bits = pict->bits;
-    stride = pict->rowstride;
-    
-    bits += y*stride;
     
-    fetch((pixman_image_t *)pict, bits, x, width, buffer, indexed);
+    fetch(pict, x, y, width, buffer);
 }
 
 #ifdef PIXMAN_FB_ACCESSORS
@@ -3564,7 +3546,6 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
     pixman_vector_t  unit;
     int         i;
     pixman_box16_t box;
-    const pixman_indexed_t * indexed = pict->indexed;
     pixman_bool_t affine = TRUE;
     
     fetch = fetchPixelProcForPicture(pict);
@@ -3614,7 +3595,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 				y = MOD(v.vector[1]>>16, pict->height);
 				x = MOD(v.vector[0]>>16, pict->width);
 			    }
-			    *(buffer + i) = fetch((pixman_image_t *)pict, bits + y * stride, x, indexed);
+			    *(buffer + i) = fetch(pict, x, y);
 			}
 		    }
 		    
@@ -3637,7 +3618,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 				x = MOD(v.vector[0]>>16, pict->width);
 			    }
 			    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box))
-				*(buffer + i) = fetch ((pixman_image_t *)pict, bits + y*stride, x, indexed);
+				*(buffer + i) = fetch (pict, x, y);
 			    else
 				*(buffer + i) = 0;
 			}
@@ -3665,7 +3646,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 				x = v.vector[0]>>16;
 			    }
 			    *(buffer + i) = ((x < box.x1) | (x >= box.x2) | (y < box.y1) | (y >= box.y2)) ?
-				0 : fetch((pixman_image_t *)pict, bits + (y)*stride, x, indexed);
+				0 : fetch(pict, x, y);
 			}
 		    }
                     v.vector[0] += unit.vector[0];
@@ -3687,7 +3668,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 				x = v.vector[0]>>16;
 			    }
 			    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box))
-				*(buffer + i) = fetch((pixman_image_t *)pict, bits + y*stride, x, indexed);
+				*(buffer + i) = fetch(pict, x, y);
 			    else
 				*(buffer + i) = 0;
 			}
@@ -3717,7 +3698,6 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    *(buffer + i) = 0;
 			} else {
 			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
-			    uint32_t *b;
 			    uint32_t tl, tr, bl, br, r;
 			    uint32_t ft, fb;
 			    
@@ -3746,13 +3726,10 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    y1 = MOD (y1, pict->height);
 			    y2 = MOD (y2, pict->height);
 			    
-			    b = bits + y1*stride;
-			    
-			    tl = fetch((pixman_image_t *)pict, b, x1, indexed);
-			    tr = fetch((pixman_image_t *)pict, b, x2, indexed);
-			    b = bits + y2*stride;
-			    bl = fetch((pixman_image_t *)pict, b, x1, indexed);
-			    br = fetch((pixman_image_t *)pict, b, x2, indexed);
+			    tl = fetch(pict, x1, y1);
+			    tr = fetch(pict, x2, y1);
+			    bl = fetch(pict, x1, y2);
+			    br = fetch(pict, x2, y2);
 			    
 			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
@@ -3781,7 +3758,6 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    *(buffer + i) = 0;
 			} else {
 			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
-			    uint32_t *b;
 			    uint32_t tl, tr, bl, br, r;
 			    uint32_t ft, fb;
 			    
@@ -3810,17 +3786,14 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    y1 = MOD (y1, pict->height);
 			    y2 = MOD (y2, pict->height);
 			    
-			    b = bits + y1*stride;
-			    
 			    tl = pixman_region_contains_point(pict->common.src_clip, x1, y1, &box)
-				? fetch((pixman_image_t *)pict, b, x1, indexed) : 0;
+				? fetch(pict, x1, y1) : 0;
 			    tr = pixman_region_contains_point(pict->common.src_clip, x2, y1, &box)
-				? fetch((pixman_image_t *)pict, b, x2, indexed) : 0;
-			    b = bits + (y2)*stride;
+				? fetch(pict, x2, y1) : 0;
 			    bl = pixman_region_contains_point(pict->common.src_clip, x1, y2, &box)
-				? fetch((pixman_image_t *)pict, b, x1, indexed) : 0;
+				? fetch(pict, x1, y2) : 0;
 			    br = pixman_region_contains_point(pict->common.src_clip, x2, y2, &box)
-				? fetch((pixman_image_t *)pict, b, x2, indexed) : 0;
+				? fetch(pict, x2, y2) : 0;
 			    
 			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
@@ -3852,8 +3825,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			if (!v.vector[2]) {
 			    *(buffer + i) = 0;
 			} else {
-			    int x1, x2, y1, y2, distx, idistx, disty, idisty, x_off;
-			    uint32_t *b;
+			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 			    uint32_t tl, tr, bl, br, r;
 			    pixman_bool_t x1_out, x2_out, y1_out, y2_out;
 			    uint32_t ft, fb;
@@ -3878,19 +3850,15 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    idistx = 256 - distx;
 			    idisty = 256 - disty;
 			    
-			    b = bits + (y1)*stride;
-			    x_off = x1;
-			    
 			    x1_out = (x1 < box.x1) | (x1 >= box.x2);
 			    x2_out = (x2 < box.x1) | (x2 >= box.x2);
 			    y1_out = (y1 < box.y1) | (y1 >= box.y2);
 			    y2_out = (y2 < box.y1) | (y2 >= box.y2);
 			    
-			    tl = x1_out|y1_out ? 0 : fetch((pixman_image_t *)pict, b, x_off, indexed);
-			    tr = x2_out|y1_out ? 0 : fetch((pixman_image_t *)pict, b, x_off + 1, indexed);
-			    b += stride;
-			    bl = x1_out|y2_out ? 0 : fetch((pixman_image_t *)pict, b, x_off, indexed);
-			    br = x2_out|y2_out ? 0 : fetch((pixman_image_t *)pict, b, x_off + 1, indexed);
+			    tl = x1_out|y1_out ? 0 : fetch(pict, x1, y1);
+			    tr = x2_out|y1_out ? 0 : fetch(pict, x2, y1);
+			    bl = x1_out|y2_out ? 0 : fetch(pict, x1, y2);
+			    br = x2_out|y2_out ? 0 : fetch(pict, x2, y2);
 			    
 			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
@@ -3919,8 +3887,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			if (!v.vector[2]) {
 			    *(buffer + i) = 0;
 			} else {
-			    int x1, x2, y1, y2, distx, idistx, disty, idisty, x_off;
-			    uint32_t *b;
+			    int x1, x2, y1, y2, distx, idistx, disty, idisty;
 			    uint32_t tl, tr, bl, br, r;
 			    uint32_t ft, fb;
 			    
@@ -3944,18 +3911,14 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    idistx = 256 - distx;
 			    idisty = 256 - disty;
 			    
-			    b = bits + (y1)*stride;
-			    x_off = x1;
-			    
 			    tl = pixman_region_contains_point(pict->common.src_clip, x1, y1, &box)
-				? fetch((pixman_image_t *)pict, b, x_off, indexed) : 0;
+				? fetch(pict, x1, y1) : 0;
 			    tr = pixman_region_contains_point(pict->common.src_clip, x2, y1, &box)
-				? fetch((pixman_image_t *)pict, b, x_off + 1, indexed) : 0;
-			    b += stride;
+				? fetch(pict, x2, y1) : 0;
 			    bl = pixman_region_contains_point(pict->common.src_clip, x1, y2, &box)
-				? fetch((pixman_image_t *)pict, b, x_off, indexed) : 0;
+				? fetch(pict, x1, y2) : 0;
 			    br = pixman_region_contains_point(pict->common.src_clip, x2, y2, &box)
-				? fetch((pixman_image_t *)pict, b, x_off + 1, indexed) : 0;
+				? fetch(pict, x2, y2) : 0;
 			    
 			    ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
 			    fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
@@ -4017,8 +3980,7 @@ static void fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uin
 			    if (*p) {
 				int tx = (pict->common.repeat == PIXMAN_REPEAT_NORMAL) ? MOD (x, pict->width) : x;
 				if (pixman_region_contains_point (pict->common.src_clip, tx, ty, &box)) {
-				    uint32_t *b = bits + (ty)*stride;
-				    uint32_t c = fetch((pixman_image_t *)pict, b, tx, indexed);
+				    uint32_t c = fetch(pict, tx, ty);
 				    
 				    srtot += Red(c) * *p;
 				    sgtot += Green(c) * *p;


More information about the xorg-commit mailing list