pixman: Branch 'master' - 10 commits

Thu Oct 4 00:47:05 PDT 2012

.gitignore                       |    4 
 configure.ac                     |    6 
 demos/radial-test.c              |    2 
 pixman/Makefile.am               |    4 
 pixman/Makefile.sources          |   20 
 pixman/make-combine.pl           |   86 -
 pixman/pixman-access.c           |  750 ++++++-----
 pixman/pixman-bits-image.c       |  121 -
 pixman/pixman-combine-float.c    | 1003 +++++++++++++++
 pixman/pixman-combine.c.template | 2437 --------------------------------------
 pixman/pixman-combine.h.template |  226 ---
 pixman/pixman-combine32.c        | 2460 +++++++++++++++++++++++++++++++++++++++
 pixman/pixman-combine32.h        |  225 +++
 pixman/pixman-conical-gradient.c |    3 
 pixman/pixman-general.c          |   12 
 pixman/pixman-implementation.c   |    4 
 pixman/pixman-linear-gradient.c  |    3 
 pixman/pixman-noop.c             |    6 
 pixman/pixman-private.h          |   79 -
 pixman/pixman-radial-gradient.c  |    3 
 pixman/pixman-solid-fill.c       |   25 
 pixman/pixman-utils.c            |  198 +--
 test/Makefile.sources            |    1 
 test/blitters-test.c             |    4 
 test/combiner-test.c             |  151 ++
 test/glyph-test.c                |    7 
 test/gradient-crash-test.c       |    2 
 test/pdf-op-test.c               |    2 
 test/stress-test.c               |    2 
 test/utils.c                     |   14 
 test/utils.h                     |    2 
 31 files changed, 4513 insertions(+), 3349 deletions(-)

New commits:
commit ec7aa11a6e4d0d02df9b339dfce9460dce954602
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Sun Sep 23 03:52:34 2012 -0400

    Speed up pixman_expand_to_float()
    
    GCC doesn't move the divisions out of the loop, so do it manually by
    looking up the four (1.0f / mask) values in a table. Table lookups are
    used under the theory that one L2 hit plus three L1 hits is preferable
    to four floating point divisions.

diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c
index 551f3f9..b1e9fb6 100644
--- a/pixman/pixman-utils.c
+++ b/pixman/pixman-utils.c
@@ -111,8 +111,28 @@ pixman_expand_to_float (argb_t               *dst,
 			pixman_format_code_t  format,
 			int                   width)
 {
+    static const float multipliers[16] = {
+	0.0f,
+	1.0f / ((1 <<  1) - 1),
+	1.0f / ((1 <<  2) - 1),
+	1.0f / ((1 <<  3) - 1),
+	1.0f / ((1 <<  4) - 1),
+	1.0f / ((1 <<  5) - 1),
+	1.0f / ((1 <<  6) - 1),
+	1.0f / ((1 <<  7) - 1),
+	1.0f / ((1 <<  8) - 1),
+	1.0f / ((1 <<  9) - 1),
+	1.0f / ((1 << 10) - 1),
+	1.0f / ((1 << 11) - 1),
+	1.0f / ((1 << 12) - 1),
+	1.0f / ((1 << 13) - 1),
+	1.0f / ((1 << 14) - 1),
+	1.0f / ((1 << 15) - 1),
+    };
     int a_size, r_size, g_size, b_size;
     int a_shift, r_shift, g_shift, b_shift;
+    float a_mul, r_mul, g_mul, b_mul;
+    uint32_t a_mask, r_mask, g_mask, b_mask;
     int i;
 
     if (!PIXMAN_FORMAT_VIS (format))
@@ -132,6 +152,16 @@ pixman_expand_to_float (argb_t               *dst,
     g_shift = 16 - g_size;
     b_shift =  8 - b_size;
 
+    a_mask = ((1 << a_size) - 1);
+    r_mask = ((1 << r_size) - 1);
+    g_mask = ((1 << g_size) - 1);
+    b_mask = ((1 << b_size) - 1);
+
+    a_mul = multipliers[a_size];
+    r_mul = multipliers[r_size];
+    g_mul = multipliers[g_size];
+    b_mul = multipliers[b_size];
+
     /* Start at the end so that we can do the expansion in place
      * when src == dst
      */
@@ -139,10 +169,10 @@ pixman_expand_to_float (argb_t               *dst,
     {
 	const uint32_t pixel = src[i];
 
-	dst[i].a = a_size? unorm_to_float (pixel >> a_shift, a_size) : 1.0;
-	dst[i].r = r_size? unorm_to_float (pixel >> r_shift, r_size) : 0.0;
-	dst[i].g = g_size? unorm_to_float (pixel >> g_shift, g_size) : 0.0;
-	dst[i].b = b_size? unorm_to_float (pixel >> b_shift, b_size) : 0.0;
+	dst[i].a = a_mask? ((pixel >> a_shift) & a_mask) * a_mul : 1.0f;
+	dst[i].r = ((pixel >> r_shift) & r_mask) * r_mul;
+	dst[i].g = ((pixel >> g_shift) & g_mask) * g_mul;
+	dst[i].b = ((pixel >> b_shift) & b_mask) * b_mul;
     }
 }
 
commit 8ccda2be30adf9dfcc3087b38a5062258324dcce
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Fri Sep 21 18:36:16 2012 -0400

    Don't auto-generate pixman-combine32.[ch] anymore
    
    Since pixman-combine64.[ch] are not used anymore, there is no point
    generating these files from pixman-combine.[ch].template.
    
    Also get rid of dependency on perl in configure.ac.

diff --git a/.gitignore b/.gitignore
index a67da1d..2d089fc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,8 +39,6 @@ demos/screen-test
 demos/srgb-test
 demos/trap-test
 demos/tri-test
-pixman/pixman-combine32.c
-pixman/pixman-combine32.h
 pixman/pixman-srgb.c
 pixman/pixman-version.h
 test/a1-trap-test
diff --git a/configure.ac b/configure.ac
index 5fda547..c069b48 100644
--- a/configure.ac
+++ b/configure.ac
@@ -182,12 +182,6 @@ AC_SUBST(LT_VERSION_INFO)
 PIXMAN_CHECK_CFLAG([-Wall])
 PIXMAN_CHECK_CFLAG([-fno-strict-aliasing])
 
-AC_PATH_PROG(PERL, perl, no)
-if test "x$PERL" = xno; then
-    AC_MSG_ERROR([Perl is required to build pixman.])
-fi
-AC_SUBST(PERL)
-
 dnl =========================================================================
 dnl OpenMP for the test suite?
 dnl
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 3060569..df53a69 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -12,9 +12,6 @@ noinst_LTLIBRARIES =
 
 EXTRA_DIST =				\
 	Makefile.win32			\
-	make-combine.pl			\
-	pixman-combine.c.template	\
-	pixman-combine.h.template	\
 	pixman-region.c			\
 	solaris-hwcap.mapfile		\
 	$(NULL)
diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources
index 5be288d..852a007 100644
--- a/pixman/Makefile.sources
+++ b/pixman/Makefile.sources
@@ -44,8 +44,3 @@ BUILT_SOURCES =				\
 	pixman-combine32.c		\
 	pixman-combine32.h		\
 	$(NULL)
-
-pixman-combine32.c: pixman-combine.c.template make-combine.pl
-	$(PERL) $(lastword $+) 8 < $< > $@ || ($(RM) $@; exit 1)
-pixman-combine32.h: pixman-combine.h.template make-combine.pl
-	$(PERL) $(lastword $+) 8 < $< > $@ || ($(RM) $@; exit 1)
diff --git a/pixman/make-combine.pl b/pixman/make-combine.pl
deleted file mode 100644
index 210a5da..0000000
--- a/pixman/make-combine.pl
+++ /dev/null
@@ -1,86 +0,0 @@
-$usage = "Usage: combine.pl { 8 | 16 } < pixman-combine.c.template";
-
-$#ARGV == 0 or die $usage;
-
-# Get the component size.
-$size = int($ARGV[0]);
-$size == 8 or $size == 16 or die $usage;
-
-$pixel_size = $size * 4;
-$half_pixel_size = $size * 2;
-
-sub mask {
-    my $str = shift;
-    my $suffix;
-    $suffix = "ULL" if $size > 8;
-
-    return "0x" . $str . $suffix;
-}
-
-# Generate mask strings.
-$nibbles = $size / 4;
-$mask = "f" x $nibbles;
-$zero_mask = "0" x $nibbles;
-$one_half = "8" . "0" x ($nibbles - 1);
-
-print "/* WARNING: This file is generated by combine.pl from combine.inc.\n";
-print "   Please edit one of those files rather than this one. */\n";
-print "\n";
-
-print "#line 1 \"pixman-combine.c.template\"\n";
-
-$mask_ = mask($mask);
-$one_half_ = mask($one_half);
-$g_mask = mask($mask . $zero_mask);
-$b_mask = mask($mask . $zero_mask x 2);
-$a_mask = mask($mask . $zero_mask x 3);
-$rb_mask = mask($mask . $zero_mask . $mask);
-$ag_mask = mask($mask . $zero_mask . $mask . $zero_mask);
-$rb_one_half = mask($one_half . $zero_mask . $one_half);
-$rb_mask_plus_one = mask("1" . $zero_mask x 2 . "1" .  $zero_mask);
-
-while (<STDIN>) {
-    # Mask and 1/2 value for a single component.
-    s/#define COMPONENT_SIZE\b/$& $size/;
-    s/#define MASK\b/$& $mask_/;
-    s/#define ONE_HALF\b/$& $one_half_/;
-
-    # Shifts and masks for green, blue, and alpha.
-    s/#define G_SHIFT\b/$& $size/;
-    s/#define R_SHIFT\b/$& $size * 2/;
-    s/#define A_SHIFT\b/$& $size * 3/;
-    s/#define G_MASK\b/$& $g_mask/;
-    s/#define R_MASK\b/$& $b_mask/;
-    s/#define A_MASK\b/$& $a_mask/;
-
-    # Special values for dealing with red + blue at the same time.
-    s/#define RB_MASK\b/$& $rb_mask/;
-    s/#define AG_MASK\b/$& $ag_mask/;
-    s/#define RB_ONE_HALF\b/$& $rb_one_half/;
-    s/#define RB_MASK_PLUS_ONE\b/$& $rb_mask_plus_one/;
-
-    # Add 32/64 suffix to combining function types.
-    s/\bCombineFunc\b/CombineFunc$pixel_size/;
-    s/\bFbComposeFunctions\b/FbComposeFunctions$pixel_size/;
-    s/combine_width/combine_$pixel_size/;
-    s/_pixman_setup_combiner_functions_width/_pixman_setup_combiner_functions_$pixel_size/;
-    s/UNc/UN$size/g;
-    s/ALPHA_c/ALPHA_$size/g;
-    s/RED_c/RED_$size/g;
-    s/GREEN_c/GREEN_$size/g;
-    s/BLUE_c/BLUE_$size/g;
-
-    # Convert comp*_t values into the appropriate real types.
-    s/comp1_t/uint${size}_t/g;
-    s/comp2_t/uint${half_pixel_size}_t/g;
-    s/comp4_t/uint${pixel_size}_t/g;
-
-    # Change the function table name for the 64-bit version.
-    s/pixman_composeFunctions/pixman_composeFunctions64/ if $size == 16;
-
-    # Change the header for the 64-bit version
-    s/pixman-combine.h/pixman-combine64.h/ if $size == 16;
-    s/pixman-combine.h/pixman-combine32.h/ if $size == 8;
-
-    print;
-}
diff --git a/pixman/pixman-combine.c.template b/pixman/pixman-combine.c.template
deleted file mode 100644
index f405312..0000000
--- a/pixman/pixman-combine.c.template
+++ /dev/null
@@ -1,2437 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-#include <string.h>
-
-#include "pixman-private.h"
-#include "pixman-combine.h"
-
-/* component alpha helper functions */
-
-static void
-combine_mask_ca (comp4_t *src, comp4_t *mask)
-{
-    comp4_t a = *mask;
-
-    comp4_t x;
-    comp2_t xa;
-
-    if (!a)
-    {
-	*(src) = 0;
-	return;
-    }
-
-    x = *(src);
-    if (a == ~0)
-    {
-	x = x >> A_SHIFT;
-	x |= x << G_SHIFT;
-	x |= x << R_SHIFT;
-	*(mask) = x;
-	return;
-    }
-
-    xa = x >> A_SHIFT;
-    UNcx4_MUL_UNcx4 (x, a);
-    *(src) = x;
-    
-    UNcx4_MUL_UNc (a, xa);
-    *(mask) = a;
-}
-
-static void
-combine_mask_value_ca (comp4_t *src, const comp4_t *mask)
-{
-    comp4_t a = *mask;
-    comp4_t x;
-
-    if (!a)
-    {
-	*(src) = 0;
-	return;
-    }
-
-    if (a == ~0)
-	return;
-
-    x = *(src);
-    UNcx4_MUL_UNcx4 (x, a);
-    *(src) = x;
-}
-
-static void
-combine_mask_alpha_ca (const comp4_t *src, comp4_t *mask)
-{
-    comp4_t a = *(mask);
-    comp4_t x;
-
-    if (!a)
-	return;
-
-    x = *(src) >> A_SHIFT;
-    if (x == MASK)
-	return;
-
-    if (a == ~0)
-    {
-	x |= x << G_SHIFT;
-	x |= x << R_SHIFT;
-	*(mask) = x;
-	return;
-    }
-
-    UNcx4_MUL_UNc (a, x);
-    *(mask) = a;
-}
-
-/*
- * There are two ways of handling alpha -- either as a single unified value or
- * a separate value for each component, hence each macro must have two
- * versions.  The unified alpha version has a 'u' at the end of the name,
- * the component version has a 'ca'.  Similarly, functions which deal with
- * this difference will have two versions using the same convention.
- */
-
-static force_inline comp4_t
-combine_mask (const comp4_t *src, const comp4_t *mask, int i)
-{
-    comp4_t s, m;
-
-    if (mask)
-    {
-	m = *(mask + i) >> A_SHIFT;
-
-	if (!m)
-	    return 0;
-    }
-
-    s = *(src + i);
-
-    if (mask)
-	UNcx4_MUL_UNc (s, m);
-
-    return s;
-}
-
-static void
-combine_clear (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               comp4_t *                dest,
-               const comp4_t *          src,
-               const comp4_t *          mask,
-               int                      width)
-{
-    memset (dest, 0, width * sizeof(comp4_t));
-}
-
-static void
-combine_dst (pixman_implementation_t *imp,
-	     pixman_op_t	      op,
-	     comp4_t *		      dest,
-	     const comp4_t *	      src,
-	     const comp4_t *          mask,
-	     int		      width)
-{
-    return;
-}
-
-static void
-combine_src_u (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               comp4_t *                dest,
-               const comp4_t *          src,
-               const comp4_t *          mask,
-               int                      width)
-{
-    int i;
-
-    if (!mask)
-    {
-	memcpy (dest, src, width * sizeof (comp4_t));
-    }
-    else
-    {
-	for (i = 0; i < width; ++i)
-	{
-	    comp4_t s = combine_mask (src, mask, i);
-
-	    *(dest + i) = s;
-	}
-    }
-}
-
-static void
-combine_over_u (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                comp4_t *                dest,
-                const comp4_t *          src,
-                const comp4_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t d = *(dest + i);
-	comp4_t ia = ALPHA_c (~s);
-
-	UNcx4_MUL_UNc_ADD_UNcx4 (d, ia, s);
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_over_reverse_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        comp4_t *                dest,
-                        const comp4_t *          src,
-                        const comp4_t *          mask,
-                        int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t d = *(dest + i);
-	comp4_t ia = ALPHA_c (~*(dest + i));
-	UNcx4_MUL_UNc_ADD_UNcx4 (s, ia, d);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_in_u (pixman_implementation_t *imp,
-              pixman_op_t              op,
-              comp4_t *                dest,
-              const comp4_t *          src,
-              const comp4_t *          mask,
-              int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t a = ALPHA_c (*(dest + i));
-	UNcx4_MUL_UNc (s, a);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_in_reverse_u (pixman_implementation_t *imp,
-                      pixman_op_t              op,
-                      comp4_t *                dest,
-                      const comp4_t *          src,
-                      const comp4_t *          mask,
-                      int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t d = *(dest + i);
-	comp4_t a = ALPHA_c (s);
-	UNcx4_MUL_UNc (d, a);
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_out_u (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               comp4_t *                dest,
-               const comp4_t *          src,
-               const comp4_t *          mask,
-               int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t a = ALPHA_c (~*(dest + i));
-	UNcx4_MUL_UNc (s, a);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_out_reverse_u (pixman_implementation_t *imp,
-                       pixman_op_t              op,
-                       comp4_t *                dest,
-                       const comp4_t *          src,
-                       const comp4_t *          mask,
-                       int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t d = *(dest + i);
-	comp4_t a = ALPHA_c (~s);
-	UNcx4_MUL_UNc (d, a);
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_atop_u (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                comp4_t *                dest,
-                const comp4_t *          src,
-                const comp4_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t d = *(dest + i);
-	comp4_t dest_a = ALPHA_c (d);
-	comp4_t src_ia = ALPHA_c (~s);
-
-	UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_a, d, src_ia);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_atop_reverse_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        comp4_t *                dest,
-                        const comp4_t *          src,
-                        const comp4_t *          mask,
-                        int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t d = *(dest + i);
-	comp4_t src_a = ALPHA_c (s);
-	comp4_t dest_ia = ALPHA_c (~d);
-
-	UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_ia, d, src_a);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_xor_u (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               comp4_t *                dest,
-               const comp4_t *          src,
-               const comp4_t *          mask,
-               int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t d = *(dest + i);
-	comp4_t src_ia = ALPHA_c (~s);
-	comp4_t dest_ia = ALPHA_c (~d);
-
-	UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_ia, d, src_ia);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_add_u (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               comp4_t *                dest,
-               const comp4_t *          src,
-               const comp4_t *          mask,
-               int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t d = *(dest + i);
-	UNcx4_ADD_UNcx4 (d, s);
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_saturate_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    comp4_t *                dest,
-                    const comp4_t *          src,
-                    const comp4_t *          mask,
-                    int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t d = *(dest + i);
-	comp2_t sa, da;
-
-	sa = s >> A_SHIFT;
-	da = ~d >> A_SHIFT;
-	if (sa > da)
-	{
-	    sa = DIV_UNc (da, sa);
-	    UNcx4_MUL_UNc (s, sa);
-	}
-	;
-	UNcx4_ADD_UNcx4 (d, s);
-	*(dest + i) = d;
-    }
-}
-
-/*
- * PDF blend modes:
- * The following blend modes have been taken from the PDF ISO 32000
- * specification, which at this point in time is available from
- * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
- * The relevant chapters are 11.3.5 and 11.3.6.
- * The formula for computing the final pixel color given in 11.3.6 is:
- * Î±r Ã— Cr = (1 â€“ Î±s) Ã— Î±b Ã— Cb + (1 â€“ Î±b) Ã— Î±s Ã— Cs + Î±b Ã— Î±s Ã— B(Cb, Cs)
- * with B() being the blend function.
- * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
- *
- * These blend modes should match the SVG filter draft specification, as
- * it has been designed to mirror ISO 32000. Note that at the current point
- * no released draft exists that shows this, as the formulas have not been
- * updated yet after the release of ISO 32000.
- *
- * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
- * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
- * argument. Note that this implementation operates on premultiplied colors,
- * while the PDF specification does not. Therefore the code uses the formula
- * Cra = (1 â€“ as) . Dca + (1 â€“ ad) . Sca + B(Dca, ad, Sca, as)
- */
-
-/*
- * Multiply
- * B(Dca, ad, Sca, as) = Dca.Sca
- */
-static void
-combine_multiply_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    comp4_t *                dest,
-                    const comp4_t *          src,
-                    const comp4_t *          mask,
-                    int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t d = *(dest + i);
-	comp4_t ss = s;
-	comp4_t src_ia = ALPHA_c (~s);
-	comp4_t dest_ia = ALPHA_c (~d);
-
-	UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (ss, dest_ia, d, src_ia);
-	UNcx4_MUL_UNcx4 (d, s);
-	UNcx4_ADD_UNcx4 (d, ss);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_multiply_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     comp4_t *                dest,
-                     const comp4_t *          src,
-                     const comp4_t *          mask,
-                     int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t m = *(mask + i);
-	comp4_t s = *(src + i);
-	comp4_t d = *(dest + i);
-	comp4_t r = d;
-	comp4_t dest_ia = ALPHA_c (~d);
-
-	combine_mask_ca (&s, &m);
-
-	UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (r, ~m, s, dest_ia);
-	UNcx4_MUL_UNcx4 (d, s);
-	UNcx4_ADD_UNcx4 (r, d);
-
-	*(dest + i) = r;
-    }
-}
-
-#define PDF_SEPARABLE_BLEND_MODE(name)					\
-    static void								\
-    combine_ ## name ## _u (pixman_implementation_t *imp,		\
-			    pixman_op_t              op,		\
-                            comp4_t *                dest,		\
-			    const comp4_t *          src,		\
-			    const comp4_t *          mask,		\
-			    int                      width)		\
-    {									\
-	int i;								\
-	for (i = 0; i < width; ++i) {					\
-	    comp4_t s = combine_mask (src, mask, i);			\
-	    comp4_t d = *(dest + i);					\
-	    comp1_t sa = ALPHA_c (s);					\
-	    comp1_t isa = ~sa;						\
-	    comp1_t da = ALPHA_c (d);					\
-	    comp1_t ida = ~da;						\
-	    comp4_t result;						\
-									\
-	    result = d;							\
-	    UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (result, isa, s, ida);	\
-	    								\
-	    *(dest + i) = result +					\
-		(DIV_ONE_UNc (sa * (comp4_t)da) << A_SHIFT) +		\
-		(blend_ ## name (RED_c (d), da, RED_c (s), sa) << R_SHIFT) + \
-		(blend_ ## name (GREEN_c (d), da, GREEN_c (s), sa) << G_SHIFT) + \
-		(blend_ ## name (BLUE_c (d), da, BLUE_c (s), sa));	\
-	}								\
-    }									\
-    									\
-    static void								\
-    combine_ ## name ## _ca (pixman_implementation_t *imp,		\
-			     pixman_op_t              op,		\
-                             comp4_t *                dest,		\
-			     const comp4_t *          src,		\
-			     const comp4_t *          mask,		\
-			     int                     width)		\
-    {									\
-	int i;								\
-	for (i = 0; i < width; ++i) {					\
-	    comp4_t m = *(mask + i);					\
-	    comp4_t s = *(src + i);					\
-	    comp4_t d = *(dest + i);					\
-	    comp1_t da = ALPHA_c (d);					\
-	    comp1_t ida = ~da;						\
-	    comp4_t result;						\
-            								\
-	    combine_mask_ca (&s, &m);					\
-            								\
-	    result = d;							\
-	    UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (result, ~m, s, ida);     \
-            								\
-	    result +=							\
-	        (DIV_ONE_UNc (ALPHA_c (m) * (comp4_t)da) << A_SHIFT) +	\
-	        (blend_ ## name (RED_c (d), da, RED_c (s), RED_c (m)) << R_SHIFT) + \
-	        (blend_ ## name (GREEN_c (d), da, GREEN_c (s), GREEN_c (m)) << G_SHIFT) + \
-	        (blend_ ## name (BLUE_c (d), da, BLUE_c (s), BLUE_c (m))); \
-	    								\
-	    *(dest + i) = result;					\
-	}								\
-    }
-
-/*
- * Screen
- * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca
- */
-static inline comp4_t
-blend_screen (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
-    return DIV_ONE_UNc (sca * da + dca * sa - sca * dca);
-}
-
-PDF_SEPARABLE_BLEND_MODE (screen)
-
-/*
- * Overlay
- * B(Dca, Da, Sca, Sa) =
- *   if 2.Dca < Da
- *     2.Sca.Dca
- *   otherwise
- *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
- */
-static inline comp4_t
-blend_overlay (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
-    comp4_t rca;
-
-    if (2 * dca < da)
-	rca = 2 * sca * dca;
-    else
-	rca = sa * da - 2 * (da - dca) * (sa - sca);
-    return DIV_ONE_UNc (rca);
-}
-
-PDF_SEPARABLE_BLEND_MODE (overlay)
-
-/*
- * Darken
- * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa)
- */
-static inline comp4_t
-blend_darken (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
-    comp4_t s, d;
-
-    s = sca * da;
-    d = dca * sa;
-    return DIV_ONE_UNc (s > d ? d : s);
-}
-
-PDF_SEPARABLE_BLEND_MODE (darken)
-
-/*
- * Lighten
- * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa)
- */
-static inline comp4_t
-blend_lighten (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
-    comp4_t s, d;
-
-    s = sca * da;
-    d = dca * sa;
-    return DIV_ONE_UNc (s > d ? s : d);
-}
-
-PDF_SEPARABLE_BLEND_MODE (lighten)
-
-/*
- * Color dodge
- * B(Dca, Da, Sca, Sa) =
- *   if Dca == 0
- *     0
- *   if Sca == Sa
- *     Sa.Da
- *   otherwise
- *     Sa.Da. min (1, Dca / Da / (1 - Sca/Sa))
- */
-static inline comp4_t
-blend_color_dodge (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
-    if (sca >= sa)
-    {
-	return dca == 0 ? 0 : DIV_ONE_UNc (sa * da);
-    }
-    else
-    {
-	comp4_t rca = dca * sa / (sa - sca);
-	return DIV_ONE_UNc (sa * MIN (rca, da));
-    }
-}
-
-PDF_SEPARABLE_BLEND_MODE (color_dodge)
-
-/*
- * Color burn
- * B(Dca, Da, Sca, Sa) =
- *   if Dca == Da
- *     Sa.Da
- *   if Sca == 0
- *     0
- *   otherwise
- *     Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca))
- */
-static inline comp4_t
-blend_color_burn (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
-    if (sca == 0)
-    {
-	return dca < da ? 0 : DIV_ONE_UNc (sa * da);
-    }
-    else
-    {
-	comp4_t rca = (da - dca) * sa / sca;
-	return DIV_ONE_UNc (sa * (MAX (rca, da) - rca));
-    }
-}
-
-PDF_SEPARABLE_BLEND_MODE (color_burn)
-
-/*
- * Hard light
- * B(Dca, Da, Sca, Sa) =
- *   if 2.Sca < Sa
- *     2.Sca.Dca
- *   otherwise
- *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
- */
-static inline comp4_t
-blend_hard_light (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
-    if (2 * sca < sa)
-	return DIV_ONE_UNc (2 * sca * dca);
-    else
-	return DIV_ONE_UNc (sa * da - 2 * (da - dca) * (sa - sca));
-}
-
-PDF_SEPARABLE_BLEND_MODE (hard_light)
-
-/*
- * Soft light
- * B(Dca, Da, Sca, Sa) =
- *   if (2.Sca <= Sa)
- *     Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa))
- *   otherwise if Dca.4 <= Da
- *     Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3)
- *   otherwise
- *     (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa))
- */
-static inline comp4_t
-blend_soft_light (comp4_t dca_org,
-		  comp4_t da_org,
-		  comp4_t sca_org,
-		  comp4_t sa_org)
-{
-    double dca = dca_org * (1.0 / MASK);
-    double da = da_org * (1.0 / MASK);
-    double sca = sca_org * (1.0 / MASK);
-    double sa = sa_org * (1.0 / MASK);
-    double rca;
-
-    if (2 * sca < sa)
-    {
-	if (da == 0)
-	    rca = dca * sa;
-	else
-	    rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da;
-    }
-    else if (da == 0)
-    {
-	rca = 0;
-    }
-    else if (4 * dca <= da)
-    {
-	rca = dca * sa +
-	    (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3);
-    }
-    else
-    {
-	rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa);
-    }
-    return rca * MASK + 0.5;
-}
-
-PDF_SEPARABLE_BLEND_MODE (soft_light)
-
-/*
- * Difference
- * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da)
- */
-static inline comp4_t
-blend_difference (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
-    comp4_t dcasa = dca * sa;
-    comp4_t scada = sca * da;
-
-    if (scada < dcasa)
-	return DIV_ONE_UNc (dcasa - scada);
-    else
-	return DIV_ONE_UNc (scada - dcasa);
-}
-
-PDF_SEPARABLE_BLEND_MODE (difference)
-
-/*
- * Exclusion
- * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca)
- */
-
-/* This can be made faster by writing it directly and not using
- * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */
-
-static inline comp4_t
-blend_exclusion (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
-    return DIV_ONE_UNc (sca * da + dca * sa - 2 * dca * sca);
-}
-
-PDF_SEPARABLE_BLEND_MODE (exclusion)
-
-#undef PDF_SEPARABLE_BLEND_MODE
-
-/*
- * PDF nonseperable blend modes are implemented using the following functions
- * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
- * and min value of the red, green and blue components.
- *
- * LUM (C) = 0.3 Ã— Cred + 0.59 Ã— Cgreen + 0.11 Ã— Cblue
- *
- * clip_color (C):
- *   l = LUM (C)
- *   min = Cmin
- *   max = Cmax
- *   if n < 0.0
- *     C = l + ( ( ( C â€“ l ) Ã— l ) â„ ( l â€“ min ) )
- *   if x > 1.0
- *     C = l + ( ( ( C â€“ l ) Ã— ( 1 â€“ l ) ) â„ ( max â€“ l ) )
- *   return C
- *
- * set_lum (C, l):
- *   d = l â€“ LUM (C)
- *   C += d
- *   return clip_color (C)
- *
- * SAT (C) = CH_MAX (C) - CH_MIN (C)
- *
- * set_sat (C, s):
- *  if Cmax > Cmin
- *    Cmid = ( ( ( Cmid â€“ Cmin ) Ã— s ) â„ ( Cmax â€“ Cmin ) )
- *    Cmax = s
- *  else
- *    Cmid = Cmax = 0.0
- *  Cmin = 0.0
- *  return C
- */
-
-/* For premultiplied colors, we need to know what happens when C is
- * multiplied by a real number. LUM and SAT are linear:
- *
- *    LUM (r Ã— C) = r Ã— LUM (C)		SAT (r * C) = r * SAT (C)
- *
- * If we extend clip_color with an extra argument a and change
- *
- *        if x >= 1.0
- *
- * into
- *
- *        if x >= a
- *
- * then clip_color is also linear:
- *
- *    r * clip_color (C, a) = clip_color (r_c, ra);
- *
- * for positive r.
- *
- * Similarly, we can extend set_lum with an extra argument that is just passed
- * on to clip_color:
- *
- *   r * set_lum ( C, l, a)
- *
- *   = r Ã— clip_color ( C + l - LUM (C), a)
- *
- *   = clip_color ( r * C + r Ã— l - r * LUM (C), r * a)
- *
- *   = set_lum ( r * C, r * l, r * a)
- *
- * Finally, set_sat:
- *
- *    r * set_sat (C, s) = set_sat (x * C, r * s)
- *
- * The above holds for all non-zero x, because the x'es in the fraction for
- * C_mid cancel out. Specifically, it holds for x = r:
- *
- *    r * set_sat (C, s) = set_sat (r_c, rs)
- *
- */
-
-/* So, for the non-separable PDF blend modes, we have (using s, d for
- * non-premultiplied colors, and S, D for premultiplied:
- *
- *   Color:
- *
- *     a_s * a_d * B(s, d)
- *   = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
- *   = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
- *
- *
- *   Luminosity:
- *
- *     a_s * a_d * B(s, d)
- *   = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
- *   = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
- *
- *
- *   Saturation:
- *
- *     a_s * a_d * B(s, d)
- *   = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
- *   = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
- *                                        a_s * LUM (D), a_s * a_d)
- *   = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
- *
- *   Hue:
- *
- *     a_s * a_d * B(s, d)
- *   = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
- *   = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
- *
- */
-
-#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2]))
-#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2]))
-#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100)
-#define SAT(c) (CH_MAX (c) - CH_MIN (c))
-
-#define PDF_NON_SEPARABLE_BLEND_MODE(name)				\
-    static void								\
-    combine_ ## name ## _u (pixman_implementation_t *imp,		\
-			    pixman_op_t op,				\
-                            comp4_t *dest,				\
-			    const comp4_t *src,				\
-			    const comp4_t *mask,			\
-			    int width)					\
-    {									\
-	int i;								\
-	for (i = 0; i < width; ++i)					\
-	{								\
-	    comp4_t s = combine_mask (src, mask, i);			\
-	    comp4_t d = *(dest + i);					\
-	    comp1_t sa = ALPHA_c (s);					\
-	    comp1_t isa = ~sa;						\
-	    comp1_t da = ALPHA_c (d);					\
-	    comp1_t ida = ~da;						\
-	    comp4_t result;						\
-	    comp4_t sc[3], dc[3], c[3];					\
-            								\
-	    result = d;							\
-	    UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (result, isa, s, ida);	\
-	    dc[0] = RED_c (d);						\
-	    sc[0] = RED_c (s);						\
-	    dc[1] = GREEN_c (d);					\
-	    sc[1] = GREEN_c (s);					\
-	    dc[2] = BLUE_c (d);						\
-	    sc[2] = BLUE_c (s);						\
-	    blend_ ## name (c, dc, da, sc, sa);				\
-            								\
-	    *(dest + i) = result +					\
-		(DIV_ONE_UNc (sa * (comp4_t)da) << A_SHIFT) +		\
-		(DIV_ONE_UNc (c[0]) << R_SHIFT) +			\
-		(DIV_ONE_UNc (c[1]) << G_SHIFT) +			\
-		(DIV_ONE_UNc (c[2]));					\
-	}								\
-    }
-
-static void
-set_lum (comp4_t dest[3], comp4_t src[3], comp4_t sa, comp4_t lum)
-{
-    double a, l, min, max;
-    double tmp[3];
-
-    a = sa * (1.0 / MASK);
-
-    l = lum * (1.0 / MASK);
-    tmp[0] = src[0] * (1.0 / MASK);
-    tmp[1] = src[1] * (1.0 / MASK);
-    tmp[2] = src[2] * (1.0 / MASK);
-
-    l = l - LUM (tmp);
-    tmp[0] += l;
-    tmp[1] += l;
-    tmp[2] += l;
-
-    /* clip_color */
-    l = LUM (tmp);
-    min = CH_MIN (tmp);
-    max = CH_MAX (tmp);
-
-    if (min < 0)
-    {
-	if (l - min == 0.0)
-	{
-	    tmp[0] = 0;
-	    tmp[1] = 0;
-	    tmp[2] = 0;
-	}
-	else
-	{
-	    tmp[0] = l + (tmp[0] - l) * l / (l - min);
-	    tmp[1] = l + (tmp[1] - l) * l / (l - min);
-	    tmp[2] = l + (tmp[2] - l) * l / (l - min);
-	}
-    }
-    if (max > a)
-    {
-	if (max - l == 0.0)
-	{
-	    tmp[0] = a;
-	    tmp[1] = a;
-	    tmp[2] = a;
-	}
-	else
-	{
-	    tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
-	    tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
-	    tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
-	}
-    }
-
-    dest[0] = tmp[0] * MASK + 0.5;
-    dest[1] = tmp[1] * MASK + 0.5;
-    dest[2] = tmp[2] * MASK + 0.5;
-}
-
-static void
-set_sat (comp4_t dest[3], comp4_t src[3], comp4_t sat)
-{
-    int id[3];
-    comp4_t min, max;
-
-    if (src[0] > src[1])
-    {
-	if (src[0] > src[2])
-	{
-	    id[0] = 0;
-	    if (src[1] > src[2])
-	    {
-		id[1] = 1;
-		id[2] = 2;
-	    }
-	    else
-	    {
-		id[1] = 2;
-		id[2] = 1;
-	    }
-	}
-	else
-	{
-	    id[0] = 2;
-	    id[1] = 0;
-	    id[2] = 1;
-	}
-    }
-    else
-    {
-	if (src[0] > src[2])
-	{
-	    id[0] = 1;
-	    id[1] = 0;
-	    id[2] = 2;
-	}
-	else
-	{
-	    id[2] = 0;
-	    if (src[1] > src[2])
-	    {
-		id[0] = 1;
-		id[1] = 2;
-	    }
-	    else
-	    {
-		id[0] = 2;
-		id[1] = 1;
-	    }
-	}
-    }
-
-    max = dest[id[0]];
-    min = dest[id[2]];
-    if (max > min)
-    {
-	dest[id[1]] = (dest[id[1]] - min) * sat / (max - min);
-	dest[id[0]] = sat;
-	dest[id[2]] = 0;
-    }
-    else
-    {
-	dest[0] = dest[1] = dest[2] = 0;
-    }
-}
-
-/*
- * Hue:
- * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
- */
-static inline void
-blend_hsl_hue (comp4_t c[3],
-               comp4_t dc[3],
-               comp4_t da,
-               comp4_t sc[3],
-               comp4_t sa)
-{
-    c[0] = sc[0] * da;
-    c[1] = sc[1] * da;
-    c[2] = sc[2] * da;
-    set_sat (c, c, SAT (dc) * sa);
-    set_lum (c, c, sa * da, LUM (dc) * sa);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue)
-
-/*
- * Saturation:
- * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
- */
-static inline void
-blend_hsl_saturation (comp4_t c[3],
-                      comp4_t dc[3],
-                      comp4_t da,
-                      comp4_t sc[3],
-                      comp4_t sa)
-{
-    c[0] = dc[0] * sa;
-    c[1] = dc[1] * sa;
-    c[2] = dc[2] * sa;
-    set_sat (c, c, SAT (sc) * da);
-    set_lum (c, c, sa * da, LUM (dc) * sa);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation)
-
-/*
- * Color:
- * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
- */
-static inline void
-blend_hsl_color (comp4_t c[3],
-                 comp4_t dc[3],
-                 comp4_t da,
-                 comp4_t sc[3],
-                 comp4_t sa)
-{
-    c[0] = sc[0] * da;
-    c[1] = sc[1] * da;
-    c[2] = sc[2] * da;
-    set_lum (c, c, sa * da, LUM (dc) * sa);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_color)
-
-/*
- * Luminosity:
- * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
- */
-static inline void
-blend_hsl_luminosity (comp4_t c[3],
-                      comp4_t dc[3],
-                      comp4_t da,
-                      comp4_t sc[3],
-                      comp4_t sa)
-{
-    c[0] = dc[0] * sa;
-    c[1] = dc[1] * sa;
-    c[2] = dc[2] * sa;
-    set_lum (c, c, sa * da, LUM (sc) * da);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity)
-
-#undef SAT
-#undef LUM
-#undef CH_MAX
-#undef CH_MIN
-#undef PDF_NON_SEPARABLE_BLEND_MODE
-
-/* All of the disjoint/conjoint composing functions
- *
- * The four entries in the first column indicate what source contributions
- * come from each of the four areas of the picture -- areas covered by neither
- * A nor B, areas covered only by A, areas covered only by B and finally
- * areas covered by both A and B.
- * 
- * Disjoint			Conjoint
- * Fa		Fb		Fa		Fb
- * (0,0,0,0)	0		0		0		0
- * (0,A,0,A)	1		0		1		0
- * (0,0,B,B)	0		1		0		1
- * (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
- * (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
- * (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
- * (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
- * (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
- * (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
- * (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
- * (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
- * (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
- *
- * See  http://marc.info/?l=xfree-render&m=99792000027857&w=2  for more
- * information about these operators.
- */
-
-#define COMBINE_A_OUT 1
-#define COMBINE_A_IN  2
-#define COMBINE_B_OUT 4
-#define COMBINE_B_IN  8
-
-#define COMBINE_CLEAR   0
-#define COMBINE_A       (COMBINE_A_OUT | COMBINE_A_IN)
-#define COMBINE_B       (COMBINE_B_OUT | COMBINE_B_IN)
-#define COMBINE_A_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN)
-#define COMBINE_B_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN)
-#define COMBINE_A_ATOP  (COMBINE_B_OUT | COMBINE_A_IN)
-#define COMBINE_B_ATOP  (COMBINE_A_OUT | COMBINE_B_IN)
-#define COMBINE_XOR     (COMBINE_A_OUT | COMBINE_B_OUT)
-
-/* portion covered by a but not b */
-static comp1_t
-combine_disjoint_out_part (comp1_t a, comp1_t b)
-{
-    /* min (1, (1-b) / a) */
-
-    b = ~b;                 /* 1 - b */
-    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
-	return MASK;        /* 1 */
-    return DIV_UNc (b, a);     /* (1-b) / a */
-}
-
-/* portion covered by both a and b */
-static comp1_t
-combine_disjoint_in_part (comp1_t a, comp1_t b)
-{
-    /* max (1-(1-b)/a,0) */
-    /*  = - min ((1-b)/a - 1, 0) */
-    /*  = 1 - min (1, (1-b)/a) */
-
-    b = ~b;                 /* 1 - b */
-    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
-	return 0;           /* 1 - 1 */
-    return ~DIV_UNc(b, a);    /* 1 - (1-b) / a */
-}
-
-/* portion covered by a but not b */
-static comp1_t
-combine_conjoint_out_part (comp1_t a, comp1_t b)
-{
-    /* max (1-b/a,0) */
-    /* = 1-min(b/a,1) */
-
-    /* min (1, (1-b) / a) */
-
-    if (b >= a)             /* b >= a -> b/a >= 1 */
-	return 0x00;        /* 0 */
-    return ~DIV_UNc(b, a);    /* 1 - b/a */
-}
-
-/* portion covered by both a and b */
-static comp1_t
-combine_conjoint_in_part (comp1_t a, comp1_t b)
-{
-    /* min (1,b/a) */
-
-    if (b >= a)             /* b >= a -> b/a >= 1 */
-	return MASK;        /* 1 */
-    return DIV_UNc (b, a);     /* b/a */
-}
-
-#define GET_COMP(v, i)   ((comp2_t) (comp1_t) ((v) >> i))
-
-#define ADD(x, y, i, t)							\
-    ((t) = GET_COMP (x, i) + GET_COMP (y, i),				\
-     (comp4_t) ((comp1_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
-
-#define GENERIC(x, y, i, ax, ay, t, u, v)				\
-    ((t) = (MUL_UNc (GET_COMP (y, i), ay, (u)) +			\
-            MUL_UNc (GET_COMP (x, i), ax, (v))),			\
-     (comp4_t) ((comp1_t) ((t) |					\
-                           (0 - ((t) >> G_SHIFT)))) << (i))
-
-static void
-combine_disjoint_general_u (comp4_t *      dest,
-                            const comp4_t *src,
-                            const comp4_t *mask,
-                            int            width,
-                            comp1_t        combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t d = *(dest + i);
-	comp4_t m, n, o, p;
-	comp2_t Fa, Fb, t, u, v;
-	comp1_t sa = s >> A_SHIFT;
-	comp1_t da = d >> A_SHIFT;
-
-	switch (combine & COMBINE_A)
-	{
-	default:
-	    Fa = 0;
-	    break;
-
-	case COMBINE_A_OUT:
-	    Fa = combine_disjoint_out_part (sa, da);
-	    break;
-
-	case COMBINE_A_IN:
-	    Fa = combine_disjoint_in_part (sa, da);
-	    break;
-
-	case COMBINE_A:
-	    Fa = MASK;
-	    break;
-	}
-
-	switch (combine & COMBINE_B)
-	{
-	default:
-	    Fb = 0;
-	    break;
-
-	case COMBINE_B_OUT:
-	    Fb = combine_disjoint_out_part (da, sa);
-	    break;
-
-	case COMBINE_B_IN:
-	    Fb = combine_disjoint_in_part (da, sa);
-	    break;
-
-	case COMBINE_B:
-	    Fb = MASK;
-	    break;
-	}
-	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
-	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
-	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
-	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
-	s = m | n | o | p;
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_disjoint_over_u (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         comp4_t *                dest,
-                         const comp4_t *          src,
-                         const comp4_t *          mask,
-                         int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp2_t a = s >> A_SHIFT;
-
-	if (s != 0x00)
-	{
-	    comp4_t d = *(dest + i);
-	    a = combine_disjoint_out_part (d >> A_SHIFT, a);
-	    UNcx4_MUL_UNc_ADD_UNcx4 (d, a, s);
-
-	    *(dest + i) = d;
-	}
-    }
-}
-
-static void
-combine_disjoint_in_u (pixman_implementation_t *imp,
-                       pixman_op_t              op,
-                       comp4_t *                dest,
-                       const comp4_t *          src,
-                       const comp4_t *          mask,
-                       int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_disjoint_in_reverse_u (pixman_implementation_t *imp,
-                               pixman_op_t              op,
-                               comp4_t *                dest,
-                               const comp4_t *          src,
-                               const comp4_t *          mask,
-                               int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_disjoint_out_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        comp4_t *                dest,
-                        const comp4_t *          src,
-                        const comp4_t *          mask,
-                        int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_disjoint_out_reverse_u (pixman_implementation_t *imp,
-                                pixman_op_t              op,
-                                comp4_t *                dest,
-                                const comp4_t *          src,
-                                const comp4_t *          mask,
-                                int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_disjoint_atop_u (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         comp4_t *                dest,
-                         const comp4_t *          src,
-                         const comp4_t *          mask,
-                         int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_disjoint_atop_reverse_u (pixman_implementation_t *imp,
-                                 pixman_op_t              op,
-                                 comp4_t *                dest,
-                                 const comp4_t *          src,
-                                 const comp4_t *          mask,
-                                 int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_disjoint_xor_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        comp4_t *                dest,
-                        const comp4_t *          src,
-                        const comp4_t *          mask,
-                        int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR);
-}
-
-static void
-combine_conjoint_general_u (comp4_t *      dest,
-                            const comp4_t *src,
-                            const comp4_t *mask,
-                            int            width,
-                            comp1_t        combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = combine_mask (src, mask, i);
-	comp4_t d = *(dest + i);
-	comp4_t m, n, o, p;
-	comp2_t Fa, Fb, t, u, v;
-	comp1_t sa = s >> A_SHIFT;
-	comp1_t da = d >> A_SHIFT;
-
-	switch (combine & COMBINE_A)
-	{
-	default:
-	    Fa = 0;
-	    break;
-
-	case COMBINE_A_OUT:
-	    Fa = combine_conjoint_out_part (sa, da);
-	    break;
-
-	case COMBINE_A_IN:
-	    Fa = combine_conjoint_in_part (sa, da);
-	    break;
-
-	case COMBINE_A:
-	    Fa = MASK;
-	    break;
-	}
-
-	switch (combine & COMBINE_B)
-	{
-	default:
-	    Fb = 0;
-	    break;
-
-	case COMBINE_B_OUT:
-	    Fb = combine_conjoint_out_part (da, sa);
-	    break;
-
-	case COMBINE_B_IN:
-	    Fb = combine_conjoint_in_part (da, sa);
-	    break;
-
-	case COMBINE_B:
-	    Fb = MASK;
-	    break;
-	}
-
-	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
-	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
-	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
-	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
-
-	s = m | n | o | p;
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_conjoint_over_u (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         comp4_t *                dest,
-                         const comp4_t *          src,
-                         const comp4_t *          mask,
-                         int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_conjoint_over_reverse_u (pixman_implementation_t *imp,
-                                 pixman_op_t              op,
-                                 comp4_t *                dest,
-                                 const comp4_t *          src,
-                                 const comp4_t *          mask,
-                                 int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER);
-}
-
-static void
-combine_conjoint_in_u (pixman_implementation_t *imp,
-                       pixman_op_t              op,
-                       comp4_t *                dest,
-                       const comp4_t *          src,
-                       const comp4_t *          mask,
-                       int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_conjoint_in_reverse_u (pixman_implementation_t *imp,
-                               pixman_op_t              op,
-                               comp4_t *                dest,
-                               const comp4_t *          src,
-                               const comp4_t *          mask,
-                               int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_conjoint_out_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        comp4_t *                dest,
-                        const comp4_t *          src,
-                        const comp4_t *          mask,
-                        int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_conjoint_out_reverse_u (pixman_implementation_t *imp,
-                                pixman_op_t              op,
-                                comp4_t *                dest,
-                                const comp4_t *          src,
-                                const comp4_t *          mask,
-                                int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_conjoint_atop_u (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         comp4_t *                dest,
-                         const comp4_t *          src,
-                         const comp4_t *          mask,
-                         int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_conjoint_atop_reverse_u (pixman_implementation_t *imp,
-                                 pixman_op_t              op,
-                                 comp4_t *                dest,
-                                 const comp4_t *          src,
-                                 const comp4_t *          mask,
-                                 int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_conjoint_xor_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        comp4_t *                dest,
-                        const comp4_t *          src,
-                        const comp4_t *          mask,
-                        int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR);
-}
-
-
-/* Component alpha combiners */
-
-static void
-combine_clear_ca (pixman_implementation_t *imp,
-                  pixman_op_t              op,
-                  comp4_t *                dest,
-                  const comp4_t *          src,
-                  const comp4_t *          mask,
-                  int                      width)
-{
-    memset (dest, 0, width * sizeof(comp4_t));
-}
-
-static void
-combine_src_ca (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                comp4_t *                dest,
-                const comp4_t *          src,
-                const comp4_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = *(src + i);
-	comp4_t m = *(mask + i);
-
-	combine_mask_value_ca (&s, &m);
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_over_ca (pixman_implementation_t *imp,
-                 pixman_op_t              op,
-                 comp4_t *                dest,
-                 const comp4_t *          src,
-                 const comp4_t *          mask,
-                 int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = *(src + i);
-	comp4_t m = *(mask + i);
-	comp4_t a;
-
-	combine_mask_ca (&s, &m);
-
-	a = ~m;
-	if (a)
-	{
-	    comp4_t d = *(dest + i);
-	    UNcx4_MUL_UNcx4_ADD_UNcx4 (d, a, s);
-	    s = d;
-	}
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_over_reverse_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         comp4_t *                dest,
-                         const comp4_t *          src,
-                         const comp4_t *          mask,
-                         int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t d = *(dest + i);
-	comp4_t a = ~d >> A_SHIFT;
-
-	if (a)
-	{
-	    comp4_t s = *(src + i);
-	    comp4_t m = *(mask + i);
-
-	    UNcx4_MUL_UNcx4 (s, m);
-	    UNcx4_MUL_UNc_ADD_UNcx4 (s, a, d);
-
-	    *(dest + i) = s;
-	}
-    }
-}
-
-static void
-combine_in_ca (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               comp4_t *                dest,
-               const comp4_t *          src,
-               const comp4_t *          mask,
-               int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t d = *(dest + i);
-	comp2_t a = d >> A_SHIFT;
-	comp4_t s = 0;
-
-	if (a)
-	{
-	    comp4_t m = *(mask + i);
-
-	    s = *(src + i);
-	    combine_mask_value_ca (&s, &m);
-
-	    if (a != MASK)
-		UNcx4_MUL_UNc (s, a);
-	}
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_in_reverse_ca (pixman_implementation_t *imp,
-                       pixman_op_t              op,
-                       comp4_t *                dest,
-                       const comp4_t *          src,
-                       const comp4_t *          mask,
-                       int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = *(src + i);
-	comp4_t m = *(mask + i);
-	comp4_t a;
-
-	combine_mask_alpha_ca (&s, &m);
-
-	a = m;
-	if (a != ~0)
-	{
-	    comp4_t d = 0;
-
-	    if (a)
-	    {
-		d = *(dest + i);
-		UNcx4_MUL_UNcx4 (d, a);
-	    }
-
-	    *(dest + i) = d;
-	}
-    }
-}
-
-static void
-combine_out_ca (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                comp4_t *                dest,
-                const comp4_t *          src,
-                const comp4_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t d = *(dest + i);
-	comp2_t a = ~d >> A_SHIFT;
-	comp4_t s = 0;
-
-	if (a)
-	{
-	    comp4_t m = *(mask + i);
-
-	    s = *(src + i);
-	    combine_mask_value_ca (&s, &m);
-
-	    if (a != MASK)
-		UNcx4_MUL_UNc (s, a);
-	}
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_out_reverse_ca (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        comp4_t *                dest,
-                        const comp4_t *          src,
-                        const comp4_t *          mask,
-                        int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = *(src + i);
-	comp4_t m = *(mask + i);
-	comp4_t a;
-
-	combine_mask_alpha_ca (&s, &m);
-
-	a = ~m;
-	if (a != ~0)
-	{
-	    comp4_t d = 0;
-
-	    if (a)
-	    {
-		d = *(dest + i);
-		UNcx4_MUL_UNcx4 (d, a);
-	    }
-
-	    *(dest + i) = d;
-	}
-    }
-}
-
-static void
-combine_atop_ca (pixman_implementation_t *imp,
-                 pixman_op_t              op,
-                 comp4_t *                dest,
-                 const comp4_t *          src,
-                 const comp4_t *          mask,
-                 int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t d = *(dest + i);
-	comp4_t s = *(src + i);
-	comp4_t m = *(mask + i);
-	comp4_t ad;
-	comp2_t as = d >> A_SHIFT;
-
-	combine_mask_ca (&s, &m);
-
-	ad = ~m;
-
-	UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_atop_reverse_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         comp4_t *                dest,
-                         const comp4_t *          src,
-                         const comp4_t *          mask,
-                         int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t d = *(dest + i);
-	comp4_t s = *(src + i);
-	comp4_t m = *(mask + i);
-	comp4_t ad;
-	comp2_t as = ~d >> A_SHIFT;
-
-	combine_mask_ca (&s, &m);
-
-	ad = m;
-
-	UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_xor_ca (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                comp4_t *                dest,
-                const comp4_t *          src,
-                const comp4_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t d = *(dest + i);
-	comp4_t s = *(src + i);
-	comp4_t m = *(mask + i);
-	comp4_t ad;
-	comp2_t as = ~d >> A_SHIFT;
-
-	combine_mask_ca (&s, &m);
-
-	ad = ~m;
-
-	UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_add_ca (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                comp4_t *                dest,
-                const comp4_t *          src,
-                const comp4_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s = *(src + i);
-	comp4_t m = *(mask + i);
-	comp4_t d = *(dest + i);
-
-	combine_mask_value_ca (&s, &m);
-
-	UNcx4_ADD_UNcx4 (d, s);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_saturate_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     comp4_t *                dest,
-                     const comp4_t *          src,
-                     const comp4_t *          mask,
-                     int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s, d;
-	comp2_t sa, sr, sg, sb, da;
-	comp2_t t, u, v;
-	comp4_t m, n, o, p;
-
-	d = *(dest + i);
-	s = *(src + i);
-	m = *(mask + i);
-
-	combine_mask_ca (&s, &m);
-
-	sa = (m >> A_SHIFT);
-	sr = (m >> R_SHIFT) & MASK;
-	sg = (m >> G_SHIFT) & MASK;
-	sb =  m             & MASK;
-	da = ~d >> A_SHIFT;
-
-	if (sb <= da)
-	    m = ADD (s, d, 0, t);
-	else
-	    m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
-
-	if (sg <= da)
-	    n = ADD (s, d, G_SHIFT, t);
-	else
-	    n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
-
-	if (sr <= da)
-	    o = ADD (s, d, R_SHIFT, t);
-	else
-	    o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
-
-	if (sa <= da)
-	    p = ADD (s, d, A_SHIFT, t);
-	else
-	    p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
-
-	*(dest + i) = m | n | o | p;
-    }
-}
-
-static void
-combine_disjoint_general_ca (comp4_t *      dest,
-                             const comp4_t *src,
-                             const comp4_t *mask,
-                             int            width,
-                             comp1_t        combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s, d;
-	comp4_t m, n, o, p;
-	comp4_t Fa, Fb;
-	comp2_t t, u, v;
-	comp4_t sa;
-	comp1_t da;
-
-	s = *(src + i);
-	m = *(mask + i);
-	d = *(dest + i);
-	da = d >> A_SHIFT;
-
-	combine_mask_ca (&s, &m);
-
-	sa = m;
-
-	switch (combine & COMBINE_A)
-	{
-	default:
-	    Fa = 0;
-	    break;
-
-	case COMBINE_A_OUT:
-	    m = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> 0), da);
-	    n = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
-	    o = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
-	    p = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
-	    Fa = m | n | o | p;
-	    break;
-
-	case COMBINE_A_IN:
-	    m = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> 0), da);
-	    n = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
-	    o = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
-	    p = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
-	    Fa = m | n | o | p;
-	    break;
-
-	case COMBINE_A:
-	    Fa = ~0;
-	    break;
-	}
-
-	switch (combine & COMBINE_B)
-	{
-	default:
-	    Fb = 0;
-	    break;
-
-	case COMBINE_B_OUT:
-	    m = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> 0));
-	    n = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
-	    o = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
-	    p = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
-	    Fb = m | n | o | p;
-	    break;
-
-	case COMBINE_B_IN:
-	    m = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> 0));
-	    n = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
-	    o = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
-	    p = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
-	    Fb = m | n | o | p;
-	    break;
-
-	case COMBINE_B:
-	    Fb = ~0;
-	    break;
-	}
-	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
-	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
-	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
-	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
-
-	s = m | n | o | p;
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_disjoint_over_ca (pixman_implementation_t *imp,
-                          pixman_op_t              op,
-                          comp4_t *                dest,
-                          const comp4_t *          src,
-                          const comp4_t *          mask,
-                          int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_disjoint_in_ca (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        comp4_t *                dest,
-                        const comp4_t *          src,
-                        const comp4_t *          mask,
-                        int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_disjoint_in_reverse_ca (pixman_implementation_t *imp,
-                                pixman_op_t              op,
-                                comp4_t *                dest,
-                                const comp4_t *          src,
-                                const comp4_t *          mask,
-                                int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_disjoint_out_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         comp4_t *                dest,
-                         const comp4_t *          src,
-                         const comp4_t *          mask,
-                         int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_disjoint_out_reverse_ca (pixman_implementation_t *imp,
-                                 pixman_op_t              op,
-                                 comp4_t *                dest,
-                                 const comp4_t *          src,
-                                 const comp4_t *          mask,
-                                 int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_disjoint_atop_ca (pixman_implementation_t *imp,
-                          pixman_op_t              op,
-                          comp4_t *                dest,
-                          const comp4_t *          src,
-                          const comp4_t *          mask,
-                          int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp,
-                                  pixman_op_t              op,
-                                  comp4_t *                dest,
-                                  const comp4_t *          src,
-                                  const comp4_t *          mask,
-                                  int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_disjoint_xor_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         comp4_t *                dest,
-                         const comp4_t *          src,
-                         const comp4_t *          mask,
-                         int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
-}
-
-static void
-combine_conjoint_general_ca (comp4_t *      dest,
-                             const comp4_t *src,
-                             const comp4_t *mask,
-                             int            width,
-                             comp1_t        combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	comp4_t s, d;
-	comp4_t m, n, o, p;
-	comp4_t Fa, Fb;
-	comp2_t t, u, v;
-	comp4_t sa;
-	comp1_t da;
-
-	s = *(src + i);
-	m = *(mask + i);
-	d = *(dest + i);
-	da = d >> A_SHIFT;
-
-	combine_mask_ca (&s, &m);
-
-	sa = m;
-
-	switch (combine & COMBINE_A)
-	{
-	default:
-	    Fa = 0;
-	    break;
-
-	case COMBINE_A_OUT:
-	    m = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> 0), da);
-	    n = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
-	    o = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
-	    p = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
-	    Fa = m | n | o | p;
-	    break;
-
-	case COMBINE_A_IN:
-	    m = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> 0), da);
-	    n = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
-	    o = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
-	    p = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
-	    Fa = m | n | o | p;
-	    break;
-
-	case COMBINE_A:
-	    Fa = ~0;
-	    break;
-	}
-
-	switch (combine & COMBINE_B)
-	{
-	default:
-	    Fb = 0;
-	    break;
-
-	case COMBINE_B_OUT:
-	    m = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> 0));
-	    n = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
-	    o = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
-	    p = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
-	    Fb = m | n | o | p;
-	    break;
-
-	case COMBINE_B_IN:
-	    m = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> 0));
-	    n = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
-	    o = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
-	    p = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
-	    Fb = m | n | o | p;
-	    break;
-
-	case COMBINE_B:
-	    Fb = ~0;
-	    break;
-	}
-	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
-	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
-	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
-	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
-
-	s = m | n | o | p;
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_conjoint_over_ca (pixman_implementation_t *imp,
-                          pixman_op_t              op,
-                          comp4_t *                dest,
-                          const comp4_t *          src,
-                          const comp4_t *          mask,
-                          int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_conjoint_over_reverse_ca (pixman_implementation_t *imp,
-                                  pixman_op_t              op,
-                                  comp4_t *                dest,
-                                  const comp4_t *          src,
-                                  const comp4_t *          mask,
-                                  int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER);
-}
-
-static void
-combine_conjoint_in_ca (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        comp4_t *                dest,
-                        const comp4_t *          src,
-                        const comp4_t *          mask,
-                        int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_conjoint_in_reverse_ca (pixman_implementation_t *imp,
-                                pixman_op_t              op,
-                                comp4_t *                dest,
-                                const comp4_t *          src,
-                                const comp4_t *          mask,
-                                int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_conjoint_out_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         comp4_t *                dest,
-                         const comp4_t *          src,
-                         const comp4_t *          mask,
-                         int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_conjoint_out_reverse_ca (pixman_implementation_t *imp,
-                                 pixman_op_t              op,
-                                 comp4_t *                dest,
-                                 const comp4_t *          src,
-                                 const comp4_t *          mask,
-                                 int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_conjoint_atop_ca (pixman_implementation_t *imp,
-                          pixman_op_t              op,
-                          comp4_t *                dest,
-                          const comp4_t *          src,
-                          const comp4_t *          mask,
-                          int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp,
-                                  pixman_op_t              op,
-                                  comp4_t *                dest,
-                                  const comp4_t *          src,
-                                  const comp4_t *          mask,
-                                  int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_conjoint_xor_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         comp4_t *                dest,
-                         const comp4_t *          src,
-                         const comp4_t *          mask,
-                         int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
-}
-
-void
-_pixman_setup_combiner_functions_width (pixman_implementation_t *imp)
-{
-    /* Unified alpha */
-    imp->combine_width[PIXMAN_OP_CLEAR] = combine_clear;
-    imp->combine_width[PIXMAN_OP_SRC] = combine_src_u;
-    imp->combine_width[PIXMAN_OP_DST] = combine_dst;
-    imp->combine_width[PIXMAN_OP_OVER] = combine_over_u;
-    imp->combine_width[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
-    imp->combine_width[PIXMAN_OP_IN] = combine_in_u;
-    imp->combine_width[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u;
-    imp->combine_width[PIXMAN_OP_OUT] = combine_out_u;
-    imp->combine_width[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u;
-    imp->combine_width[PIXMAN_OP_ATOP] = combine_atop_u;
-    imp->combine_width[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u;
-    imp->combine_width[PIXMAN_OP_XOR] = combine_xor_u;
-    imp->combine_width[PIXMAN_OP_ADD] = combine_add_u;
-    imp->combine_width[PIXMAN_OP_SATURATE] = combine_saturate_u;
-
-    /* Disjoint, unified */
-    imp->combine_width[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
-    imp->combine_width[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
-    imp->combine_width[PIXMAN_OP_DISJOINT_DST] = combine_dst;
-    imp->combine_width[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
-    imp->combine_width[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
-    imp->combine_width[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
-    imp->combine_width[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u;
-    imp->combine_width[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u;
-    imp->combine_width[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u;
-    imp->combine_width[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u;
-    imp->combine_width[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u;
-    imp->combine_width[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u;
-
-    /* Conjoint, unified */
-    imp->combine_width[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
-    imp->combine_width[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
-    imp->combine_width[PIXMAN_OP_CONJOINT_DST] = combine_dst;
-    imp->combine_width[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
-    imp->combine_width[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
-    imp->combine_width[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
-    imp->combine_width[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u;
-    imp->combine_width[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u;
-    imp->combine_width[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u;
-    imp->combine_width[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u;
-    imp->combine_width[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u;
-    imp->combine_width[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u;
-
-    imp->combine_width[PIXMAN_OP_MULTIPLY] = combine_multiply_u;
-    imp->combine_width[PIXMAN_OP_SCREEN] = combine_screen_u;
-    imp->combine_width[PIXMAN_OP_OVERLAY] = combine_overlay_u;
-    imp->combine_width[PIXMAN_OP_DARKEN] = combine_darken_u;
-    imp->combine_width[PIXMAN_OP_LIGHTEN] = combine_lighten_u;
-    imp->combine_width[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u;
-    imp->combine_width[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u;
-    imp->combine_width[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u;
-    imp->combine_width[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u;
-    imp->combine_width[PIXMAN_OP_DIFFERENCE] = combine_difference_u;
-    imp->combine_width[PIXMAN_OP_EXCLUSION] = combine_exclusion_u;
-    imp->combine_width[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u;
-    imp->combine_width[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u;
-    imp->combine_width[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u;
-    imp->combine_width[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u;
-
-    /* Component alpha combiners */
-    imp->combine_width_ca[PIXMAN_OP_CLEAR] = combine_clear_ca;
-    imp->combine_width_ca[PIXMAN_OP_SRC] = combine_src_ca;
-    /* dest */
-    imp->combine_width_ca[PIXMAN_OP_OVER] = combine_over_ca;
-    imp->combine_width_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca;
-    imp->combine_width_ca[PIXMAN_OP_IN] = combine_in_ca;
-    imp->combine_width_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca;
-    imp->combine_width_ca[PIXMAN_OP_OUT] = combine_out_ca;
-    imp->combine_width_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca;
-    imp->combine_width_ca[PIXMAN_OP_ATOP] = combine_atop_ca;
-    imp->combine_width_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca;
-    imp->combine_width_ca[PIXMAN_OP_XOR] = combine_xor_ca;
-    imp->combine_width_ca[PIXMAN_OP_ADD] = combine_add_ca;
-    imp->combine_width_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca;
-
-    /* Disjoint CA */
-    imp->combine_width_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
-    imp->combine_width_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
-    imp->combine_width_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
-    imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
-    imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
-    imp->combine_width_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
-    imp->combine_width_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca;
-    imp->combine_width_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca;
-    imp->combine_width_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca;
-    imp->combine_width_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca;
-    imp->combine_width_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca;
-    imp->combine_width_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca;
-
-    /* Conjoint CA */
-    imp->combine_width_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
-    imp->combine_width_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
-    imp->combine_width_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
-    imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
-    imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
-    imp->combine_width_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
-    imp->combine_width_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca;
-    imp->combine_width_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca;
-    imp->combine_width_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca;
-    imp->combine_width_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca;
-    imp->combine_width_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca;
-    imp->combine_width_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca;
-
-    imp->combine_width_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca;
-    imp->combine_width_ca[PIXMAN_OP_SCREEN] = combine_screen_ca;
-    imp->combine_width_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca;
-    imp->combine_width_ca[PIXMAN_OP_DARKEN] = combine_darken_ca;
-    imp->combine_width_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca;
-    imp->combine_width_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca;
-    imp->combine_width_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca;
-    imp->combine_width_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca;
-    imp->combine_width_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca;
-    imp->combine_width_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
-    imp->combine_width_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
-
-    /* It is not clear that these make sense, so make them noops for now */
-    imp->combine_width_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
-    imp->combine_width_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;
-    imp->combine_width_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
-    imp->combine_width_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
-}
diff --git a/pixman/pixman-combine.h.template b/pixman/pixman-combine.h.template
deleted file mode 100644
index 20f784b..0000000
--- a/pixman/pixman-combine.h.template
+++ /dev/null
@@ -1,226 +0,0 @@
-
-#define COMPONENT_SIZE
-#define MASK
-#define ONE_HALF
-
-#define A_SHIFT
-#define R_SHIFT
-#define G_SHIFT
-#define A_MASK
-#define R_MASK
-#define G_MASK
-
-#define RB_MASK
-#define AG_MASK
-#define RB_ONE_HALF
-#define RB_MASK_PLUS_ONE
-
-#define ALPHA_c(x) ((x) >> A_SHIFT)
-#define RED_c(x) (((x) >> R_SHIFT) & MASK)
-#define GREEN_c(x) (((x) >> G_SHIFT) & MASK)
-#define BLUE_c(x) ((x) & MASK)
-
-/*
- * Helper macros.
- */
-
-#define MUL_UNc(a, b, t)						\
-    ((t) = (a) * (comp2_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT ))
-
-#define DIV_UNc(a, b)							\
-    (((comp2_t) (a) * MASK + ((b) / 2)) / (b))
-
-#define ADD_UNc(x, y, t)				     \
-    ((t) = (x) + (y),					     \
-     (comp4_t) (comp1_t) ((t) | (0 - ((t) >> G_SHIFT))))
-
-#define DIV_ONE_UNc(x)							\
-    (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
-
-/*
- * The methods below use some tricks to be able to do two color
- * components at the same time.
- */
-
-/*
- * x_rb = (x_rb * a) / 255
- */
-#define UNc_rb_MUL_UNc(x, a, t)						\
-    do									\
-    {									\
-	t  = ((x) & RB_MASK) * (a);					\
-	t += RB_ONE_HALF;						\
-	x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;		\
-	x &= RB_MASK;							\
-    } while (0)
-
-/*
- * x_rb = min (x_rb + y_rb, 255)
- */
-#define UNc_rb_ADD_UNc_rb(x, y, t)					\
-    do									\
-    {									\
-	t = ((x) + (y));						\
-	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);		\
-	x = (t & RB_MASK);						\
-    } while (0)
-
-/*
- * x_rb = (x_rb * a_rb) / 255
- */
-#define UNc_rb_MUL_UNc_rb(x, a, t)					\
-    do									\
-    {									\
-	t  = (x & MASK) * (a & MASK);					\
-	t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK);			\
-	t += RB_ONE_HALF;						\
-	t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;		\
-	x = t & RB_MASK;						\
-    } while (0)
-
-/*
- * x_c = (x_c * a) / 255
- */
-#define UNcx4_MUL_UNc(x, a)						\
-    do									\
-    {									\
-	comp4_t r1__, r2__, t__;					\
-									\
-	r1__ = (x);							\
-	UNc_rb_MUL_UNc (r1__, (a), t__);				\
-									\
-	r2__ = (x) >> G_SHIFT;						\
-	UNc_rb_MUL_UNc (r2__, (a), t__);				\
-									\
-	(x) = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
-
-/*
- * x_c = (x_c * a) / 255 + y_c
- */
-#define UNcx4_MUL_UNc_ADD_UNcx4(x, a, y)				\
-    do									\
-    {									\
-	comp4_t r1__, r2__, r3__, t__;					\
-									\
-	r1__ = (x);							\
-	r2__ = (y) & RB_MASK;						\
-	UNc_rb_MUL_UNc (r1__, (a), t__);				\
-	UNc_rb_ADD_UNc_rb (r1__, r2__, t__);				\
-									\
-	r2__ = (x) >> G_SHIFT;						\
-	r3__ = ((y) >> G_SHIFT) & RB_MASK;				\
-	UNc_rb_MUL_UNc (r2__, (a), t__);				\
-	UNc_rb_ADD_UNc_rb (r2__, r3__, t__);				\
-									\
-	(x) = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
-
-/*
- * x_c = (x_c * a + y_c * b) / 255
- */
-#define UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc(x, a, y, b)			\
-    do									\
-    {									\
-	comp4_t r1__, r2__, r3__, t__;					\
-									\
-	r1__ = (x);							\
-	r2__ = (y);							\
-	UNc_rb_MUL_UNc (r1__, (a), t__);				\
-	UNc_rb_MUL_UNc (r2__, (b), t__);				\
-	UNc_rb_ADD_UNc_rb (r1__, r2__, t__);				\
-									\
-	r2__ = ((x) >> G_SHIFT);					\
-	r3__ = ((y) >> G_SHIFT);					\
-	UNc_rb_MUL_UNc (r2__, (a), t__);				\
-	UNc_rb_MUL_UNc (r3__, (b), t__);				\
-	UNc_rb_ADD_UNc_rb (r2__, r3__, t__);				\
-									\
-	(x) = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
-
-/*
- * x_c = (x_c * a_c) / 255
- */
-#define UNcx4_MUL_UNcx4(x, a)						\
-    do									\
-    {									\
-	comp4_t r1__, r2__, r3__, t__;					\
-									\
-	r1__ = (x);							\
-	r2__ = (a);							\
-	UNc_rb_MUL_UNc_rb (r1__, r2__, t__);				\
-									\
-	r2__ = (x) >> G_SHIFT;						\
-	r3__ = (a) >> G_SHIFT;						\
-	UNc_rb_MUL_UNc_rb (r2__, r3__, t__);				\
-									\
-	(x) = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
-
-/*
- * x_c = (x_c * a_c) / 255 + y_c
- */
-#define UNcx4_MUL_UNcx4_ADD_UNcx4(x, a, y)				\
-    do									\
-    {									\
-	comp4_t r1__, r2__, r3__, t__;					\
-									\
-	r1__ = (x);							\
-	r2__ = (a);							\
-	UNc_rb_MUL_UNc_rb (r1__, r2__, t__);				\
-	r2__ = (y) & RB_MASK;						\
-	UNc_rb_ADD_UNc_rb (r1__, r2__, t__);				\
-									\
-	r2__ = ((x) >> G_SHIFT);					\
-	r3__ = ((a) >> G_SHIFT);					\
-	UNc_rb_MUL_UNc_rb (r2__, r3__, t__);				\
-	r3__ = ((y) >> G_SHIFT) & RB_MASK;				\
-	UNc_rb_ADD_UNc_rb (r2__, r3__, t__);				\
-									\
-	(x) = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
-
-/*
- * x_c = (x_c * a_c + y_c * b) / 255
- */
-#define UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc(x, a, y, b)			\
-    do									\
-    {									\
-	comp4_t r1__, r2__, r3__, t__;					\
-									\
-	r1__ = (x);							\
-	r2__ = (a);							\
-	UNc_rb_MUL_UNc_rb (r1__, r2__, t__);				\
-	r2__ = (y);							\
-	UNc_rb_MUL_UNc (r2__, (b), t__);				\
-	UNc_rb_ADD_UNc_rb (r1__, r2__, t__);				\
-									\
-	r2__ = (x) >> G_SHIFT;						\
-	r3__ = (a) >> G_SHIFT;						\
-	UNc_rb_MUL_UNc_rb (r2__, r3__, t__);				\
-	r3__ = (y) >> G_SHIFT;						\
-	UNc_rb_MUL_UNc (r3__, (b), t__);				\
-	UNc_rb_ADD_UNc_rb (r2__, r3__, t__);				\
-									\
-	x = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
-
-/*
-  x_c = min(x_c + y_c, 255)
-*/
-#define UNcx4_ADD_UNcx4(x, y)						\
-    do									\
-    {									\
-	comp4_t r1__, r2__, r3__, t__;					\
-									\
-	r1__ = (x) & RB_MASK;						\
-	r2__ = (y) & RB_MASK;						\
-	UNc_rb_ADD_UNc_rb (r1__, r2__, t__);				\
-									\
-	r2__ = ((x) >> G_SHIFT) & RB_MASK;				\
-	r3__ = ((y) >> G_SHIFT) & RB_MASK;				\
-	UNc_rb_ADD_UNc_rb (r2__, r3__, t__);				\
-									\
-	x = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
diff --git a/pixman/pixman-combine32.c b/pixman/pixman-combine32.c
new file mode 100644
index 0000000..54cc877
--- /dev/null
+++ b/pixman/pixman-combine32.c
@@ -0,0 +1,2460 @@
+/*
+ * Copyright Â© 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+#include <string.h>
+
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+
+/* component alpha helper functions */
+
+static void
+combine_mask_ca (uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *mask;
+
+    uint32_t x;
+    uint16_t xa;
+
+    if (!a)
+    {
+	*(src) = 0;
+	return;
+    }
+
+    x = *(src);
+    if (a == ~0)
+    {
+	x = x >> A_SHIFT;
+	x |= x << G_SHIFT;
+	x |= x << R_SHIFT;
+	*(mask) = x;
+	return;
+    }
+
+    xa = x >> A_SHIFT;
+    UN8x4_MUL_UN8x4 (x, a);
+    *(src) = x;
+    
+    UN8x4_MUL_UN8 (a, xa);
+    *(mask) = a;
+}
+
+static void
+combine_mask_value_ca (uint32_t *src, const uint32_t *mask)
+{
+    uint32_t a = *mask;
+    uint32_t x;
+
+    if (!a)
+    {
+	*(src) = 0;
+	return;
+    }
+
+    if (a == ~0)
+	return;
+
+    x = *(src);
+    UN8x4_MUL_UN8x4 (x, a);
+    *(src) = x;
+}
+
+static void
+combine_mask_alpha_ca (const uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *(mask);
+    uint32_t x;
+
+    if (!a)
+	return;
+
+    x = *(src) >> A_SHIFT;
+    if (x == MASK)
+	return;
+
+    if (a == ~0)
+    {
+	x |= x << G_SHIFT;
+	x |= x << R_SHIFT;
+	*(mask) = x;
+	return;
+    }
+
+    UN8x4_MUL_UN8 (a, x);
+    *(mask) = a;
+}
+
+/*
+ * There are two ways of handling alpha -- either as a single unified value or
+ * a separate value for each component, hence each macro must have two
+ * versions.  The unified alpha version has a 'u' at the end of the name,
+ * the component version has a 'ca'.  Similarly, functions which deal with
+ * this difference will have two versions using the same convention.
+ */
+
+static force_inline uint32_t
+combine_mask (const uint32_t *src, const uint32_t *mask, int i)
+{
+    uint32_t s, m;
+
+    if (mask)
+    {
+	m = *(mask + i) >> A_SHIFT;
+
+	if (!m)
+	    return 0;
+    }
+
+    s = *(src + i);
+
+    if (mask)
+	UN8x4_MUL_UN8 (s, m);
+
+    return s;
+}
+
+static void
+combine_clear (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *                dest,
+               const uint32_t *          src,
+               const uint32_t *          mask,
+               int                      width)
+{
+    memset (dest, 0, width * sizeof(uint32_t));
+}
+
+static void
+combine_dst (pixman_implementation_t *imp,
+	     pixman_op_t	      op,
+	     uint32_t *		      dest,
+	     const uint32_t *	      src,
+	     const uint32_t *          mask,
+	     int		      width)
+{
+    return;
+}
+
+static void
+combine_src_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *                dest,
+               const uint32_t *          src,
+               const uint32_t *          mask,
+               int                      width)
+{
+    int i;
+
+    if (!mask)
+    {
+	memcpy (dest, src, width * sizeof (uint32_t));
+    }
+    else
+    {
+	for (i = 0; i < width; ++i)
+	{
+	    uint32_t s = combine_mask (src, mask, i);
+
+	    *(dest + i) = s;
+	}
+    }
+}
+
+static void
+combine_over_u (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t ia = ALPHA_8 (~s);
+
+	UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_over_reverse_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t ia = ALPHA_8 (~*(dest + i));
+	UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_in_u (pixman_implementation_t *imp,
+              pixman_op_t              op,
+              uint32_t *                dest,
+              const uint32_t *          src,
+              const uint32_t *          mask,
+              int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t a = ALPHA_8 (*(dest + i));
+	UN8x4_MUL_UN8 (s, a);
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_in_reverse_u (pixman_implementation_t *imp,
+                      pixman_op_t              op,
+                      uint32_t *                dest,
+                      const uint32_t *          src,
+                      const uint32_t *          mask,
+                      int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t a = ALPHA_8 (s);
+	UN8x4_MUL_UN8 (d, a);
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_out_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *                dest,
+               const uint32_t *          src,
+               const uint32_t *          mask,
+               int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t a = ALPHA_8 (~*(dest + i));
+	UN8x4_MUL_UN8 (s, a);
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_out_reverse_u (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       uint32_t *                dest,
+                       const uint32_t *          src,
+                       const uint32_t *          mask,
+                       int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t a = ALPHA_8 (~s);
+	UN8x4_MUL_UN8 (d, a);
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_atop_u (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t dest_a = ALPHA_8 (d);
+	uint32_t src_ia = ALPHA_8 (~s);
+
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_atop_reverse_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t src_a = ALPHA_8 (s);
+	uint32_t dest_ia = ALPHA_8 (~d);
+
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_xor_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *                dest,
+               const uint32_t *          src,
+               const uint32_t *          mask,
+               int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t src_ia = ALPHA_8 (~s);
+	uint32_t dest_ia = ALPHA_8 (~d);
+
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_add_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *                dest,
+               const uint32_t *          src,
+               const uint32_t *          mask,
+               int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	UN8x4_ADD_UN8x4 (d, s);
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_saturate_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *                dest,
+                    const uint32_t *          src,
+                    const uint32_t *          mask,
+                    int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint16_t sa, da;
+
+	sa = s >> A_SHIFT;
+	da = ~d >> A_SHIFT;
+	if (sa > da)
+	{
+	    sa = DIV_UN8 (da, sa);
+	    UN8x4_MUL_UN8 (s, sa);
+	}
+	;
+	UN8x4_ADD_UN8x4 (d, s);
+	*(dest + i) = d;
+    }
+}
+
+/*
+ * PDF blend modes:
+ * The following blend modes have been taken from the PDF ISO 32000
+ * specification, which at this point in time is available from
+ * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
+ * The relevant chapters are 11.3.5 and 11.3.6.
+ * The formula for computing the final pixel color given in 11.3.6 is:
+ * Î±r Ã— Cr = (1 â€“ Î±s) Ã— Î±b Ã— Cb + (1 â€“ Î±b) Ã— Î±s Ã— Cs + Î±b Ã— Î±s Ã— B(Cb, Cs)
+ * with B() being the blend function.
+ * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
+ *
+ * These blend modes should match the SVG filter draft specification, as
+ * it has been designed to mirror ISO 32000. Note that at the current point
+ * no released draft exists that shows this, as the formulas have not been
+ * updated yet after the release of ISO 32000.
+ *
+ * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
+ * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
+ * argument. Note that this implementation operates on premultiplied colors,
+ * while the PDF specification does not. Therefore the code uses the formula
+ * Cra = (1 â€“ as) . Dca + (1 â€“ ad) . Sca + B(Dca, ad, Sca, as)
+ */
+
+/*
+ * Multiply
+ * B(Dca, ad, Sca, as) = Dca.Sca
+ */
+static void
+combine_multiply_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *                dest,
+                    const uint32_t *          src,
+                    const uint32_t *          mask,
+                    int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t ss = s;
+	uint32_t src_ia = ALPHA_8 (~s);
+	uint32_t dest_ia = ALPHA_8 (~d);
+
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (ss, dest_ia, d, src_ia);
+	UN8x4_MUL_UN8x4 (d, s);
+	UN8x4_ADD_UN8x4 (d, ss);
+
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_multiply_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *                dest,
+                     const uint32_t *          src,
+                     const uint32_t *          mask,
+                     int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t m = *(mask + i);
+	uint32_t s = *(src + i);
+	uint32_t d = *(dest + i);
+	uint32_t r = d;
+	uint32_t dest_ia = ALPHA_8 (~d);
+
+	combine_mask_ca (&s, &m);
+
+	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (r, ~m, s, dest_ia);
+	UN8x4_MUL_UN8x4 (d, s);
+	UN8x4_ADD_UN8x4 (r, d);
+
+	*(dest + i) = r;
+    }
+}
+
+#define PDF_SEPARABLE_BLEND_MODE(name)					\
+    static void								\
+    combine_ ## name ## _u (pixman_implementation_t *imp,		\
+			    pixman_op_t              op,		\
+                            uint32_t *                dest,		\
+			    const uint32_t *          src,		\
+			    const uint32_t *          mask,		\
+			    int                      width)		\
+    {									\
+	int i;								\
+	for (i = 0; i < width; ++i) {					\
+	    uint32_t s = combine_mask (src, mask, i);			\
+	    uint32_t d = *(dest + i);					\
+	    uint8_t sa = ALPHA_8 (s);					\
+	    uint8_t isa = ~sa;						\
+	    uint8_t da = ALPHA_8 (d);					\
+	    uint8_t ida = ~da;						\
+	    uint32_t result;						\
+									\
+	    result = d;							\
+	    UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida);	\
+	    								\
+	    *(dest + i) = result +					\
+		(DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) +		\
+		(blend_ ## name (RED_8 (d), da, RED_8 (s), sa) << R_SHIFT) + \
+		(blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa) << G_SHIFT) + \
+		(blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa));	\
+	}								\
+    }									\
+    									\
+    static void								\
+    combine_ ## name ## _ca (pixman_implementation_t *imp,		\
+			     pixman_op_t              op,		\
+                             uint32_t *                dest,		\
+			     const uint32_t *          src,		\
+			     const uint32_t *          mask,		\
+			     int                     width)		\
+    {									\
+	int i;								\
+	for (i = 0; i < width; ++i) {					\
+	    uint32_t m = *(mask + i);					\
+	    uint32_t s = *(src + i);					\
+	    uint32_t d = *(dest + i);					\
+	    uint8_t da = ALPHA_8 (d);					\
+	    uint8_t ida = ~da;						\
+	    uint32_t result;						\
+            								\
+	    combine_mask_ca (&s, &m);					\
+            								\
+	    result = d;							\
+	    UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (result, ~m, s, ida);     \
+            								\
+	    result +=							\
+	        (DIV_ONE_UN8 (ALPHA_8 (m) * (uint32_t)da) << A_SHIFT) +	\
+	        (blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m)) << R_SHIFT) + \
+	        (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)) << G_SHIFT) + \
+	        (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m))); \
+	    								\
+	    *(dest + i) = result;					\
+	}								\
+    }
+
+/*
+ * Screen
+ * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca
+ */
+static inline uint32_t
+blend_screen (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    return DIV_ONE_UN8 (sca * da + dca * sa - sca * dca);
+}
+
+PDF_SEPARABLE_BLEND_MODE (screen)
+
+/*
+ * Overlay
+ * B(Dca, Da, Sca, Sa) =
+ *   if 2.Dca < Da
+ *     2.Sca.Dca
+ *   otherwise
+ *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
+ */
+static inline uint32_t
+blend_overlay (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    uint32_t rca;
+
+    if (2 * dca < da)
+	rca = 2 * sca * dca;
+    else
+	rca = sa * da - 2 * (da - dca) * (sa - sca);
+    return DIV_ONE_UN8 (rca);
+}
+
+PDF_SEPARABLE_BLEND_MODE (overlay)
+
+/*
+ * Darken
+ * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa)
+ */
+static inline uint32_t
+blend_darken (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    uint32_t s, d;
+
+    s = sca * da;
+    d = dca * sa;
+    return DIV_ONE_UN8 (s > d ? d : s);
+}
+
+PDF_SEPARABLE_BLEND_MODE (darken)
+
+/*
+ * Lighten
+ * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa)
+ */
+static inline uint32_t
+blend_lighten (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    uint32_t s, d;
+
+    s = sca * da;
+    d = dca * sa;
+    return DIV_ONE_UN8 (s > d ? s : d);
+}
+
+PDF_SEPARABLE_BLEND_MODE (lighten)
+
+/*
+ * Color dodge
+ * B(Dca, Da, Sca, Sa) =
+ *   if Dca == 0
+ *     0
+ *   if Sca == Sa
+ *     Sa.Da
+ *   otherwise
+ *     Sa.Da. min (1, Dca / Da / (1 - Sca/Sa))
+ */
+static inline uint32_t
+blend_color_dodge (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    if (sca >= sa)
+    {
+	return dca == 0 ? 0 : DIV_ONE_UN8 (sa * da);
+    }
+    else
+    {
+	uint32_t rca = dca * sa / (sa - sca);
+	return DIV_ONE_UN8 (sa * MIN (rca, da));
+    }
+}
+
+PDF_SEPARABLE_BLEND_MODE (color_dodge)
+
+/*
+ * Color burn
+ * B(Dca, Da, Sca, Sa) =
+ *   if Dca == Da
+ *     Sa.Da
+ *   if Sca == 0
+ *     0
+ *   otherwise
+ *     Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca))
+ */
+static inline uint32_t
+blend_color_burn (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    if (sca == 0)
+    {
+	return dca < da ? 0 : DIV_ONE_UN8 (sa * da);
+    }
+    else
+    {
+	uint32_t rca = (da - dca) * sa / sca;
+	return DIV_ONE_UN8 (sa * (MAX (rca, da) - rca));
+    }
+}
+
+PDF_SEPARABLE_BLEND_MODE (color_burn)
+
+/*
+ * Hard light
+ * B(Dca, Da, Sca, Sa) =
+ *   if 2.Sca < Sa
+ *     2.Sca.Dca
+ *   otherwise
+ *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
+ */
+static inline uint32_t
+blend_hard_light (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    if (2 * sca < sa)
+	return DIV_ONE_UN8 (2 * sca * dca);
+    else
+	return DIV_ONE_UN8 (sa * da - 2 * (da - dca) * (sa - sca));
+}
+
+PDF_SEPARABLE_BLEND_MODE (hard_light)
+
+/*
+ * Soft light
+ * B(Dca, Da, Sca, Sa) =
+ *   if (2.Sca <= Sa)
+ *     Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa))
+ *   otherwise if Dca.4 <= Da
+ *     Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3)
+ *   otherwise
+ *     (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa))
+ */
+static inline uint32_t
+blend_soft_light (uint32_t dca_org,
+		  uint32_t da_org,
+		  uint32_t sca_org,
+		  uint32_t sa_org)
+{
+    double dca = dca_org * (1.0 / MASK);
+    double da = da_org * (1.0 / MASK);
+    double sca = sca_org * (1.0 / MASK);
+    double sa = sa_org * (1.0 / MASK);
+    double rca;
+
+    if (2 * sca < sa)
+    {
+	if (da == 0)
+	    rca = dca * sa;
+	else
+	    rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da;
+    }
+    else if (da == 0)
+    {
+	rca = 0;
+    }
+    else if (4 * dca <= da)
+    {
+	rca = dca * sa +
+	    (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3);
+    }
+    else
+    {
+	rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa);
+    }
+    return rca * MASK + 0.5;
+}
+
+PDF_SEPARABLE_BLEND_MODE (soft_light)
+
+/*
+ * Difference
+ * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da)
+ */
+static inline uint32_t
+blend_difference (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    uint32_t dcasa = dca * sa;
+    uint32_t scada = sca * da;
+
+    if (scada < dcasa)
+	return DIV_ONE_UN8 (dcasa - scada);
+    else
+	return DIV_ONE_UN8 (scada - dcasa);
+}
+
+PDF_SEPARABLE_BLEND_MODE (difference)
+
+/*
+ * Exclusion
+ * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca)
+ */
+
+/* This can be made faster by writing it directly and not using
+ * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */
+
+static inline uint32_t
+blend_exclusion (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    return DIV_ONE_UN8 (sca * da + dca * sa - 2 * dca * sca);
+}
+
+PDF_SEPARABLE_BLEND_MODE (exclusion)
+
+#undef PDF_SEPARABLE_BLEND_MODE
+
+/*
+ * PDF nonseperable blend modes are implemented using the following functions
+ * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
+ * and min value of the red, green and blue components.
+ *
+ * LUM (C) = 0.3 Ã— Cred + 0.59 Ã— Cgreen + 0.11 Ã— Cblue
+ *
+ * clip_color (C):
+ *   l = LUM (C)
+ *   min = Cmin
+ *   max = Cmax
+ *   if n < 0.0
+ *     C = l + ( ( ( C â€“ l ) Ã— l ) â„ ( l â€“ min ) )
+ *   if x > 1.0
+ *     C = l + ( ( ( C â€“ l ) Ã— ( 1 â€“ l ) ) â„ ( max â€“ l ) )
+ *   return C
+ *
+ * set_lum (C, l):
+ *   d = l â€“ LUM (C)
+ *   C += d
+ *   return clip_color (C)
+ *
+ * SAT (C) = CH_MAX (C) - CH_MIN (C)
+ *
+ * set_sat (C, s):
+ *  if Cmax > Cmin
+ *    Cmid = ( ( ( Cmid â€“ Cmin ) Ã— s ) â„ ( Cmax â€“ Cmin ) )
+ *    Cmax = s
+ *  else
+ *    Cmid = Cmax = 0.0
+ *  Cmin = 0.0
+ *  return C
+ */
+
+/* For premultiplied colors, we need to know what happens when C is
+ * multiplied by a real number. LUM and SAT are linear:
+ *
+ *    LUM (r Ã— C) = r Ã— LUM (C)		SAT (r * C) = r * SAT (C)
+ *
+ * If we extend clip_color with an extra argument a and change
+ *
+ *        if x >= 1.0
+ *
+ * into
+ *
+ *        if x >= a
+ *
+ * then clip_color is also linear:
+ *
+ *    r * clip_color (C, a) = clip_color (r_c, ra);
+ *
+ * for positive r.
+ *
+ * Similarly, we can extend set_lum with an extra argument that is just passed
+ * on to clip_color:
+ *
+ *   r * set_lum ( C, l, a)
+ *
+ *   = r Ã— clip_color ( C + l - LUM (C), a)
+ *
+ *   = clip_color ( r * C + r Ã— l - r * LUM (C), r * a)
+ *
+ *   = set_lum ( r * C, r * l, r * a)
+ *
+ * Finally, set_sat:
+ *
+ *    r * set_sat (C, s) = set_sat (x * C, r * s)
+ *
+ * The above holds for all non-zero x, because the x'es in the fraction for
+ * C_mid cancel out. Specifically, it holds for x = r:
+ *
+ *    r * set_sat (C, s) = set_sat (r_c, rs)
+ *
+ */
+
+/* So, for the non-separable PDF blend modes, we have (using s, d for
+ * non-premultiplied colors, and S, D for premultiplied:
+ *
+ *   Color:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
+ *   = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
+ *
+ *
+ *   Luminosity:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
+ *   = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
+ *
+ *
+ *   Saturation:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
+ *   = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
+ *                                        a_s * LUM (D), a_s * a_d)
+ *   = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
+ *
+ *   Hue:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
+ *   = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
+ *
+ */
+
+#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2]))
+#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2]))
+#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100)
+#define SAT(c) (CH_MAX (c) - CH_MIN (c))
+
+#define PDF_NON_SEPARABLE_BLEND_MODE(name)				\
+    static void								\
+    combine_ ## name ## _u (pixman_implementation_t *imp,		\
+			    pixman_op_t op,				\
+                            uint32_t *dest,				\
+			    const uint32_t *src,				\
+			    const uint32_t *mask,			\
+			    int width)					\
+    {									\
+	int i;								\
+	for (i = 0; i < width; ++i)					\
+	{								\
+	    uint32_t s = combine_mask (src, mask, i);			\
+	    uint32_t d = *(dest + i);					\
+	    uint8_t sa = ALPHA_8 (s);					\
+	    uint8_t isa = ~sa;						\
+	    uint8_t da = ALPHA_8 (d);					\
+	    uint8_t ida = ~da;						\
+	    uint32_t result;						\
+	    uint32_t sc[3], dc[3], c[3];					\
+            								\
+	    result = d;							\
+	    UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida);	\
+	    dc[0] = RED_8 (d);						\
+	    sc[0] = RED_8 (s);						\
+	    dc[1] = GREEN_8 (d);					\
+	    sc[1] = GREEN_8 (s);					\
+	    dc[2] = BLUE_8 (d);						\
+	    sc[2] = BLUE_8 (s);						\
+	    blend_ ## name (c, dc, da, sc, sa);				\
+            								\
+	    *(dest + i) = result +					\
+		(DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) +		\
+		(DIV_ONE_UN8 (c[0]) << R_SHIFT) +			\
+		(DIV_ONE_UN8 (c[1]) << G_SHIFT) +			\
+		(DIV_ONE_UN8 (c[2]));					\
+	}								\
+    }
+
+static void
+set_lum (uint32_t dest[3], uint32_t src[3], uint32_t sa, uint32_t lum)
+{
+    double a, l, min, max;
+    double tmp[3];
+
+    a = sa * (1.0 / MASK);
+
+    l = lum * (1.0 / MASK);
+    tmp[0] = src[0] * (1.0 / MASK);
+    tmp[1] = src[1] * (1.0 / MASK);
+    tmp[2] = src[2] * (1.0 / MASK);
+
+    l = l - LUM (tmp);
+    tmp[0] += l;
+    tmp[1] += l;
+    tmp[2] += l;
+
+    /* clip_color */
+    l = LUM (tmp);
+    min = CH_MIN (tmp);
+    max = CH_MAX (tmp);
+
+    if (min < 0)
+    {
+	if (l - min == 0.0)
+	{
+	    tmp[0] = 0;
+	    tmp[1] = 0;
+	    tmp[2] = 0;
+	}
+	else
+	{
+	    tmp[0] = l + (tmp[0] - l) * l / (l - min);
+	    tmp[1] = l + (tmp[1] - l) * l / (l - min);
+	    tmp[2] = l + (tmp[2] - l) * l / (l - min);
+	}
+    }
+    if (max > a)
+    {
+	if (max - l == 0.0)
+	{
+	    tmp[0] = a;
+	    tmp[1] = a;
+	    tmp[2] = a;
+	}
+	else
+	{
+	    tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
+	    tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
+	    tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
+	}
+    }
+
+    dest[0] = tmp[0] * MASK + 0.5;
+    dest[1] = tmp[1] * MASK + 0.5;
+    dest[2] = tmp[2] * MASK + 0.5;
+}
+
+static void
+set_sat (uint32_t dest[3], uint32_t src[3], uint32_t sat)
+{
+    int id[3];
+    uint32_t min, max;
+
+    if (src[0] > src[1])
+    {
+	if (src[0] > src[2])
+	{
+	    id[0] = 0;
+	    if (src[1] > src[2])
+	    {
+		id[1] = 1;
+		id[2] = 2;
+	    }
+	    else
+	    {
+		id[1] = 2;
+		id[2] = 1;
+	    }
+	}
+	else
+	{
+	    id[0] = 2;
+	    id[1] = 0;
+	    id[2] = 1;
+	}
+    }
+    else
+    {
+	if (src[0] > src[2])
+	{
+	    id[0] = 1;
+	    id[1] = 0;
+	    id[2] = 2;
+	}
+	else
+	{
+	    id[2] = 0;
+	    if (src[1] > src[2])
+	    {
+		id[0] = 1;
+		id[1] = 2;
+	    }
+	    else
+	    {
+		id[0] = 2;
+		id[1] = 1;
+	    }
+	}
+    }
+
+    max = dest[id[0]];
+    min = dest[id[2]];
+    if (max > min)
+    {
+	dest[id[1]] = (dest[id[1]] - min) * sat / (max - min);
+	dest[id[0]] = sat;
+	dest[id[2]] = 0;
+    }
+    else
+    {
+	dest[0] = dest[1] = dest[2] = 0;
+    }
+}
+
+/*
+ * Hue:
+ * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
+ */
+static inline void
+blend_hsl_hue (uint32_t c[3],
+               uint32_t dc[3],
+               uint32_t da,
+               uint32_t sc[3],
+               uint32_t sa)
+{
+    c[0] = sc[0] * da;
+    c[1] = sc[1] * da;
+    c[2] = sc[2] * da;
+    set_sat (c, c, SAT (dc) * sa);
+    set_lum (c, c, sa * da, LUM (dc) * sa);
+}
+
+PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue)
+
+/*
+ * Saturation:
+ * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
+ */
+static inline void
+blend_hsl_saturation (uint32_t c[3],
+                      uint32_t dc[3],
+                      uint32_t da,
+                      uint32_t sc[3],
+                      uint32_t sa)
+{
+    c[0] = dc[0] * sa;
+    c[1] = dc[1] * sa;
+    c[2] = dc[2] * sa;
+    set_sat (c, c, SAT (sc) * da);
+    set_lum (c, c, sa * da, LUM (dc) * sa);
+}
+
+PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation)
+
+/*
+ * Color:
+ * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
+ */
+static inline void
+blend_hsl_color (uint32_t c[3],
+                 uint32_t dc[3],
+                 uint32_t da,
+                 uint32_t sc[3],
+                 uint32_t sa)
+{
+    c[0] = sc[0] * da;
+    c[1] = sc[1] * da;
+    c[2] = sc[2] * da;
+    set_lum (c, c, sa * da, LUM (dc) * sa);
+}
+
+PDF_NON_SEPARABLE_BLEND_MODE (hsl_color)
+
+/*
+ * Luminosity:
+ * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
+ */
+static inline void
+blend_hsl_luminosity (uint32_t c[3],
+                      uint32_t dc[3],
+                      uint32_t da,
+                      uint32_t sc[3],
+                      uint32_t sa)
+{
+    c[0] = dc[0] * sa;
+    c[1] = dc[1] * sa;
+    c[2] = dc[2] * sa;
+    set_lum (c, c, sa * da, LUM (sc) * da);
+}
+
+PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity)
+
+#undef SAT
+#undef LUM
+#undef CH_MAX
+#undef CH_MIN
+#undef PDF_NON_SEPARABLE_BLEND_MODE
+
+/* All of the disjoint/conjoint composing functions
+ *
+ * The four entries in the first column indicate what source contributions
+ * come from each of the four areas of the picture -- areas covered by neither
+ * A nor B, areas covered only by A, areas covered only by B and finally
+ * areas covered by both A and B.
+ * 
+ * Disjoint			Conjoint
+ * Fa		Fb		Fa		Fb
+ * (0,0,0,0)	0		0		0		0
+ * (0,A,0,A)	1		0		1		0
+ * (0,0,B,B)	0		1		0		1
+ * (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
+ * (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
+ * (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
+ * (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
+ * (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
+ * (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
+ * (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
+ * (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
+ * (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
+ *
+ * See  http://marc.info/?l=xfree-render&m=99792000027857&w=2  for more
+ * information about these operators.
+ */
+
+#define COMBINE_A_OUT 1
+#define COMBINE_A_IN  2
+#define COMBINE_B_OUT 4
+#define COMBINE_B_IN  8
+
+#define COMBINE_CLEAR   0
+#define COMBINE_A       (COMBINE_A_OUT | COMBINE_A_IN)
+#define COMBINE_B       (COMBINE_B_OUT | COMBINE_B_IN)
+#define COMBINE_A_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN)
+#define COMBINE_B_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN)
+#define COMBINE_A_ATOP  (COMBINE_B_OUT | COMBINE_A_IN)
+#define COMBINE_B_ATOP  (COMBINE_A_OUT | COMBINE_B_IN)
+#define COMBINE_XOR     (COMBINE_A_OUT | COMBINE_B_OUT)
+
+/* portion covered by a but not b */
+static uint8_t
+combine_disjoint_out_part (uint8_t a, uint8_t b)
+{
+    /* min (1, (1-b) / a) */
+
+    b = ~b;                 /* 1 - b */
+    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
+	return MASK;        /* 1 */
+    return DIV_UN8 (b, a);     /* (1-b) / a */
+}
+
+/* portion covered by both a and b */
+static uint8_t
+combine_disjoint_in_part (uint8_t a, uint8_t b)
+{
+    /* max (1-(1-b)/a,0) */
+    /*  = - min ((1-b)/a - 1, 0) */
+    /*  = 1 - min (1, (1-b)/a) */
+
+    b = ~b;                 /* 1 - b */
+    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
+	return 0;           /* 1 - 1 */
+    return ~DIV_UN8(b, a);    /* 1 - (1-b) / a */
+}
+
+/* portion covered by a but not b */
+static uint8_t
+combine_conjoint_out_part (uint8_t a, uint8_t b)
+{
+    /* max (1-b/a,0) */
+    /* = 1-min(b/a,1) */
+
+    /* min (1, (1-b) / a) */
+
+    if (b >= a)             /* b >= a -> b/a >= 1 */
+	return 0x00;        /* 0 */
+    return ~DIV_UN8(b, a);    /* 1 - b/a */
+}
+
+/* portion covered by both a and b */
+static uint8_t
+combine_conjoint_in_part (uint8_t a, uint8_t b)
+{
+    /* min (1,b/a) */
+
+    if (b >= a)             /* b >= a -> b/a >= 1 */
+	return MASK;        /* 1 */
+    return DIV_UN8 (b, a);     /* b/a */
+}
+
+#define GET_COMP(v, i)   ((uint16_t) (uint8_t) ((v) >> i))
+
+#define ADD(x, y, i, t)							\
+    ((t) = GET_COMP (x, i) + GET_COMP (y, i),				\
+     (uint32_t) ((uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
+
+#define GENERIC(x, y, i, ax, ay, t, u, v)				\
+    ((t) = (MUL_UN8 (GET_COMP (y, i), ay, (u)) +			\
+            MUL_UN8 (GET_COMP (x, i), ax, (v))),			\
+     (uint32_t) ((uint8_t) ((t) |					\
+                           (0 - ((t) >> G_SHIFT)))) << (i))
+
+static void
+combine_disjoint_general_u (uint32_t *      dest,
+                            const uint32_t *src,
+                            const uint32_t *mask,
+                            int            width,
+                            uint8_t        combine)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t m, n, o, p;
+	uint16_t Fa, Fb, t, u, v;
+	uint8_t sa = s >> A_SHIFT;
+	uint8_t da = d >> A_SHIFT;
+
+	switch (combine & COMBINE_A)
+	{
+	default:
+	    Fa = 0;
+	    break;
+
+	case COMBINE_A_OUT:
+	    Fa = combine_disjoint_out_part (sa, da);
+	    break;
+
+	case COMBINE_A_IN:
+	    Fa = combine_disjoint_in_part (sa, da);
+	    break;
+
+	case COMBINE_A:
+	    Fa = MASK;
+	    break;
+	}
+
+	switch (combine & COMBINE_B)
+	{
+	default:
+	    Fb = 0;
+	    break;
+
+	case COMBINE_B_OUT:
+	    Fb = combine_disjoint_out_part (da, sa);
+	    break;
+
+	case COMBINE_B_IN:
+	    Fb = combine_disjoint_in_part (da, sa);
+	    break;
+
+	case COMBINE_B:
+	    Fb = MASK;
+	    break;
+	}
+	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
+	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
+	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
+	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
+	s = m | n | o | p;
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_disjoint_over_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint16_t a = s >> A_SHIFT;
+
+	if (s != 0x00)
+	{
+	    uint32_t d = *(dest + i);
+	    a = combine_disjoint_out_part (d >> A_SHIFT, a);
+	    UN8x4_MUL_UN8_ADD_UN8x4 (d, a, s);
+
+	    *(dest + i) = d;
+	}
+    }
+}
+
+static void
+combine_disjoint_in_u (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       uint32_t *                dest,
+                       const uint32_t *          src,
+                       const uint32_t *          mask,
+                       int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
+}
+
+static void
+combine_disjoint_in_reverse_u (pixman_implementation_t *imp,
+                               pixman_op_t              op,
+                               uint32_t *                dest,
+                               const uint32_t *          src,
+                               const uint32_t *          mask,
+                               int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
+}
+
+static void
+combine_disjoint_out_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
+}
+
+static void
+combine_disjoint_out_reverse_u (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                uint32_t *                dest,
+                                const uint32_t *          src,
+                                const uint32_t *          mask,
+                                int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
+}
+
+static void
+combine_disjoint_atop_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
+}
+
+static void
+combine_disjoint_atop_reverse_u (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 uint32_t *                dest,
+                                 const uint32_t *          src,
+                                 const uint32_t *          mask,
+                                 int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
+}
+
+static void
+combine_disjoint_xor_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR);
+}
+
+static void
+combine_conjoint_general_u (uint32_t *      dest,
+                            const uint32_t *src,
+                            const uint32_t *mask,
+                            int            width,
+                            uint8_t        combine)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t m, n, o, p;
+	uint16_t Fa, Fb, t, u, v;
+	uint8_t sa = s >> A_SHIFT;
+	uint8_t da = d >> A_SHIFT;
+
+	switch (combine & COMBINE_A)
+	{
+	default:
+	    Fa = 0;
+	    break;
+
+	case COMBINE_A_OUT:
+	    Fa = combine_conjoint_out_part (sa, da);
+	    break;
+
+	case COMBINE_A_IN:
+	    Fa = combine_conjoint_in_part (sa, da);
+	    break;
+
+	case COMBINE_A:
+	    Fa = MASK;
+	    break;
+	}
+
+	switch (combine & COMBINE_B)
+	{
+	default:
+	    Fb = 0;
+	    break;
+
+	case COMBINE_B_OUT:
+	    Fb = combine_conjoint_out_part (da, sa);
+	    break;
+
+	case COMBINE_B_IN:
+	    Fb = combine_conjoint_in_part (da, sa);
+	    break;
+
+	case COMBINE_B:
+	    Fb = MASK;
+	    break;
+	}
+
+	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
+	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
+	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
+	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
+
+	s = m | n | o | p;
+
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_conjoint_over_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER);
+}
+
+static void
+combine_conjoint_over_reverse_u (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 uint32_t *                dest,
+                                 const uint32_t *          src,
+                                 const uint32_t *          mask,
+                                 int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER);
+}
+
+static void
+combine_conjoint_in_u (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       uint32_t *                dest,
+                       const uint32_t *          src,
+                       const uint32_t *          mask,
+                       int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
+}
+
+static void
+combine_conjoint_in_reverse_u (pixman_implementation_t *imp,
+                               pixman_op_t              op,
+                               uint32_t *                dest,
+                               const uint32_t *          src,
+                               const uint32_t *          mask,
+                               int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
+}
+
+static void
+combine_conjoint_out_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
+}
+
+static void
+combine_conjoint_out_reverse_u (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                uint32_t *                dest,
+                                const uint32_t *          src,
+                                const uint32_t *          mask,
+                                int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
+}
+
+static void
+combine_conjoint_atop_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
+}
+
+static void
+combine_conjoint_atop_reverse_u (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 uint32_t *                dest,
+                                 const uint32_t *          src,
+                                 const uint32_t *          mask,
+                                 int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
+}
+
+static void
+combine_conjoint_xor_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR);
+}
+
+
+/* Component alpha combiners */
+
+static void
+combine_clear_ca (pixman_implementation_t *imp,
+                  pixman_op_t              op,
+                  uint32_t *                dest,
+                  const uint32_t *          src,
+                  const uint32_t *          mask,
+                  int                      width)
+{
+    memset (dest, 0, width * sizeof(uint32_t));
+}
+
+static void
+combine_src_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+
+	combine_mask_value_ca (&s, &m);
+
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_over_ca (pixman_implementation_t *imp,
+                 pixman_op_t              op,
+                 uint32_t *                dest,
+                 const uint32_t *          src,
+                 const uint32_t *          mask,
+                 int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	combine_mask_ca (&s, &m);
+
+	a = ~m;
+	if (a)
+	{
+	    uint32_t d = *(dest + i);
+	    UN8x4_MUL_UN8x4_ADD_UN8x4 (d, a, s);
+	    s = d;
+	}
+
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_over_reverse_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t d = *(dest + i);
+	uint32_t a = ~d >> A_SHIFT;
+
+	if (a)
+	{
+	    uint32_t s = *(src + i);
+	    uint32_t m = *(mask + i);
+
+	    UN8x4_MUL_UN8x4 (s, m);
+	    UN8x4_MUL_UN8_ADD_UN8x4 (s, a, d);
+
+	    *(dest + i) = s;
+	}
+    }
+}
+
+static void
+combine_in_ca (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *                dest,
+               const uint32_t *          src,
+               const uint32_t *          mask,
+               int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t d = *(dest + i);
+	uint16_t a = d >> A_SHIFT;
+	uint32_t s = 0;
+
+	if (a)
+	{
+	    uint32_t m = *(mask + i);
+
+	    s = *(src + i);
+	    combine_mask_value_ca (&s, &m);
+
+	    if (a != MASK)
+		UN8x4_MUL_UN8 (s, a);
+	}
+
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_in_reverse_ca (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       uint32_t *                dest,
+                       const uint32_t *          src,
+                       const uint32_t *          mask,
+                       int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	combine_mask_alpha_ca (&s, &m);
+
+	a = m;
+	if (a != ~0)
+	{
+	    uint32_t d = 0;
+
+	    if (a)
+	    {
+		d = *(dest + i);
+		UN8x4_MUL_UN8x4 (d, a);
+	    }
+
+	    *(dest + i) = d;
+	}
+    }
+}
+
+static void
+combine_out_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t d = *(dest + i);
+	uint16_t a = ~d >> A_SHIFT;
+	uint32_t s = 0;
+
+	if (a)
+	{
+	    uint32_t m = *(mask + i);
+
+	    s = *(src + i);
+	    combine_mask_value_ca (&s, &m);
+
+	    if (a != MASK)
+		UN8x4_MUL_UN8 (s, a);
+	}
+
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_out_reverse_ca (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	combine_mask_alpha_ca (&s, &m);
+
+	a = ~m;
+	if (a != ~0)
+	{
+	    uint32_t d = 0;
+
+	    if (a)
+	    {
+		d = *(dest + i);
+		UN8x4_MUL_UN8x4 (d, a);
+	    }
+
+	    *(dest + i) = d;
+	}
+    }
+}
+
+static void
+combine_atop_ca (pixman_implementation_t *imp,
+                 pixman_op_t              op,
+                 uint32_t *                dest,
+                 const uint32_t *          src,
+                 const uint32_t *          mask,
+                 int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t d = *(dest + i);
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t ad;
+	uint16_t as = d >> A_SHIFT;
+
+	combine_mask_ca (&s, &m);
+
+	ad = ~m;
+
+	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
+
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_atop_reverse_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t d = *(dest + i);
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t ad;
+	uint16_t as = ~d >> A_SHIFT;
+
+	combine_mask_ca (&s, &m);
+
+	ad = m;
+
+	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
+
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_xor_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t d = *(dest + i);
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t ad;
+	uint16_t as = ~d >> A_SHIFT;
+
+	combine_mask_ca (&s, &m);
+
+	ad = ~m;
+
+	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
+
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_add_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t d = *(dest + i);
+
+	combine_mask_value_ca (&s, &m);
+
+	UN8x4_ADD_UN8x4 (d, s);
+
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_saturate_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *                dest,
+                     const uint32_t *          src,
+                     const uint32_t *          mask,
+                     int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s, d;
+	uint16_t sa, sr, sg, sb, da;
+	uint16_t t, u, v;
+	uint32_t m, n, o, p;
+
+	d = *(dest + i);
+	s = *(src + i);
+	m = *(mask + i);
+
+	combine_mask_ca (&s, &m);
+
+	sa = (m >> A_SHIFT);
+	sr = (m >> R_SHIFT) & MASK;
+	sg = (m >> G_SHIFT) & MASK;
+	sb =  m             & MASK;
+	da = ~d >> A_SHIFT;
+
+	if (sb <= da)
+	    m = ADD (s, d, 0, t);
+	else
+	    m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
+
+	if (sg <= da)
+	    n = ADD (s, d, G_SHIFT, t);
+	else
+	    n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
+
+	if (sr <= da)
+	    o = ADD (s, d, R_SHIFT, t);
+	else
+	    o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
+
+	if (sa <= da)
+	    p = ADD (s, d, A_SHIFT, t);
+	else
+	    p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
+
+	*(dest + i) = m | n | o | p;
+    }
+}
+
+static void
+combine_disjoint_general_ca (uint32_t *      dest,
+                             const uint32_t *src,
+                             const uint32_t *mask,
+                             int            width,
+                             uint8_t        combine)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s, d;
+	uint32_t m, n, o, p;
+	uint32_t Fa, Fb;
+	uint16_t t, u, v;
+	uint32_t sa;
+	uint8_t da;
+
+	s = *(src + i);
+	m = *(mask + i);
+	d = *(dest + i);
+	da = d >> A_SHIFT;
+
+	combine_mask_ca (&s, &m);
+
+	sa = m;
+
+	switch (combine & COMBINE_A)
+	{
+	default:
+	    Fa = 0;
+	    break;
+
+	case COMBINE_A_OUT:
+	    m = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> 0), da);
+	    n = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
+	    o = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
+	    p = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
+	    Fa = m | n | o | p;
+	    break;
+
+	case COMBINE_A_IN:
+	    m = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> 0), da);
+	    n = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
+	    o = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
+	    p = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
+	    Fa = m | n | o | p;
+	    break;
+
+	case COMBINE_A:
+	    Fa = ~0;
+	    break;
+	}
+
+	switch (combine & COMBINE_B)
+	{
+	default:
+	    Fb = 0;
+	    break;
+
+	case COMBINE_B_OUT:
+	    m = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> 0));
+	    n = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
+	    o = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
+	    p = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
+	    Fb = m | n | o | p;
+	    break;
+
+	case COMBINE_B_IN:
+	    m = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> 0));
+	    n = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
+	    o = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
+	    p = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
+	    Fb = m | n | o | p;
+	    break;
+
+	case COMBINE_B:
+	    Fb = ~0;
+	    break;
+	}
+	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
+	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
+	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
+	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
+
+	s = m | n | o | p;
+
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_disjoint_over_ca (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          uint32_t *                dest,
+                          const uint32_t *          src,
+                          const uint32_t *          mask,
+                          int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
+}
+
+static void
+combine_disjoint_in_ca (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
+}
+
+static void
+combine_disjoint_in_reverse_ca (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                uint32_t *                dest,
+                                const uint32_t *          src,
+                                const uint32_t *          mask,
+                                int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
+}
+
+static void
+combine_disjoint_out_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
+}
+
+static void
+combine_disjoint_out_reverse_ca (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 uint32_t *                dest,
+                                 const uint32_t *          src,
+                                 const uint32_t *          mask,
+                                 int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
+}
+
+static void
+combine_disjoint_atop_ca (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          uint32_t *                dest,
+                          const uint32_t *          src,
+                          const uint32_t *          mask,
+                          int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
+}
+
+static void
+combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp,
+                                  pixman_op_t              op,
+                                  uint32_t *                dest,
+                                  const uint32_t *          src,
+                                  const uint32_t *          mask,
+                                  int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
+}
+
+static void
+combine_disjoint_xor_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
+}
+
+static void
+combine_conjoint_general_ca (uint32_t *      dest,
+                             const uint32_t *src,
+                             const uint32_t *mask,
+                             int            width,
+                             uint8_t        combine)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s, d;
+	uint32_t m, n, o, p;
+	uint32_t Fa, Fb;
+	uint16_t t, u, v;
+	uint32_t sa;
+	uint8_t da;
+
+	s = *(src + i);
+	m = *(mask + i);
+	d = *(dest + i);
+	da = d >> A_SHIFT;
+
+	combine_mask_ca (&s, &m);
+
+	sa = m;
+
+	switch (combine & COMBINE_A)
+	{
+	default:
+	    Fa = 0;
+	    break;
+
+	case COMBINE_A_OUT:
+	    m = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> 0), da);
+	    n = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
+	    o = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
+	    p = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
+	    Fa = m | n | o | p;
+	    break;
+
+	case COMBINE_A_IN:
+	    m = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> 0), da);
+	    n = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
+	    o = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
+	    p = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
+	    Fa = m | n | o | p;
+	    break;
+
+	case COMBINE_A:
+	    Fa = ~0;
+	    break;
+	}
+
+	switch (combine & COMBINE_B)
+	{
+	default:
+	    Fb = 0;
+	    break;
+
+	case COMBINE_B_OUT:
+	    m = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> 0));
+	    n = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
+	    o = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
+	    p = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
+	    Fb = m | n | o | p;
+	    break;
+
+	case COMBINE_B_IN:
+	    m = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> 0));
+	    n = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
+	    o = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
+	    p = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
+	    Fb = m | n | o | p;
+	    break;
+
+	case COMBINE_B:
+	    Fb = ~0;
+	    break;
+	}
+	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
+	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
+	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
+	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
+
+	s = m | n | o | p;
+
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_conjoint_over_ca (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          uint32_t *                dest,
+                          const uint32_t *          src,
+                          const uint32_t *          mask,
+                          int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
+}
+
+static void
+combine_conjoint_over_reverse_ca (pixman_implementation_t *imp,
+                                  pixman_op_t              op,
+                                  uint32_t *                dest,
+                                  const uint32_t *          src,
+                                  const uint32_t *          mask,
+                                  int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER);
+}
+
+static void
+combine_conjoint_in_ca (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
+}
+
+static void
+combine_conjoint_in_reverse_ca (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                uint32_t *                dest,
+                                const uint32_t *          src,
+                                const uint32_t *          mask,
+                                int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
+}
+
+static void
+combine_conjoint_out_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
+}
+
+static void
+combine_conjoint_out_reverse_ca (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 uint32_t *                dest,
+                                 const uint32_t *          src,
+                                 const uint32_t *          mask,
+                                 int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
+}
+
+static void
+combine_conjoint_atop_ca (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          uint32_t *                dest,
+                          const uint32_t *          src,
+                          const uint32_t *          mask,
+                          int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
+}
+
+static void
+combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp,
+                                  pixman_op_t              op,
+                                  uint32_t *                dest,
+                                  const uint32_t *          src,
+                                  const uint32_t *          mask,
+                                  int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
+}
+
+static void
+combine_conjoint_xor_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
+}
+
+void
+_pixman_setup_combiner_functions_32 (pixman_implementation_t *imp)
+{
+    /* Unified alpha */
+    imp->combine_32[PIXMAN_OP_CLEAR] = combine_clear;
+    imp->combine_32[PIXMAN_OP_SRC] = combine_src_u;
+    imp->combine_32[PIXMAN_OP_DST] = combine_dst;
+    imp->combine_32[PIXMAN_OP_OVER] = combine_over_u;
+    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
+    imp->combine_32[PIXMAN_OP_IN] = combine_in_u;
+    imp->combine_32[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u;
+    imp->combine_32[PIXMAN_OP_OUT] = combine_out_u;
+    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u;
+    imp->combine_32[PIXMAN_OP_ATOP] = combine_atop_u;
+    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u;
+    imp->combine_32[PIXMAN_OP_XOR] = combine_xor_u;
+    imp->combine_32[PIXMAN_OP_ADD] = combine_add_u;
+    imp->combine_32[PIXMAN_OP_SATURATE] = combine_saturate_u;
+
+    /* Disjoint, unified */
+    imp->combine_32[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
+    imp->combine_32[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_DST] = combine_dst;
+    imp->combine_32[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u;
+
+    /* Conjoint, unified */
+    imp->combine_32[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
+    imp->combine_32[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_DST] = combine_dst;
+    imp->combine_32[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u;
+
+    imp->combine_32[PIXMAN_OP_MULTIPLY] = combine_multiply_u;
+    imp->combine_32[PIXMAN_OP_SCREEN] = combine_screen_u;
+    imp->combine_32[PIXMAN_OP_OVERLAY] = combine_overlay_u;
+    imp->combine_32[PIXMAN_OP_DARKEN] = combine_darken_u;
+    imp->combine_32[PIXMAN_OP_LIGHTEN] = combine_lighten_u;
+    imp->combine_32[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u;
+    imp->combine_32[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u;
+    imp->combine_32[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u;
+    imp->combine_32[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u;
+    imp->combine_32[PIXMAN_OP_DIFFERENCE] = combine_difference_u;
+    imp->combine_32[PIXMAN_OP_EXCLUSION] = combine_exclusion_u;
+    imp->combine_32[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u;
+    imp->combine_32[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u;
+    imp->combine_32[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u;
+    imp->combine_32[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u;
+
+    /* Component alpha combiners */
+    imp->combine_32_ca[PIXMAN_OP_CLEAR] = combine_clear_ca;
+    imp->combine_32_ca[PIXMAN_OP_SRC] = combine_src_ca;
+    /* dest */
+    imp->combine_32_ca[PIXMAN_OP_OVER] = combine_over_ca;
+    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_IN] = combine_in_ca;
+    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_OUT] = combine_out_ca;
+    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_ATOP] = combine_atop_ca;
+    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_XOR] = combine_xor_ca;
+    imp->combine_32_ca[PIXMAN_OP_ADD] = combine_add_ca;
+    imp->combine_32_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca;
+
+    /* Disjoint CA */
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca;
+
+    /* Conjoint CA */
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca;
+
+    imp->combine_32_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca;
+    imp->combine_32_ca[PIXMAN_OP_SCREEN] = combine_screen_ca;
+    imp->combine_32_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca;
+    imp->combine_32_ca[PIXMAN_OP_DARKEN] = combine_darken_ca;
+    imp->combine_32_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca;
+    imp->combine_32_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca;
+    imp->combine_32_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca;
+    imp->combine_32_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca;
+    imp->combine_32_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca;
+    imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
+    imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
+
+    /* It is not clear that these make sense, so make them noops for now */
+    imp->combine_32_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
+    imp->combine_32_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;
+    imp->combine_32_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
+    imp->combine_32_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
+}
diff --git a/pixman/pixman-combine32.h b/pixman/pixman-combine32.h
new file mode 100644
index 0000000..875dde3
--- /dev/null
+++ b/pixman/pixman-combine32.h
@@ -0,0 +1,225 @@
+#define COMPONENT_SIZE 8
+#define MASK 0xff
+#define ONE_HALF 0x80
+
+#define A_SHIFT 8 * 3
+#define R_SHIFT 8 * 2
+#define G_SHIFT 8
+#define A_MASK 0xff000000
+#define R_MASK 0xff0000
+#define G_MASK 0xff00
+
+#define RB_MASK 0xff00ff
+#define AG_MASK 0xff00ff00
+#define RB_ONE_HALF 0x800080
+#define RB_MASK_PLUS_ONE 0x10000100
+
+#define ALPHA_8(x) ((x) >> A_SHIFT)
+#define RED_8(x) (((x) >> R_SHIFT) & MASK)
+#define GREEN_8(x) (((x) >> G_SHIFT) & MASK)
+#define BLUE_8(x) ((x) & MASK)
+
+/*
+ * Helper macros.
+ */
+
+#define MUL_UN8(a, b, t)						\
+    ((t) = (a) * (uint16_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT ))
+
+#define DIV_UN8(a, b)							\
+    (((uint16_t) (a) * MASK + ((b) / 2)) / (b))
+
+#define ADD_UN8(x, y, t)				     \
+    ((t) = (x) + (y),					     \
+     (uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT))))
+
+#define DIV_ONE_UN8(x)							\
+    (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
+
+/*
+ * The methods below use some tricks to be able to do two color
+ * components at the same time.
+ */
+
+/*
+ * x_rb = (x_rb * a) / 255
+ */
+#define UN8_rb_MUL_UN8(x, a, t)						\
+    do									\
+    {									\
+	t  = ((x) & RB_MASK) * (a);					\
+	t += RB_ONE_HALF;						\
+	x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;		\
+	x &= RB_MASK;							\
+    } while (0)
+
+/*
+ * x_rb = min (x_rb + y_rb, 255)
+ */
+#define UN8_rb_ADD_UN8_rb(x, y, t)					\
+    do									\
+    {									\
+	t = ((x) + (y));						\
+	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);		\
+	x = (t & RB_MASK);						\
+    } while (0)
+
+/*
+ * x_rb = (x_rb * a_rb) / 255
+ */
+#define UN8_rb_MUL_UN8_rb(x, a, t)					\
+    do									\
+    {									\
+	t  = (x & MASK) * (a & MASK);					\
+	t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK);			\
+	t += RB_ONE_HALF;						\
+	t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;		\
+	x = t & RB_MASK;						\
+    } while (0)
+
+/*
+ * x_c = (x_c * a) / 255
+ */
+#define UN8x4_MUL_UN8(x, a)						\
+    do									\
+    {									\
+	uint32_t r1__, r2__, t__;					\
+									\
+	r1__ = (x);							\
+	UN8_rb_MUL_UN8 (r1__, (a), t__);				\
+									\
+	r2__ = (x) >> G_SHIFT;						\
+	UN8_rb_MUL_UN8 (r2__, (a), t__);				\
+									\
+	(x) = r1__ | (r2__ << G_SHIFT);					\
+    } while (0)
+
+/*
+ * x_c = (x_c * a) / 255 + y_c
+ */
+#define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y)				\
+    do									\
+    {									\
+	uint32_t r1__, r2__, r3__, t__;					\
+									\
+	r1__ = (x);							\
+	r2__ = (y) & RB_MASK;						\
+	UN8_rb_MUL_UN8 (r1__, (a), t__);				\
+	UN8_rb_ADD_UN8_rb (r1__, r2__, t__);				\
+									\
+	r2__ = (x) >> G_SHIFT;						\
+	r3__ = ((y) >> G_SHIFT) & RB_MASK;				\
+	UN8_rb_MUL_UN8 (r2__, (a), t__);				\
+	UN8_rb_ADD_UN8_rb (r2__, r3__, t__);				\
+									\
+	(x) = r1__ | (r2__ << G_SHIFT);					\
+    } while (0)
+
+/*
+ * x_c = (x_c * a + y_c * b) / 255
+ */
+#define UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(x, a, y, b)			\
+    do									\
+    {									\
+	uint32_t r1__, r2__, r3__, t__;					\
+									\
+	r1__ = (x);							\
+	r2__ = (y);							\
+	UN8_rb_MUL_UN8 (r1__, (a), t__);				\
+	UN8_rb_MUL_UN8 (r2__, (b), t__);				\
+	UN8_rb_ADD_UN8_rb (r1__, r2__, t__);				\
+									\
+	r2__ = ((x) >> G_SHIFT);					\
+	r3__ = ((y) >> G_SHIFT);					\
+	UN8_rb_MUL_UN8 (r2__, (a), t__);				\
+	UN8_rb_MUL_UN8 (r3__, (b), t__);				\
+	UN8_rb_ADD_UN8_rb (r2__, r3__, t__);				\
+									\
+	(x) = r1__ | (r2__ << G_SHIFT);					\
+    } while (0)
+
+/*
+ * x_c = (x_c * a_c) / 255
+ */
+#define UN8x4_MUL_UN8x4(x, a)						\
+    do									\
+    {									\
+	uint32_t r1__, r2__, r3__, t__;					\
+									\
+	r1__ = (x);							\
+	r2__ = (a);							\
+	UN8_rb_MUL_UN8_rb (r1__, r2__, t__);				\
+									\
+	r2__ = (x) >> G_SHIFT;						\
+	r3__ = (a) >> G_SHIFT;						\
+	UN8_rb_MUL_UN8_rb (r2__, r3__, t__);				\
+									\
+	(x) = r1__ | (r2__ << G_SHIFT);					\
+    } while (0)
+
+/*
+ * x_c = (x_c * a_c) / 255 + y_c
+ */
+#define UN8x4_MUL_UN8x4_ADD_UN8x4(x, a, y)				\
+    do									\
+    {									\
+	uint32_t r1__, r2__, r3__, t__;					\
+									\
+	r1__ = (x);							\
+	r2__ = (a);							\
+	UN8_rb_MUL_UN8_rb (r1__, r2__, t__);				\
+	r2__ = (y) & RB_MASK;						\
+	UN8_rb_ADD_UN8_rb (r1__, r2__, t__);				\
+									\
+	r2__ = ((x) >> G_SHIFT);					\
+	r3__ = ((a) >> G_SHIFT);					\
+	UN8_rb_MUL_UN8_rb (r2__, r3__, t__);				\
+	r3__ = ((y) >> G_SHIFT) & RB_MASK;				\
+	UN8_rb_ADD_UN8_rb (r2__, r3__, t__);				\
+									\
+	(x) = r1__ | (r2__ << G_SHIFT);					\
+    } while (0)
+
+/*
+ * x_c = (x_c * a_c + y_c * b) / 255
+ */
+#define UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8(x, a, y, b)			\
+    do									\
+    {									\
+	uint32_t r1__, r2__, r3__, t__;					\
+									\
+	r1__ = (x);							\
+	r2__ = (a);							\
+	UN8_rb_MUL_UN8_rb (r1__, r2__, t__);				\
+	r2__ = (y);							\
+	UN8_rb_MUL_UN8 (r2__, (b), t__);				\
+	UN8_rb_ADD_UN8_rb (r1__, r2__, t__);				\
+									\
+	r2__ = (x) >> G_SHIFT;						\
+	r3__ = (a) >> G_SHIFT;						\
+	UN8_rb_MUL_UN8_rb (r2__, r3__, t__);				\
+	r3__ = (y) >> G_SHIFT;						\
+	UN8_rb_MUL_UN8 (r3__, (b), t__);				\
+	UN8_rb_ADD_UN8_rb (r2__, r3__, t__);				\
+									\
+	x = r1__ | (r2__ << G_SHIFT);					\
+    } while (0)
+
+/*
+  x_c = min(x_c + y_c, 255)
+*/
+#define UN8x4_ADD_UN8x4(x, y)						\
+    do									\
+    {									\
+	uint32_t r1__, r2__, r3__, t__;					\
+									\
+	r1__ = (x) & RB_MASK;						\
+	r2__ = (y) & RB_MASK;						\
+	UN8_rb_ADD_UN8_rb (r1__, r2__, t__);				\
+									\
+	r2__ = ((x) >> G_SHIFT) & RB_MASK;				\
+	r3__ = ((y) >> G_SHIFT) & RB_MASK;				\
+	UN8_rb_ADD_UN8_rb (r2__, r3__, t__);				\
+									\
+	x = r1__ | (r2__ << G_SHIFT);					\
+    } while (0)
commit 4afd20cc71ba75190ebcead774b946157d0995a6
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Sat Aug 18 14:39:29 2012 -0400

    Remove 64 bit pipeline
    
    The 64 bit pipeline is not used anymore, so it can now be removed.
    
    Don't generate pixman-combine64.[ch] anymore. Don't generate the
    pixman-srgb.c anymore. Delete all the 64 bit fetchers in
    pixman-access.c, all the 64 bit iterator functions in
    pixman-bits-image.c and all the functions that expand from 8 to 16
    bits.

diff --git a/.gitignore b/.gitignore
index a4d9f99..a67da1d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,8 +41,6 @@ demos/trap-test
 demos/tri-test
 pixman/pixman-combine32.c
 pixman/pixman-combine32.h
-pixman/pixman-combine64.c
-pixman/pixman-combine64.h
 pixman/pixman-srgb.c
 pixman/pixman-version.h
 test/a1-trap-test
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 270d65e..3060569 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -13,7 +13,6 @@ noinst_LTLIBRARIES =
 EXTRA_DIST =				\
 	Makefile.win32			\
 	make-combine.pl			\
-	make-srgb.pl			\
 	pixman-combine.c.template	\
 	pixman-combine.h.template	\
 	pixman-region.c			\
diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources
index 96540ec..5be288d 100644
--- a/pixman/Makefile.sources
+++ b/pixman/Makefile.sources
@@ -4,7 +4,6 @@ libpixman_sources =			\
 	pixman-access-accessors.c	\
 	pixman-bits-image.c		\
 	pixman-combine32.c		\
-	pixman-combine64.c		\
 	pixman-combine-float.c		\
 	pixman-conical-gradient.c	\
 	pixman-x86.c			\
@@ -26,7 +25,6 @@ libpixman_sources =			\
 	pixman-region16.c		\
 	pixman-region32.c		\
 	pixman-solid-fill.c		\
-	pixman-srgb.c			\
 	pixman-timer.c			\
 	pixman-trap.c			\
 	pixman-utils.c			\
@@ -36,7 +34,6 @@ libpixman_headers =			\
 	pixman.h			\
 	pixman-accessor.h		\
 	pixman-combine32.h		\
-	pixman-combine64.h		\
 	pixman-compiler.h		\
 	pixman-edge-imp.h		\
 	pixman-inlines.h		\
@@ -46,20 +43,9 @@ libpixman_headers =			\
 BUILT_SOURCES =				\
 	pixman-combine32.c		\
 	pixman-combine32.h		\
-	pixman-combine64.c		\
-	pixman-combine64.h		\
-	pixman-srgb.c			\
 	$(NULL)
 
-pixman-srgb.c: make-srgb.pl
-	$(PERL) $< > $@ || ($(RM) $@; exit 1)
-
 pixman-combine32.c: pixman-combine.c.template make-combine.pl
 	$(PERL) $(lastword $+) 8 < $< > $@ || ($(RM) $@; exit 1)
 pixman-combine32.h: pixman-combine.h.template make-combine.pl
 	$(PERL) $(lastword $+) 8 < $< > $@ || ($(RM) $@; exit 1)
-
-pixman-combine64.c: pixman-combine.c.template make-combine.pl
-	$(PERL) $(lastword $+) 16 < $< > $@ || ($(RM) $@; exit 1)
-pixman-combine64.h: pixman-combine.h.template make-combine.pl
-	$(PERL) $(lastword $+) 16 < $< > $@ || ($(RM) $@; exit 1)
diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
index 1eef621..b5c8e40 100644
--- a/pixman/pixman-access.c
+++ b/pixman/pixman-access.c
@@ -507,135 +507,6 @@ MAKE_ACCESSORS(a1);
 MAKE_ACCESSORS(g1);
 
 /********************************** Fetch ************************************/
-
-/* Expects a uint64_t buffer */
-static void
-fetch_scanline_a2r10g10b10 (pixman_image_t *image,
-                            int             x,
-                            int             y,
-                            int             width,
-                            uint32_t *      b,
-                            const uint32_t *mask)
-{
-    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
-    const uint32_t *pixel = bits + x;
-    const uint32_t *end = pixel + width;
-    uint64_t *buffer = (uint64_t *)b;
-
-    while (pixel < end)
-    {
-	uint32_t p = READ (image, pixel++);
-	uint64_t a = p >> 30;
-	uint64_t r = (p >> 20) & 0x3ff;
-	uint64_t g = (p >> 10) & 0x3ff;
-	uint64_t b = p & 0x3ff;
-
-	r = r << 6 | r >> 4;
-	g = g << 6 | g >> 4;
-	b = b << 6 | b >> 4;
-
-	a <<= 14;
-	a |= a >> 2;
-	a |= a >> 4;
-	a |= a >> 8;
-
-	*buffer++ = a << 48 | r << 32 | g << 16 | b;
-    }
-}
-
-/* Expects a uint64_t buffer */
-static void
-fetch_scanline_x2r10g10b10 (pixman_image_t *image,
-                            int             x,
-                            int             y,
-                            int             width,
-                            uint32_t *      b,
-                            const uint32_t *mask)
-{
-    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
-    const uint32_t *pixel = (uint32_t *)bits + x;
-    const uint32_t *end = pixel + width;
-    uint64_t *buffer = (uint64_t *)b;
-    
-    while (pixel < end)
-    {
-	uint32_t p = READ (image, pixel++);
-	uint64_t r = (p >> 20) & 0x3ff;
-	uint64_t g = (p >> 10) & 0x3ff;
-	uint64_t b = p & 0x3ff;
-	
-	r = r << 6 | r >> 4;
-	g = g << 6 | g >> 4;
-	b = b << 6 | b >> 4;
-	
-	*buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b;
-    }
-}
-
-/* Expects a uint64_t buffer */
-static void
-fetch_scanline_a2b10g10r10 (pixman_image_t *image,
-                            int             x,
-                            int             y,
-                            int             width,
-                            uint32_t *      b,
-                            const uint32_t *mask)
-{
-    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
-    const uint32_t *pixel = bits + x;
-    const uint32_t *end = pixel + width;
-    uint64_t *buffer = (uint64_t *)b;
-    
-    while (pixel < end)
-    {
-	uint32_t p = READ (image, pixel++);
-	uint64_t a = p >> 30;
-	uint64_t b = (p >> 20) & 0x3ff;
-	uint64_t g = (p >> 10) & 0x3ff;
-	uint64_t r = p & 0x3ff;
-	
-	r = r << 6 | r >> 4;
-	g = g << 6 | g >> 4;
-	b = b << 6 | b >> 4;
-	
-	a <<= 14;
-	a |= a >> 2;
-	a |= a >> 4;
-	a |= a >> 8;
-
-	*buffer++ = a << 48 | r << 32 | g << 16 | b;
-    }
-}
-
-/* Expects a uint64_t buffer */
-static void
-fetch_scanline_x2b10g10r10 (pixman_image_t *image,
-                            int             x,
-                            int             y,
-                            int             width,
-                            uint32_t *      b,
-                            const uint32_t *mask)
-{
-    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
-    const uint32_t *pixel = (uint32_t *)bits + x;
-    const uint32_t *end = pixel + width;
-    uint64_t *buffer = (uint64_t *)b;
-    
-    while (pixel < end)
-    {
-	uint32_t p = READ (image, pixel++);
-	uint64_t b = (p >> 20) & 0x3ff;
-	uint64_t g = (p >> 10) & 0x3ff;
-	uint64_t r = p & 0x3ff;
-	
-	r = r << 6 | r >> 4;
-	g = g << 6 | g >> 4;
-	b = b << 6 | b >> 4;
-	
-	*buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b;
-    }
-}
-
 /* Table mapping sRGB-encoded 8 bit numbers to linearly encoded
  * floating point numbers. We assume that single precision
  * floating point follows the IEEE 754 format.
@@ -934,94 +805,6 @@ fetch_scanline_yv12 (pixman_image_t *image,
 
 /**************************** Pixel wise fetching *****************************/
 
-/* Despite the type, expects a uint64_t buffer */
-static uint64_t
-fetch_pixel_a2r10g10b10 (bits_image_t *image,
-			 int		  offset,
-			 int           line)
-{
-    uint32_t *bits = image->bits + line * image->rowstride;
-    uint32_t p = READ (image, bits + offset);
-    uint64_t a = p >> 30;
-    uint64_t r = (p >> 20) & 0x3ff;
-    uint64_t g = (p >> 10) & 0x3ff;
-    uint64_t b = p & 0x3ff;
-
-    r = r << 6 | r >> 4;
-    g = g << 6 | g >> 4;
-    b = b << 6 | b >> 4;
-
-    a <<= 14;
-    a |= a >> 2;
-    a |= a >> 4;
-    a |= a >> 8;
-
-    return a << 48 | r << 32 | g << 16 | b;
-}
-
-/* Despite the type, this function expects a uint64_t buffer */
-static uint64_t
-fetch_pixel_x2r10g10b10 (bits_image_t *image,
-			 int	   offset,
-			 int           line)
-{
-    uint32_t *bits = image->bits + line * image->rowstride;
-    uint32_t p = READ (image, bits + offset);
-    uint64_t r = (p >> 20) & 0x3ff;
-    uint64_t g = (p >> 10) & 0x3ff;
-    uint64_t b = p & 0x3ff;
-    
-    r = r << 6 | r >> 4;
-    g = g << 6 | g >> 4;
-    b = b << 6 | b >> 4;
-    
-    return 0xffffULL << 48 | r << 32 | g << 16 | b;
-}
-
-/* Despite the type, expects a uint64_t buffer */
-static uint64_t
-fetch_pixel_a2b10g10r10 (bits_image_t *image,
-			 int           offset,
-			 int           line)
-{
-    uint32_t *bits = image->bits + line * image->rowstride;
-    uint32_t p = READ (image, bits + offset);
-    uint64_t a = p >> 30;
-    uint64_t b = (p >> 20) & 0x3ff;
-    uint64_t g = (p >> 10) & 0x3ff;
-    uint64_t r = p & 0x3ff;
-    
-    r = r << 6 | r >> 4;
-    g = g << 6 | g >> 4;
-    b = b << 6 | b >> 4;
-    
-    a <<= 14;
-    a |= a >> 2;
-    a |= a >> 4;
-    a |= a >> 8;
-    
-    return a << 48 | r << 32 | g << 16 | b;
-}
-
-/* Despite the type, this function expects a uint64_t buffer */
-static uint64_t
-fetch_pixel_x2b10g10r10 (bits_image_t *image,
-			 int           offset,
-			 int           line)
-{
-    uint32_t *bits = image->bits + line * image->rowstride;
-    uint32_t p = READ (image, bits + offset);
-    uint64_t b = (p >> 20) & 0x3ff;
-    uint64_t g = (p >> 10) & 0x3ff;
-    uint64_t r = p & 0x3ff;
-    
-    r = r << 6 | r >> 4;
-    g = g << 6 | g >> 4;
-    b = b << 6 | b >> 4;
-    
-    return 0xffffULL << 48 | r << 32 | g << 16 | b;
-}
-
 static argb_t
 fetch_pixel_x2r10g10b10_float (bits_image_t *image,
 			       int	   offset,
@@ -1180,93 +963,6 @@ fetch_pixel_yv12 (bits_image_t *image,
 /*********************************** Store ************************************/
 
 static void
-store_scanline_a2r10g10b10 (bits_image_t *  image,
-                            int             x,
-                            int             y,
-                            int             width,
-                            const uint32_t *v)
-{
-    uint32_t *bits = image->bits + image->rowstride * y;
-    uint32_t *pixel = bits + x;
-    uint64_t *values = (uint64_t *)v;
-    int i;
-    
-    for (i = 0; i < width; ++i)
-    {
-	WRITE (image, pixel++,
-	       ((values[i] >> 32) & 0xc0000000) |
-	       ((values[i] >> 18) & 0x3ff00000) |
-	       ((values[i] >> 12) & 0xffc00) | 
-	       ((values[i] >> 6) & 0x3ff));    
-    }
-}
-
-static void
-store_scanline_x2r10g10b10 (bits_image_t *  image,
-                            int             x,
-                            int             y,
-                            int             width,
-                            const uint32_t *v)
-{
-    uint32_t *bits = image->bits + image->rowstride * y;
-    uint64_t *values = (uint64_t *)v;
-    uint32_t *pixel = bits + x;
-    int i;
-    
-    for (i = 0; i < width; ++i)
-    {
-	WRITE (image, pixel++,
-	       ((values[i] >> 18) & 0x3ff00000) | 
-	       ((values[i] >> 12) & 0xffc00) |
-	       ((values[i] >> 6) & 0x3ff));
-    }
-}
-
-static void
-store_scanline_a2b10g10r10 (bits_image_t *  image,
-                            int             x,
-                            int             y,
-                            int             width,
-                            const uint32_t *v)
-{
-    uint32_t *bits = image->bits + image->rowstride * y;
-    uint32_t *pixel = bits + x;
-    uint64_t *values = (uint64_t *)v;
-    int i;
-    
-    for (i = 0; i < width; ++i)
-    {
-	WRITE (image, pixel++,
-	       ((values[i] >> 32) & 0xc0000000) |
-	       ((values[i] >> 38) & 0x3ff) |
-	       ((values[i] >> 12) & 0xffc00) |
-	       ((values[i] << 14) & 0x3ff00000));
-    }
-}
-
-static void
-store_scanline_x2b10g10r10 (bits_image_t *  image,
-                            int             x,
-                            int             y,
-                            int             width,
-                            const uint32_t *v)
-{
-    uint32_t *bits = image->bits + image->rowstride * y;
-    uint64_t *values = (uint64_t *)v;
-    uint32_t *pixel = bits + x;
-    int i;
-    
-    for (i = 0; i < width; ++i)
-    {
-	WRITE (image, pixel++,
-	       ((values[i] >> 38) & 0x3ff) |
-	       ((values[i] >> 12) & 0xffc00) |
-	       ((values[i] << 14) & 0x3ff00000));
-    }
-}
-
-
-static void
 store_scanline_a2r10g10b10_float (bits_image_t *  image,
 				  int             x,
 				  int             y,
@@ -1395,35 +1091,6 @@ store_scanline_a8r8g8b8_sRGB_float (bits_image_t *  image,
 }
 
 /*
- * Contracts a 64bpp image to 32bpp and then stores it using a regular 32-bit
- * store proc. Despite the type, this function expects a uint64_t buffer.
- */
-static void
-store_scanline_generic_64 (bits_image_t *  image,
-                           int             x,
-                           int             y,
-                           int             width,
-                           const uint32_t *values)
-{
-    uint32_t *argb8_pixels;
-    
-    assert (image->common.type == BITS);
-    
-    argb8_pixels = pixman_malloc_ab (width, sizeof(uint32_t));
-    if (!argb8_pixels)
-	return;
-    
-    /* Contract the scanline.  We could do this in place if values weren't
-     * const.
-     */
-    pixman_contract (argb8_pixels, (uint64_t *)values, width);
-    
-    image->store_scanline_32 (image, x, y, width, argb8_pixels);
-    
-    free (argb8_pixels);
-}
-
-/*
  * Contracts a floating point image to 32bpp and then stores it using a
  * regular 32-bit store proc. Despite the type, this function expects an
  * argb_t buffer.
@@ -1453,39 +1120,6 @@ store_scanline_generic_float (bits_image_t *  image,
     free (argb8_pixels);
 }
 
-/* Despite the type, this function expects both buffer
- * and mask to be uint64_t
- */
-static void
-fetch_scanline_generic_64 (pixman_image_t *image,
-                           int             x,
-                           int             y,
-                           int             width,
-                           uint32_t *      buffer,
-                           const uint32_t *mask)
-{
-    pixman_format_code_t format;
-
-    /* Fetch the pixels into the first half of buffer and then expand them in
-     * place.
-     */
-    image->bits.fetch_scanline_32 (image, x, y, width, buffer, NULL);
-
-    format = image->bits.format;
-    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR	||
-	PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY)
-    {
-	/* Indexed formats are mapped to a8r8g8b8 with full
-	 * precision, so when expanding we shouldn't correct
-	 * for the width of the channels
-	 */
-
-	format = PIXMAN_a8r8g8b8;
-    }
-
-    pixman_expand ((uint64_t *)buffer, buffer, format, width);
-}
-
 static void
 fetch_scanline_generic_float (pixman_image_t *image,
 			      int	      x,
@@ -1499,33 +1133,6 @@ fetch_scanline_generic_float (pixman_image_t *image,
     pixman_expand_to_float ((argb_t *)buffer, buffer, image->bits.format, width);
 }
 
-/* Despite the type, this function expects a uint64_t *buffer */
-static uint64_t
-fetch_pixel_generic_64 (bits_image_t *image,
-			int	      offset,
-			int           line)
-{
-    uint32_t pixel32 = image->fetch_pixel_32 (image, offset, line);
-    uint64_t result;
-    pixman_format_code_t format;
-
-    format = image->format;
-    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR	||
-	PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY)
-    {
-	/* Indexed formats are mapped to a8r8g8b8 with full
-	 * precision, so when expanding we shouldn't correct
-	 * for the width of the channels
-	 */
-
-	format = PIXMAN_a8r8g8b8;
-    }
-
-    pixman_expand ((uint64_t *)&result, &pixel32, format, 1);
-
-    return result;
-}
-
 /* The 32_sRGB paths should be deleted after narrow processing
  * is no longer invoked for formats that are considered wide.
  * (Also see fetch_pixel_generic_lossy_32) */
@@ -1561,30 +1168,6 @@ fetch_scanline_a8r8g8b8_32_sRGB (pixman_image_t *image,
     }
 }
 
-static void
-fetch_scanline_a8r8g8b8_64_sRGB (pixman_image_t *image,
-                                 int             x,
-                                 int             y,
-                                 int             width,
-                                 uint32_t       *b,
-                                 const uint32_t *mask)
-{
-    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
-    const uint32_t *pixel = (uint32_t *)bits + x;
-    const uint32_t *end = pixel + width;
-    uint64_t *buffer = (uint64_t *)b;
-    uint32_t tmp;
-    
-    while (pixel < end)
-    {
-	tmp = READ (image, pixel++);
-	*buffer++ = (uint64_t)               ((tmp >> 24) * 257)  << 48
-		  | (uint64_t) srgb_to_linear[(tmp >> 16) & 0xff] << 32
-		  | (uint64_t) srgb_to_linear[(tmp >>  8) & 0xff] << 16
-		  | (uint64_t) srgb_to_linear[(tmp >>  0) & 0xff] <<  0;
-    }
-}
-
 static uint32_t
 fetch_pixel_a8r8g8b8_32_sRGB (bits_image_t *image,
 			      int           offset,
@@ -1606,19 +1189,6 @@ fetch_pixel_a8r8g8b8_32_sRGB (bits_image_t *image,
     return (a << 24) | (r << 16) | (g << 8) | (b << 0);
 }
 
-static uint64_t
-fetch_pixel_a8r8g8b8_64_sRGB (bits_image_t *image,
-			      int           offset,
-			      int	    line)
-{
-    uint32_t *bits = image->bits + line * image->rowstride;
-    uint32_t tmp = READ (image, bits + offset);
-    return (uint64_t)               ((tmp >> 24) * 257)  << 48
-	 | (uint64_t) srgb_to_linear[(tmp >> 16) & 0xff] << 32
-	 | (uint64_t) srgb_to_linear[(tmp >>  8) & 0xff] << 16
-	 | (uint64_t) srgb_to_linear[(tmp >>  0) & 0xff] <<  0;
-}
-
 static void
 store_scanline_a8r8g8b8_32_sRGB (bits_image_t   *image,
                                  int             x,
@@ -1651,30 +1221,6 @@ store_scanline_a8r8g8b8_32_sRGB (bits_image_t   *image,
     }
 }
 
-static void
-store_scanline_a8r8g8b8_64_sRGB (bits_image_t  *image,
-                                int             x,
-                                int             y,
-                                int             width,
-                                const uint32_t *v)
-{
-    uint32_t *bits = image->bits + image->rowstride * y;
-    uint64_t *values = (uint64_t *)v;
-    uint32_t *pixel = bits + x;
-    uint64_t tmp;
-    int i;
-    
-    for (i = 0; i < width; ++i)
-    {
-	tmp = values[i];
-	WRITE (image, pixel++,
-		  ((uint32_t)     (tmp >> 56)          << 24)
-		| (linear_to_srgb[(tmp >> 36) & 0xfff] << 16)
-		| (linear_to_srgb[(tmp >> 20) & 0xfff] <<  8)
-		| (linear_to_srgb[(tmp >>  4) & 0xfff] <<  0));
-    }
-}
-
 static argb_t
 fetch_pixel_generic_float (bits_image_t *image,
 			   int		 offset,
@@ -1699,10 +1245,10 @@ fetch_pixel_generic_lossy_32 (bits_image_t *image,
 			      int           offset,
 			      int           line)
 {
-    uint64_t pixel64 = image->fetch_pixel_64 (image, offset, line);
+    argb_t pixel64 = image->fetch_pixel_float (image, offset, line);
     uint32_t result;
 
-    pixman_contract (&result, &pixel64, 1);
+    pixman_contract_from_float (&result, &pixel64, 1);
 
     return result;
 }
@@ -1711,13 +1257,10 @@ typedef struct
 {
     pixman_format_code_t	format;
     fetch_scanline_t		fetch_scanline_32;
-    fetch_scanline_t		fetch_scanline_64;
     fetch_scanline_t		fetch_scanline_float;
     fetch_pixel_32_t		fetch_pixel_32;
-    fetch_pixel_64_t		fetch_pixel_64;
     fetch_pixel_float_t		fetch_pixel_float;
     store_scanline_t		store_scanline_32;
-    store_scanline_t		store_scanline_64;
     store_scanline_t		store_scanline_float;
 } format_info_t;
 
@@ -1725,13 +1268,10 @@ typedef struct
     {									\
 	PIXMAN_ ## format,						\
 	    fetch_scanline_ ## format,					\
-	    fetch_scanline_generic_64,					\
 	    fetch_scanline_generic_float,				\
 	    fetch_pixel_ ## format,					\
-	    fetch_pixel_generic_64,					\
 	    fetch_pixel_generic_float,					\
 	    store_scanline_ ## format,					\
-	    store_scanline_generic_64,					\
 	    store_scanline_generic_float				\
     }
 
@@ -1750,9 +1290,9 @@ static const format_info_t accessors[] =
 
 /* sRGB formats */
   { PIXMAN_a8r8g8b8_sRGB,
-    fetch_scanline_a8r8g8b8_32_sRGB, fetch_scanline_a8r8g8b8_64_sRGB, fetch_scanline_a8r8g8b8_sRGB_float,
-    fetch_pixel_a8r8g8b8_32_sRGB, fetch_pixel_a8r8g8b8_64_sRGB, fetch_pixel_a8r8g8b8_sRGB_float,
-    store_scanline_a8r8g8b8_32_sRGB, store_scanline_a8r8g8b8_64_sRGB, store_scanline_a8r8g8b8_sRGB_float,
+    fetch_scanline_a8r8g8b8_32_sRGB, fetch_scanline_a8r8g8b8_sRGB_float,
+    fetch_pixel_a8r8g8b8_32_sRGB, fetch_pixel_a8r8g8b8_sRGB_float,
+    store_scanline_a8r8g8b8_32_sRGB, store_scanline_a8r8g8b8_sRGB_float,
   },
 
 /* 24bpp formats */
@@ -1813,34 +1353,34 @@ static const format_info_t accessors[] =
 /* Wide formats */
     
     { PIXMAN_a2r10g10b10,
-      NULL, fetch_scanline_a2r10g10b10, fetch_scanline_a2r10g10b10_float,
-      fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10, fetch_pixel_a2r10g10b10_float,
-      NULL, store_scanline_a2r10g10b10, store_scanline_a2r10g10b10_float },
-    
+      NULL, fetch_scanline_a2r10g10b10_float,
+      fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10_float,
+      NULL, store_scanline_a2r10g10b10_float },
+
     { PIXMAN_x2r10g10b10,
-      NULL, fetch_scanline_x2r10g10b10, fetch_scanline_x2r10g10b10_float,
-      fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10, fetch_pixel_x2r10g10b10_float,
-      NULL, store_scanline_x2r10g10b10, store_scanline_x2r10g10b10_float },
-    
+      NULL, fetch_scanline_x2r10g10b10_float,
+      fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10_float,
+      NULL, store_scanline_x2r10g10b10_float },
+
     { PIXMAN_a2b10g10r10,
-      NULL, fetch_scanline_a2b10g10r10, fetch_scanline_a2b10g10r10_float,
-      fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10, fetch_pixel_a2b10g10r10_float,
-      NULL, store_scanline_a2b10g10r10, store_scanline_a2b10g10r10_float },
-    
+      NULL, fetch_scanline_a2b10g10r10_float,
+      fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10_float,
+      NULL, store_scanline_a2b10g10r10_float },
+
     { PIXMAN_x2b10g10r10,
-      NULL, fetch_scanline_x2b10g10r10, fetch_scanline_x2b10g10r10_float,
-      fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10, fetch_pixel_x2b10g10r10_float,
-      NULL, store_scanline_x2b10g10r10, store_scanline_x2b10g10r10_float },
-    
+      NULL, fetch_scanline_x2b10g10r10_float,
+      fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10_float,
+      NULL, store_scanline_x2b10g10r10_float },
+
 /* YUV formats */
     { PIXMAN_yuy2,
-      fetch_scanline_yuy2, fetch_scanline_generic_64, fetch_scanline_generic_float,
-      fetch_pixel_yuy2, fetch_pixel_generic_64, fetch_pixel_generic_float,
-      NULL, NULL, NULL },
-    
+      fetch_scanline_yuy2, fetch_scanline_generic_float,
+      fetch_pixel_yuy2, fetch_pixel_generic_float,
+      NULL, NULL },
+
     { PIXMAN_yv12,
-      fetch_scanline_yv12, fetch_scanline_generic_64, fetch_scanline_generic_float,
-      fetch_pixel_yv12, fetch_pixel_generic_64, fetch_pixel_generic_float,
+      fetch_scanline_yv12, fetch_scanline_generic_float,
+      fetch_pixel_yv12, fetch_pixel_generic_float,
       NULL, NULL },
     
     { PIXMAN_null },
@@ -1856,13 +1396,10 @@ setup_accessors (bits_image_t *image)
 	if (info->format == image->format)
 	{
 	    image->fetch_scanline_32 = info->fetch_scanline_32;
-	    image->fetch_scanline_64 = info->fetch_scanline_64;
 	    image->fetch_scanline_float = info->fetch_scanline_float;
 	    image->fetch_pixel_32 = info->fetch_pixel_32;
-	    image->fetch_pixel_64 = info->fetch_pixel_64;
 	    image->fetch_pixel_float = info->fetch_pixel_float;
 	    image->store_scanline_32 = info->store_scanline_32;
-	    image->store_scanline_64 = info->store_scanline_64;
 	    image->store_scanline_float = info->store_scanline_float;
 	    
 	    return;
diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index abb4cc4..029093d 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -50,45 +50,6 @@ _pixman_image_get_scanline_generic_float (pixman_iter_t * iter,
     return iter->buffer;
 }
 
-/*
- * By default, just evaluate the image at 32bpp and expand.  Individual image
- * types can plug in a better scanline getter if they want to. For example
- * we  could produce smoother gradients by evaluating them at higher color
- * depth, but that's a project for the future.
- */
-static uint32_t *
-_pixman_image_get_scanline_generic_64 (pixman_iter_t * iter,
-                                       const uint32_t *mask)
-{
-    int             width  = iter->width;
-    uint32_t *      buffer = iter->buffer;
-
-    pixman_iter_get_scanline_t fetch_32 = iter->data;
-    uint32_t *mask8 = NULL;
-
-    /* Contract the mask image, if one exists, so that the 32-bit fetch
-     * function can use it.
-     */
-    if (mask)
-    {
-	mask8 = pixman_malloc_ab (width, sizeof(uint32_t));
-	if (!mask8)
-	    return buffer;
-
-	pixman_contract (mask8, (uint64_t *)mask, width);
-    }
-
-    /* Fetch the source image into the first half of buffer. */
-    fetch_32 (iter, mask8);
-
-    /* Expand from 32bpp to 64bpp in place. */
-    pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, width);
-
-    free (mask8);
-
-    return buffer;
-}
-
 /* Fetch functions */
 
 static force_inline uint32_t
@@ -1141,7 +1102,6 @@ typedef struct
     pixman_format_code_t	format;
     uint32_t			flags;
     pixman_iter_get_scanline_t	get_scanline_32;
-    pixman_iter_get_scanline_t	get_scanline_64;
     pixman_iter_get_scanline_t  get_scanline_float;
 } fetcher_info_t;
 
@@ -1154,7 +1114,6 @@ static const fetcher_info_t fetcher_info[] =
        FAST_PATH_NO_PAD_REPEAT			|
        FAST_PATH_NO_REFLECT_REPEAT),
       bits_image_fetch_untransformed_32,
-      NULL,
       bits_image_fetch_untransformed_float
     },
 
@@ -1171,14 +1130,12 @@ static const fetcher_info_t fetcher_info[] =
     { PIXMAN_a8r8g8b8,
       FAST_BILINEAR_FLAGS,
       bits_image_fetch_bilinear_no_repeat_8888,
-      _pixman_image_get_scanline_generic_64,
       _pixman_image_get_scanline_generic_float
     },
 
     { PIXMAN_x8r8g8b8,
       FAST_BILINEAR_FLAGS,
       bits_image_fetch_bilinear_no_repeat_8888,
-      _pixman_image_get_scanline_generic_64,
       _pixman_image_get_scanline_generic_float
     },
 
@@ -1200,15 +1157,13 @@ static const fetcher_info_t fetcher_info[] =
     { PIXMAN_ ## format,						\
       GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
       bits_image_fetch_bilinear_affine_ ## name,			\
-      _pixman_image_get_scanline_generic_64,				\
       _pixman_image_get_scanline_generic_float				\
     },
 
 #define NEAREST_AFFINE_FAST_PATH(name, format, repeat)			\
     { PIXMAN_ ## format,						\
       GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
-      bits_image_fetch_nearest_affine_ ## name,			\
-      _pixman_image_get_scanline_generic_64,				\
+      bits_image_fetch_nearest_affine_ ## name,				\
       _pixman_image_get_scanline_generic_float				\
     },
 
@@ -1237,7 +1192,6 @@ static const fetcher_info_t fetcher_info[] =
     { PIXMAN_any,
       (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM),
       bits_image_fetch_affine_no_alpha,
-      _pixman_image_get_scanline_generic_64,
       _pixman_image_get_scanline_generic_float
     },
 
@@ -1245,7 +1199,6 @@ static const fetcher_info_t fetcher_info[] =
     { PIXMAN_any,
       0,
       bits_image_fetch_general,
-      _pixman_image_get_scanline_generic_64,
       _pixman_image_get_scanline_generic_float
     },
 
diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
index 12294e8..0bf91e4 100644
--- a/pixman/pixman-general.c
+++ b/pixman/pixman-general.c
@@ -220,7 +220,6 @@ _pixman_implementation_create_general (void)
     pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path);
 
     _pixman_setup_combiner_functions_32 (imp);
-    _pixman_setup_combiner_functions_64 (imp);
     _pixman_setup_combiner_functions_float (imp);
 
     imp->src_iter_init = general_src_iter_init;
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index d41e8a1..dd03a93 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -66,10 +66,6 @@ typedef uint32_t (*fetch_pixel_32_t) (bits_image_t *image,
 				      int           x,
 				      int           y);
 
-typedef uint64_t (*fetch_pixel_64_t) (bits_image_t *image,
-				      int           x,
-				      int           y);
-
 typedef argb_t (*fetch_pixel_float_t) (bits_image_t *image,
 				       int           x,
 				       int           y);
@@ -128,7 +124,6 @@ struct solid_fill
     pixman_color_t color;
 
     uint32_t	   color_32;
-    uint64_t	   color_64;
     argb_t	   color_float;
 };
 
@@ -188,10 +183,6 @@ struct bits_image
     fetch_pixel_32_t	       fetch_pixel_32;
     store_scanline_t           store_scanline_32;
 
-    fetch_scanline_t           fetch_scanline_64;
-    fetch_pixel_64_t	       fetch_pixel_64;
-    store_scanline_t           store_scanline_64;
-
     fetch_scanline_t	       fetch_scanline_float;
     fetch_pixel_float_t	       fetch_pixel_float;
     store_scanline_t           store_scanline_float;
@@ -433,13 +424,6 @@ typedef void (*pixman_combine_32_func_t) (pixman_implementation_t *imp,
 					  const uint32_t *         mask,
 					  int                      width);
 
-typedef void (*pixman_combine_64_func_t) (pixman_implementation_t *imp,
-					  pixman_op_t              op,
-					  uint64_t *               dest,
-					  const uint64_t *         src,
-					  const uint64_t *         mask,
-					  int                      width);
-
 typedef void (*pixman_combine_float_func_t) (pixman_implementation_t *imp,
 					     pixman_op_t	      op,
 					     float *		      dest,
@@ -475,7 +459,6 @@ typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp,
 						  pixman_iter_t           *iter);
 
 void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
-void _pixman_setup_combiner_functions_64 (pixman_implementation_t *imp);
 void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp);
 
 typedef struct
@@ -503,8 +486,6 @@ struct pixman_implementation_t
 
     pixman_combine_32_func_t	combine_32[PIXMAN_N_OPERATORS];
     pixman_combine_32_func_t	combine_32_ca[PIXMAN_N_OPERATORS];
-    pixman_combine_64_func_t	combine_64[PIXMAN_N_OPERATORS];
-    pixman_combine_64_func_t	combine_64_ca[PIXMAN_N_OPERATORS];
     pixman_combine_float_func_t	combine_float[PIXMAN_N_OPERATORS];
     pixman_combine_float_func_t	combine_float_ca[PIXMAN_N_OPERATORS];
 };
@@ -807,22 +788,12 @@ _pixman_addition_overflows_int (unsigned int a, unsigned int b);
 
 /* Compositing utilities */
 void
-pixman_expand (uint64_t *           dst,
-               const uint32_t *     src,
-               pixman_format_code_t format,
-               int                  width);
-void
 pixman_expand_to_float (argb_t               *dst,
 			const uint32_t       *src,
 			pixman_format_code_t  format,
 			int                   width);
 
 void
-pixman_contract (uint32_t *      dst,
-                 const uint64_t *src,
-                 int             width);
-
-void
 pixman_contract_from_float (uint32_t     *dst,
 			    const argb_t *src,
 			    int           width);
@@ -1137,18 +1108,6 @@ void pixman_timer_register (pixman_timer_t *timer);
 
 #endif /* PIXMAN_TIMERS */
 
-/* sRGB<->linear conversion tables. Linear color space is the same
- * as sRGB but the components are in linear light (gamma 1.0).
- *
- * linear_to_srgb maps linear value from 0 to 4095 ([0.0, 1.0])
- * and returns 8-bit sRGB value.
- *
- * srgb_to_linear maps 8-bit sRGB value to 16-bit linear value
- * with range 0 to 65535 ([0.0, 1.0]).
- */
-extern const uint8_t linear_to_srgb[4096];
-extern const uint16_t srgb_to_linear[256];
-
 #endif /* __ASSEMBLER__ */
 
 #endif /* PIXMAN_PRIVATE_H */
diff --git a/pixman/pixman-solid-fill.c b/pixman/pixman-solid-fill.c
index 3e63c33..60d56d5 100644
--- a/pixman/pixman-solid-fill.c
+++ b/pixman/pixman-solid-fill.c
@@ -61,14 +61,17 @@ color_to_uint32 (const pixman_color_t *color)
         (color->blue >> 8);
 }
 
-static uint64_t
-color_to_uint64 (const pixman_color_t *color)
+static argb_t
+color_to_float (const pixman_color_t *color)
 {
-    return
-        ((uint64_t)color->alpha << 48) |
-        ((uint64_t)color->red << 32) |
-        ((uint64_t)color->green << 16) |
-        ((uint64_t)color->blue);
+    argb_t result;
+
+    result.a = pixman_unorm_to_float (color->alpha, 16);
+    result.r = pixman_unorm_to_float (color->red, 16);
+    result.g = pixman_unorm_to_float (color->green, 16);
+    result.b = pixman_unorm_to_float (color->blue, 16);
+
+    return result;
 }
 
 PIXMAN_EXPORT pixman_image_t *
@@ -82,11 +85,7 @@ pixman_image_create_solid_fill (const pixman_color_t *color)
     img->type = SOLID;
     img->solid.color = *color;
     img->solid.color_32 = color_to_uint32 (color);
-    img->solid.color_64 = color_to_uint64 (color);
-    img->solid.color_float.a = pixman_unorm_to_float (color->alpha, 16);
-    img->solid.color_float.r = pixman_unorm_to_float (color->red, 16);
-    img->solid.color_float.g = pixman_unorm_to_float (color->green, 16);
-    img->solid.color_float.b = pixman_unorm_to_float (color->blue, 16);
+    img->solid.color_float = color_to_float (color);
 
     return img;
 }
diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c
index 4f9db29..551f3f9 100644
--- a/pixman/pixman-utils.c
+++ b/pixman/pixman-utils.c
@@ -71,97 +71,6 @@ pixman_malloc_abc (unsigned int a,
 	return malloc (a * b * c);
 }
 
-static void
-unorm_to_unorm_params (int in_width, int out_width, uint32_t *factor, int *shift)
-{
-    int w = 0;
-
-    *factor = 0;
-    while (in_width != 0 && w < out_width)
-    {
-	*factor |= 1 << w;
-	w += in_width;
-    }
-
-    /* Did we generate too many bits? */
-    *shift = w - out_width;
-}
-
-/*
- * This function expands images from ARGB8 format to ARGB16.  To preserve
- * precision, it needs to know the original source format.  For example, if the
- * source was PIXMAN_x1r5g5b5 and the red component contained bits 12345, then
- * the expanded value is 12345123.  To correctly expand this to 16 bits, it
- * should be 1234512345123451 and not 1234512312345123.
- */
-void
-pixman_expand (uint64_t *           dst,
-               const uint32_t *     src,
-               pixman_format_code_t format,
-               int                  width)
-{
-    /*
-     * Determine the sizes of each component and the masks and shifts
-     * required to extract them from the source pixel.
-     */
-    const int a_size = PIXMAN_FORMAT_A (format),
-              r_size = PIXMAN_FORMAT_R (format),
-              g_size = PIXMAN_FORMAT_G (format),
-              b_size = PIXMAN_FORMAT_B (format);
-    const int a_shift = 32 - a_size,
-              r_shift = 24 - r_size,
-              g_shift = 16 - g_size,
-              b_shift =  8 - b_size;
-    const uint8_t a_mask = ~(~0 << a_size),
-                  r_mask = ~(~0 << r_size),
-                  g_mask = ~(~0 << g_size),
-                  b_mask = ~(~0 << b_size);
-    uint32_t au_factor, ru_factor, gu_factor, bu_factor;
-    int au_shift, ru_shift, gu_shift, bu_shift;
-    int i;
-
-    unorm_to_unorm_params (a_size, 16, &au_factor, &au_shift);
-    unorm_to_unorm_params (r_size, 16, &ru_factor, &ru_shift);
-    unorm_to_unorm_params (g_size, 16, &gu_factor, &gu_shift);
-    unorm_to_unorm_params (b_size, 16, &bu_factor, &bu_shift);
-
-    /* Start at the end so that we can do the expansion in place
-     * when src == dst
-     */
-    for (i = width - 1; i >= 0; i--)
-    {
-	const uint32_t pixel = src[i];
-	uint8_t a, r, g, b;
-	uint64_t a16, r16, g16, b16;
-
-	if (a_size)
-	{
-	    a = (pixel >> a_shift) & a_mask;
-            a16 = a * au_factor >> au_shift;
-	}
-	else
-	{
-	    a16 = 0xffff;
-	}
-
-	if (r_size)
-	{
-	    r = (pixel >> r_shift) & r_mask;
-	    g = (pixel >> g_shift) & g_mask;
-	    b = (pixel >> b_shift) & b_mask;
-            r16 = r * ru_factor >> ru_shift;
-            g16 = g * gu_factor >> gu_shift;
-            b16 = b * bu_factor >> bu_shift;
-	}
-	else
-	{
-	    r16 = g16 = b16 = 0;
-	}
-	
-	dst[i] = a16 << 48 | r16 << 32 | g16 << 16 | b16;
-    }
-}
-
 static force_inline uint16_t
 float_to_unorm (float f, int n_bits)
 {
@@ -269,31 +178,6 @@ pixman_contract_from_float (uint32_t     *dst,
     }
 }
 
-/*
- * Contracting is easier than expanding.  We just need to truncate the
- * components.
- */
-void
-pixman_contract (uint32_t *      dst,
-                 const uint64_t *src,
-                 int             width)
-{
-    int i;
-
-    /* Start at the beginning so that we can do the contraction in
-     * place when src == dst
-     */
-    for (i = 0; i < width; i++)
-    {
-	const uint8_t a = src[i] >> 56,
-	              r = src[i] >> 40,
-	              g = src[i] >> 24,
-	              b = src[i] >> 8;
-
-	dst[i] = a << 24 | r << 16 | g << 8 | b;
-    }
-}
-
 uint32_t *
 _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask)
 {
commit 5ff0bbd9721bb216a8332cbde18adc458af3cdec
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Sat Aug 18 14:11:07 2012 -0400

    Switch the wide pipeline over to using floating point
    
    In pixman-bits-image.c, remove bits_image_fetch_untransformed_64() and
    add bits_image_fetch_untransformed_float(); change
    dest_get_scanline_wide() to produce a floating point buffer,
    
    In the gradients, change *_get_scanline_wide() to call
    pixman_expand_to_float() instead of pixman_expand().
    
    In pixman-general.c change the wide Bpp to 16 instead of 8, and
    initialize the buffers to 0 to prevent NaNs from causing trouble.
    
    In pixman-noop.c make the wide solid iterator generate floating point
    pixels.
    
    In pixman-solid-fill.c, cache a floating point pixel, and make the
    wide iterator generate floating point pixels.
    
    Bug fix in bits_image_fetch_untransformed_repeat_normal

diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index 07353dc..abb4cc4 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -979,17 +979,17 @@ replicate_pixel_32 (bits_image_t *   bits,
 }
 
 static void
-replicate_pixel_64 (bits_image_t *   bits,
-		    int              x,
-		    int              y,
-		    int              width,
-		    uint32_t *       b)
+replicate_pixel_float (bits_image_t *   bits,
+		       int              x,
+		       int              y,
+		       int              width,
+		       uint32_t *       b)
 {
-    uint64_t color;
-    uint64_t *buffer = (uint64_t *)b;
-    uint64_t *end;
+    argb_t color;
+    argb_t *buffer = (argb_t *)b;
+    argb_t *end;
 
-    color = bits->fetch_pixel_64 (bits, x, y);
+    color = bits->fetch_pixel_float (bits, x, y);
 
     end = buffer + width;
     while (buffer < end)
@@ -1008,7 +1008,7 @@ bits_image_fetch_untransformed_repeat_none (bits_image_t *image,
 
     if (y < 0 || y >= image->height)
     {
-	memset (buffer, 0, width * (wide? 8 : 4));
+	memset (buffer, 0, width * (wide? sizeof (argb_t) : 4));
 	return;
     }
 
@@ -1016,10 +1016,10 @@ bits_image_fetch_untransformed_repeat_none (bits_image_t *image,
     {
 	w = MIN (width, -x);
 
-	memset (buffer, 0, w * (wide ? 8 : 4));
+	memset (buffer, 0, w * (wide ? sizeof (argb_t) : 4));
 
 	width -= w;
-	buffer += w * (wide? 2 : 1);
+	buffer += w * (wide? 4 : 1);
 	x += w;
     }
 
@@ -1028,16 +1028,16 @@ bits_image_fetch_untransformed_repeat_none (bits_image_t *image,
 	w = MIN (width, image->width - x);
 
 	if (wide)
-	    image->fetch_scanline_64 ((pixman_image_t *)image, x, y, w, buffer, NULL);
+	    image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL);
 	else
 	    image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL);
 
 	width -= w;
-	buffer += w * (wide? 2 : 1);
+	buffer += w * (wide? 4 : 1);
 	x += w;
     }
 
-    memset (buffer, 0, width * (wide ? 8 : 4));
+    memset (buffer, 0, width * (wide ? sizeof (argb_t) : 4));
 }
 
 static void
@@ -1059,7 +1059,7 @@ bits_image_fetch_untransformed_repeat_normal (bits_image_t *image,
     if (image->width == 1)
     {
 	if (wide)
-	    replicate_pixel_64 (image, 0, y, width, buffer);
+	    replicate_pixel_float (image, 0, y, width, buffer);
 	else
 	    replicate_pixel_32 (image, 0, y, width, buffer);
 
@@ -1076,11 +1076,11 @@ bits_image_fetch_untransformed_repeat_normal (bits_image_t *image,
 	w = MIN (width, image->width - x);
 
 	if (wide)
-	    image->fetch_scanline_64 ((pixman_image_t *)image, x, y, w, buffer, NULL);
+	    image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL);
 	else
 	    image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL);
 
-	buffer += w * (wide? 2 : 1);
+	buffer += w * (wide? 4 : 1);
 	x += w;
 	width -= w;
     }
@@ -1112,9 +1112,8 @@ bits_image_fetch_untransformed_32 (pixman_iter_t * iter,
 }
 
 static uint32_t *
-bits_image_fetch_untransformed_64 (pixman_iter_t * iter,
-                                   const uint32_t *mask)
-				   
+bits_image_fetch_untransformed_float (pixman_iter_t * iter,
+				      const uint32_t *mask)
 {
     pixman_image_t *image  = iter->image;
     int             x      = iter->x;
@@ -1155,8 +1154,8 @@ static const fetcher_info_t fetcher_info[] =
        FAST_PATH_NO_PAD_REPEAT			|
        FAST_PATH_NO_REFLECT_REPEAT),
       bits_image_fetch_untransformed_32,
-      bits_image_fetch_untransformed_64,
-      _pixman_image_get_scanline_generic_float
+      NULL,
+      bits_image_fetch_untransformed_float
     },
 
 #define FAST_BILINEAR_FLAGS						\
@@ -1278,7 +1277,7 @@ _pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter)
 	    else
 	    {
 		iter->data = info->get_scanline_32;
-		iter->get_scanline = info->get_scanline_64;
+		iter->get_scanline = info->get_scanline_float;
 	    }
 	    return;
 	}
@@ -1333,30 +1332,27 @@ dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
     int             x      = iter->x;
     int             y      = iter->y;
     int             width  = iter->width;
-    uint64_t *	    buffer = (uint64_t *)iter->buffer;
+    argb_t *	    buffer = (argb_t *)iter->buffer;
 
-    image->fetch_scanline_64 (
+    image->fetch_scanline_float (
 	(pixman_image_t *)image, x, y, width, (uint32_t *)buffer, mask);
     if (image->common.alpha_map)
     {
-	uint64_t *alpha;
+	argb_t *alpha;
 
-	if ((alpha = malloc (width * sizeof (uint64_t))))
+	if ((alpha = malloc (width * sizeof (argb_t))))
 	{
 	    int i;
 
 	    x -= image->common.alpha_origin_x;
 	    y -= image->common.alpha_origin_y;
 
-	    image->common.alpha_map->fetch_scanline_64 (
+	    image->common.alpha_map->fetch_scanline_float (
 		(pixman_image_t *)image->common.alpha_map,
 		x, y, width, (uint32_t *)alpha, mask);
 
 	    for (i = 0; i < width; ++i)
-	    {
-		buffer[i] &= ~0xffff000000000000ULL;
-		buffer[i] |= (alpha[i] & 0xffff000000000000ULL);
-	    }
+		buffer[i].a = alpha[i].a;
 
 	    free (alpha);
 	}
@@ -1397,14 +1393,14 @@ dest_write_back_wide (pixman_iter_t *iter)
     int             width  = iter->width;
     const uint32_t *buffer = iter->buffer;
 
-    image->store_scanline_64 (image, x, y, width, buffer);
+    image->store_scanline_float (image, x, y, width, buffer);
 
     if (image->common.alpha_map)
     {
 	x -= image->common.alpha_origin_x;
 	y -= image->common.alpha_origin_y;
 
-	image->common.alpha_map->store_scanline_64 (
+	image->common.alpha_map->store_scanline_float (
 	    image->common.alpha_map, x, y, width, buffer);
     }
 
diff --git a/pixman/pixman-conical-gradient.c b/pixman/pixman-conical-gradient.c
index 05d3595..8bb46ae 100644
--- a/pixman/pixman-conical-gradient.c
+++ b/pixman/pixman-conical-gradient.c
@@ -165,7 +165,8 @@ conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
 {
     uint32_t *buffer = conical_get_scanline_narrow (iter, NULL);
 
-    pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+    pixman_expand_to_float (
+	(argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
 
     return buffer;
 }
diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
index 2d92014..12294e8 100644
--- a/pixman/pixman-general.c
+++ b/pixman/pixman-general.c
@@ -130,7 +130,7 @@ general_composite_rect  (pixman_implementation_t *imp,
     else
     {
 	narrow = 0;
-	Bpp = 8;
+	Bpp = 16;
     }
 
     if (width * Bpp > SCANLINE_BUFFER_LENGTH)
@@ -145,6 +145,14 @@ general_composite_rect  (pixman_implementation_t *imp,
     mask_buffer = src_buffer + width * Bpp;
     dest_buffer = mask_buffer + width * Bpp;
 
+    if (!narrow)
+    {
+	/* To make sure there aren't any NANs in the buffers */
+	memset (src_buffer, 0, width * Bpp);
+	memset (mask_buffer, 0, width * Bpp);
+	memset (dest_buffer, 0, width * Bpp);
+    }
+    
     /* src iter */
     src_iter_flags = narrow | op_flags[op].src;
 
diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c
index 5dd0501..a70892c 100644
--- a/pixman/pixman-implementation.c
+++ b/pixman/pixman-implementation.c
@@ -177,11 +177,11 @@ _pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
 	switch ((narrow << 1) | component_alpha)
 	{
 	case 0: /* not narrow, not component alpha */
-	    f = (pixman_combine_32_func_t)imp->combine_64[op];
+	    f = (pixman_combine_32_func_t)imp->combine_float[op];
 	    break;
 	    
 	case 1: /* not narrow, component_alpha */
-	    f = (pixman_combine_32_func_t)imp->combine_64_ca[op];
+	    f = (pixman_combine_32_func_t)imp->combine_float_ca[op];
 	    break;
 
 	case 2: /* narrow, not component alpha */
diff --git a/pixman/pixman-linear-gradient.c b/pixman/pixman-linear-gradient.c
index e511368..40c8c9f 100644
--- a/pixman/pixman-linear-gradient.c
+++ b/pixman/pixman-linear-gradient.c
@@ -227,7 +227,8 @@ linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
 {
     uint32_t *buffer = linear_get_scanline_narrow (iter, NULL);
 
-    pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+    pixman_expand_to_float (
+	(argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
 
     return buffer;
 }
diff --git a/pixman/pixman-noop.c b/pixman/pixman-noop.c
index 7b9759f..850caa1 100644
--- a/pixman/pixman-noop.c
+++ b/pixman/pixman-noop.c
@@ -93,9 +93,9 @@ noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
 	}
 	else
 	{
-	    uint64_t color = bits->fetch_pixel_64 (bits, 0, 0);
-	    uint64_t *buffer = (uint64_t *)iter->buffer;
-	    uint64_t *end = buffer + iter->width;
+	    argb_t color = bits->fetch_pixel_float (bits, 0, 0);
+	    argb_t *buffer = (argb_t *)iter->buffer;
+	    argb_t *end = buffer + iter->width;
 
 	    while (buffer < end)
 		*(buffer++) = color;
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 6a460e7..d41e8a1 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -126,9 +126,10 @@ struct solid_fill
 {
     image_common_t common;
     pixman_color_t color;
-    
+
     uint32_t	   color_32;
     uint64_t	   color_64;
+    argb_t	   color_float;
 };
 
 struct gradient
diff --git a/pixman/pixman-radial-gradient.c b/pixman/pixman-radial-gradient.c
index 41bb79d..8d56246 100644
--- a/pixman/pixman-radial-gradient.c
+++ b/pixman/pixman-radial-gradient.c
@@ -405,7 +405,8 @@ radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
 {
     uint32_t *buffer = radial_get_scanline_narrow (iter, NULL);
 
-    pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+    pixman_expand_to_float (
+	(argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
 
     return buffer;
 }
diff --git a/pixman/pixman-solid-fill.c b/pixman/pixman-solid-fill.c
index 26f85ce..3e63c33 100644
--- a/pixman/pixman-solid-fill.c
+++ b/pixman/pixman-solid-fill.c
@@ -40,9 +40,9 @@ _pixman_solid_fill_iter_init (pixman_image_t *image, pixman_iter_t  *iter)
     }
     else
     {
-	uint64_t *b = (uint64_t *)iter->buffer;
-	uint64_t *e = b + iter->width;
-	uint64_t color = image->solid.color_64;
+	argb_t *b = (argb_t *)iter->buffer;
+	argb_t *e = b + iter->width;
+	argb_t color = image->solid.color_float;
 
 	while (b < e)
 	    *(b++) = color;
@@ -83,6 +83,10 @@ pixman_image_create_solid_fill (const pixman_color_t *color)
     img->solid.color = *color;
     img->solid.color_32 = color_to_uint32 (color);
     img->solid.color_64 = color_to_uint64 (color);
+    img->solid.color_float.a = pixman_unorm_to_float (color->alpha, 16);
+    img->solid.color_float.r = pixman_unorm_to_float (color->red, 16);
+    img->solid.color_float.g = pixman_unorm_to_float (color->green, 16);
+    img->solid.color_float.b = pixman_unorm_to_float (color->blue, 16);
 
     return img;
 }
commit e75bacc5f9196c3980ce331c7d53de5b7e92d699
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Sat Aug 18 13:22:15 2012 -0400

    pixman-access.c: Add floating point accessor functions
    
    Three new function pointer fields are added to bits_image_t:
    
          fetch_scanline_float
          fetch_pixel_float
          store_scanline_float
    
    similar to the existing 32 and 64 bit accessors. The fetcher_info_t
    struct in pixman_access similarly gets a new get_scanline_float field.
    
    For most formats, the new get_scanline_float field is set to a new
    function fetch_scanline_generic_float() that first calls the 32 bit
    fetcher uses the 32 bit scanline fetcher and then expands these pixels
    to floating point.
    
    For the 10 bpc formats, new floating point accessors are added that
    use pixman_unorm_to_float() and pixman_float_to_unorm() to convert
    back and forth.
    
    The PIXMAN_a8r8g8b8_sRGB format is handled with a 256-entry table that
    maps 8 bit sRGB channels to linear single precision floating point
    numbers. The sRGB->linear direction can then be done with a simple
    table lookup.
    
    The other direction is currently done with 4096-entry table which
    works fine for 16 bit integers, but not so great for floating
    point. So instead this patch uses a binary search in the sRGB->linear
    table. The existing 32 bit accessors for the sRGB format are also
    converted to use this method.

diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
index 9feafc4..1eef621 100644
--- a/pixman/pixman-access.c
+++ b/pixman/pixman-access.c
@@ -31,6 +31,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <assert.h>
+#include <math.h>
 
 #include "pixman-accessor.h"
 #include "pixman-private.h"
@@ -635,6 +636,231 @@ fetch_scanline_x2b10g10r10 (pixman_image_t *image,
     }
 }
 
+/* Table mapping sRGB-encoded 8 bit numbers to linearly encoded
+ * floating point numbers. We assume that single precision
+ * floating point follows the IEEE 754 format.
+ */
+static const uint32_t to_linear_u[256] =
+{
+    0x00000000, 0x399f22b4, 0x3a1f22b4, 0x3a6eb40e, 0x3a9f22b4, 0x3ac6eb61,
+    0x3aeeb40e, 0x3b0b3e5d, 0x3b1f22b4, 0x3b33070b, 0x3b46eb61, 0x3b5b518a,
+    0x3b70f18a, 0x3b83e1c5, 0x3b8fe614, 0x3b9c87fb, 0x3ba9c9b5, 0x3bb7ad6d,
+    0x3bc63547, 0x3bd5635f, 0x3be539bd, 0x3bf5ba70, 0x3c0373b5, 0x3c0c6152,
+    0x3c15a703, 0x3c1f45bc, 0x3c293e68, 0x3c3391f4, 0x3c3e4149, 0x3c494d43,
+    0x3c54b6c7, 0x3c607eb1, 0x3c6ca5df, 0x3c792d22, 0x3c830aa8, 0x3c89af9e,
+    0x3c9085db, 0x3c978dc5, 0x3c9ec7c0, 0x3ca63432, 0x3cadd37d, 0x3cb5a601,
+    0x3cbdac20, 0x3cc5e639, 0x3cce54ab, 0x3cd6f7d2, 0x3cdfd00e, 0x3ce8ddb9,
+    0x3cf2212c, 0x3cfb9ac1, 0x3d02a569, 0x3d0798dc, 0x3d0ca7e4, 0x3d11d2ae,
+    0x3d171963, 0x3d1c7c2e, 0x3d21fb3a, 0x3d2796af, 0x3d2d4ebb, 0x3d332380,
+    0x3d39152b, 0x3d3f23e3, 0x3d454fd0, 0x3d4b991c, 0x3d51ffeb, 0x3d588466,
+    0x3d5f26b7, 0x3d65e6fe, 0x3d6cc564, 0x3d73c210, 0x3d7add25, 0x3d810b65,
+    0x3d84b793, 0x3d88732e, 0x3d8c3e48, 0x3d9018f4, 0x3d940343, 0x3d97fd48,
+    0x3d9c0714, 0x3da020b9, 0x3da44a48, 0x3da883d6, 0x3daccd70, 0x3db12728,
+    0x3db59110, 0x3dba0b38, 0x3dbe95b2, 0x3dc3308f, 0x3dc7dbe0, 0x3dcc97b4,
+    0x3dd1641c, 0x3dd6412a, 0x3ddb2eec, 0x3de02d75, 0x3de53cd3, 0x3dea5d16,
+    0x3def8e52, 0x3df4d091, 0x3dfa23e5, 0x3dff885e, 0x3e027f06, 0x3e05427f,
+    0x3e080ea2, 0x3e0ae376, 0x3e0dc104, 0x3e10a752, 0x3e139669, 0x3e168e50,
+    0x3e198f0e, 0x3e1c98ab, 0x3e1fab2e, 0x3e22c6a0, 0x3e25eb08, 0x3e29186a,
+    0x3e2c4ed0, 0x3e2f8e42, 0x3e32d6c4, 0x3e362861, 0x3e39831e, 0x3e3ce702,
+    0x3e405416, 0x3e43ca5e, 0x3e4749e4, 0x3e4ad2ae, 0x3e4e64c2, 0x3e520027,
+    0x3e55a4e6, 0x3e595303, 0x3e5d0a8a, 0x3e60cb7c, 0x3e6495e0, 0x3e6869bf,
+    0x3e6c4720, 0x3e702e08, 0x3e741e7f, 0x3e78188c, 0x3e7c1c34, 0x3e8014c0,
+    0x3e822039, 0x3e84308b, 0x3e8645b8, 0x3e885fc3, 0x3e8a7eb0, 0x3e8ca281,
+    0x3e8ecb3a, 0x3e90f8df, 0x3e932b72, 0x3e9562f6, 0x3e979f6f, 0x3e99e0e0,
+    0x3e9c274e, 0x3e9e72b8, 0x3ea0c322, 0x3ea31892, 0x3ea57308, 0x3ea7d28a,
+    0x3eaa3718, 0x3eaca0b7, 0x3eaf0f69, 0x3eb18332, 0x3eb3fc16, 0x3eb67a15,
+    0x3eb8fd34, 0x3ebb8576, 0x3ebe12de, 0x3ec0a56e, 0x3ec33d2a, 0x3ec5da14,
+    0x3ec87c30, 0x3ecb2380, 0x3ecdd008, 0x3ed081ca, 0x3ed338c9, 0x3ed5f508,
+    0x3ed8b68a, 0x3edb7d52, 0x3ede4962, 0x3ee11abe, 0x3ee3f168, 0x3ee6cd64,
+    0x3ee9aeb6, 0x3eec955d, 0x3eef815d, 0x3ef272ba, 0x3ef56976, 0x3ef86594,
+    0x3efb6717, 0x3efe6e02, 0x3f00bd2b, 0x3f02460c, 0x3f03d1a5, 0x3f055ff8,
+    0x3f06f105, 0x3f0884ce, 0x3f0a1b54, 0x3f0bb499, 0x3f0d509f, 0x3f0eef65,
+    0x3f1090ef, 0x3f12353c, 0x3f13dc50, 0x3f15862a, 0x3f1732cc, 0x3f18e237,
+    0x3f1a946d, 0x3f1c4970, 0x3f1e013f, 0x3f1fbbde, 0x3f21794c, 0x3f23398c,
+    0x3f24fca0, 0x3f26c286, 0x3f288b42, 0x3f2a56d3, 0x3f2c253d, 0x3f2df680,
+    0x3f2fca9d, 0x3f31a195, 0x3f337b6a, 0x3f35581e, 0x3f3737b1, 0x3f391a24,
+    0x3f3aff7a, 0x3f3ce7b2, 0x3f3ed2d0, 0x3f40c0d2, 0x3f42b1bc, 0x3f44a58e,
+    0x3f469c49, 0x3f4895ee, 0x3f4a9280, 0x3f4c91ff, 0x3f4e946c, 0x3f5099c8,
+    0x3f52a216, 0x3f54ad55, 0x3f56bb88, 0x3f58ccae, 0x3f5ae0cb, 0x3f5cf7de,
+    0x3f5f11ec, 0x3f612ef0, 0x3f634eef, 0x3f6571ea, 0x3f6797e1, 0x3f69c0d6,
+    0x3f6beccb, 0x3f6e1bc0, 0x3f704db6, 0x3f7282af, 0x3f74baac, 0x3f76f5ae,
+    0x3f7933b6, 0x3f7b74c6, 0x3f7db8de, 0x3f800000
+};
+
+static const float * const to_linear = (const float *)to_linear_u;
+
+static uint8_t
+to_srgb (float f)
+{
+    uint8_t low = 0;
+    uint8_t high = 255;
+
+    while (high - low > 1)
+    {
+	uint8_t mid = (low + high) / 2;
+
+	if (to_linear[mid] > f)
+	    high = mid;
+	else
+	    low = mid;
+    }
+
+    if (to_linear[high] - f < f - to_linear[low])
+	return high;
+    else
+	return low;
+}
+
+static void
+fetch_scanline_a8r8g8b8_sRGB_float (pixman_image_t *image,
+				    int             x,
+				    int             y,
+				    int             width,
+				    uint32_t *      b,
+				    const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint32_t *pixel = bits + x;
+    const uint32_t *end = pixel + width;
+    argb_t *buffer = (argb_t *)b;
+
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	argb_t *argb = buffer;
+
+	argb->a = pixman_unorm_to_float ((p >> 24) & 0xff, 8);
+
+	argb->r = to_linear [(p >> 16) & 0xff];
+	argb->g = to_linear [(p >>  8) & 0xff];
+	argb->b = to_linear [(p >>  0) & 0xff];
+
+	buffer++;
+    }
+}
+
+/* Expects a float buffer */
+static void
+fetch_scanline_a2r10g10b10_float (pixman_image_t *image,
+				  int             x,
+				  int             y,
+				  int             width,
+				  uint32_t *      b,
+				  const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint32_t *pixel = bits + x;
+    const uint32_t *end = pixel + width;
+    argb_t *buffer = (argb_t *)b;
+
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint64_t a = p >> 30;
+	uint64_t r = (p >> 20) & 0x3ff;
+	uint64_t g = (p >> 10) & 0x3ff;
+	uint64_t b = p & 0x3ff;
+
+	buffer->a = pixman_unorm_to_float (a, 2);
+	buffer->r = pixman_unorm_to_float (r, 10);
+	buffer->g = pixman_unorm_to_float (g, 10);
+	buffer->b = pixman_unorm_to_float (b, 10);
+
+	buffer++;
+    }
+}
+
+/* Expects a float buffer */
+static void
+fetch_scanline_x2r10g10b10_float (pixman_image_t *image,
+				  int             x,
+				  int             y,
+				  int             width,
+				  uint32_t *      b,
+				  const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint32_t *pixel = (uint32_t *)bits + x;
+    const uint32_t *end = pixel + width;
+    argb_t *buffer = (argb_t *)b;
+
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint64_t r = (p >> 20) & 0x3ff;
+	uint64_t g = (p >> 10) & 0x3ff;
+	uint64_t b = p & 0x3ff;
+
+	buffer->a = 1.0;
+	buffer->r = pixman_unorm_to_float (r, 10);
+	buffer->g = pixman_unorm_to_float (g, 10);
+	buffer->b = pixman_unorm_to_float (b, 10);
+
+	buffer++;
+    }
+}
+
+/* Expects a float buffer */
+static void
+fetch_scanline_a2b10g10r10_float (pixman_image_t *image,
+				  int             x,
+				  int             y,
+				  int             width,
+				  uint32_t *      b,
+				  const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint32_t *pixel = bits + x;
+    const uint32_t *end = pixel + width;
+    argb_t *buffer = (argb_t *)b;
+
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint64_t a = p >> 30;
+	uint64_t b = (p >> 20) & 0x3ff;
+	uint64_t g = (p >> 10) & 0x3ff;
+	uint64_t r = p & 0x3ff;
+
+	buffer->a = pixman_unorm_to_float (a, 2);
+	buffer->r = pixman_unorm_to_float (r, 10);
+	buffer->g = pixman_unorm_to_float (g, 10);
+	buffer->b = pixman_unorm_to_float (b, 10);
+
+	buffer++;
+    }
+}
+
+/* Expects a float buffer */
+static void
+fetch_scanline_x2b10g10r10_float (pixman_image_t *image,
+				  int             x,
+				  int             y,
+				  int             width,
+				  uint32_t *      b,
+				  const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint32_t *pixel = (uint32_t *)bits + x;
+    const uint32_t *end = pixel + width;
+    argb_t *buffer = (argb_t *)b;
+
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint64_t b = (p >> 20) & 0x3ff;
+	uint64_t g = (p >> 10) & 0x3ff;
+	uint64_t r = p & 0x3ff;
+
+	buffer->a = 1.0;
+	buffer->r = pixman_unorm_to_float (r, 10);
+	buffer->g = pixman_unorm_to_float (g, 10);
+	buffer->b = pixman_unorm_to_float (b, 10);
+
+	buffer++;
+    }
+}
+
 static void
 fetch_scanline_yuy2 (pixman_image_t *image,
                      int             x,
@@ -796,6 +1022,106 @@ fetch_pixel_x2b10g10r10 (bits_image_t *image,
     return 0xffffULL << 48 | r << 32 | g << 16 | b;
 }
 
+static argb_t
+fetch_pixel_x2r10g10b10_float (bits_image_t *image,
+			       int	   offset,
+			       int           line)
+{
+    uint32_t *bits = image->bits + line * image->rowstride;
+    uint32_t p = READ (image, bits + offset);
+    uint64_t r = (p >> 20) & 0x3ff;
+    uint64_t g = (p >> 10) & 0x3ff;
+    uint64_t b = p & 0x3ff;
+    argb_t argb;
+
+    argb.a = 1.0;
+    argb.r = pixman_unorm_to_float (r, 10);
+    argb.g = pixman_unorm_to_float (g, 10);
+    argb.b = pixman_unorm_to_float (b, 10);
+
+    return argb;
+}
+
+static argb_t
+fetch_pixel_a2r10g10b10_float (bits_image_t *image,
+			       int	     offset,
+			       int           line)
+{
+    uint32_t *bits = image->bits + line * image->rowstride;
+    uint32_t p = READ (image, bits + offset);
+    uint64_t a = p >> 30;
+    uint64_t r = (p >> 20) & 0x3ff;
+    uint64_t g = (p >> 10) & 0x3ff;
+    uint64_t b = p & 0x3ff;
+    argb_t argb;
+
+    argb.a = pixman_unorm_to_float (a, 2);
+    argb.r = pixman_unorm_to_float (r, 10);
+    argb.g = pixman_unorm_to_float (g, 10);
+    argb.b = pixman_unorm_to_float (b, 10);
+
+    return argb;
+}
+
+static argb_t
+fetch_pixel_a2b10g10r10_float (bits_image_t *image,
+			       int           offset,
+			       int           line)
+{
+    uint32_t *bits = image->bits + line * image->rowstride;
+    uint32_t p = READ (image, bits + offset);
+    uint64_t a = p >> 30;
+    uint64_t b = (p >> 20) & 0x3ff;
+    uint64_t g = (p >> 10) & 0x3ff;
+    uint64_t r = p & 0x3ff;
+    argb_t argb;
+
+    argb.a = pixman_unorm_to_float (a, 2);
+    argb.r = pixman_unorm_to_float (r, 10);
+    argb.g = pixman_unorm_to_float (g, 10);
+    argb.b = pixman_unorm_to_float (b, 10);
+
+    return argb;
+}
+
+static argb_t
+fetch_pixel_x2b10g10r10_float (bits_image_t *image,
+			       int           offset,
+			       int           line)
+{
+    uint32_t *bits = image->bits + line * image->rowstride;
+    uint32_t p = READ (image, bits + offset);
+    uint64_t b = (p >> 20) & 0x3ff;
+    uint64_t g = (p >> 10) & 0x3ff;
+    uint64_t r = p & 0x3ff;
+    argb_t argb;
+
+    argb.a = 1.0;
+    argb.r = pixman_unorm_to_float (r, 10);
+    argb.g = pixman_unorm_to_float (g, 10);
+    argb.b = pixman_unorm_to_float (b, 10);
+
+    return argb;
+}
+
+static argb_t
+fetch_pixel_a8r8g8b8_sRGB_float (bits_image_t *image,
+				 int	       offset,
+				 int           line)
+{
+    uint32_t *bits = image->bits + line * image->rowstride;
+    uint32_t p = READ (image, bits + offset);
+    argb_t argb;
+
+    argb.a = pixman_unorm_to_float ((p >> 24) & 0xff, 8);
+
+    argb.r = to_linear [(p >> 16) & 0xff];
+    argb.g = to_linear [(p >>  8) & 0xff];
+    argb.b = to_linear [(p >>  0) & 0xff];
+
+    return argb;
+}
+
 static uint32_t
 fetch_pixel_yuy2 (bits_image_t *image,
 		  int           offset,
@@ -939,6 +1265,135 @@ store_scanline_x2b10g10r10 (bits_image_t *  image,
     }
 }
 
+
+static void
+store_scanline_a2r10g10b10_float (bits_image_t *  image,
+				  int             x,
+				  int             y,
+				  int             width,
+				  const uint32_t *v)
+{
+    uint32_t *bits = image->bits + image->rowstride * y;
+    uint32_t *pixel = bits + x;
+    argb_t *values = (argb_t *)v;
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint16_t a, r, g, b;
+
+	a = pixman_float_to_unorm (values[i].a, 2);
+	r = pixman_float_to_unorm (values[i].r, 10);
+	g = pixman_float_to_unorm (values[i].g, 10);
+	b = pixman_float_to_unorm (values[i].b, 10);
+
+	WRITE (image, pixel++,
+	       (a << 30) | (r << 20) | (g << 10) | b);
+    }
+}
+
+static void
+store_scanline_x2r10g10b10_float (bits_image_t *  image,
+				  int             x,
+				  int             y,
+				  int             width,
+				  const uint32_t *v)
+{
+    uint32_t *bits = image->bits + image->rowstride * y;
+    uint32_t *pixel = bits + x;
+    argb_t *values = (argb_t *)v;
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint16_t r, g, b;
+
+	r = pixman_float_to_unorm (values[i].r, 10);
+	g = pixman_float_to_unorm (values[i].g, 10);
+	b = pixman_float_to_unorm (values[i].b, 10);
+
+	WRITE (image, pixel++,
+	       (r << 20) | (g << 10) | b);
+    }
+}
+
+static void
+store_scanline_a2b10g10r10_float (bits_image_t *  image,
+				  int             x,
+				  int             y,
+				  int             width,
+				  const uint32_t *v)
+{
+    uint32_t *bits = image->bits + image->rowstride * y;
+    uint32_t *pixel = bits + x;
+    argb_t *values = (argb_t *)v;
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint16_t a, r, g, b;
+
+	a = pixman_float_to_unorm (values[i].a, 2);
+	r = pixman_float_to_unorm (values[i].r, 10);
+	g = pixman_float_to_unorm (values[i].g, 10);
+	b = pixman_float_to_unorm (values[i].b, 10);
+
+	WRITE (image, pixel++,
+	       (a << 30) | (b << 20) | (g << 10) | r);
+    }
+}
+
+static void
+store_scanline_x2b10g10r10_float (bits_image_t *  image,
+				  int             x,
+				  int             y,
+				  int             width,
+				  const uint32_t *v)
+{
+    uint32_t *bits = image->bits + image->rowstride * y;
+    uint32_t *pixel = bits + x;
+    argb_t *values = (argb_t *)v;
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint16_t r, g, b;
+
+	r = pixman_float_to_unorm (values[i].r, 10);
+	g = pixman_float_to_unorm (values[i].g, 10);
+	b = pixman_float_to_unorm (values[i].b, 10);
+
+	WRITE (image, pixel++,
+	       (b << 20) | (g << 10) | r);
+    }
+}
+
+static void
+store_scanline_a8r8g8b8_sRGB_float (bits_image_t *  image,
+				    int             x,
+				    int             y,
+				    int             width,
+				    const uint32_t *v)
+{
+    uint32_t *bits = image->bits + image->rowstride * y;
+    uint32_t *pixel = bits + x;
+    argb_t *values = (argb_t *)v;
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint8_t a, r, g, b;
+
+	a = pixman_float_to_unorm (values[i].a, 8);
+	r = to_srgb (values[i].r);
+	g = to_srgb (values[i].g);
+	b = to_srgb (values[i].b);
+
+	WRITE (image, pixel++,
+	       (a << 24) | (r << 16) | (g << 8) | b);
+    }
+}
+
 /*
  * Contracts a 64bpp image to 32bpp and then stores it using a regular 32-bit
  * store proc. Despite the type, this function expects a uint64_t buffer.
@@ -968,6 +1423,36 @@ store_scanline_generic_64 (bits_image_t *  image,
     free (argb8_pixels);
 }
 
+/*
+ * Contracts a floating point image to 32bpp and then stores it using a
+ * regular 32-bit store proc. Despite the type, this function expects an
+ * argb_t buffer.
+ */
+static void
+store_scanline_generic_float (bits_image_t *  image,
+			      int             x,
+			      int             y,
+			      int             width,
+			      const uint32_t *values)
+{
+    uint32_t *argb8_pixels;
+
+    assert (image->common.type == BITS);
+
+    argb8_pixels = pixman_malloc_ab (width, sizeof(uint32_t));
+    if (!argb8_pixels)
+	return;
+
+    /* Contract the scanline.  We could do this in place if values weren't
+     * const.
+     */
+    pixman_contract_from_float (argb8_pixels, (argb_t *)values, width);
+
+    image->store_scanline_32 (image, x, y, width, argb8_pixels);
+
+    free (argb8_pixels);
+}
+
 /* Despite the type, this function expects both buffer
  * and mask to be uint64_t
  */
@@ -1001,6 +1486,19 @@ fetch_scanline_generic_64 (pixman_image_t *image,
     pixman_expand ((uint64_t *)buffer, buffer, format, width);
 }
 
+static void
+fetch_scanline_generic_float (pixman_image_t *image,
+			      int	      x,
+			      int	      y,
+			      int	      width,
+			      uint32_t *      buffer,
+			      const uint32_t *mask)
+{
+    image->bits.fetch_scanline_32 (image, x, y, width, buffer, NULL);
+
+    pixman_expand_to_float ((argb_t *)buffer, buffer, image->bits.format, width);
+}
+
 /* Despite the type, this function expects a uint64_t *buffer */
 static uint64_t
 fetch_pixel_generic_64 (bits_image_t *image,
@@ -1046,11 +1544,20 @@ fetch_scanline_a8r8g8b8_32_sRGB (pixman_image_t *image,
     
     while (pixel < end)
     {
+	uint8_t a, r, g, b;
+
 	tmp = READ (image, pixel++);
-	*buffer++ =                 (tmp >> 24)               << 24
-		  | (srgb_to_linear[(tmp >> 16) & 0xff] >> 8) << 16
-		  | (srgb_to_linear[(tmp >>  8) & 0xff] >> 8) <<  8
-		  | (srgb_to_linear[(tmp >>  0) & 0xff] >> 8) <<  0;
+
+	a = (tmp >> 24) & 0xff;
+	r = (tmp >> 16) & 0xff;
+	g = (tmp >> 8) & 0xff;
+	b = (tmp >> 0) & 0xff;
+
+	r = to_linear[r] * 255.0f + 0.5f;
+	g = to_linear[g] * 255.0f + 0.5f;
+	b = to_linear[b] * 255.0f + 0.5f;
+
+	*buffer++ = (a << 24) | (r << 16) | (g << 8) | (b << 0);
     }
 }
 
@@ -1085,10 +1592,18 @@ fetch_pixel_a8r8g8b8_32_sRGB (bits_image_t *image,
 {
     uint32_t *bits = image->bits + line * image->rowstride;
     uint32_t tmp = READ (image, bits + offset);
-    return                 (tmp >> 24)               << 24
-	 | (srgb_to_linear[(tmp >> 16) & 0xff] >> 8) << 16
-	 | (srgb_to_linear[(tmp >>  8) & 0xff] >> 8) <<  8
-	 | (srgb_to_linear[(tmp >>  0) & 0xff] >> 8) <<  0;
+    uint8_t a, r, g, b;
+
+    a = (tmp >> 24) & 0xff;
+    r = (tmp >> 16) & 0xff;
+    g = (tmp >> 8) & 0xff;
+    b = (tmp >> 0) & 0xff;
+
+    r = to_linear[r] * 255.0f + 0.5f;
+    g = to_linear[g] * 255.0f + 0.5f;
+    b = to_linear[b] * 255.0f + 0.5f;
+
+    return (a << 24) | (r << 16) | (g << 8) | (b << 0);
 }
 
 static uint64_t
@@ -1119,12 +1634,20 @@ store_scanline_a8r8g8b8_32_sRGB (bits_image_t   *image,
     
     for (i = 0; i < width; ++i)
     {
+	uint8_t a, r, g, b;
+
 	tmp = values[i];
-	WRITE (image, pixel++,
-		  ((uint32_t)     (tmp >> 24     )          << 24)
-		| (linear_to_srgb[(tmp >> 16 << 4) & 0xfff] << 16)
-		| (linear_to_srgb[(tmp >>  8 << 4) & 0xfff] <<  8)
-		| (linear_to_srgb[(tmp >>  0 << 4) & 0xfff] <<  0));
+
+	a = (tmp >> 24) & 0xff;
+	r = (tmp >> 16) & 0xff;
+	g = (tmp >> 8) & 0xff;
+	b = (tmp >> 0) & 0xff;
+
+	r = to_srgb (r * (1/255.0f));
+	g = to_srgb (g * (1/255.0f));
+	b = to_srgb (b * (1/255.0f));
+	
+	WRITE (image, pixel++, a | (r << 16) | (g << 8) | (b << 0));
     }
 }
 
@@ -1152,6 +1675,19 @@ store_scanline_a8r8g8b8_64_sRGB (bits_image_t  *image,
     }
 }
 
+static argb_t
+fetch_pixel_generic_float (bits_image_t *image,
+			   int		 offset,
+			   int           line)
+{
+    uint32_t pixel32 = image->fetch_pixel_32 (image, offset, line);
+    argb_t f;
+
+    pixman_expand_to_float (&f, &pixel32, image->format, 1);
+
+    return f;
+}
+
 /*
  * XXX: The transformed fetch path only works at 32-bpp so far.  When all
  * paths have wide versions, this can be removed.
@@ -1176,10 +1712,13 @@ typedef struct
     pixman_format_code_t	format;
     fetch_scanline_t		fetch_scanline_32;
     fetch_scanline_t		fetch_scanline_64;
+    fetch_scanline_t		fetch_scanline_float;
     fetch_pixel_32_t		fetch_pixel_32;
     fetch_pixel_64_t		fetch_pixel_64;
+    fetch_pixel_float_t		fetch_pixel_float;
     store_scanline_t		store_scanline_32;
     store_scanline_t		store_scanline_64;
+    store_scanline_t		store_scanline_float;
 } format_info_t;
 
 #define FORMAT_INFO(format) 						\
@@ -1187,8 +1726,13 @@ typedef struct
 	PIXMAN_ ## format,						\
 	    fetch_scanline_ ## format,					\
 	    fetch_scanline_generic_64,					\
-	    fetch_pixel_ ## format, fetch_pixel_generic_64,		\
-	    store_scanline_ ## format, store_scanline_generic_64	\
+	    fetch_scanline_generic_float,				\
+	    fetch_pixel_ ## format,					\
+	    fetch_pixel_generic_64,					\
+	    fetch_pixel_generic_float,					\
+	    store_scanline_ ## format,					\
+	    store_scanline_generic_64,					\
+	    store_scanline_generic_float				\
     }
 
 static const format_info_t accessors[] =
@@ -1206,10 +1750,10 @@ static const format_info_t accessors[] =
 
 /* sRGB formats */
   { PIXMAN_a8r8g8b8_sRGB,
-    fetch_scanline_a8r8g8b8_32_sRGB,
-    fetch_scanline_a8r8g8b8_64_sRGB,
-    fetch_pixel_a8r8g8b8_32_sRGB, fetch_pixel_a8r8g8b8_64_sRGB,
-    store_scanline_a8r8g8b8_32_sRGB, store_scanline_a8r8g8b8_64_sRGB },
+    fetch_scanline_a8r8g8b8_32_sRGB, fetch_scanline_a8r8g8b8_64_sRGB, fetch_scanline_a8r8g8b8_sRGB_float,
+    fetch_pixel_a8r8g8b8_32_sRGB, fetch_pixel_a8r8g8b8_64_sRGB, fetch_pixel_a8r8g8b8_sRGB_float,
+    store_scanline_a8r8g8b8_32_sRGB, store_scanline_a8r8g8b8_64_sRGB, store_scanline_a8r8g8b8_sRGB_float,
+  },
 
 /* 24bpp formats */
     FORMAT_INFO (r8g8b8),
@@ -1269,34 +1813,34 @@ static const format_info_t accessors[] =
 /* Wide formats */
     
     { PIXMAN_a2r10g10b10,
-      NULL, fetch_scanline_a2r10g10b10,
-      fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10,
-      NULL, store_scanline_a2r10g10b10 },
+      NULL, fetch_scanline_a2r10g10b10, fetch_scanline_a2r10g10b10_float,
+      fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10, fetch_pixel_a2r10g10b10_float,
+      NULL, store_scanline_a2r10g10b10, store_scanline_a2r10g10b10_float },
     
     { PIXMAN_x2r10g10b10,
-      NULL, fetch_scanline_x2r10g10b10,
-      fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10,
-      NULL, store_scanline_x2r10g10b10 },
+      NULL, fetch_scanline_x2r10g10b10, fetch_scanline_x2r10g10b10_float,
+      fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10, fetch_pixel_x2r10g10b10_float,
+      NULL, store_scanline_x2r10g10b10, store_scanline_x2r10g10b10_float },
     
     { PIXMAN_a2b10g10r10,
-      NULL, fetch_scanline_a2b10g10r10,
-      fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10,
-      NULL, store_scanline_a2b10g10r10 },
+      NULL, fetch_scanline_a2b10g10r10, fetch_scanline_a2b10g10r10_float,
+      fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10, fetch_pixel_a2b10g10r10_float,
+      NULL, store_scanline_a2b10g10r10, store_scanline_a2b10g10r10_float },
     
     { PIXMAN_x2b10g10r10,
-      NULL, fetch_scanline_x2b10g10r10,
-      fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10,
-      NULL, store_scanline_x2b10g10r10 },
+      NULL, fetch_scanline_x2b10g10r10, fetch_scanline_x2b10g10r10_float,
+      fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10, fetch_pixel_x2b10g10r10_float,
+      NULL, store_scanline_x2b10g10r10, store_scanline_x2b10g10r10_float },
     
 /* YUV formats */
     { PIXMAN_yuy2,
-      fetch_scanline_yuy2, fetch_scanline_generic_64,
-      fetch_pixel_yuy2, fetch_pixel_generic_64,
-      NULL, NULL },
+      fetch_scanline_yuy2, fetch_scanline_generic_64, fetch_scanline_generic_float,
+      fetch_pixel_yuy2, fetch_pixel_generic_64, fetch_pixel_generic_float,
+      NULL, NULL, NULL },
     
     { PIXMAN_yv12,
-      fetch_scanline_yv12, fetch_scanline_generic_64,
-      fetch_pixel_yv12, fetch_pixel_generic_64,
+      fetch_scanline_yv12, fetch_scanline_generic_64, fetch_scanline_generic_float,
+      fetch_pixel_yv12, fetch_pixel_generic_64, fetch_pixel_generic_float,
       NULL, NULL },
     
     { PIXMAN_null },
@@ -1313,10 +1857,13 @@ setup_accessors (bits_image_t *image)
 	{
 	    image->fetch_scanline_32 = info->fetch_scanline_32;
 	    image->fetch_scanline_64 = info->fetch_scanline_64;
+	    image->fetch_scanline_float = info->fetch_scanline_float;
 	    image->fetch_pixel_32 = info->fetch_pixel_32;
 	    image->fetch_pixel_64 = info->fetch_pixel_64;
+	    image->fetch_pixel_float = info->fetch_pixel_float;
 	    image->store_scanline_32 = info->store_scanline_32;
 	    image->store_scanline_64 = info->store_scanline_64;
+	    image->store_scanline_float = info->store_scanline_float;
 	    
 	    return;
 	}
diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index c69e151..07353dc 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -36,6 +36,20 @@
 #include "pixman-combine32.h"
 #include "pixman-inlines.h"
 
+static uint32_t *
+_pixman_image_get_scanline_generic_float (pixman_iter_t * iter,
+					  const uint32_t *mask)
+{
+    pixman_iter_get_scanline_t fetch_32 = iter->data;
+    uint32_t *buffer = iter->buffer;
+
+    fetch_32 (iter, NULL);
+
+    pixman_expand_to_float ((argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+
+    return iter->buffer;
+}
+
 /*
  * By default, just evaluate the image at 32bpp and expand.  Individual image
  * types can plug in a better scanline getter if they want to. For example
@@ -1129,6 +1143,7 @@ typedef struct
     uint32_t			flags;
     pixman_iter_get_scanline_t	get_scanline_32;
     pixman_iter_get_scanline_t	get_scanline_64;
+    pixman_iter_get_scanline_t  get_scanline_float;
 } fetcher_info_t;
 
 static const fetcher_info_t fetcher_info[] =
@@ -1140,7 +1155,8 @@ static const fetcher_info_t fetcher_info[] =
        FAST_PATH_NO_PAD_REPEAT			|
        FAST_PATH_NO_REFLECT_REPEAT),
       bits_image_fetch_untransformed_32,
-      bits_image_fetch_untransformed_64
+      bits_image_fetch_untransformed_64,
+      _pixman_image_get_scanline_generic_float
     },
 
 #define FAST_BILINEAR_FLAGS						\
@@ -1156,13 +1172,15 @@ static const fetcher_info_t fetcher_info[] =
     { PIXMAN_a8r8g8b8,
       FAST_BILINEAR_FLAGS,
       bits_image_fetch_bilinear_no_repeat_8888,
-      _pixman_image_get_scanline_generic_64
+      _pixman_image_get_scanline_generic_64,
+      _pixman_image_get_scanline_generic_float
     },
 
     { PIXMAN_x8r8g8b8,
       FAST_BILINEAR_FLAGS,
       bits_image_fetch_bilinear_no_repeat_8888,
-      _pixman_image_get_scanline_generic_64
+      _pixman_image_get_scanline_generic_64,
+      _pixman_image_get_scanline_generic_float
     },
 
 #define GENERAL_BILINEAR_FLAGS						\
@@ -1183,14 +1201,16 @@ static const fetcher_info_t fetcher_info[] =
     { PIXMAN_ ## format,						\
       GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
       bits_image_fetch_bilinear_affine_ ## name,			\
-      _pixman_image_get_scanline_generic_64				\
+      _pixman_image_get_scanline_generic_64,				\
+      _pixman_image_get_scanline_generic_float				\
     },
 
 #define NEAREST_AFFINE_FAST_PATH(name, format, repeat)			\
     { PIXMAN_ ## format,						\
       GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
       bits_image_fetch_nearest_affine_ ## name,			\
-      _pixman_image_get_scanline_generic_64				\
+      _pixman_image_get_scanline_generic_64,				\
+      _pixman_image_get_scanline_generic_float				\
     },
 
 #define AFFINE_FAST_PATHS(name, format, repeat)				\
@@ -1218,11 +1238,17 @@ static const fetcher_info_t fetcher_info[] =
     { PIXMAN_any,
       (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM),
       bits_image_fetch_affine_no_alpha,
-      _pixman_image_get_scanline_generic_64
+      _pixman_image_get_scanline_generic_64,
+      _pixman_image_get_scanline_generic_float
     },
 
     /* General */
-    { PIXMAN_any, 0, bits_image_fetch_general, _pixman_image_get_scanline_generic_64 },
+    { PIXMAN_any,
+      0,
+      bits_image_fetch_general,
+      _pixman_image_get_scanline_generic_64,
+      _pixman_image_get_scanline_generic_float
+    },
 
     { PIXMAN_null },
 };
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 379943e..6a460e7 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -70,6 +70,10 @@ typedef uint64_t (*fetch_pixel_64_t) (bits_image_t *image,
 				      int           x,
 				      int           y);
 
+typedef argb_t (*fetch_pixel_float_t) (bits_image_t *image,
+				       int           x,
+				       int           y);
+
 typedef void (*store_scanline_t) (bits_image_t *  image,
 				  int             x,
 				  int             y,
@@ -187,6 +191,10 @@ struct bits_image
     fetch_pixel_64_t	       fetch_pixel_64;
     store_scanline_t           store_scanline_64;
 
+    fetch_scanline_t	       fetch_scanline_float;
+    fetch_pixel_float_t	       fetch_pixel_float;
+    store_scanline_t           store_scanline_float;
+
     /* Used for indirect access to the bits */
     pixman_read_memory_func_t  read_func;
     pixman_write_memory_func_t write_func;
commit 23252393a2dcae4dc5a7d03727dd66cdd81286ba
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Sat Aug 18 13:02:31 2012 -0400

    pixman-utils.c, pixman-private.h: Add floating point conversion routines
    
    A new struct argb_t containing a floating point pixel is added to
    pixman-private.h and conversion routines are added to pixman-utils.c
    to convert normalized integers to and from that struct.
    
    New functions:
    
      - pixman_expand_to_float()
        Expands a buffer of integer pixels to a buffer of argb_t pixels
    
      - pixman_contract_from_float()
        Converts a buffer of argb_t pixels to a buffer integer pixels
    
      - pixman_float_to_unorm()
        Converts a floating point number to an unsigned normalized integer
    
      - pixman_unorm_to_float()
        Converts an unsigned normalized integer to a floating point number

diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index c82316f..379943e 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -45,6 +45,16 @@ typedef struct radial_gradient radial_gradient_t;
 typedef struct bits_image bits_image_t;
 typedef struct circle circle_t;
 
+typedef struct argb_t argb_t;
+
+struct argb_t
+{
+    float a;
+    float r;
+    float g;
+    float b;
+};
+
 typedef void (*fetch_scanline_t) (pixman_image_t *image,
 				  int             x,
 				  int             y,
@@ -792,12 +802,22 @@ pixman_expand (uint64_t *           dst,
                const uint32_t *     src,
                pixman_format_code_t format,
                int                  width);
+void
+pixman_expand_to_float (argb_t               *dst,
+			const uint32_t       *src,
+			pixman_format_code_t  format,
+			int                   width);
 
 void
 pixman_contract (uint32_t *      dst,
                  const uint64_t *src,
                  int             width);
 
+void
+pixman_contract_from_float (uint32_t     *dst,
+			    const argb_t *src,
+			    int           width);
+
 /* Region Helpers */
 pixman_bool_t
 pixman_region32_copy_from_region16 (pixman_region32_t *dst,
@@ -957,6 +977,9 @@ unorm_to_unorm (uint32_t val, int from_bits, int to_bits)
     return result;
 }
 
+uint16_t pixman_float_to_unorm (float f, int n_bits);
+float pixman_unorm_to_float (uint16_t u, int n_bits);
+
 /*
  * Various debugging code
  */
diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c
index e4a9730..4f9db29 100644
--- a/pixman/pixman-utils.c
+++ b/pixman/pixman-utils.c
@@ -162,6 +162,113 @@ pixman_expand (uint64_t *           dst,
     }
 }
 
+static force_inline uint16_t
+float_to_unorm (float f, int n_bits)
+{
+    uint32_t u;
+
+    if (f > 1.0)
+	f = 1.0;
+    if (f < 0.0)
+	f = 0.0;
+
+    u = f * (1 << n_bits);
+    u -= (u >> n_bits);
+
+    return u;
+}
+
+static force_inline float
+unorm_to_float (uint16_t u, int n_bits)
+{
+    uint32_t m = ((1 << n_bits) - 1);
+
+    return (u & m) * (1.f / (float)m);
+}
+
+/*
+ * This function expands images from a8r8g8b8 to argb_t.  To preserve
+ * precision, it needs to know from which source format the a8r8g8b8 pixels
+ * originally came.
+ *
+ * For example, if the source was PIXMAN_x1r5g5b5 and the red component
+ * contained bits 12345, then the 8-bit value is 12345123.  To correctly
+ * expand this to floating point, it should be 12345 / 31.0 and not
+ * 12345123 / 255.0.
+ */
+void
+pixman_expand_to_float (argb_t               *dst,
+			const uint32_t       *src,
+			pixman_format_code_t  format,
+			int                   width)
+{
+    int a_size, r_size, g_size, b_size;
+    int a_shift, r_shift, g_shift, b_shift;
+    int i;
+
+    if (!PIXMAN_FORMAT_VIS (format))
+	format = PIXMAN_a8r8g8b8;
+
+    /*
+     * Determine the sizes of each component and the masks and shifts
+     * required to extract them from the source pixel.
+     */
+    a_size = PIXMAN_FORMAT_A (format);
+    r_size = PIXMAN_FORMAT_R (format);
+    g_size = PIXMAN_FORMAT_G (format);
+    b_size = PIXMAN_FORMAT_B (format);
+
+    a_shift = 32 - a_size;
+    r_shift = 24 - r_size;
+    g_shift = 16 - g_size;
+    b_shift =  8 - b_size;
+
+    /* Start at the end so that we can do the expansion in place
+     * when src == dst
+     */
+    for (i = width - 1; i >= 0; i--)
+    {
+	const uint32_t pixel = src[i];
+
+	dst[i].a = a_size? unorm_to_float (pixel >> a_shift, a_size) : 1.0;
+	dst[i].r = r_size? unorm_to_float (pixel >> r_shift, r_size) : 0.0;
+	dst[i].g = g_size? unorm_to_float (pixel >> g_shift, g_size) : 0.0;
+	dst[i].b = b_size? unorm_to_float (pixel >> b_shift, b_size) : 0.0;
+    }
+}
+
+uint16_t
+pixman_float_to_unorm (float f, int n_bits)
+{
+    return float_to_unorm (f, n_bits);
+}
+
+float
+pixman_unorm_to_float (uint16_t u, int n_bits)
+{
+    return unorm_to_float (u, n_bits);
+}
+
+void
+pixman_contract_from_float (uint32_t     *dst,
+			    const argb_t *src,
+			    int           width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint8_t a, r, g, b;
+
+	a = float_to_unorm (src[i].a, 8);
+	r = float_to_unorm (src[i].r, 8);
+	g = float_to_unorm (src[i].g, 8);
+	b = float_to_unorm (src[i].b, 8);
+
+	dst[i] = (a << 24) | (r << 16) | (g << 8) | (b << 0);
+    }
+}
+
 /*
  * Contracting is easier than expanding.  We just need to truncate the
  * components.
commit 4760599ff3008ab0f1e36a7d4d362362817fd930
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Sun Sep 9 17:56:53 2012 -0400

    Add combiner test
    
    This test runs the new floating point combiners on random input with
    divide-by-zero exceptions turned on.
    
    With the floating point combiners the only thing we guarantee is that
    divide-by-zero exceptions are not generated, so change
    enable_fp_exceptions() to only enable those, and rename accordingly.

diff --git a/demos/radial-test.c b/demos/radial-test.c
index 35e90d7..e64f357 100644
--- a/demos/radial-test.c
+++ b/demos/radial-test.c
@@ -133,7 +133,7 @@ main (int argc, char **argv)
     pixman_image_t *src_img, *dest_img;
     int i, j;
 
-    enable_fp_exceptions ();
+    enable_divbyzero_exceptions ();
 
     dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8,
 					 WIDTH, HEIGHT,
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 949d384..c82316f 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -764,6 +764,12 @@ get_implementation (void)
     return global_implementation;
 }
 
+/* This function is exported for the sake of the test suite and not part
+ * of the ABI.
+ */
+PIXMAN_EXPORT pixman_implementation_t *
+_pixman_internal_only_get_implementation (void);
+
 /* Memory allocation helpers */
 void *
 pixman_malloc_ab (unsigned int n, unsigned int b);
diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c
index 5633f8f..e4a9730 100644
--- a/pixman/pixman-utils.c
+++ b/pixman/pixman-utils.c
@@ -262,6 +262,15 @@ pixman_region32_copy_from_region16 (pixman_region32_t *dst,
     return retval;
 }
 
+/* This function is exported for the sake of the test suite and not part
+ * of the ABI.
+ */
+PIXMAN_EXPORT pixman_implementation_t *
+_pixman_internal_only_get_implementation (void)
+{
+    return get_implementation ();
+}
+
 #ifdef DEBUG
 
 void
diff --git a/test/Makefile.sources b/test/Makefile.sources
index 0f34411..0778971 100644
--- a/test/Makefile.sources
+++ b/test/Makefile.sources
@@ -4,6 +4,7 @@ TESTPROGRAMS =			\
 	pdf-op-test		\
 	region-test		\
 	region-translate-test	\
+	combiner-test		\
 	fetch-test		\
 	rotate-test		\
 	oob-test		\
diff --git a/test/combiner-test.c b/test/combiner-test.c
new file mode 100644
index 0000000..c438ae6
--- /dev/null
+++ b/test/combiner-test.c
@@ -0,0 +1,151 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "utils.h"
+#include <sys/types.h>
+#include "pixman-private.h"
+
+static const pixman_op_t op_list[] =
+{
+    PIXMAN_OP_SRC,
+    PIXMAN_OP_OVER,
+    PIXMAN_OP_ADD,
+    PIXMAN_OP_CLEAR,
+    PIXMAN_OP_SRC,
+    PIXMAN_OP_DST,
+    PIXMAN_OP_OVER,
+    PIXMAN_OP_OVER_REVERSE,
+    PIXMAN_OP_IN,
+    PIXMAN_OP_IN_REVERSE,
+    PIXMAN_OP_OUT,
+    PIXMAN_OP_OUT_REVERSE,
+    PIXMAN_OP_ATOP,
+    PIXMAN_OP_ATOP_REVERSE,
+    PIXMAN_OP_XOR,
+    PIXMAN_OP_ADD,
+    PIXMAN_OP_SATURATE,
+    PIXMAN_OP_DISJOINT_CLEAR,
+    PIXMAN_OP_DISJOINT_SRC,
+    PIXMAN_OP_DISJOINT_DST,
+    PIXMAN_OP_DISJOINT_OVER,
+    PIXMAN_OP_DISJOINT_OVER_REVERSE,
+    PIXMAN_OP_DISJOINT_IN,
+    PIXMAN_OP_DISJOINT_IN_REVERSE,
+    PIXMAN_OP_DISJOINT_OUT,
+    PIXMAN_OP_DISJOINT_OUT_REVERSE,
+    PIXMAN_OP_DISJOINT_ATOP,
+    PIXMAN_OP_DISJOINT_ATOP_REVERSE,
+    PIXMAN_OP_DISJOINT_XOR,
+    PIXMAN_OP_CONJOINT_CLEAR,
+    PIXMAN_OP_CONJOINT_SRC,
+    PIXMAN_OP_CONJOINT_DST,
+    PIXMAN_OP_CONJOINT_OVER,
+    PIXMAN_OP_CONJOINT_OVER_REVERSE,
+    PIXMAN_OP_CONJOINT_IN,
+    PIXMAN_OP_CONJOINT_IN_REVERSE,
+    PIXMAN_OP_CONJOINT_OUT,
+    PIXMAN_OP_CONJOINT_OUT_REVERSE,
+    PIXMAN_OP_CONJOINT_ATOP,
+    PIXMAN_OP_CONJOINT_ATOP_REVERSE,
+    PIXMAN_OP_CONJOINT_XOR,
+    PIXMAN_OP_MULTIPLY,
+    PIXMAN_OP_SCREEN,
+    PIXMAN_OP_OVERLAY,
+    PIXMAN_OP_DARKEN,
+    PIXMAN_OP_LIGHTEN,
+    PIXMAN_OP_COLOR_DODGE,
+    PIXMAN_OP_COLOR_BURN,
+    PIXMAN_OP_HARD_LIGHT,
+    PIXMAN_OP_DIFFERENCE,
+    PIXMAN_OP_EXCLUSION,
+    PIXMAN_OP_SOFT_LIGHT,
+    PIXMAN_OP_HSL_HUE,
+    PIXMAN_OP_HSL_SATURATION,
+    PIXMAN_OP_HSL_COLOR,
+    PIXMAN_OP_HSL_LUMINOSITY,
+};
+
+static float
+rand_float (void)
+{
+    uint32_t u = lcg_rand_u32();
+
+    return *(float *)&u;
+}
+
+static void
+random_floats (argb_t *argb, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	argb_t *p = argb + i;
+
+	p->a = rand_float();
+	p->r = rand_float();
+	p->g = rand_float();
+	p->b = rand_float();
+    }
+}
+
+#define WIDTH	512
+
+static pixman_combine_float_func_t
+lookup_combiner (pixman_implementation_t *imp, pixman_op_t op,
+		 pixman_bool_t component_alpha)
+{
+    pixman_combine_float_func_t f;
+
+    do
+    {
+	if (component_alpha)
+	    f = imp->combine_float_ca[op];
+	else
+	    f = imp->combine_float[op];
+	
+	imp = imp->fallback;
+    }
+    while (!f);
+
+    return f;
+}
+
+int
+main ()
+{
+    pixman_implementation_t *impl;
+    argb_t *src_bytes = malloc (WIDTH * sizeof (argb_t));
+    argb_t *mask_bytes = malloc (WIDTH * sizeof (argb_t));
+    argb_t *dest_bytes = malloc (WIDTH * sizeof (argb_t));
+    int i;
+
+    enable_divbyzero_exceptions();
+    
+    impl = _pixman_internal_only_get_implementation();
+    
+    lcg_srand (0);
+
+    for (i = 0; i < ARRAY_LENGTH (op_list); ++i)
+    {
+	pixman_op_t op = op_list[i];
+	pixman_combine_float_func_t combiner;
+	int ca;
+
+	for (ca = 0; ca < 2; ++ca)
+	{
+	    combiner = lookup_combiner (impl, op, ca);
+
+	    random_floats (src_bytes, WIDTH);
+	    random_floats (mask_bytes, WIDTH);
+	    random_floats (dest_bytes, WIDTH);
+
+	    combiner (impl, op,
+		      (float *)dest_bytes,
+		      (float *)mask_bytes,
+		      (float *)src_bytes,
+		      WIDTH);
+	}
+    }	
+
+    return 0;
+}
diff --git a/test/gradient-crash-test.c b/test/gradient-crash-test.c
index 73e5bbc..962d1cb 100644
--- a/test/gradient-crash-test.c
+++ b/test/gradient-crash-test.c
@@ -85,7 +85,7 @@ main (int argc, char **argv)
     pixman_fixed_t r_inner;
     pixman_fixed_t r_outer;
 
-    enable_fp_exceptions();
+    enable_divbyzero_exceptions();
     
     for (i = 0; i < WIDTH * HEIGHT; ++i)
 	dest[i] = 0x4f00004f; /* pale blue */
diff --git a/test/pdf-op-test.c b/test/pdf-op-test.c
index 99cb7df..dcb3a60 100644
--- a/test/pdf-op-test.c
+++ b/test/pdf-op-test.c
@@ -36,7 +36,7 @@ main ()
 {
     int o, s, m, d;
 
-    enable_fp_exceptions();
+    enable_divbyzero_exceptions();
 
     for (o = 0; o < ARRAY_LENGTH (pdf_ops); ++o)
     {
diff --git a/test/stress-test.c b/test/stress-test.c
index edcfe09..059250d 100644
--- a/test/stress-test.c
+++ b/test/stress-test.c
@@ -850,7 +850,7 @@ main (int argc, char **argv)
 
     pixman_disable_out_of_bounds_workaround ();
 
-    enable_fp_exceptions();
+    enable_divbyzero_exceptions();
 
     if (getenv ("VERBOSE") != NULL)
 	verbose = TRUE;
diff --git a/test/utils.c b/test/utils.c
index c922ae5..716bb75 100644
--- a/test/utils.c
+++ b/test/utils.c
@@ -723,21 +723,11 @@ fail_after (int seconds, const char *msg)
 }
 
 void
-enable_fp_exceptions (void)
+enable_divbyzero_exceptions (void)
 {
 #ifdef HAVE_FENV_H
 #ifdef HAVE_FEENABLEEXCEPT
-    /* Note: we don't enable the FE_INEXACT trap because
-     * that happens quite commonly. It is possible that
-     * over- and underflow should similarly be considered
-     * okay, but for now the test suite passes with them
-     * enabled, and it's useful to know if they start
-     * occuring.
-     */
-    feenableexcept (FE_DIVBYZERO	|
-		    FE_INVALID		|
-		    FE_OVERFLOW		|
-		    FE_UNDERFLOW);
+    feenableexcept (FE_DIVBYZERO);
 #endif
 #endif
 }
diff --git a/test/utils.h b/test/utils.h
index faf427f..f7ea34c 100644
--- a/test/utils.h
+++ b/test/utils.h
@@ -110,7 +110,7 @@ void
 fail_after (int seconds, const char *msg);
 
 /* If possible, enable traps for floating point exceptions */
-void enable_fp_exceptions(void);
+void enable_divbyzero_exceptions(void);
 
 /* Converts a8r8g8b8 pixels to pixels that
  *  - are not premultiplied,
commit a5b459114e35c7a946362f1e5857e8a87a403ec3
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Fri May 14 00:42:04 2010 -0400

    Add pixman-combine-float.c
    
    This file contains floating point implementations of combiners for all
    pixman operators. These combiners operate on buffers containing single
    precision floating point pixels stored in (a, r, g, b) order.
    
    The combiners are added to the pixman_implementation_t struct, but
    nothing uses them yet.
    
    This commit incorporates a number of bug fixes contributed by Andrea
    Canciani.
    
    Some notes:
    
    - The combiners are making sure to never divide by zero regardless of
      input, so an application could enable divide-by-zero exceptions and
      pixman wouldn't generate any.
    
    - The operators are implemented according to the Render spec. Ie.,
    
        - If the input pixels are between 0 and 1, then so is the output.
    
        - The source and destination coefficients for the conjoint and
          disjoint operators are clamped to [0, 1].
    
    - The PDF operators are not described in the render spec, and the
      implementation here doesn't do any clamping except in the final
      conversion from floating point to destination format.
    
    All of the above will need to be rethought if we add support for pixel
    formats that can support negative and greater-than-one pixels. It is
    in fact already the case in principle that convolution filters can
    produce pixels with negative values, but since these go through the
    broken "wide" path that narrows everything to 32 bits, these negative
    values don't currently survive to the combiners.

diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources
index cf7040f..96540ec 100644
--- a/pixman/Makefile.sources
+++ b/pixman/Makefile.sources
@@ -5,6 +5,7 @@ libpixman_sources =			\
 	pixman-bits-image.c		\
 	pixman-combine32.c		\
 	pixman-combine64.c		\
+	pixman-combine-float.c		\
 	pixman-conical-gradient.c	\
 	pixman-x86.c			\
 	pixman-mips.c			\
diff --git a/pixman/pixman-combine-float.c b/pixman/pixman-combine-float.c
new file mode 100644
index 0000000..7bf5b5a
--- /dev/null
+++ b/pixman/pixman-combine-float.c
@@ -0,0 +1,1003 @@
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright Â© 2010, 2012 Soren Sandmann Pedersen
+ * Copyright Â© 2010, 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Soren Sandmann Pedersen (sandmann at cs.au.dk)
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+#include <string.h>
+#include <float.h>
+
+#include "pixman-private.h"
+
+typedef float (* combine_channel_t) (float sa, float s, float da, float d);
+
+static force_inline void
+combine_inner (pixman_bool_t component,
+	       float *dest, const float *src, const float *mask, int n_pixels,
+	       combine_channel_t combine_a, combine_channel_t combine_c)
+{
+    int i;
+
+    if (!mask)
+    {
+	for (i = 0; i < 4 * n_pixels; i += 4)
+	{
+	    float sa = src[i + 0];
+	    float sr = src[i + 1];
+	    float sg = src[i + 2];
+	    float sb = src[i + 3];
+	    
+	    float da = dest[i + 0];
+	    float dr = dest[i + 1];
+	    float dg = dest[i + 2];
+	    float db = dest[i + 3];					
+	    
+	    dest[i + 0] = combine_a (sa, sa, da, da);
+	    dest[i + 1] = combine_c (sa, sr, da, dr);
+	    dest[i + 2] = combine_c (sa, sg, da, dg);
+	    dest[i + 3] = combine_c (sa, sb, da, db);
+	}
+    }
+    else
+    {
+	for (i = 0; i < 4 * n_pixels; i += 4)
+	{
+	    float sa, sr, sg, sb;
+	    float ma, mr, mg, mb;
+	    float da, dr, dg, db;
+	    
+	    sa = src[i + 0];
+	    sr = src[i + 1];
+	    sg = src[i + 2];
+	    sb = src[i + 3];
+	    
+	    if (component)
+	    {
+		ma = mask[i + 0];
+		mr = mask[i + 1];
+		mg = mask[i + 2];
+		mb = mask[i + 3];
+
+		sr *= mr;
+		sg *= mg;
+		sb *= mb;
+
+		ma *= sa;
+		mr *= sa;
+		mg *= sa;
+		mb *= sa;
+		
+		sa = ma;
+	    }
+	    else
+	    {
+		ma = mask[i + 0];
+
+		sa *= ma;
+		sr *= ma;
+		sg *= ma;
+		sb *= ma;
+
+		ma = mr = mg = mb = sa;
+	    }
+	    
+	    da = dest[i + 0];
+	    dr = dest[i + 1];
+	    dg = dest[i + 2];
+	    db = dest[i + 3];
+	    
+	    dest[i + 0] = combine_a (ma, sa, da, da);
+	    dest[i + 1] = combine_c (mr, sr, da, dr);
+	    dest[i + 2] = combine_c (mg, sg, da, dg);
+	    dest[i + 3] = combine_c (mb, sb, da, db);
+	}
+    }
+}
+
+#define MAKE_COMBINER(name, component, combine_a, combine_c)		\
+    static void								\
+    combine_ ## name ## _float (pixman_implementation_t *imp,		\
+				pixman_op_t              op,		\
+				float                   *dest,		\
+				const float             *src,		\
+				const float             *mask,		\
+				int		         n_pixels)	\
+    {									\
+	combine_inner (component, dest, src, mask, n_pixels,		\
+		       combine_a, combine_c);				\
+    }
+
+#define MAKE_COMBINERS(name, combine_a, combine_c)			\
+    MAKE_COMBINER(name ## _ca, TRUE, combine_a, combine_c)		\
+    MAKE_COMBINER(name ## _u, FALSE, combine_a, combine_c)
+
+
+/*
+ * Porter/Duff operators
+ */
+typedef enum
+{
+    ZERO,
+    ONE,
+    SRC_ALPHA,
+    DEST_ALPHA,
+    INV_SA,
+    INV_DA,
+    SA_OVER_DA,
+    DA_OVER_SA,
+    INV_SA_OVER_DA,
+    INV_DA_OVER_SA,
+    ONE_MINUS_SA_OVER_DA,
+    ONE_MINUS_DA_OVER_SA,
+    ONE_MINUS_INV_DA_OVER_SA,
+    ONE_MINUS_INV_SA_OVER_DA
+} combine_factor_t;
+
+#define CLAMP(f)					\
+    (((f) < 0)? 0 : (((f) > 1.0) ? 1.0 : (f)))
+
+static force_inline float
+get_factor (combine_factor_t factor, float sa, float da)
+{
+    float f = -1;
+
+    switch (factor)
+    {
+    case ZERO:
+	f = 0.0f;
+	break;
+
+    case ONE:
+	f = 1.0f;
+	break;
+
+    case SRC_ALPHA:
+	f = sa;
+	break;
+
+    case DEST_ALPHA:
+	f = da;
+	break;
+
+    case INV_SA:
+	f = 1 - sa;
+	break;
+
+    case INV_DA:
+	f = 1 - da;
+	break;
+
+    case SA_OVER_DA:
+	if (da == 0.0f)
+	    f = 1.0f;
+	else
+	    f = CLAMP (sa / da);
+	break;
+
+    case DA_OVER_SA:
+	if (sa == 0.0f)
+	    f = 1.0f;
+	else
+	    f = CLAMP (da / sa);
+	break;
+
+    case INV_SA_OVER_DA:
+	if (da == 0.0f)
+	    f = 1.0f;
+	else
+	    f = CLAMP ((1.0f - sa) / da);
+	break;
+
+    case INV_DA_OVER_SA:
+	if (sa == 0.0f)
+	    f = 1.0f;
+	else
+	    f = CLAMP ((1.0f - da) / sa);
+	break;
+
+    case ONE_MINUS_SA_OVER_DA:
+	if (da == 0.0f)
+	    f = 0.0f;
+	else
+	    f = CLAMP (1.0f - sa / da);
+	break;
+
+    case ONE_MINUS_DA_OVER_SA:
+	if (sa == 0.0f)
+	    f = 0.0f;
+	else
+	    f = CLAMP (1.0f - da / sa);
+	break;
+
+    case ONE_MINUS_INV_DA_OVER_SA:
+	if (sa == 0.0f)
+	    f = 0.0f;
+	else
+	    f = CLAMP (1.0f - (1.0f - da) / sa);
+	break;
+
+    case ONE_MINUS_INV_SA_OVER_DA:
+	if (da == 0.0f)
+	    f = 0.0f;
+	else
+	    f = CLAMP (1.0f - (1.0f - sa) / da);
+	break;
+    }
+
+    return f;
+}
+
+#define MAKE_PD_COMBINERS(name, a, b)					\
+    static float force_inline						\
+    pd_combine_ ## name (float sa, float s, float da, float d)		\
+    {									\
+	const float fa = get_factor (a, sa, da);			\
+	const float fb = get_factor (b, sa, da);			\
+									\
+	return MIN (1.0f, s * fa + d * fb);				\
+    }									\
+    									\
+    MAKE_COMBINERS(name, pd_combine_ ## name, pd_combine_ ## name)
+
+MAKE_PD_COMBINERS (clear,			ZERO,				ZERO)
+MAKE_PD_COMBINERS (src,				ONE,				ZERO)
+MAKE_PD_COMBINERS (dst,				ZERO,				ONE)
+MAKE_PD_COMBINERS (over,			ONE,				INV_SA)
+MAKE_PD_COMBINERS (over_reverse,		INV_DA,				ONE)
+MAKE_PD_COMBINERS (in,				DEST_ALPHA,			ZERO)
+MAKE_PD_COMBINERS (in_reverse,			ZERO,				SRC_ALPHA)
+MAKE_PD_COMBINERS (out,				INV_DA,				ZERO)
+MAKE_PD_COMBINERS (out_reverse,			ZERO,				INV_SA)
+MAKE_PD_COMBINERS (atop,			DEST_ALPHA,			INV_SA)
+MAKE_PD_COMBINERS (atop_reverse,		INV_DA,				SRC_ALPHA)
+MAKE_PD_COMBINERS (xor,				INV_DA,				INV_SA)
+MAKE_PD_COMBINERS (add,				ONE,				ONE)
+
+MAKE_PD_COMBINERS (saturate,			INV_DA_OVER_SA,			ONE)
+
+MAKE_PD_COMBINERS (disjoint_clear,		ZERO,				ZERO)
+MAKE_PD_COMBINERS (disjoint_src,		ONE,				ZERO)
+MAKE_PD_COMBINERS (disjoint_dst,		ZERO,				ONE)
+MAKE_PD_COMBINERS (disjoint_over,		ONE,				INV_SA_OVER_DA)
+MAKE_PD_COMBINERS (disjoint_over_reverse,	INV_DA_OVER_SA,			ONE)
+MAKE_PD_COMBINERS (disjoint_in,			ONE_MINUS_INV_DA_OVER_SA,	ZERO)
+MAKE_PD_COMBINERS (disjoint_in_reverse,		ZERO,				ONE_MINUS_INV_SA_OVER_DA)
+MAKE_PD_COMBINERS (disjoint_out,		INV_DA_OVER_SA,			ZERO)
+MAKE_PD_COMBINERS (disjoint_out_reverse,	ZERO,				INV_SA_OVER_DA)
+MAKE_PD_COMBINERS (disjoint_atop,		ONE_MINUS_INV_DA_OVER_SA,	INV_SA_OVER_DA)
+MAKE_PD_COMBINERS (disjoint_atop_reverse,	INV_DA_OVER_SA,			ONE_MINUS_INV_SA_OVER_DA)
+MAKE_PD_COMBINERS (disjoint_xor,		INV_DA_OVER_SA,			INV_SA_OVER_DA)
+
+MAKE_PD_COMBINERS (conjoint_clear,		ZERO,				ZERO)
+MAKE_PD_COMBINERS (conjoint_src,		ONE,				ZERO)
+MAKE_PD_COMBINERS (conjoint_dst,		ZERO,				ONE)
+MAKE_PD_COMBINERS (conjoint_over,		ONE,				ONE_MINUS_SA_OVER_DA)
+MAKE_PD_COMBINERS (conjoint_over_reverse,	ONE_MINUS_DA_OVER_SA,		ONE)
+MAKE_PD_COMBINERS (conjoint_in,			DA_OVER_SA,			ZERO)
+MAKE_PD_COMBINERS (conjoint_in_reverse,		ZERO,				SA_OVER_DA)
+MAKE_PD_COMBINERS (conjoint_out,		ONE_MINUS_DA_OVER_SA,		ZERO)
+MAKE_PD_COMBINERS (conjoint_out_reverse,	ZERO,				ONE_MINUS_SA_OVER_DA)
+MAKE_PD_COMBINERS (conjoint_atop,		DA_OVER_SA,			ONE_MINUS_SA_OVER_DA)
+MAKE_PD_COMBINERS (conjoint_atop_reverse,	ONE_MINUS_DA_OVER_SA,		SA_OVER_DA)
+MAKE_PD_COMBINERS (conjoint_xor,		ONE_MINUS_DA_OVER_SA,		ONE_MINUS_SA_OVER_DA)
+
+/*
+ * PDF blend modes:
+ *
+ * The following blend modes have been taken from the PDF ISO 32000
+ * specification, which at this point in time is available from
+ * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
+ * The relevant chapters are 11.3.5 and 11.3.6.
+ * The formula for computing the final pixel color given in 11.3.6 is:
+ * Î±r Ã— Cr = (1 â€“ Î±s) Ã— Î±b Ã— Cb + (1 â€“ Î±b) Ã— Î±s Ã— Cs + Î±b Ã— Î±s Ã— B(Cb, Cs)
+ * with B() being the blend function.
+ * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
+ *
+ * These blend modes should match the SVG filter draft specification, as
+ * it has been designed to mirror ISO 32000. Note that at the current point
+ * no released draft exists that shows this, as the formulas have not been
+ * updated yet after the release of ISO 32000.
+ *
+ * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
+ * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
+ * argument. Note that this implementation operates on premultiplied colors,
+ * while the PDF specification does not. Therefore the code uses the formula
+ * ar.Cra = (1 â€“ as) . Dca + (1 â€“ ad) . Sca + B(Dca, ad, Sca, as)
+ */
+
+#define MAKE_SEPARABLE_PDF_COMBINERS(name)				\
+    static force_inline float						\
+    combine_ ## name ## _a (float sa, float s, float da, float d)	\
+    {									\
+	return da + sa - da * sa;					\
+    }									\
+    									\
+    static force_inline float						\
+    combine_ ## name ## _c (float sa, float s, float da, float d)	\
+    {									\
+	float f = (1 - sa) * d + (1 - da) * s;				\
+									\
+	return f + blend_ ## name (sa, s, da, d);			\
+    }									\
+    									\
+    MAKE_COMBINERS (name, combine_ ## name ## _a, combine_ ## name ## _c)
+
+static force_inline float
+blend_multiply (float sa, float s, float da, float d)
+{
+    return d * s;
+}
+
+static force_inline float
+blend_screen (float sa, float s, float da, float d)
+{
+    return d * sa + s * da - s * d;
+}
+
+static force_inline float
+blend_overlay (float sa, float s, float da, float d)
+{
+    if (2 * d < da)
+	return 2 * s * d;
+    else
+	return sa * da - 2 * (da - d) * (sa - s);
+}
+
+static force_inline float
+blend_darken (float sa, float s, float da, float d)
+{
+    s = s * da;
+    d = d * sa;
+
+    if (s > d)
+	return d;
+    else
+	return s;
+}
+
+static force_inline float
+blend_lighten (float sa, float s, float da, float d)
+{
+    s = s * da;
+    d = d * sa;
+
+    if (s > d)
+	return s;
+    else
+	return d;
+}
+
+static force_inline float
+blend_color_dodge (float sa, float s, float da, float d)
+{
+    if (d == 0.0f)
+	return 0.0f;
+    else if (d * sa >= sa * da - s * da)
+	return sa * da;
+    else if (sa - s == 0.0f)
+	return sa * da;
+    else
+	return sa * sa * d / (sa - s);
+}
+
+static force_inline float
+blend_color_burn (float sa, float s, float da, float d)
+{
+    if (d >= da)
+	return sa * da;
+    else if (sa * (da - d) >= s * da)
+	return 0.0f;
+    else if (s == 0.0f)
+	return 0.0f;
+    else
+	return sa * (da - sa * (da - d) / s);
+}
+
+static force_inline float
+blend_hard_light (float sa, float s, float da, float d)
+{
+    if (2 * s < sa)
+	return 2 * s * d;
+    else
+	return sa * da - 2 * (da - d) * (sa - s);
+}
+
+static force_inline float
+blend_soft_light (float sa, float s, float da, float d)
+{
+    if (2 * s < sa)
+    {
+	if (da == 0.0f)
+	    return d * sa;
+	else
+	    return d * sa - d * (da - d) * (sa - 2 * s) / da;
+    }
+    else
+    {
+	if (da == 0.0f)
+	{
+	    return 0.0f;
+	}
+	else
+	{
+	    if (4 * d <= da)
+		return d * sa + (2 * s - sa) * d * ((16 * d / da - 12) * d / da + 3);
+	    else
+		return d * sa + (sqrtf (d * da) - d) * (2 * s - sa);
+	}
+    }
+}
+
+static force_inline float
+blend_difference (float sa, float s, float da, float d)
+{
+    float dsa = d * sa;
+    float sda = s * da;
+
+    if (sda < dsa)
+	return dsa - sda;
+    else
+	return sda - dsa;
+}
+
+static force_inline float
+blend_exclusion (float sa, float s, float da, float d)
+{
+    return s * da + d * sa - 2 * d * s;
+}
+
+MAKE_SEPARABLE_PDF_COMBINERS (multiply)
+MAKE_SEPARABLE_PDF_COMBINERS (screen)
+MAKE_SEPARABLE_PDF_COMBINERS (overlay)
+MAKE_SEPARABLE_PDF_COMBINERS (darken)
+MAKE_SEPARABLE_PDF_COMBINERS (lighten)
+MAKE_SEPARABLE_PDF_COMBINERS (color_dodge)
+MAKE_SEPARABLE_PDF_COMBINERS (color_burn)
+MAKE_SEPARABLE_PDF_COMBINERS (hard_light)
+MAKE_SEPARABLE_PDF_COMBINERS (soft_light)
+MAKE_SEPARABLE_PDF_COMBINERS (difference)
+MAKE_SEPARABLE_PDF_COMBINERS (exclusion)
+
+/*
+ * PDF nonseperable blend modes.
+ *
+ * These are implemented using the following functions to operate in Hsl
+ * space, with Cmax, Cmid, Cmin referring to the max, mid and min value
+ * of the red, green and blue components.
+ *
+ * LUM (C) = 0.3 Ã— Cred + 0.59 Ã— Cgreen + 0.11 Ã— Cblue
+ *
+ * clip_color (C):
+ *   l = LUM (C)
+ *   min = Cmin
+ *   max = Cmax
+ *   if n < 0.0
+ *     C = l + (((C â€“ l) Ã— l) â„     (l â€“ min))
+ *   if x > 1.0
+ *     C = l + (((C â€“ l) Ã— (1 â€“ l)) (max â€“ l))
+ *   return C
+ *
+ * set_lum (C, l):
+ *   d = l â€“ LUM (C)
+ *   C += d
+ *   return clip_color (C)
+ *
+ * SAT (C) = CH_MAX (C) - CH_MIN (C)
+ *
+ * set_sat (C, s):
+ *  if Cmax > Cmin
+ *    Cmid = ( ( ( Cmid â€“ Cmin ) Ã— s ) â„ ( Cmax â€“ Cmin ) )
+ *    Cmax = s
+ *  else
+ *    Cmid = Cmax = 0.0
+ *  Cmin = 0.0
+ *  return C
+ */
+
+/* For premultiplied colors, we need to know what happens when C is
+ * multiplied by a real number. LUM and SAT are linear:
+ *
+ *    LUM (r Ã— C) = r Ã— LUM (C)		SAT (r Ã— C) = r Ã— SAT (C)
+ *
+ * If we extend clip_color with an extra argument a and change
+ *
+ *        if x >= 1.0
+ *
+ * into
+ *
+ *        if x >= a
+ *
+ * then clip_color is also linear:
+ *
+ *     r * clip_color (C, a) = clip_color (r_c, ra);
+ *
+ * for positive r.
+ *
+ * Similarly, we can extend set_lum with an extra argument that is just passed
+ * on to clip_color:
+ *
+ *     r Ã— set_lum ( C, l, a)
+ *
+ *   = r Ã— clip_color ( C + l - LUM (C), a)
+ *
+ *   = clip_color ( r * C + r Ã— l - LUM (r Ã— C), r * a)
+ *
+ *   = set_lum ( r * C, r * l, r * a)
+ *
+ * Finally, set_sat:
+ *
+ *     r * set_sat (C, s) = set_sat (x * C, r * s)
+ *
+ * The above holds for all non-zero x because they x'es in the fraction for
+ * C_mid cancel out. Specifically, it holds for x = r:
+ *
+ *     r * set_sat (C, s) = set_sat (r_c, rs)
+ *
+ *
+ *
+ *
+ * So, for the non-separable PDF blend modes, we have (using s, d for
+ * non-premultiplied colors, and S, D for premultiplied:
+ *
+ *   Color:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
+ *   = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
+ *
+ *
+ *   Luminosity:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
+ *   = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
+ *
+ *
+ *   Saturation:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
+ *   = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
+ *                                        a_s * LUM (D), a_s * a_d)
+ *   = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
+ *
+ *   Hue:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
+ *   = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
+ *
+ */
+
+typedef struct
+{
+    float	r;
+    float	g;
+    float	b;
+} rgb_t;
+
+static force_inline float
+minf (float a, float b)
+{
+    return a < b? a : b;
+}
+
+static force_inline float
+maxf (float a, float b)
+{
+    return a > b? a : b;
+}
+
+static force_inline float
+channel_min (const rgb_t *c)
+{
+    return minf (minf (c->r, c->g), c->b);
+}
+
+static force_inline float
+channel_max (const rgb_t *c)
+{
+    return maxf (maxf (c->r, c->g), c->b);
+}
+
+static force_inline float
+get_lum (const rgb_t *c)
+{
+    return c->r * 0.3f + c->g * 0.59f + c->b * 0.11f;
+}
+
+static force_inline float
+get_sat (const rgb_t *c)
+{
+    return channel_max (c) - channel_min (c);
+}
+
+static void
+clip_color (rgb_t *color, float a)
+{
+    float l = get_lum (color);
+    float n = channel_min (color);
+    float x = channel_max (color);
+
+    if (n < 0.0f)
+    {
+	if ((l - n) < 4 * FLT_EPSILON)
+	{
+	    color->r = 0.0f;
+	    color->g = 0.0f;
+	    color->b = 0.0f;
+	}
+	else
+	{
+	    color->r = l + (((color->r - l) * l) / (l - n));
+	    color->g = l + (((color->g - l) * l) / (l - n));
+	    color->b = l + (((color->b - l) * l) / (l - n));
+	}
+    }
+    if (x > a)
+    {
+	if ((x - l) < 4 * FLT_EPSILON)
+	{
+	    color->r = a;
+	    color->g = a;
+	    color->b = a;
+	}
+	else
+	{
+	    color->r = l + (((color->r - l) * (a - l) / (x - l)));
+	    color->g = l + (((color->g - l) * (a - l) / (x - l)));
+	    color->b = l + (((color->b - l) * (a - l) / (x - l)));
+	}
+    }
+}
+
+static void
+set_lum (rgb_t *color, float sa, float l)
+{
+    float d = l - get_lum (color);
+
+    color->r = color->r + d;
+    color->g = color->g + d;
+    color->b = color->b + d;
+
+    clip_color (color, sa);
+}
+
+static void
+set_sat (rgb_t *src, float sat)
+{
+    float *max, *mid, *min;
+
+    if (src->r > src->g)
+    {
+	if (src->r > src->b)
+	{
+	    max = &(src->r);
+
+	    if (src->g > src->b)
+	    {
+		mid = &(src->g);
+		min = &(src->b);
+	    }
+	    else
+	    {
+		mid = &(src->b);
+		min = &(src->g);
+	    }
+	}
+	else
+	{
+	    max = &(src->b);
+	    mid = &(src->r);
+	    min = &(src->g);
+	}
+    }
+    else
+    {
+	if (src->r > src->b)
+	{
+	    max = &(src->g);
+	    mid = &(src->r);
+	    min = &(src->b);
+	}
+	else
+	{
+	    min = &(src->r);
+
+	    if (src->g > src->b)
+	    {
+		max = &(src->g);
+		mid = &(src->b);
+	    }
+	    else
+	    {
+		max = &(src->b);
+		mid = &(src->g);
+	    }
+	}
+    }
+
+    if (*max > *min)
+    {
+	*mid = (((*mid - *min) * sat) / (*max - *min));
+	*max = sat;
+    }
+    else
+    {
+	*mid = *max = 0.0f;
+    }
+
+    *min = 0.0f;
+}
+
+/*
+ * Hue:
+ * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
+ */
+static force_inline void
+blend_hsl_hue (rgb_t *res,
+	       const rgb_t *dest, float da,
+	       const rgb_t *src, float sa)
+{
+    res->r = src->r * da;
+    res->g = src->g * da;
+    res->b = src->b * da;
+
+    set_sat (res, get_sat (dest) * sa);
+    set_lum (res, sa * da, get_lum (dest) * sa);
+}
+
+/*
+ * Saturation:
+ * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
+ */
+static force_inline void
+blend_hsl_saturation (rgb_t *res,
+		      const rgb_t *dest, float da,
+		      const rgb_t *src, float sa)
+{
+    res->r = dest->r * sa;
+    res->g = dest->g * sa;
+    res->b = dest->b * sa;
+
+    set_sat (res, get_sat (src) * da);
+    set_lum (res, sa * da, get_lum (dest) * sa);
+}
+
+/*
+ * Color:
+ * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
+ */
+static force_inline void
+blend_hsl_color (rgb_t *res,
+		 const rgb_t *dest, float da,
+		 const rgb_t *src, float sa)
+{
+    res->r = src->r * da;
+    res->g = src->g * da;
+    res->b = src->b * da;
+
+    set_lum (res, sa * da, get_lum (dest) * sa);
+}
+
+/*
+ * Luminosity:
+ * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
+ */
+static force_inline void
+blend_hsl_luminosity (rgb_t *res,
+		      const rgb_t *dest, float da,
+		      const rgb_t *src, float sa)
+{
+    res->r = dest->r * sa;
+    res->g = dest->g * sa;
+    res->b = dest->b * sa;
+
+    set_lum (res, sa * da, get_lum (src) * da);
+}
+
+#define MAKE_NON_SEPARABLE_PDF_COMBINERS(name)				\
+    static void								\
+    combine_ ## name ## _u_float (pixman_implementation_t *imp,		\
+				  pixman_op_t              op,		\
+				  float                   *dest,	\
+				  const float             *src,		\
+				  const float             *mask,	\
+				  int		           n_pixels)	\
+    {									\
+    	int i;								\
+									\
+	for (i = 0; i < 4 * n_pixels; i += 4)				\
+	{								\
+	    float sa, da;						\
+	    rgb_t sc, dc, rc;						\
+									\
+	    sa = src[i + 0];						\
+	    sc.r = src[i + 1];						\
+	    sc.g = src[i + 2];						\
+	    sc.b = src[i + 3];						\
+									\
+	    da = dest[i + 0];						\
+	    dc.r = dest[i + 1];						\
+	    dc.g = dest[i + 2];						\
+	    dc.b = dest[i + 3];						\
+									\
+	    if (mask)							\
+	    {								\
+		float ma = mask[i + 0];					\
+									\
+		/* Component alpha is not supported for HSL modes */	\
+		sa *= ma;						\
+		sc.r *= ma;						\
+		sc.g *= ma;						\
+		sc.g *= ma;						\
+	    }								\
+									\
+	    blend_ ## name (&rc, &dc, da, &sc, sa);			\
+									\
+	    dest[i + 0] = sa + da - sa * da;				\
+	    dest[i + 1] = (1 - sa) * dc.r + (1 - da) * sc.r + rc.r;	\
+	    dest[i + 2] = (1 - sa) * dc.g + (1 - da) * sc.g + rc.g;	\
+	    dest[i + 3] = (1 - sa) * dc.b + (1 - da) * sc.b + rc.b;	\
+	}								\
+    }
+
+MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_hue)
+MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_saturation)
+MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_color)
+MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_luminosity)
+
+void
+_pixman_setup_combiner_functions_float (pixman_implementation_t *imp)
+{
+    /* Unified alpha */
+    imp->combine_float[PIXMAN_OP_CLEAR] = combine_clear_u_float;
+    imp->combine_float[PIXMAN_OP_SRC] = combine_src_u_float;
+    imp->combine_float[PIXMAN_OP_DST] = combine_dst_u_float;
+    imp->combine_float[PIXMAN_OP_OVER] = combine_over_u_float;
+    imp->combine_float[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_IN] = combine_in_u_float;
+    imp->combine_float[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_OUT] = combine_out_u_float;
+    imp->combine_float[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_ATOP] = combine_atop_u_float;
+    imp->combine_float[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_XOR] = combine_xor_u_float;
+    imp->combine_float[PIXMAN_OP_ADD] = combine_add_u_float;
+    imp->combine_float[PIXMAN_OP_SATURATE] = combine_saturate_u_float;
+
+    /* Disjoint, unified */
+    imp->combine_float[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u_float;
+
+    /* Conjoint, unified */
+    imp->combine_float[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u_float;
+
+    /* PDF operators, unified */
+    imp->combine_float[PIXMAN_OP_MULTIPLY] = combine_multiply_u_float;
+    imp->combine_float[PIXMAN_OP_SCREEN] = combine_screen_u_float;
+    imp->combine_float[PIXMAN_OP_OVERLAY] = combine_overlay_u_float;
+    imp->combine_float[PIXMAN_OP_DARKEN] = combine_darken_u_float;
+    imp->combine_float[PIXMAN_OP_LIGHTEN] = combine_lighten_u_float;
+    imp->combine_float[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u_float;
+    imp->combine_float[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u_float;
+    imp->combine_float[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u_float;
+    imp->combine_float[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u_float;
+    imp->combine_float[PIXMAN_OP_DIFFERENCE] = combine_difference_u_float;
+    imp->combine_float[PIXMAN_OP_EXCLUSION] = combine_exclusion_u_float;
+
+    imp->combine_float[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u_float;
+    imp->combine_float[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u_float;
+    imp->combine_float[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u_float;
+    imp->combine_float[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u_float;
+
+    /* Component alpha combiners */
+    imp->combine_float_ca[PIXMAN_OP_CLEAR] = combine_clear_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_SRC] = combine_src_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DST] = combine_dst_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_OVER] = combine_over_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_IN] = combine_in_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_OUT] = combine_out_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_ATOP] = combine_atop_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_XOR] = combine_xor_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_ADD] = combine_add_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca_float;
+
+    /* Disjoint CA */
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca_float;
+
+    /* Conjoint CA */
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca_float;
+
+    /* PDF operators CA */
+    imp->combine_float_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_SCREEN] = combine_screen_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DARKEN] = combine_darken_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca_float;
+
+    /* It is not clear that these make sense, so make them noops for now */
+    imp->combine_float_ca[PIXMAN_OP_HSL_HUE] = combine_dst_u_float;
+    imp->combine_float_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst_u_float;
+    imp->combine_float_ca[PIXMAN_OP_HSL_COLOR] = combine_dst_u_float;
+    imp->combine_float_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst_u_float;
+}
diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
index 42a84a0..2d92014 100644
--- a/pixman/pixman-general.c
+++ b/pixman/pixman-general.c
@@ -213,6 +213,7 @@ _pixman_implementation_create_general (void)
 
     _pixman_setup_combiner_functions_32 (imp);
     _pixman_setup_combiner_functions_64 (imp);
+    _pixman_setup_combiner_functions_float (imp);
 
     imp->src_iter_init = general_src_iter_init;
     imp->dest_iter_init = general_dest_iter_init;
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index b9c8319..949d384 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -421,6 +421,13 @@ typedef void (*pixman_combine_64_func_t) (pixman_implementation_t *imp,
 					  const uint64_t *         mask,
 					  int                      width);
 
+typedef void (*pixman_combine_float_func_t) (pixman_implementation_t *imp,
+					     pixman_op_t	      op,
+					     float *		      dest,
+					     const float *	      src,
+					     const float *	      mask,
+					     int		      n_pixels);
+
 typedef void (*pixman_composite_func_t) (pixman_implementation_t *imp,
 					 pixman_composite_info_t *info);
 typedef pixman_bool_t (*pixman_blt_func_t) (pixman_implementation_t *imp,
@@ -450,6 +457,7 @@ typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp,
 
 void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
 void _pixman_setup_combiner_functions_64 (pixman_implementation_t *imp);
+void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp);
 
 typedef struct
 {
@@ -478,6 +486,8 @@ struct pixman_implementation_t
     pixman_combine_32_func_t	combine_32_ca[PIXMAN_N_OPERATORS];
     pixman_combine_64_func_t	combine_64[PIXMAN_N_OPERATORS];
     pixman_combine_64_func_t	combine_64_ca[PIXMAN_N_OPERATORS];
+    pixman_combine_float_func_t	combine_float[PIXMAN_N_OPERATORS];
+    pixman_combine_float_func_t	combine_float_ca[PIXMAN_N_OPERATORS];
 };
 
 uint32_t
commit 7a9c2d586b2349b5e17966a96d7fe8c390abb75a
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Sat Jun 2 00:15:54 2012 -0400

    blitters-test: Prepare for floating point
    
    Comment out some formats in blitters-test that are going to rely on
    floating point in some upcoming patches.

diff --git a/test/blitters-test.c b/test/blitters-test.c
index 8c46cef..30d6912 100644
--- a/test/blitters-test.c
+++ b/test/blitters-test.c
@@ -172,10 +172,12 @@ static pixman_format_code_t img_fmt_list[] = {
     PIXMAN_x14r6g6b6,
     PIXMAN_r8g8b8,
     PIXMAN_b8g8r8,
+#if 0 /* These are going to use floating point in the near future */
     PIXMAN_x2r10g10b10,
     PIXMAN_a2r10g10b10,
     PIXMAN_x2b10g10r10,
     PIXMAN_a2b10g10r10,
+#endif
     PIXMAN_a1r5g5b5,
     PIXMAN_x1r5g5b5,
     PIXMAN_a1b5g5r5,
@@ -395,6 +397,6 @@ main (int argc, const char *argv[])
     }
 
     return fuzzer_test_main("blitters", 2000000,
-			    0x3E1DD2E8,
+			    0x46136E0A,
 			    test_composite, argc, argv);
 }
commit 600a06c81d3999bc6551c7e889726ed7b8bec84d
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Wed Jul 11 03:27:49 2012 -0400

    glyph-test: Prepare for floating point
    
    In preparation for an upcoming change of the wide pipe to use floating
    point, comment out some formats in glyph-test that are going to be
    using floating point and update the CRC32 value to match.

diff --git a/test/glyph-test.c b/test/glyph-test.c
index 84de5aa..9dd5b41 100644
--- a/test/glyph-test.c
+++ b/test/glyph-test.c
@@ -30,10 +30,13 @@ static const pixman_format_code_t formats[] =
     PIXMAN_x14r6g6b6,
     PIXMAN_r8g8b8,
     PIXMAN_b8g8r8,
+#if 0
+    /* These use floating point */
     PIXMAN_x2r10g10b10,
     PIXMAN_a2r10g10b10,
     PIXMAN_x2b10g10r10,
     PIXMAN_a2b10g10r10,
+#endif
     PIXMAN_a1r5g5b5,
     PIXMAN_x1r5g5b5,
     PIXMAN_a1b5g5r5,
@@ -329,7 +332,7 @@ test_glyphs (int testnum, int verbose)
 int
 main (int argc, const char *argv[])
 {
-    return fuzzer_test_main ("glyph", 30000,
-			     0x741CB2DB,
+    return fuzzer_test_main ("glyph", 30000,	
+			     0x79E74996,
 			     test_glyphs, argc, argv);
 }