[Pixman] [PATCH v11 13/14] pixman-image: Implement PIXMAN_FILTER_GOOD/BEST as separable convolutions

Fri Feb 5 08:49:07 CET 2016

From: Bill Spitzak <spitzak at gmail.com>

Detects and uses PIXMAN_FILTER_NEAREST for all 8 90-degree rotations and
reflections when the scale is 1.0 and integer translation.

GOOD uses:

 scale < 1/16 : BOX.BOX at size 16
 scale < 3/4 : BOX.BOX at size 1/scale
 larger : BOX.BOX at size 1

 If both directions have a scale >= 3/4 or a scale of 1/2 and an integer
 translation, the faster PIXMAN_FILTER_BILINEAR code is used. This is
 compatable at these scales with older versions of pixman where bilinear
 was always used for GOOD.

BEST uses:

 scale < 1/24 : BOX.BOX at size 24
 scale < 1/16 : BOX.BOX at size 1/scale
 scale < 1 : IMPULSE.LANCZOS2 at size 1/scale
 scale < 2.333 : IMPULSE.LANCZOS2 at size 1
 scale < 128 : BOX.LANCZOS2 at size 1/(scale-1) (antialiased square pixels)
 larger : BOX.LANCZOS2 at size 1/127 (antialias blur gets thicker)

v8: Cutoff in BEST between IMPULSE.LANCZOS2 and BOX.LANCZOS2 adjusted for
    a better match between the filters.

v9: Use the new negative subsample controls to scale the subsamples. These
    were chosen by finding the lowest number that did not add visible
    artifacts to the zone plate image.

    Scale demo altered to default to GOOD and locked-together x+y scale

    Fixed divide-by-zero from all-zero matrix found by stress-test

v11: Whitespace and formatting fixes
     Moved demo changes to a later patch

Signed-off-by: Bill Spitzak <spitzak at gmail.com>
---
 pixman/pixman-image.c | 323 +++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 241 insertions(+), 82 deletions(-)

diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 1ff1a49..92ed68b 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -28,6 +28,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <assert.h>
+#include <math.h>
 
 #include "pixman-private.h"
 
@@ -274,112 +275,270 @@ compute_image_info (pixman_image_t *image)
 		  FAST_PATH_X_UNIT_POSITIVE	|
 		  FAST_PATH_Y_UNIT_ZERO		|
 		  FAST_PATH_AFFINE_TRANSFORM);
+	switch (image->common.filter)
+	{
+	case PIXMAN_FILTER_CONVOLUTION:
+	    break;
+	case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
+	    flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
+	    break;
+	default:
+	    flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+	    break;
+	}
     }
     else
     {
+	pixman_fixed_t (*m)[3] = image->common.transform->matrix;
+	double dx, dy;
+	int nearest_ok, bilinear_ok;
+
 	flags |= FAST_PATH_HAS_TRANSFORM;
 
-	if (image->common.transform->matrix[2][0] == 0			&&
-	    image->common.transform->matrix[2][1] == 0			&&
-	    image->common.transform->matrix[2][2] == pixman_fixed_1)
-	{
+	nearest_ok = FALSE;
+	bilinear_ok = FALSE;
+
+	if (m[2][0] == 0			&&
+	    m[2][1] == 0			&&
+	    m[2][2] == pixman_fixed_1)
+	{ /* no perspective */
 	    flags |= FAST_PATH_AFFINE_TRANSFORM;
 
-	    if (image->common.transform->matrix[0][1] == 0 &&
-		image->common.transform->matrix[1][0] == 0)
-	    {
-		if (image->common.transform->matrix[0][0] == -pixman_fixed_1 &&
-		    image->common.transform->matrix[1][1] == -pixman_fixed_1)
-		{
-		    flags |= FAST_PATH_ROTATE_180_TRANSFORM;
-		}
+	    if (m[0][1] == 0 &&
+		m[1][0] == 0)
+	    { /* no tilt of either axis */
 		flags |= FAST_PATH_SCALE_TRANSFORM;
+		if (abs(m[0][0]) == pixman_fixed_1 &&
+		    abs(m[1][1]) == pixman_fixed_1)
+		{ /* no scaling */
+		    nearest_ok = TRUE;
+		    if (m[0][0] < 0 && m[1][1] < 0)
+			flags |= FAST_PATH_ROTATE_180_TRANSFORM;
+		}
 	    }
-	    else if (image->common.transform->matrix[0][0] == 0 &&
-	             image->common.transform->matrix[1][1] == 0)
-	    {
-		pixman_fixed_t m01 = image->common.transform->matrix[0][1];
-		pixman_fixed_t m10 = image->common.transform->matrix[1][0];
-
-		if (m01 == -pixman_fixed_1 && m10 == pixman_fixed_1)
-		    flags |= FAST_PATH_ROTATE_90_TRANSFORM;
-		else if (m01 == pixman_fixed_1 && m10 == -pixman_fixed_1)
-		    flags |= FAST_PATH_ROTATE_270_TRANSFORM;
+	    else if (m[0][0] == 0 &&
+		     m[1][1] == 0)
+	    { /* x/y axis are swapped, 90 degree rotation */
+		if (abs(m[0][1]) == pixman_fixed_1 &&
+		    abs(m[1][0]) == pixman_fixed_1)
+		{ /* no scaling */
+		    nearest_ok = TRUE;
+		    if (m[0][1] < 0 && m[1][0] > 0)
+			flags |= FAST_PATH_ROTATE_90_TRANSFORM;
+		    else if (m[0][1] > 0 && m[1][0] < 0)
+			flags |= FAST_PATH_ROTATE_270_TRANSFORM;
+		}
 	    }
 	}
 
-	if (image->common.transform->matrix[0][0] > 0)
+	if (nearest_ok)
+	{
+	    /* reject non-integer translation: */
+	    if (pixman_fixed_frac (m[0][2] | m[1][2]))
+		nearest_ok = FALSE;
+	    /* FIXME: there are some affine-test failures, showing
+	     * that handling of BILINEAR and NEAREST filter is not
+	     * quite equivalent when getting close to 32K for the
+	     * translation components of the matrix. That's likely
+	     * some bug, but for now just skip BILINEAR->NEAREST
+	     * optimization in this case.
+	     */
+	    else if (abs(m[0][2] | m[1][2]) > pixman_int_to_fixed (30000))
+		nearest_ok = FALSE;
+	}
+
+	if (m[0][0] > 0)
 	    flags |= FAST_PATH_X_UNIT_POSITIVE;
 
-	if (image->common.transform->matrix[1][0] == 0)
+	if (m[1][0] == 0)
 	    flags |= FAST_PATH_Y_UNIT_ZERO;
-    }
 
-    /* Filter */
-    switch (image->common.filter)
-    {
-    case PIXMAN_FILTER_NEAREST:
-    case PIXMAN_FILTER_FAST:
-	flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
-	break;
+	switch (image->common.filter)
+	{
+	case PIXMAN_FILTER_NEAREST:
+	case PIXMAN_FILTER_FAST:
+	    flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+	    break;
 
-    case PIXMAN_FILTER_BILINEAR:
-    case PIXMAN_FILTER_GOOD:
-    case PIXMAN_FILTER_BEST:
-	flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+	case PIXMAN_FILTER_BILINEAR:
+	    if (nearest_ok)
+		flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+	    else
+		flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+	    break;
 
-	/* Here we have a chance to optimize BILINEAR filter to NEAREST if
-	 * they are equivalent for the currently used transformation matrix.
-	 */
-	if (flags & FAST_PATH_ID_TRANSFORM)
-	{
-	    flags |= FAST_PATH_NEAREST_FILTER;
-	}
-	else if (
-	    /* affine and integer translation components in matrix ... */
-	    ((flags & FAST_PATH_AFFINE_TRANSFORM) &&
-	     !pixman_fixed_frac (image->common.transform->matrix[0][2] |
-				 image->common.transform->matrix[1][2])) &&
-	    (
-		/* ... combined with a simple rotation */
-		(flags & (FAST_PATH_ROTATE_90_TRANSFORM |
-			  FAST_PATH_ROTATE_180_TRANSFORM |
-			  FAST_PATH_ROTATE_270_TRANSFORM)) ||
-		/* ... or combined with a simple non-rotated translation */
-		(image->common.transform->matrix[0][0] == pixman_fixed_1 &&
-		 image->common.transform->matrix[1][1] == pixman_fixed_1 &&
-		 image->common.transform->matrix[0][1] == 0 &&
-		 image->common.transform->matrix[1][0] == 0)
-		)
-	    )
-	{
-	    /* FIXME: there are some affine-test failures, showing that
-	     * handling of BILINEAR and NEAREST filter is not quite
-	     * equivalent when getting close to 32K for the translation
-	     * components of the matrix. That's likely some bug, but for
-	     * now just skip BILINEAR->NEAREST optimization in this case.
+	case PIXMAN_FILTER_GOOD:
+	    if (nearest_ok)
+	    {
+		flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+		break;
+	    }
+
+	    /* Compute filter sizes. This is the bounding box of a
+	     * diameter=1 circle transformed by the matrix. Scaling
+	     * down produces values greater than 1. See comment in
+	     * ../demos/scale.c for proof hypot is correct.
+	     *
+	     * For non-affine the circle is centered on one of the 4
+	     * points 1,1 away from the origin. Which one depends on
+	     * the signs of the values in the last row of the matrix,
+	     * chosen to avoid dividing by zero.
+	     */
+	    /* This division factor both accounts for the w component
+	     * and converts from fixed to float.
+	     */
+	    dy = abs(m[2][0]) + abs(m[2][1]) + abs(m[2][2]);
+	    if (dy)
+		dy = 1.0 / dy;
+	    /* There are some signs that hypot is faster with numbers near 1
+	     * so the division is done first. Mathematically it should work
+	     * to divide afterwards.
+	     */
+	    dx = hypot (m[0][0] * dy, m[0][1] * dy);
+	    dy = hypot (m[1][0] * dy, m[1][1] * dy);
+
+	    /* scale < 1/16 : BOX.BOX at size 16
+	     * scale < 3/4 : BOX.BOX at size 1/scale
+	     * larger : BOX.BOX at size 1
+	     *
+	     * If both directions have a scale >= 3/4 or a scale of
+	     * 1/2 and an integer translation, the faster
+	     * PIXMAN_FILTER_BILINEAR code is used.
+	     *
+	     * Filter size is clamped to 16 to prevent extreme slowness.
 	     */
-	    pixman_fixed_t magic_limit = pixman_int_to_fixed (30000);
-	    if (image->common.transform->matrix[0][2] <= magic_limit  &&
-	        image->common.transform->matrix[1][2] <= magic_limit  &&
-	        image->common.transform->matrix[0][2] >= -magic_limit &&
-	        image->common.transform->matrix[1][2] >= -magic_limit)
+	    if (dx <= 4.0 / 3)
 	    {
-		flags |= FAST_PATH_NEAREST_FILTER;
+		dx = 1.0;
+		bilinear_ok = TRUE;
+	    }
+	    else if (dx > 16.0)
+	    {
+		dx = 16.0;
+	    }
+	    else if (dx > 1.999 && dx < 2.001 &&
+		     abs(m[0][0] * m[0][1]) < 4 &&
+		     abs(pixman_fixed_frac(m[0][2]) < 2))
+	    {
+		bilinear_ok = TRUE;
+	    }
+	    if (dy <= 4.0 / 3)
+	    {
+		dy = 1.0;
+	    }
+	    else if (dy > 16.0)
+	    {
+		dy = 16.0;
+		bilinear_ok = FALSE;
+	    }
+	    else if (bilinear_ok)
+	    {
+		bilinear_ok =
+		    (dy > 1.999 && dy < 2.001 &&
+		     abs(m[1][0] * m[1][1]) < 4 &&
+		     abs(pixman_fixed_frac(m[1][2]) < 2));
+	    }
+	    if (bilinear_ok)
+	    {
+		flags |= (FAST_PATH_BILINEAR_FILTER |
+			  FAST_PATH_NO_CONVOLUTION_FILTER);
+		break;
 	    }
-	}
-	break;
 
-    case PIXMAN_FILTER_CONVOLUTION:
-	break;
+	    if (image->common.filter_params)
+		free (image->common.filter_params);
+
+	    image->common.filter_params =
+		pixman_filter_create_separable_convolution
+		( & image->common.n_filter_params,
+		  pixman_double_to_fixed(dx),
+		  pixman_double_to_fixed(dy),
+		  PIXMAN_KERNEL_BOX,
+		  PIXMAN_KERNEL_BOX,
+		  PIXMAN_KERNEL_BOX,
+		  PIXMAN_KERNEL_BOX,
+		  -12, -12);
+
+	    flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
+	    break;
 
-    case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
-	flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
-	break;
+	case PIXMAN_FILTER_BEST:
+	    if (nearest_ok)
+	    {
+		flags |= (FAST_PATH_NEAREST_FILTER |
+			  FAST_PATH_NO_CONVOLUTION_FILTER);
+		break;
+	    }
+	    /* See notes above about filter sizes */
+	    dy = abs(m[2][0]) + abs(m[2][1]) + abs(m[2][2]);
+	    if (dy) dy = 1.0 / dy;
+	    dx = hypot (m[0][0] * dy, m[0][1] * dy);
+	    dy = hypot (m[1][0] * dy, m[1][1] * dy);
+
+	    /* scale < 1/24 : BOX.BOX at size 24
+	     * scale < 1/16 : BOX.BOX at size 1/scale
+	     * scale < 1 : IMPULSE.LANCZOS2 at size 1/scale
+	     * scale < 2.333 : IMPULSE.LANCZOS2 at size 1
+	     * scale < 128 : BOX.LANCZOS2 at size 1/(scale-1)
+	     * larger : BOX.LANCZOS2 at size 1/127
+	     *
+	     * Filter switches to box and then clamps at 24 to prevent
+	     * extreme slowness.
+	     *
+	     * When enlarging this produces square pixels with an
+	     * anti-aliased border between them.  At scales larger
+	     * than 128x the antialias blur is increased to avoid
+	     * making lots of subsamples.
+	     */
+	    if (dx > 24.0)
+		dx = 24.0;
+	    else if (dx < 1.0)
+	    {
+		if (dx >= 3.0/7)
+		    dx = 1.0;
+		else if (dx > 1.0/128)
+		    dx /= 1.0 - dx;
+		else
+		    dx = 1.0/127;
+	    }
 
-    default:
-	flags |= FAST_PATH_NO_CONVOLUTION_FILTER;
-	break;
+	    if (dy > 24.0)
+		dy = 24.0;
+	    else if (dy < 1.0)
+	    {
+		if (dy >= 3.0/7)
+		    dy = 1.0;
+		else if (dy > 1.0/128)
+		    dy /= 1.0 - dy;
+		else
+		    dy = 1.0/127;
+	    }
+
+	    image->common.filter_params =
+		pixman_filter_create_separable_convolution
+		( & image->common.n_filter_params,
+		  pixman_double_to_fixed(dx),
+		  pixman_double_to_fixed(dy),
+		  dx >= 1.0 && dx < 16.0 ? PIXMAN_KERNEL_IMPULSE : PIXMAN_KERNEL_BOX,
+		  dy >= 1.0 && dy < 16.0 ? PIXMAN_KERNEL_IMPULSE : PIXMAN_KERNEL_BOX,
+		  dx < 16.0 ? PIXMAN_KERNEL_LANCZOS2 : PIXMAN_KERNEL_BOX,
+		  dy < 16.0 ? PIXMAN_KERNEL_LANCZOS2 : PIXMAN_KERNEL_BOX,
+		  -14, -14);
+
+	    flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
+	    break;
+
+	case PIXMAN_FILTER_CONVOLUTION:
+	    break;
+
+	case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
+	    flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
+	    break;
+
+	default:
+	    flags |= FAST_PATH_NO_CONVOLUTION_FILTER;
+	    break;
+	}
     }
 
     /* Repeat mode */
-- 
1.9.1