[Pixman] [PATCH v8 14/14] pixman-image: Implement PIXMAN_FILTER_GOOD/BEST as separable convolutions

Mon Jan 4 21:49:01 PST 2016

From: Bill Spitzak <spitzak at gmail.com>

Detects and uses PIXMAN_FILTER_NEAREST for all 8 90-degree rotations and
reflections when the scale is 1.0 and integer translation.

GOOD uses:

 scale < 1/16 : BOX.BOX at size 16
 scale < 3/4 : BOX.BOX at size 1/scale
 larger : BOX.BOX at size 1

 If both directions have a scale >= 3/4 or a scale of 1/2 and an integer
 translation, the faster PIXMAN_FILTER_BILINEAR code is used.

BEST uses:

 scale < 1/24 : BOX.BOX at size 24
 scale < 1/16 : BOX.BOX at size 1/scale
 scale < 1 : IMPULSE.LANCZOS2 at size 1/scale
 scale < 2.333 : IMPULSE.LANCZOS2 at size 1
 scale < 128 : BOX.LANCZOS2 at size 1/(scale-1) (antialiased square pixels)
 larger : BOX.LANCZOS2 at size 1/127 (antialias blur gets thicker)

v8: Changed cutoff in BEST slightly to make filters match better.
    It was doing far too much subsampling, I may have changed if for debugging
    and not changed it back.

Signed-off-by: Bill Spitzak <spitzak at gmail.com>
---
 pixman/pixman-image.c | 299 +++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 220 insertions(+), 79 deletions(-)

diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 1ff1a49..64748f4 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -28,6 +28,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <assert.h>
+#include <math.h>
 
 #include "pixman-private.h"
 
@@ -274,112 +275,252 @@ compute_image_info (pixman_image_t *image)
 		  FAST_PATH_X_UNIT_POSITIVE	|
 		  FAST_PATH_Y_UNIT_ZERO		|
 		  FAST_PATH_AFFINE_TRANSFORM);
+	switch (image->common.filter)
+	{
+	case PIXMAN_FILTER_CONVOLUTION:
+	    break;
+	case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
+	    flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
+	    break;
+	default:
+	    flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+	    break;
+	}
     }
     else
     {
+	pixman_fixed_t (*m)[3] = image->common.transform->matrix;
+	double dx, dy;
+	int xsubsample, ysubsample;
+	int nearest_ok, bilinear_ok;
+
 	flags |= FAST_PATH_HAS_TRANSFORM;
 
-	if (image->common.transform->matrix[2][0] == 0			&&
-	    image->common.transform->matrix[2][1] == 0			&&
-	    image->common.transform->matrix[2][2] == pixman_fixed_1)
+	nearest_ok = FALSE;
+        bilinear_ok = FALSE;
+
+	if (m[2][0] == 0			&&
+	    m[2][1] == 0			&&
+	    m[2][2] == pixman_fixed_1)
 	{
 	    flags |= FAST_PATH_AFFINE_TRANSFORM;
 
-	    if (image->common.transform->matrix[0][1] == 0 &&
-		image->common.transform->matrix[1][0] == 0)
+	    if (m[0][1] == 0 && m[1][0] == 0)
 	    {
-		if (image->common.transform->matrix[0][0] == -pixman_fixed_1 &&
-		    image->common.transform->matrix[1][1] == -pixman_fixed_1)
+		flags |= FAST_PATH_SCALE_TRANSFORM;
+		if (abs(m[0][0]) == pixman_fixed_1 &&
+		    abs(m[1][1]) == pixman_fixed_1)
 		{
-		    flags |= FAST_PATH_ROTATE_180_TRANSFORM;
+		    nearest_ok = TRUE;
+		    if (m[0][0] < 0 && m[1][1] < 0)
+			flags |= FAST_PATH_ROTATE_180_TRANSFORM;
 		}
-		flags |= FAST_PATH_SCALE_TRANSFORM;
 	    }
-	    else if (image->common.transform->matrix[0][0] == 0 &&
-	             image->common.transform->matrix[1][1] == 0)
+	    else if (m[0][0] == 0 && m[1][1] == 0)
 	    {
-		pixman_fixed_t m01 = image->common.transform->matrix[0][1];
-		pixman_fixed_t m10 = image->common.transform->matrix[1][0];
-
-		if (m01 == -pixman_fixed_1 && m10 == pixman_fixed_1)
-		    flags |= FAST_PATH_ROTATE_90_TRANSFORM;
-		else if (m01 == pixman_fixed_1 && m10 == -pixman_fixed_1)
-		    flags |= FAST_PATH_ROTATE_270_TRANSFORM;
+		if (abs(m[0][1]) == pixman_fixed_1 &&
+		    abs(m[1][0]) == pixman_fixed_1)
+		{
+		    nearest_ok = TRUE;
+		    if (m[0][1] < 0 && m[1][0] > 0)
+			flags |= FAST_PATH_ROTATE_90_TRANSFORM;
+		    else if (m[0][1] > 0 && m[1][0] < 0)
+			flags |= FAST_PATH_ROTATE_270_TRANSFORM;
+		}
 	    }
 	}
 
-	if (image->common.transform->matrix[0][0] > 0)
+	if (nearest_ok)
+	{
+	    /* reject non-integer translation: */
+	    if (pixman_fixed_frac (m[0][2] | m[1][2]))
+		nearest_ok = FALSE;
+	    /* FIXME: there are some affine-test failures, showing
+	     * that handling of BILINEAR and NEAREST filter is not
+	     * quite equivalent when getting close to 32K for the
+	     * translation components of the matrix. That's likely
+	     * some bug, but for now just skip BILINEAR->NEAREST
+	     * optimization in this case.
+	     */
+	    else if (abs(m[0][2] | m[1][2]) > pixman_int_to_fixed (30000))
+		nearest_ok = FALSE;
+	}
+
+	if (m[0][0] > 0)
 	    flags |= FAST_PATH_X_UNIT_POSITIVE;
 
-	if (image->common.transform->matrix[1][0] == 0)
+	if (m[1][0] == 0)
 	    flags |= FAST_PATH_Y_UNIT_ZERO;
-    }
 
-    /* Filter */
-    switch (image->common.filter)
-    {
-    case PIXMAN_FILTER_NEAREST:
-    case PIXMAN_FILTER_FAST:
-	flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
-	break;
+	switch (image->common.filter)
+	{
+	case PIXMAN_FILTER_NEAREST:
+	case PIXMAN_FILTER_FAST:
+	    flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+	    break;
 
-    case PIXMAN_FILTER_BILINEAR:
-    case PIXMAN_FILTER_GOOD:
-    case PIXMAN_FILTER_BEST:
-	flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+	case PIXMAN_FILTER_BILINEAR:
+	    if (nearest_ok)
+		flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+	    else
+		flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+	    break;
 
-	/* Here we have a chance to optimize BILINEAR filter to NEAREST if
-	 * they are equivalent for the currently used transformation matrix.
-	 */
-	if (flags & FAST_PATH_ID_TRANSFORM)
-	{
-	    flags |= FAST_PATH_NEAREST_FILTER;
-	}
-	else if (
-	    /* affine and integer translation components in matrix ... */
-	    ((flags & FAST_PATH_AFFINE_TRANSFORM) &&
-	     !pixman_fixed_frac (image->common.transform->matrix[0][2] |
-				 image->common.transform->matrix[1][2])) &&
-	    (
-		/* ... combined with a simple rotation */
-		(flags & (FAST_PATH_ROTATE_90_TRANSFORM |
-			  FAST_PATH_ROTATE_180_TRANSFORM |
-			  FAST_PATH_ROTATE_270_TRANSFORM)) ||
-		/* ... or combined with a simple non-rotated translation */
-		(image->common.transform->matrix[0][0] == pixman_fixed_1 &&
-		 image->common.transform->matrix[1][1] == pixman_fixed_1 &&
-		 image->common.transform->matrix[0][1] == 0 &&
-		 image->common.transform->matrix[1][0] == 0)
-		)
-	    )
-	{
-	    /* FIXME: there are some affine-test failures, showing that
-	     * handling of BILINEAR and NEAREST filter is not quite
-	     * equivalent when getting close to 32K for the translation
-	     * components of the matrix. That's likely some bug, but for
-	     * now just skip BILINEAR->NEAREST optimization in this case.
+	case PIXMAN_FILTER_GOOD:
+	    if (nearest_ok) {
+		flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+		break;
+	    }
+
+	    /* Compute filter sizes. This is the bounding box of a
+	     * diameter=1 circle transformed by the matrix. Scaling
+	     * down produces values greater than 1. See comment in
+	     * ../demos/scale.c for proof hypot is correct.
+	     *
+	     * For non-affine the circle is centered on one of the 4
+	     * points 1,1 away from the origin. Which one depends on
+	     * the signs of the values in the last row of the matrix,
+	     * chosen to avoid dividing by zero.
 	     */
-	    pixman_fixed_t magic_limit = pixman_int_to_fixed (30000);
-	    if (image->common.transform->matrix[0][2] <= magic_limit  &&
-	        image->common.transform->matrix[1][2] <= magic_limit  &&
-	        image->common.transform->matrix[0][2] >= -magic_limit &&
-	        image->common.transform->matrix[1][2] >= -magic_limit)
-	    {
-		flags |= FAST_PATH_NEAREST_FILTER;
+	    /* This division factor both accounts for the w component
+	     * and converts from fixed to float.
+	     */
+	    dy = 1.0 / (abs(m[2][0]) + abs(m[2][1]) + abs(m[2][2]));
+	    /* There are some signs that hypot is faster with numbers near 1
+	     * so the division is done first. Mathematically it should work
+	     * to divide afterwards.
+	     */
+	    dx = hypot (m[0][0] * dy, m[0][1] * dy);
+	    dy = hypot (m[1][0] * dy, m[1][1] * dy);
+
+	    /* scale < 1/16 : BOX.BOX at size 16
+             * scale < 3/4 : BOX.BOX at size 1/scale
+             * larger : BOX.BOX at size 1      
+             *
+             * If both directions have a scale >= 3/4 or a scale of
+             * 1/2 and an integer translation, the faster
+             * PIXMAN_FILTER_BILINEAR code is used.
+	     *
+	     * Filter size is clamped to 16 to prevent extreme slowness.
+	     */
+	    if (dx <= 4.0 / 3) {
+		dx = 1.0;
+		bilinear_ok = TRUE;
+	    } else if (dx > 16.0) {
+		dx = 16.0;
+	    } else if (dx > 1.999 && dx < 2.001 &&
+		       abs(m[0][0] * m[0][1]) < 4 &&
+		       abs(pixman_fixed_frac(m[0][2]) < 2))
+		bilinear_ok = TRUE;
+	    if (dy <= 4.0 / 3)
+		dy = 1.0;
+	    else if (dy > 16.0) {
+		dy = 16.0;
+		bilinear_ok = FALSE;
+	    } else if (bilinear_ok) {
+		if (dy > 1.999 && dy < 2.001 &&
+		    abs(m[1][0] * m[1][1]) < 4 &&
+		    abs(pixman_fixed_frac(m[1][2]) < 2))
+		    ;
+		else bilinear_ok = FALSE;
+	    }
+	    if (bilinear_ok) {
+		flags |= (FAST_PATH_BILINEAR_FILTER |
+			  FAST_PATH_NO_CONVOLUTION_FILTER);
+		break;
 	    }
-	}
-	break;
 
-    case PIXMAN_FILTER_CONVOLUTION:
-	break;
+	    xsubsample = 0;
+	    while (dx * (1 << xsubsample) <= 16.0) xsubsample++;
+	    ysubsample = 0;
+	    while (dy * (1 << ysubsample) <= 16.0) ysubsample++;
+
+	    if (image->common.filter_params)
+		free (image->common.filter_params);
+
+	    image->common.filter_params =
+		pixman_filter_create_separable_convolution
+		( & image->common.n_filter_params,
+		  pixman_double_to_fixed(dx),
+		  pixman_double_to_fixed(dy),
+		  PIXMAN_KERNEL_BOX,
+		  PIXMAN_KERNEL_BOX,
+		  PIXMAN_KERNEL_BOX,
+		  PIXMAN_KERNEL_BOX,
+		  xsubsample, ysubsample);
+
+	    flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
+	    break;
 
-    case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
-	flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
-	break;
+	case PIXMAN_FILTER_BEST:
+	    if (nearest_ok) {
+		flags |= (FAST_PATH_NEAREST_FILTER |
+			  FAST_PATH_NO_CONVOLUTION_FILTER);
+		break;
+	    }
+	    /* See notes above about filter sizes */
+	    dy = 1.0 / (abs(m[2][0]) + abs(m[2][1]) + abs(m[2][2]));
+	    dx = hypot (m[0][0] * dy, m[0][1] * dy);
+	    dy = hypot (m[1][0] * dy, m[1][1] * dy);
+
+	    /* scale < 1/24 : BOX.BOX at size 24
+             * scale < 1/16 : BOX.BOX at size 1/scale
+             * scale < 1 : IMPULSE.LANCZOS2 at size 1/scale
+             * scale < 2.333 : IMPULSE.LANCZOS2 at size 1
+             * scale < 128 : BOX.LANCZOS2 at size 1/(scale-1)
+             * larger : BOX.LANCZOS2 at size 1/127
+	     *
+             * Filter switches to box and then clamps at 24 to prevent
+             * extreme slowness.
+             *
+	     * When enlarging this produces square pixels with an
+             * anti-aliased border between them.  At scales larger
+             * than 128x the antialias blur is increased to avoid
+             * making lots of subsamples.
+	     */
+	    if (dx > 24.0) dx = 24.0;
+	    else if (dx >= 1.0) ;
+            else if (dx >= 3.0/7) dx = 1.0;
+            else if (dx > 1.0/128) dx /= 1.0 - dx;
+            else dx = 1.0/127;
+
+	    xsubsample = 0;
+	    while (dx * (1 << xsubsample) <= 16.0) xsubsample++;
+
+	    if (dy > 24.0) dy = 24.0;
+	    else if (dy >= 1.0) ;
+            else if (dy >= 0.5) dy = 1.0;
+            else if (dy > 1.0/128) dy /= 1.0 - dy;
+            else dy = 1.0/127;
+
+	    ysubsample = 0;
+	    while (dy * (1 << ysubsample) <= 16.0) ysubsample++;
+
+	    image->common.filter_params =
+		pixman_filter_create_separable_convolution
+		( & image->common.n_filter_params,
+		  pixman_double_to_fixed(dx),
+		  pixman_double_to_fixed(dy),
+                  dx >= 1.0 && dx < 16.0 ? PIXMAN_KERNEL_IMPULSE : PIXMAN_KERNEL_BOX,
+                  dy >= 1.0 && dy < 16.0 ? PIXMAN_KERNEL_IMPULSE : PIXMAN_KERNEL_BOX,
+		  dx < 16.0 ? PIXMAN_KERNEL_LANCZOS2 : PIXMAN_KERNEL_BOX,
+		  dy < 16.0 ? PIXMAN_KERNEL_LANCZOS2 : PIXMAN_KERNEL_BOX,
+		  xsubsample, ysubsample);
+
+	    flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
+	    break;
 
-    default:
-	flags |= FAST_PATH_NO_CONVOLUTION_FILTER;
-	break;
+	case PIXMAN_FILTER_CONVOLUTION:
+	    break;
+
+	case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
+	    flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
+	    break;
+
+	default:
+	    flags |= FAST_PATH_NO_CONVOLUTION_FILTER;
+	    break;
+	}
     }
 
     /* Repeat mode */
-- 
1.9.1