[Pixman] [PATCH] Strength-reduce BILINEAR filter to NEAREST filter for identity transforms

Søren Sandmann sandmann at cs.au.dk
Mon Sep 5 18:27:15 PDT 2011


From: Søren Sandmann Pedersen <ssp at redhat.com>

An image with a bilinear filter and an identity transform is
equivalent to one with a nearest filter, so there is no reason the
standard fast paths shouldn't be usable.

But because a BILINEAR filter samples a 2x2 pixel block in the source
image, FAST_PATH_SAMPLES_COVER_CLIP can't be set in the case where the
source area is the entire image, because some compositing operations
might then read pixels outside the image.

This patch fixes the problem by splitting the
FAST_PATH_SAMPLES_COVER_CLIP flag into two separate flags
FAST_PATH_SAMPLES_COVER_CLIP_NEAREST and
FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR that indicate that the clip
covers the samples taking into account NEAREST/BILINEAR filters
respectively.

All the existing compositing operations that require
FAST_PATH_SAMPLES_COVER_CLIP then have their flags modified to pick
either COVER_CLIP_NEAREST or COVER_CLIP_BILINEAR depending on which
filter they depend on.

In compute_image_info() both COVER_CILP_NEAREST and
COVER_CLIP_BILINEAR can be set depending on how much room there is
around the clip rectangle.

Finally, images with an identity transform and a bilinear filter get
FAST_PATH_NEAREST_FILTER set as well as FAST_PATH_BILINEAR_FILTER.

Performance measurementas with render_bench against Xephyr:

Before

*** ROUND 1 ***
---------------------------------------------------------------
Test: Test Xrender doing non-scaled Over blends
Time: 5.720 sec.
---------------------------------------------------------------
Test: Test Xrender (offscreen) doing non-scaled Over blends
Time: 5.149 sec.
---------------------------------------------------------------
Test: Test Imlib2 doing non-scaled Over blends
Time: 6.237 sec.

After:

*** ROUND 1 ***
---------------------------------------------------------------
Test: Test Xrender doing non-scaled Over blends
Time: 4.947 sec.
---------------------------------------------------------------
Test: Test Xrender (offscreen) doing non-scaled Over blends
Time: 4.487 sec.
---------------------------------------------------------------
Test: Test Imlib2 doing non-scaled Over blends
Time: 6.235 sec.
---
 pixman/pixman-fast-path.c |    2 +-
 pixman/pixman-image.c     |    4 ++
 pixman/pixman-inlines.h   |   12 ++++----
 pixman/pixman-private.h   |   19 ++++++------
 pixman/pixman.c           |   69 ++++++++++++++++++++++++++++----------------
 5 files changed, 65 insertions(+), 41 deletions(-)

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index bbdc8e8..033efd7 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1764,7 +1764,7 @@ static const pixman_fast_path_t c_fast_paths[] =
 #define SIMPLE_ROTATE_FLAGS(angle)					  \
     (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM	|			  \
      FAST_PATH_NEAREST_FILTER			|			  \
-     FAST_PATH_SAMPLES_COVER_CLIP		|			  \
+     FAST_PATH_SAMPLES_COVER_CLIP_NEAREST	|			  \
      FAST_PATH_STANDARD_FLAGS)
 
 #define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)				  \
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 84bacf8..88262f7 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -250,6 +250,10 @@ compute_image_info (pixman_image_t *image)
     case PIXMAN_FILTER_GOOD:
     case PIXMAN_FILTER_BEST:
 	flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+
+	/* Reduce BILINEAR to NEAREST for identity transforms */
+	if (flags & FAST_PATH_ID_TRANSFORM)
+	    flags |= FAST_PATH_NEAREST_FILTER;
 	break;
 
     case PIXMAN_FILTER_CONVOLUTION:
diff --git a/pixman/pixman-inlines.h b/pixman/pixman-inlines.h
index f1e0cbd..3532867 100644
--- a/pixman/pixman-inlines.h
+++ b/pixman/pixman-inlines.h
@@ -585,7 +585,7 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)			\
     {   PIXMAN_OP_ ## op,						\
 	PIXMAN_ ## s,							\
-	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
+	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
 	PIXMAN_null, 0,							\
 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
 	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
@@ -627,7 +627,7 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
     {   PIXMAN_OP_ ## op,						\
 	PIXMAN_ ## s,							\
-	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
+	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
 	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
@@ -669,7 +669,7 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
     {   PIXMAN_OP_ ## op,						\
 	PIXMAN_ ## s,							\
-	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
+	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
 	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
@@ -1157,7 +1157,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)			\
     {   PIXMAN_OP_ ## op,						\
 	PIXMAN_ ## s,							\
-	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
+	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
 	PIXMAN_null, 0,							\
 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
 	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
@@ -1199,7 +1199,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
     {   PIXMAN_OP_ ## op,						\
 	PIXMAN_ ## s,							\
-	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
+	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
 	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
@@ -1241,7 +1241,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
     {   PIXMAN_OP_ ## op,						\
 	PIXMAN_ ## s,							\
-	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
+	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
 	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index a25897d..9c5cab6 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -609,14 +609,15 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
 #define FAST_PATH_IS_OPAQUE			(1 << 13)
 #define FAST_PATH_NO_NORMAL_REPEAT		(1 << 14)
 #define FAST_PATH_NO_NONE_REPEAT		(1 << 15)
-#define FAST_PATH_SAMPLES_COVER_CLIP		(1 << 16)
-#define FAST_PATH_X_UNIT_POSITIVE		(1 << 17)
-#define FAST_PATH_AFFINE_TRANSFORM		(1 << 18)
-#define FAST_PATH_Y_UNIT_ZERO			(1 << 19)
-#define FAST_PATH_BILINEAR_FILTER		(1 << 20)
-#define FAST_PATH_ROTATE_90_TRANSFORM		(1 << 21)
-#define FAST_PATH_ROTATE_180_TRANSFORM		(1 << 22)
-#define FAST_PATH_ROTATE_270_TRANSFORM		(1 << 23)
+#define FAST_PATH_X_UNIT_POSITIVE		(1 << 16)
+#define FAST_PATH_AFFINE_TRANSFORM		(1 << 17)
+#define FAST_PATH_Y_UNIT_ZERO			(1 << 18)
+#define FAST_PATH_BILINEAR_FILTER		(1 << 19)
+#define FAST_PATH_ROTATE_90_TRANSFORM		(1 << 20)
+#define FAST_PATH_ROTATE_180_TRANSFORM		(1 << 21)
+#define FAST_PATH_ROTATE_270_TRANSFORM		(1 << 22)
+#define FAST_PATH_SAMPLES_COVER_CLIP_NEAREST	(1 << 23)
+#define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR	(1 << 24)
 
 #define FAST_PATH_PAD_REPEAT						\
     (FAST_PATH_NO_NONE_REPEAT		|				\
@@ -652,7 +653,7 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
 #define SOURCE_FLAGS(format)						\
     (FAST_PATH_STANDARD_FLAGS |						\
      ((PIXMAN_ ## format == PIXMAN_solid) ?				\
-      0 : (FAST_PATH_SAMPLES_COVER_CLIP | FAST_PATH_ID_TRANSFORM)))
+      0 : (FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | FAST_PATH_NEAREST_FILTER | FAST_PATH_ID_TRANSFORM)))
 
 #define MASK_FLAGS(format, extra)					\
     ((PIXMAN_ ## format == PIXMAN_null) ? 0 : (SOURCE_FLAGS (format) | extra))
diff --git a/pixman/pixman.c b/pixman/pixman.c
index 3ecd311..5372960 100644
--- a/pixman/pixman.c
+++ b/pixman/pixman.c
@@ -524,10 +524,10 @@ analyze_extent (pixman_image_t       *image,
 		uint32_t             *flags)
 {
     pixman_transform_t *transform;
-    pixman_fixed_t *params;
     pixman_fixed_t x_off, y_off;
     pixman_fixed_t width, height;
     pixman_fixed_t m00, m10;
+    pixman_fixed_t *params;
     box_48_16_t transformed;
 
     if (!image)
@@ -556,15 +556,13 @@ analyze_extent (pixman_image_t       *image,
 	if (image->bits.width >= 0x7fff	|| image->bits.height >= 0x7fff)
 	    return FALSE;
 
-#define ID_AND_NEAREST (FAST_PATH_ID_TRANSFORM | FAST_PATH_NEAREST_FILTER)
-
-	if ((image->common.flags & ID_AND_NEAREST) == ID_AND_NEAREST &&
+	if ((image->common.flags & FAST_PATH_ID_TRANSFORM) == FAST_PATH_ID_TRANSFORM &&
 	    extents->x1 >= 0 &&
 	    extents->y1 >= 0 &&
 	    extents->x2 <= image->bits.width &&
 	    extents->y2 <= image->bits.height)
 	{
-	    *flags |= FAST_PATH_SAMPLES_COVER_CLIP;
+	    *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
 	    return TRUE;
 	}
 
@@ -614,10 +612,10 @@ analyze_extent (pixman_image_t       *image,
      * may happen during sampling. Note that (8 * pixman_fixed_e) is very far from
      * 0.5 so this won't cause the area computed to be overly pessimistic.
      */
-    transformed.x1 += x_off - 8 * pixman_fixed_e;
-    transformed.y1 += y_off - 8 * pixman_fixed_e;
-    transformed.x2 += x_off + width + 8 * pixman_fixed_e;
-    transformed.y2 += y_off + height + 8 * pixman_fixed_e;
+    transformed.x1 -= 8 * pixman_fixed_e;
+    transformed.y1 -= 8 * pixman_fixed_e;
+    transformed.x2 += 8 * pixman_fixed_e;
+    transformed.y2 += 8 * pixman_fixed_e;
 
     /* Check we don't overflow when the destination extents are expanded by one.
      * This ensures that compositing functions can simply walk the source space
@@ -626,21 +624,31 @@ analyze_extent (pixman_image_t       *image,
     m00 = transform? ABS (transform->matrix[0][0]) : pixman_fixed_1;
     m10 = transform? ABS (transform->matrix[1][0]) : 0;
 
-    if (!IS_16_16 (transformed.x1 - m00)	||
-	!IS_16_16 (transformed.y1 - m10)	||
-	!IS_16_16 (transformed.x2 + m00)	||
-	!IS_16_16 (transformed.y2 + m10))
+    if (!IS_16_16 (transformed.x1 + x_off - m00)		||
+	!IS_16_16 (transformed.y1 + y_off - m10)		||
+	!IS_16_16 (transformed.x2 + x_off + width + m00)	||
+	!IS_16_16 (transformed.y2 + y_off + height + m10))
     {
 	return FALSE;
     }
 
-    if (image->common.type == BITS					&&
-	pixman_fixed_to_int (transformed.x1) >= 0			&&
-	pixman_fixed_to_int (transformed.y1) >= 0			&&
-	pixman_fixed_to_int (transformed.x2) < image->bits.width	&&
-	pixman_fixed_to_int (transformed.y2) < image->bits.height)
+    if (image->common.type == BITS)
     {
-	*flags |= FAST_PATH_SAMPLES_COVER_CLIP;
+	if (pixman_fixed_to_int (transformed.x1) >= 0			&&
+	    pixman_fixed_to_int (transformed.y1) >= 0			&&
+	    pixman_fixed_to_int (transformed.x2) < image->bits.width	&&
+	    pixman_fixed_to_int (transformed.y2) < image->bits.height)
+	{
+	    *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
+	}
+
+	if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2) >= 0		  &&
+	    pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2) >= 0		  &&
+	    pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2) < image->bits.width &&
+	    pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2) < image->bits.height)
+	{
+	    *flags |= FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR;
+	}
     }
 
     return TRUE;
@@ -747,16 +755,27 @@ pixman_image_composite32 (pixman_op_t      op,
     if (!analyze_extent (mask, &extents, &mask_flags))
 	goto out;
 
-    /* If the clip is within the source samples, and the samples are opaque,
-     * then the source is effectively opaque.
+    /* If the clip is within the source samples, and the samples are
+     * opaque, then the source is effectively opaque.
      */
-#define BOTH (FAST_PATH_SAMPLES_OPAQUE | FAST_PATH_SAMPLES_COVER_CLIP)
-
-    if ((src_flags & BOTH) == BOTH)
+#define NEAREST_OPAQUE	(FAST_PATH_SAMPLES_OPAQUE |			\
+			 FAST_PATH_NEAREST_FILTER |			\
+			 FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+#define BILINEAR_OPAQUE	(FAST_PATH_SAMPLES_OPAQUE |			\
+			 FAST_PATH_BILINEAR_FILTER |			\
+			 FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR)
+
+    if ((src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+	(src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+    {
 	src_flags |= FAST_PATH_IS_OPAQUE;
+    }
 
-    if ((mask_flags & BOTH) == BOTH)
+    if ((mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+	(mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+    {
 	mask_flags |= FAST_PATH_IS_OPAQUE;
+    }
 
     /*
      * Check if we can replace our operator by a simpler one
-- 
1.6.0.6



More information about the Pixman mailing list