[Pixman] [PATCH 4/4] pixman-fast-path: Make bilinear cover fetcher use COVER_CLIP_TIGHT flag

Thu Sep 3 19:09:23 PDT 2015

The bilinear cover fetcher was implemented with similar assumptions to
FAST_BILINEAR_MAINLOOP_INT - that for all transformed destination
coordinates, you could divide by 2^16, round down and add 1, and still be
within the source image.

This patch effectively reverses this - dividing by 2^16, rounding up and
subtracting 1. The big advantage of this is we need only test for this
subtracted number being out of bounds and skip the corresponding pixel
load just once per row, at the start before entering the loop, whereas
with the original scheme, you would need to check every pixel.

To make the rounding up a simple operation, all the X coordinates
(including increments) are negated. There is an additional offset of
(1 << 16 - BILINEAR_INTERPOLATION_BITS)) - 1, to allow for the fact
that coordinates are truncated with rounding towards -infinity during
generation of weighting factors. Because the weight is inverted along
with the coordinates, you will also see that the weight is now
considered as the fraction of the way from the right pixel to the left
pixel, rather than vice versa - none of this increases the
computational complexity per pixel.

Tested using cover-test on armv7 with PIXMAN_DISABLE="arm-neon arm-simd"
---
 pixman/pixman-fast-path.c |   37 +++++++++++++++++++++++++------------
 1 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 53d4a1f..a17ca26 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -2280,18 +2280,29 @@ fetch_horizontal (bits_image_t *image, line_t *line,
 		  int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
 {
     uint32_t *bits = image->bits + y * image->rowstride;
-    int i;
+    int i, x0, x1;
+    int32_t dist_x;
+    uint32_t left, right;
+
+    x = (1 << (16 - BILINEAR_INTERPOLATION_BITS)) - 1 - x;
+    ux = -ux;
+
+    x0 = pixman_fixed_to_int (x);
+    x1 = x0 + 1;
+    dist_x = pixman_fixed_to_bilinear_weight (x);
+    left = dist_x ? *(bits - x1) : 0;
 
     for (i = 0; i < n; ++i)
     {
-	int x0 = pixman_fixed_to_int (x);
-	int x1 = x0 + 1;
-	int32_t dist_x;
-
-	uint32_t left = *(bits + x0);
-	uint32_t right = *(bits + x1);
+	if (i > 0)
+	{
+	    x0 = pixman_fixed_to_int (x);
+	    x1 = x0 + 1;
+	    dist_x = pixman_fixed_to_bilinear_weight (x);
+	    left = *(bits - x1);
+	}
 
-	dist_x = pixman_fixed_to_bilinear_weight (x);
+	right = *(bits - x0);
 	dist_x <<= (8 - BILINEAR_INTERPOLATION_BITS);
 
 #if SIZEOF_LONG <= 4
@@ -2301,11 +2312,11 @@ fetch_horizontal (bits_image_t *image, line_t *line,
 
 	    lag = (left & 0xff00ff00) >> 8;
 	    rag = (right & 0xff00ff00) >> 8;
-	    ag = (lag << 8) + dist_x * (rag - lag);
+	    ag = (rag << 8) + dist_x * (lag - rag);
 
 	    lrb = (left & 0x00ff00ff);
 	    rrb = (right & 0x00ff00ff);
-	    rb = (lrb << 8) + dist_x * (rrb - lrb);
+	    rb = (rrb << 8) + dist_x * (lrb - rrb);
 
 	    *((uint32_t *)(line->buffer + i)) = ag;
 	    *((uint32_t *)(line->buffer + i) + 1) = rb;
@@ -2323,7 +2334,7 @@ fetch_horizontal (bits_image_t *image, line_t *line,
 	    lagrb = (((uint64_t)lag) << 24) | lrb;
 	    ragrb = (((uint64_t)rag) << 24) | rrb;
 
-	    line->buffer[i] = (lagrb << 8) + dist_x * (ragrb - lagrb);
+	    line->buffer[i] = (ragrb << 8) + dist_x * (lagrb - ragrb);
 	}
 #endif
 
@@ -2350,6 +2361,8 @@ fast_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
 
     y0 = pixman_fixed_to_int (info->y);
     y1 = y0 + 1;
+    if (y1 == iter->image->bits.height)
+	y1 = y0;
     dist_y = pixman_fixed_to_bilinear_weight (info->y);
     dist_y <<= (8 - BILINEAR_INTERPOLATION_BITS);
 
@@ -3187,7 +3200,7 @@ static const pixman_iter_info_t fast_iters[] =
       (FAST_PATH_STANDARD_FLAGS			|
        FAST_PATH_SCALE_TRANSFORM		|
        FAST_PATH_BILINEAR_FILTER		|
-       FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR),
+       FAST_PATH_SAMPLES_COVER_CLIP_TIGHT_BILINEAR),
       ITER_NARROW | ITER_SRC,
       fast_bilinear_cover_iter_init,
       NULL, NULL
-- 
1.7.5.4