# [Pixman] [PATCH 5/5] Improve precision of calculations in pixman-gradient-walker.c

Søren Sandmann sandmann at cs.au.dk
Fri Mar 8 15:01:56 PST 2013

```From: Søren Sandmann Pedersen <ssp at redhat.com>

The computations in pixman-gradient-walker.c currently take place at
very limited 8 bit precision which results in quite visible artefacts
which currently looks like this:

http://i.imgur.com/kQbX8nd.png

With the changes in this commit, the gradient looks like this:

http://i.imgur.com/nUlyuKI.png

The images are also available here:

This patch computes pixels using floating point, but uses a faster
algorithm, which makes up for the loss of performance.

== Theory:

In both the new and the old algorithm, the various gradient
implementations compute a parameter x that indicates how far along the
gradient the current scanline is. The current algorithm has a cache of
the two color stops surrounding the last parameter; those are used in
a SIMD-within-register fashion in this way:

t1 = walker->left_rb * idist + walker->right_rb * dist;

where dist and idist are the distances to the left and right color
stops respectively normalized to the distance between the left and
right stops. The normalization (which involves a division) is captured
in another cached variable "stepper". The cached values are recomputed
whenever the parameter moves in between two different stops (called
"reset" in the implementation).

Because idist and dist are computed in 8 bits only, a lot of
information is lost, which is quite visible as the image linked above
shows.

interpolating between stops, the formula to be used is this:

t = ((x - left) / (right - left));

result = lc * (1 - t) + rc * t;

where

- x is the parameter as computed by the main gradient code,
- left is the position of the left color stop,
- right is the position of the right color stop
- lc is the color of the left color stop
- rc is the color of the right color stop

That formula can also be written like this:

result
= lc * (1 - t) + rc * t;
= lc + (rc - lc) * t
= lc + (rc - lc) * ((x - left) / (right - left))
= (rc - lc) / (right - left) * x +
lc - (left * (rc - lc)) / (right - left)
= s * x + b

where

s = (rc - lc) / (right - left)

and

b = lc - left * (rc - lc) / (right - left)
= (lc * (right - left) - left * (rc - lc)) / (right - left)
= (lc * right - rc * left) / (right - left)

To summarize, setting w = (right - left):

s = (rc - lc) / w
b = (lc * right - rc * left) / w

r = s * x + b

Since s and b only depend on the two active stops, both can be cached
so that the computation only needs to do one multiplication and one
addition per pixel (followed by premultiplication of the alpha
channel). That is, seven multiplications in total, which is the same
number as the old SIMD-within-register implementation had.

== Implementation notes:

The new formula described above is implemented in single precision
floating point, and the eight divisions necessary to compute the
cached values are done by multiplication with the reciprocal of the
distance between the color stops.

The alpha values used in the cached computation are scaled by 255.0,
whereas the RGB values are kept in the [0, 1] interval. The ensures
that after premultiplication, all values will be in the [0, 255]
interval.

== Performance impact (median of three runs of radial-perf-test):

== Intel Sandy Bridge, Core i5 @ 1.2GHz

Before: 0.014516
After:  0.014511
Change: 0.0%

== AMD Barcelona @ 1.2 GHz

Before: 0.021854 seconds
After:  0.022073 seconds
Change: 1.0% slowdown

Ie., little to no change, though conceivably the impact could be
bigger on machines with a bigger difference between integer and
floating point performance.
---
pixman/pixman-private.h         |    9 ++-
2 files changed, 80 insertions(+), 45 deletions(-)

index e7e724f..c2694c6 100644
@@ -27,6 +27,7 @@
#include <config.h>
#endif
#include "pixman-private.h"
+#include "pixman-combine32.h"

void
walker->left_x    = 0;
walker->right_x   = 0x10000;
walker->stepper   = 0;
-    walker->left_ag   = 0;
-    walker->left_rb   = 0;
-    walker->right_ag  = 0;
-    walker->right_rb  = 0;
+    walker->a_m       = 0;
+    walker->a_d       = 0;
+    walker->r_m       = 0;
+    walker->r_d       = 0;
+    walker->g_m       = 0;
+    walker->g_d       = 0;
+    walker->b_m       = 0;
+    walker->b_d       = 0;
+    walker->offset    = 0;
walker->repeat    = repeat;

walker->need_reset = TRUE;
right_x =  stops[n].x;
right_c = &stops[n].color;

+    walker->offset = 0;
if (walker->repeat == PIXMAN_REPEAT_NORMAL)
{
-	left_x  += (pos - x);
-	right_x += (pos - x);
+        walker->offset = x - pos;
}
else if (walker->repeat == PIXMAN_REPEAT_REFLECT)
{

x = 0x10000 - x;
}
-	left_x  += (pos - x);
-	right_x += (pos - x);
+        walker->offset = x - pos;
}
else if (walker->repeat == PIXMAN_REPEAT_NONE)
{
left_c = right_c;
}

-    walker->left_x   = left_x;
-    walker->right_x  = right_x;
-    walker->left_ag  = ((left_c->alpha >> 8) << 16)   | (left_c->green >> 8);
-    walker->left_rb  = ((left_c->red & 0xff00) << 8)  | (left_c->blue >> 8);
-    walker->right_ag = ((right_c->alpha >> 8) << 16)  | (right_c->green >> 8);
-    walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
-
-    if (walker->left_x == walker->right_x                ||
-        (walker->left_ag == walker->right_ag &&
-	 walker->left_rb == walker->right_rb))
+    float la, lr, lg, lb;
+    float ra, rr, rg, rb;
+    float lx, rx;
+
+    la = (left_c->alpha * (1.0f/257.0f));
+    lr = (left_c->red * (1.0f/257.0f));
+    lg = (left_c->green * (1.0f/257.0f));
+    lb = (left_c->blue * (1.0f/257.0f));
+
+    ra = (right_c->alpha * (1.0f/257.0f));
+    rr = (right_c->red * (1.0f/257.0f));
+    rg = (right_c->green * (1.0f/257.0f));
+    rb = (right_c->blue * (1.0f/257.0f));
+
+    lx = left_x * (1.0f/65536.0f);
+    rx = right_x * (1.0f/65536.0f);
+
+    if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX)
{
-	walker->stepper = 0;
+	walker->a_m = walker->r_m = walker->g_m = walker->b_m = 0.0f;
+	walker->a_d = (la + ra) * (1.0 / 2.0f);
+	walker->r_d = (lr + rr) * (1.0 / 510.0f);
+	walker->g_d = (lg + rg) * (1.0 / 510.0f);
+	walker->b_d = (lb + rb) * (1.0 / 510.0f);
}
else
{
-	int32_t width = right_x - left_x;
-	walker->stepper = ((1 << 24) + width / 2) / width;
+	float w_rec = 1.0f / (rx - lx);
+
+	walker->a_d = (la * rx - ra * lx) * w_rec;
+	walker->r_d = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f);
+	walker->g_d = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f);
+	walker->b_d = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f);
+
+	walker->a_m = (ra - la) * w_rec;
+	walker->r_m = (rr - lr) * w_rec * (1.0f/255.0f);
+	walker->g_m = (rg - lg) * w_rec * (1.0f/255.0f);
+	walker->b_m = (rb - lb) * w_rec * (1.0f/255.0f);
}
+
+    walker->left_x   = left_x;
+    walker->right_x  = right_x;

walker->need_reset = FALSE;
}
@@ -142,31 +171,36 @@ uint32_t
pixman_fixed_48_16_t      x)
{
-    int dist, idist;
-    uint32_t t1, t2, a, color;
-
-    if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
+    float a, r, g, b;
+    uint8_t a8, r8, g8, b8;
+    uint32_t v;
+    float y;

-    dist  = ((int)(x - walker->left_x) * walker->stepper) >> 16;
-    idist = 256 - dist;
+    x += walker->offset;

-    /* combined INTERPOLATE and premultiply */
-    t1 = walker->left_rb * idist + walker->right_rb * dist;
-    t1 = (t1 >> 8) & 0xff00ff;
+    if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
+    {
+        x -= walker->offset;
+        x += walker->offset;
+    }

-    t2  = walker->left_ag * idist + walker->right_ag * dist;
-    t2 &= 0xff00ff00;
+    y = x * (1.0f / 65536.0f);

-    color = t2 & 0xff000000;
-    a     = t2 >> 24;
+    a = walker->a_m * y + walker->a_d;
+    r = a * (walker->r_m * y + walker->r_d);
+    g = a * (walker->g_m * y + walker->g_d);
+    b = a * (walker->b_m * y + walker->b_d);

-    t1  = t1 * a + 0x800080;
-    t1  = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
+    a8 = a + 0.5f;
+    r8 = r + 0.5f;
+    g8 = g + 0.5f;
+    b8 = b + 0.5f;

-    t2  = (t2 >> 8) * a + 0x800080;
-    t2  = (t2 + ((t2 >> 8) & 0xff00ff));
+    v = ((a8 << 24) & 0xff000000) |
+        ((r8 << 16) & 0x00ff0000) |
+        ((g8 <<  8) & 0x0000ff00) |
+        ((b8 >>  0) & 0x000000ff);

-    return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
+    return v;
}
-
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 91e329f..8c2c7f5 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -319,10 +319,11 @@ _pixman_image_validate (pixman_image_t *image);
*/
typedef struct
{
-    uint32_t                left_ag;
-    uint32_t                left_rb;
-    uint32_t                right_ag;
-    uint32_t                right_rb;
+    float		    a_m, a_d;
+    float		    r_m, r_d;
+    float		    g_m, g_d;
+    float		    b_m, b_d;
+    int32_t                 offset;
pixman_fixed_t	    left_x;
pixman_fixed_t          right_x;
pixman_fixed_t          stepper;
--
1.7.4

```