pixman: Branch 'master'

Sun Jun 10 11:43:29 PDT 2012

pixman/pixman-utils.c |   31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

New commits:
commit fd175f9d02f36b1e91973e4264519228547f5dc7
Author: Antti S. Lankila <alankila at bel.fi>
Date:   Sun Jun 10 19:22:56 2012 +0300

    Faster unorm_to_unorm for wide processing.
    
    Optimizing the unorm_to_unorm functions allows a speedup from:
    
    src_8888_2x10 =  L1:  62.08  L2:  60.73  M: 59.61 (  4.30%)  HT: 46.81
    	VT: 42.17  R: 43.18  RT: 26.01 (325Kops/s)
    
    to:
    
    src_8888_2x10 =  L1:  76.94  L2:  78.43  M: 75.87 (  5.59%)  HT: 56.73
    	VT: 52.39  R: 53.00  RT: 29.29 (363Kops/s)
    
    on a i7 Q720 -based laptop.
    
    The key of the patch is the observation that unorm_to_unorm's work can
    more easily be done with a simple multiplication and shift, when the
    function is applied repeatedly and the parameters are not compile-time
    constants. For instance, converting from 0xfe to 0xfefe (expanding
    from 8 bits to 16 bits) can be done by calculating
    
    c = c * 0x101
    
    However, sometimes the result is not a neat replication of all the
    bits. For instance, going from 10 bits to 16 bits can be done by
    calculating
    
    c = c * 0x401UL >> 4
    
    where the intermediate result is 20 bit wide repetition of the 10-bit
    pattern followed by shifting off the unnecessary lowest bits.
    
    The patch has the algorithm to calculate the factor and the shift, and
    converts the code to use it.

diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c
index 2ec2594..93c061a 100644
--- a/pixman/pixman-utils.c
+++ b/pixman/pixman-utils.c
@@ -183,6 +183,22 @@ pixman_malloc_abc (unsigned int a,
 	return malloc (a * b * c);
 }
 
+static void
+unorm_to_unorm_params (int in_width, int out_width, uint32_t *factor, int *shift)
+{
+    int w = 0;
+
+    *factor = 0;
+    while (in_width != 0 && w < out_width)
+    {
+	*factor |= 1 << w;
+	w += in_width;
+    }
+
+    /* Did we generate too many bits? */
+    *shift = w - out_width;
+}
+
 /*
  * This function expands images from ARGB8 format to ARGB16.  To preserve
  * precision, it needs to know the original source format.  For example, if the
@@ -212,8 +228,15 @@ pixman_expand (uint64_t *           dst,
                   r_mask = ~(~0 << r_size),
                   g_mask = ~(~0 << g_size),
                   b_mask = ~(~0 << b_size);
+    uint32_t au_factor, ru_factor, gu_factor, bu_factor;
+    int au_shift, ru_shift, gu_shift, bu_shift;
     int i;
 
+    unorm_to_unorm_params (a_size, 16, &au_factor, &au_shift);
+    unorm_to_unorm_params (r_size, 16, &ru_factor, &ru_shift);
+    unorm_to_unorm_params (g_size, 16, &gu_factor, &gu_shift);
+    unorm_to_unorm_params (b_size, 16, &bu_factor, &bu_shift);
+
     /* Start at the end so that we can do the expansion in place
      * when src == dst
      */
@@ -226,7 +249,7 @@ pixman_expand (uint64_t *           dst,
 	if (a_size)
 	{
 	    a = (pixel >> a_shift) & a_mask;
-	    a16 = unorm_to_unorm (a, a_size, 16);
+            a16 = a * au_factor >> au_shift;
 	}
 	else
 	{
@@ -238,9 +261,9 @@ pixman_expand (uint64_t *           dst,
 	    r = (pixel >> r_shift) & r_mask;
 	    g = (pixel >> g_shift) & g_mask;
 	    b = (pixel >> b_shift) & b_mask;
-	    r16 = unorm_to_unorm (r, r_size, 16);
-	    g16 = unorm_to_unorm (g, g_size, 16);
-	    b16 = unorm_to_unorm (b, b_size, 16);
+            r16 = r * ru_factor >> ru_shift;
+            g16 = g * gu_factor >> gu_shift;
+            b16 = b * bu_factor >> bu_shift;
 	}
 	else
 	{