[Pixman] [PATCH 05/10] pixman-utils.c, pixman-private.h: Add floating point conversion routines

Wed Sep 26 14:40:31 PDT 2012

On Wed, Sep 26, 2012 at 1:43 PM, Søren Sandmann <sandmann at cs.au.dk> wrote:
> From: Søren Sandmann Pedersen <ssp at redhat.com>
>
> A new struct argb_t containing a floating point pixel is added to
> pixman-private.h, and conversion routines are added to pixman-utils.c
> to convert normalized integers to and from that struct.
>
> New functions:
>
>   - pixman_expand_to_float()
>     Expands a buffer of integer pixels to a buffer of argb_t pixels
>
>   - pixman_contract_from_float()
>     Converts a buffer of argb_t pixels to a buffer integer pixels
>
>   - pixman_float_to_unorm()
>     Converts a floating point number to an unsigned normalized integer
>
>   - pixman_unorm_to_float()
>     Converts an unsigned normalized integer to a floating point number
> ---
>  pixman/pixman-private.h |   35 +++++++++++++++
>  pixman/pixman-utils.c   |  107 +++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 142 insertions(+), 0 deletions(-)
>
> diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
> index c82316f..91f35ed 100644
> --- a/pixman/pixman-private.h
> +++ b/pixman/pixman-private.h
> @@ -45,6 +45,16 @@ typedef struct radial_gradient radial_gradient_t;
>  typedef struct bits_image bits_image_t;
>  typedef struct circle circle_t;
>
> +typedef struct argb_t argb_t;
> +
> +struct argb_t
> +{
> +    float a;
> +    float r;
> +    float g;
> +    float b;
> +};
> +
>  typedef void (*fetch_scanline_t) (pixman_image_t *image,
>                                   int             x,
>                                   int             y,
> @@ -792,12 +802,34 @@ pixman_expand (uint64_t *           dst,
>                 const uint32_t *     src,
>                 pixman_format_code_t format,
>                 int                  width);
> +void
> +pixman_expand_to_float (argb_t               *dst,
> +                       const uint32_t       *src,
> +                       pixman_format_code_t  format,
> +                       int                   width);
>
>  void
>  pixman_contract (uint32_t *      dst,
>                   const uint64_t *src,
>                   int             width);
>
> +void
> +pixman_contract_from_float (uint32_t     *dst,
> +                           const argb_t *src,
> +                           int           width);
> +
> +pixman_bool_t
> +_pixman_lookup_composite_function (pixman_implementation_t     *toplevel,
> +                                  pixman_op_t                  op,
> +                                  pixman_format_code_t         src_format,
> +                                  uint32_t                     src_flags,
> +                                  pixman_format_code_t         mask_format,
> +                                  uint32_t                     mask_flags,
> +                                  pixman_format_code_t         dest_format,
> +                                  uint32_t                     dest_flags,
> +                                  pixman_implementation_t    **out_imp,
> +                                  pixman_composite_func_t     *out_func);
> +
>  /* Region Helpers */
>  pixman_bool_t
>  pixman_region32_copy_from_region16 (pixman_region32_t *dst,
> @@ -957,6 +989,9 @@ unorm_to_unorm (uint32_t val, int from_bits, int to_bits)
>      return result;
>  }
>
> +uint16_t pixman_float_to_unorm (float f, int n_bits);
> +float pixman_unorm_to_float (uint16_t u, int n_bits);
> +
>  /*
>   * Various debugging code
>   */
> diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c
> index e4a9730..4f9db29 100644
> --- a/pixman/pixman-utils.c
> +++ b/pixman/pixman-utils.c
> @@ -162,6 +162,113 @@ pixman_expand (uint64_t *           dst,
>      }
>  }
>
> +static force_inline uint16_t
> +float_to_unorm (float f, int n_bits)
> +{
> +    uint32_t u;
> +
> +    if (f > 1.0)
> +       f = 1.0;
> +    if (f < 0.0)
> +       f = 0.0;
> +
> +    u = f * (1 << n_bits);
> +    u -= (u >> n_bits);
> +
> +    return u;
> +}
> +
> +static force_inline float
> +unorm_to_float (uint16_t u, int n_bits)
> +{
> +    uint32_t m = ((1 << n_bits) - 1);
> +
> +    return (u & m) * (1.f / (float)m);
> +}
> +
> +/*
> + * This function expands images from a8r8g8b8 to argb_t.  To preserve
> + * precision, it needs to know from which source format the a8r8g8b8 pixels
> + * originally came.
> + *
> + * For example, if the source was PIXMAN_x1r5g5b5 and the red component
> + * contained bits 12345, then the 8-bit value is 12345123.  To correctly
> + * expand this to floating point, it should be 12345 / 31.0 and not
> + * 12345123 / 255.0.
> + */
> +void
> +pixman_expand_to_float (argb_t               *dst,
> +                       const uint32_t       *src,
> +                       pixman_format_code_t  format,
> +                       int                   width)
> +{
> +    int a_size, r_size, g_size, b_size;
> +    int a_shift, r_shift, g_shift, b_shift;
> +    int i;
> +
> +    if (!PIXMAN_FORMAT_VIS (format))
> +       format = PIXMAN_a8r8g8b8;
> +
> +    /*
> +     * Determine the sizes of each component and the masks and shifts
> +     * required to extract them from the source pixel.
> +     */
> +    a_size = PIXMAN_FORMAT_A (format);
> +    r_size = PIXMAN_FORMAT_R (format);
> +    g_size = PIXMAN_FORMAT_G (format);
> +    b_size = PIXMAN_FORMAT_B (format);
> +
> +    a_shift = 32 - a_size;
> +    r_shift = 24 - r_size;
> +    g_shift = 16 - g_size;
> +    b_shift =  8 - b_size;
> +
> +    /* Start at the end so that we can do the expansion in place
> +     * when src == dst
> +     */
> +    for (i = width - 1; i >= 0; i--)
> +    {
> +       const uint32_t pixel = src[i];
> +
> +       dst[i].a = a_size? unorm_to_float (pixel >> a_shift, a_size) : 1.0;
> +       dst[i].r = r_size? unorm_to_float (pixel >> r_shift, r_size) : 0.0;
> +       dst[i].g = g_size? unorm_to_float (pixel >> g_shift, g_size) : 0.0;
> +       dst[i].b = b_size? unorm_to_float (pixel >> b_shift, b_size) : 0.0;
> +    }
> +}
> +
> +uint16_t
> +pixman_float_to_unorm (float f, int n_bits)
> +{
> +    return float_to_unorm (f, n_bits);
> +}
> +
> +float
> +pixman_unorm_to_float (uint16_t u, int n_bits)
> +{
> +    return unorm_to_float (u, n_bits);
> +}
> +
> +void
> +pixman_contract_from_float (uint32_t     *dst,
> +                           const argb_t *src,
> +                           int           width)
> +{
> +    int i;
> +
> +    for (i = 0; i < width; ++i)
> +    {
> +       uint8_t a, r, g, b;
> +
> +       a = float_to_unorm (src[i].a, 8);
> +       r = float_to_unorm (src[i].r, 8);
> +       g = float_to_unorm (src[i].g, 8);
> +       b = float_to_unorm (src[i].b, 8);
> +
> +       dst[i] = (a << 24) | (r << 16) | (g << 8) | (b << 0);
> +    }
> +}
> +
>  /*
>   * Contracting is easier than expanding.  We just need to truncate the
>   * components.
> --
> 1.7.4
>
> _______________________________________________
> Pixman mailing list
> Pixman at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/pixman

As I'm sure you know, these functions can be done with SSE 2 or 4.1 if
we could convert 4 pixels at once. How can we override their
implementations with optimized ones?

SSE2 expand to float could be something like

__m128i vsrc = _mm_loadu_si128 (src); /* vsrc = r g b a r g b a r g b
a r g b a */

Get individual components of vsrc, such that

pix1 = 0 0 0 r 0 0 0 r 0 0 0 r 0 0 0 r
pix2 = 0 0 0 g 0 0 0 g 0 0 0 g 0 0 0 g
pix3 = 0 0 0 b 0 0 0 b 0 0 0 b 0 0 0 b
pix4 = 0 0 0 a 0 0 0 a 0 0 0 a 0 0 0 a

Convert to floats with _mm_cvtepi32_ps:

__m128 R = _mm_cvtepi32_ps(r);          /* - R 1 -:- R 2 -:- R 3 -:- R 4 - */
__m128 G = _mm_cvtepi32_ps(g);          /* - G 1 -:- G 2 -:- G 3 -:- G 4 - */
__m128 B = _mm_cvtepi32_ps(b);          /* - B 1 -:- B 2 -:- B 3 -:- B 4 - */
__m128 A = _mm_cvtepi32_ps(a);          /* - A 1 -:- A 2 -:- A 3 -:- A 4 - */

And finally transpose before storing.

_MM_TRANSPOSE4_PS (R, G, B, A);

Or, with SSE4 we can use _mm_cvtepu8_epi32:

__m128i vsrc = _mm_loadu_si128 (src); /* vsrc = r g b a r g b a r g b
a r g b a */

__m128i pix1 = _mm_cvtepu8_epi32 (vsrc);
__m128i pix2 = _mm_cvtepu8_epi32 (_mm_srli_si128 (vsrc, 32));
__m128i pix3 = _mm_cvtepu8_epi32 (_mm_srli_si128 (vsrc, 64));
__m128i pix4 = _mm_cvtepu8_epi32 (_mm_srli_si128 (vsrc, 96));

__m128 fpix1 = _mm_cvtepi32_ps (pix1);
__m128 fpix2 = _mm_cvtepi32_ps (pix2);
__m128 fpix3 = _mm_cvtepi32_ps (pix3);
__m128 fpix4 = _mm_cvtepi32_ps (pix4);

Totally untested. Probably has bugs. Waiting for Siarhei to show me
how to make this better. :)