[Pixman] [PATCH 05/10] pixman-utils.c, pixman-private.h: Add floating point conversion routines
Matt Turner
mattst88 at gmail.com
Wed Sep 26 14:40:31 PDT 2012
On Wed, Sep 26, 2012 at 1:43 PM, Søren Sandmann <sandmann at cs.au.dk> wrote:
> From: Søren Sandmann Pedersen <ssp at redhat.com>
>
> A new struct argb_t containing a floating point pixel is added to
> pixman-private.h, and conversion routines are added to pixman-utils.c
> to convert normalized integers to and from that struct.
>
> New functions:
>
> - pixman_expand_to_float()
> Expands a buffer of integer pixels to a buffer of argb_t pixels
>
> - pixman_contract_from_float()
> Converts a buffer of argb_t pixels to a buffer integer pixels
>
> - pixman_float_to_unorm()
> Converts a floating point number to an unsigned normalized integer
>
> - pixman_unorm_to_float()
> Converts an unsigned normalized integer to a floating point number
> ---
> pixman/pixman-private.h | 35 +++++++++++++++
> pixman/pixman-utils.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 142 insertions(+), 0 deletions(-)
>
> diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
> index c82316f..91f35ed 100644
> --- a/pixman/pixman-private.h
> +++ b/pixman/pixman-private.h
> @@ -45,6 +45,16 @@ typedef struct radial_gradient radial_gradient_t;
> typedef struct bits_image bits_image_t;
> typedef struct circle circle_t;
>
> +typedef struct argb_t argb_t;
> +
> +struct argb_t
> +{
> + float a;
> + float r;
> + float g;
> + float b;
> +};
> +
> typedef void (*fetch_scanline_t) (pixman_image_t *image,
> int x,
> int y,
> @@ -792,12 +802,34 @@ pixman_expand (uint64_t * dst,
> const uint32_t * src,
> pixman_format_code_t format,
> int width);
> +void
> +pixman_expand_to_float (argb_t *dst,
> + const uint32_t *src,
> + pixman_format_code_t format,
> + int width);
>
> void
> pixman_contract (uint32_t * dst,
> const uint64_t *src,
> int width);
>
> +void
> +pixman_contract_from_float (uint32_t *dst,
> + const argb_t *src,
> + int width);
> +
> +pixman_bool_t
> +_pixman_lookup_composite_function (pixman_implementation_t *toplevel,
> + pixman_op_t op,
> + pixman_format_code_t src_format,
> + uint32_t src_flags,
> + pixman_format_code_t mask_format,
> + uint32_t mask_flags,
> + pixman_format_code_t dest_format,
> + uint32_t dest_flags,
> + pixman_implementation_t **out_imp,
> + pixman_composite_func_t *out_func);
> +
> /* Region Helpers */
> pixman_bool_t
> pixman_region32_copy_from_region16 (pixman_region32_t *dst,
> @@ -957,6 +989,9 @@ unorm_to_unorm (uint32_t val, int from_bits, int to_bits)
> return result;
> }
>
> +uint16_t pixman_float_to_unorm (float f, int n_bits);
> +float pixman_unorm_to_float (uint16_t u, int n_bits);
> +
> /*
> * Various debugging code
> */
> diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c
> index e4a9730..4f9db29 100644
> --- a/pixman/pixman-utils.c
> +++ b/pixman/pixman-utils.c
> @@ -162,6 +162,113 @@ pixman_expand (uint64_t * dst,
> }
> }
>
> +static force_inline uint16_t
> +float_to_unorm (float f, int n_bits)
> +{
> + uint32_t u;
> +
> + if (f > 1.0)
> + f = 1.0;
> + if (f < 0.0)
> + f = 0.0;
> +
> + u = f * (1 << n_bits);
> + u -= (u >> n_bits);
> +
> + return u;
> +}
> +
> +static force_inline float
> +unorm_to_float (uint16_t u, int n_bits)
> +{
> + uint32_t m = ((1 << n_bits) - 1);
> +
> + return (u & m) * (1.f / (float)m);
> +}
> +
> +/*
> + * This function expands images from a8r8g8b8 to argb_t. To preserve
> + * precision, it needs to know from which source format the a8r8g8b8 pixels
> + * originally came.
> + *
> + * For example, if the source was PIXMAN_x1r5g5b5 and the red component
> + * contained bits 12345, then the 8-bit value is 12345123. To correctly
> + * expand this to floating point, it should be 12345 / 31.0 and not
> + * 12345123 / 255.0.
> + */
> +void
> +pixman_expand_to_float (argb_t *dst,
> + const uint32_t *src,
> + pixman_format_code_t format,
> + int width)
> +{
> + int a_size, r_size, g_size, b_size;
> + int a_shift, r_shift, g_shift, b_shift;
> + int i;
> +
> + if (!PIXMAN_FORMAT_VIS (format))
> + format = PIXMAN_a8r8g8b8;
> +
> + /*
> + * Determine the sizes of each component and the masks and shifts
> + * required to extract them from the source pixel.
> + */
> + a_size = PIXMAN_FORMAT_A (format);
> + r_size = PIXMAN_FORMAT_R (format);
> + g_size = PIXMAN_FORMAT_G (format);
> + b_size = PIXMAN_FORMAT_B (format);
> +
> + a_shift = 32 - a_size;
> + r_shift = 24 - r_size;
> + g_shift = 16 - g_size;
> + b_shift = 8 - b_size;
> +
> + /* Start at the end so that we can do the expansion in place
> + * when src == dst
> + */
> + for (i = width - 1; i >= 0; i--)
> + {
> + const uint32_t pixel = src[i];
> +
> + dst[i].a = a_size? unorm_to_float (pixel >> a_shift, a_size) : 1.0;
> + dst[i].r = r_size? unorm_to_float (pixel >> r_shift, r_size) : 0.0;
> + dst[i].g = g_size? unorm_to_float (pixel >> g_shift, g_size) : 0.0;
> + dst[i].b = b_size? unorm_to_float (pixel >> b_shift, b_size) : 0.0;
> + }
> +}
> +
> +uint16_t
> +pixman_float_to_unorm (float f, int n_bits)
> +{
> + return float_to_unorm (f, n_bits);
> +}
> +
> +float
> +pixman_unorm_to_float (uint16_t u, int n_bits)
> +{
> + return unorm_to_float (u, n_bits);
> +}
> +
> +void
> +pixman_contract_from_float (uint32_t *dst,
> + const argb_t *src,
> + int width)
> +{
> + int i;
> +
> + for (i = 0; i < width; ++i)
> + {
> + uint8_t a, r, g, b;
> +
> + a = float_to_unorm (src[i].a, 8);
> + r = float_to_unorm (src[i].r, 8);
> + g = float_to_unorm (src[i].g, 8);
> + b = float_to_unorm (src[i].b, 8);
> +
> + dst[i] = (a << 24) | (r << 16) | (g << 8) | (b << 0);
> + }
> +}
> +
> /*
> * Contracting is easier than expanding. We just need to truncate the
> * components.
> --
> 1.7.4
>
> _______________________________________________
> Pixman mailing list
> Pixman at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/pixman
As I'm sure you know, these functions can be done with SSE 2 or 4.1 if
we could convert 4 pixels at once. How can we override their
implementations with optimized ones?
SSE2 expand to float could be something like
__m128i vsrc = _mm_loadu_si128 (src); /* vsrc = r g b a r g b a r g b
a r g b a */
Get individual components of vsrc, such that
pix1 = 0 0 0 r 0 0 0 r 0 0 0 r 0 0 0 r
pix2 = 0 0 0 g 0 0 0 g 0 0 0 g 0 0 0 g
pix3 = 0 0 0 b 0 0 0 b 0 0 0 b 0 0 0 b
pix4 = 0 0 0 a 0 0 0 a 0 0 0 a 0 0 0 a
Convert to floats with _mm_cvtepi32_ps:
__m128 R = _mm_cvtepi32_ps(r); /* - R 1 -:- R 2 -:- R 3 -:- R 4 - */
__m128 G = _mm_cvtepi32_ps(g); /* - G 1 -:- G 2 -:- G 3 -:- G 4 - */
__m128 B = _mm_cvtepi32_ps(b); /* - B 1 -:- B 2 -:- B 3 -:- B 4 - */
__m128 A = _mm_cvtepi32_ps(a); /* - A 1 -:- A 2 -:- A 3 -:- A 4 - */
And finally transpose before storing.
_MM_TRANSPOSE4_PS (R, G, B, A);
Or, with SSE4 we can use _mm_cvtepu8_epi32:
__m128i vsrc = _mm_loadu_si128 (src); /* vsrc = r g b a r g b a r g b
a r g b a */
__m128i pix1 = _mm_cvtepu8_epi32 (vsrc);
__m128i pix2 = _mm_cvtepu8_epi32 (_mm_srli_si128 (vsrc, 32));
__m128i pix3 = _mm_cvtepu8_epi32 (_mm_srli_si128 (vsrc, 64));
__m128i pix4 = _mm_cvtepu8_epi32 (_mm_srli_si128 (vsrc, 96));
__m128 fpix1 = _mm_cvtepi32_ps (pix1);
__m128 fpix2 = _mm_cvtepi32_ps (pix2);
__m128 fpix3 = _mm_cvtepi32_ps (pix3);
__m128 fpix4 = _mm_cvtepi32_ps (pix4);
Totally untested. Probably has bugs. Waiting for Siarhei to show me
how to make this better. :)
More information about the Pixman
mailing list