[Mesa-dev] [PATCH 2/3] mesa/main: Add sse2 streaming clamping
Bruno Jimenez
brunojimen at gmail.com
Wed Nov 12 09:36:37 PST 2014
On Wed, 2014-11-12 at 14:50 +0200, Juha-Pekka Heikkila wrote:
> Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
> ---
> src/mesa/Makefile.am | 8 +++
> src/mesa/main/sse2_clamping.c | 138 ++++++++++++++++++++++++++++++++++++++++++
> src/mesa/main/sse2_clamping.h | 49 +++++++++++++++
> 3 files changed, 195 insertions(+)
> create mode 100644 src/mesa/main/sse2_clamping.c
> create mode 100644 src/mesa/main/sse2_clamping.h
>
> diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am
> index 932db4f..43dbe87 100644
> --- a/src/mesa/Makefile.am
> +++ b/src/mesa/Makefile.am
> @@ -111,6 +111,10 @@ if SSE41_SUPPORTED
> ARCH_LIBS += libmesa_sse41.la
> endif
>
> +if SSE2_SUPPORTED
> +ARCH_LIBS += libmesa_sse2.la
> +endif
> +
> MESA_ASM_FILES_FOR_ARCH =
>
> if HAVE_X86_ASM
> @@ -155,6 +159,10 @@ libmesa_sse41_la_SOURCES = \
> main/sse_minmax.c
> libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) -msse4.1
>
> +libmesa_sse2_la_SOURCES = \
> + main/sse2_clamping.c
> +libmesa_sse2_la_CFLAGS = $(AM_CFLAGS) -msse2
> +
> pkgconfigdir = $(libdir)/pkgconfig
> pkgconfig_DATA = gl.pc
>
> diff --git a/src/mesa/main/sse2_clamping.c b/src/mesa/main/sse2_clamping.c
> new file mode 100644
> index 0000000..66c7dc7
> --- /dev/null
> +++ b/src/mesa/main/sse2_clamping.c
> @@ -0,0 +1,138 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + * Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
> + *
> + */
> +
> +#ifdef __SSE2__
> +#include "main/macros.h"
> +#include "main/sse2_clamping.h"
> +#include <emmintrin.h>
> +
> +/**
> + * Clamp four float values to [min,max]
> + */
> +static inline void
> +_mesa_clamp_float_rgba(GLfloat src[4], GLfloat result[4], const float min,
> + const float max)
> +{
> + __m128 operand, minval, maxval;
> +
> + operand = _mm_loadu_ps(src);
> + minval = _mm_set1_ps(min);
> + maxval = _mm_set1_ps(max);
> + operand = _mm_max_ps(operand, minval);
> + operand = _mm_min_ps(operand, maxval);
> + _mm_storeu_ps(result, operand);
> +}
> +
> +
> +/* Clamp n amount float rgba pixels to [min,max] using SSE2
> + */
> +__attribute__((optimize("unroll-loops")))
> +void
> +_mesa_streaming_clamp_float_rgba(const GLuint n, GLfloat rgba_src[][4],
> + GLfloat rgba_dst[][4], const GLfloat min,
> + const GLfloat max)
> +{
> + int c, prefetch_c;
> + float* worker = &rgba_src[0][0];
> + __m128 operand[2], minval, maxval;
> +
> + _mm_prefetch((char*) (((unsigned long)worker)|0x1f) + 65, _MM_HINT_T0);
^^^^ ^^^
Hi,
May I ask why precisely this numbers?
> +
> + minval = _mm_set1_ps(min);
> + maxval = _mm_set1_ps(max);
> +
> + for (c = n*4; c > 0 && (((unsigned long)worker)&0x1f) != 0; c--, worker++) {
^^^^^
I guess that this is for alignment, but you only need to align to a 16
bytes boundary, not 32. Or maybe I am missing something obvious.
> + operand[0] = _mm_load_ss(worker);
> + operand[0] = _mm_max_ss(operand[0], minval);
> + operand[0] = _mm_min_ss(operand[0], maxval);
> + _mm_store_ss(worker, operand[0]);
> + }
> +
> + while (c >= 8) {
> + _mm_prefetch((char*) worker + 64, _MM_HINT_T0);
^^^
> +
> + for (prefetch_c = 64/8; prefetch_c > 0 && c >= 8; prefetch_c--, c-=8,
^^^^
May I ask also why this numbers?
Thanks in advance!
Bruno
> + worker += 8) {
> +
> + operand[0] = _mm_load_ps(worker);
> + operand[1] = _mm_load_ps(worker+4);
> + operand[0] = _mm_max_ps(operand[0], minval);
> + operand[1] = _mm_max_ps(operand[1], minval);
> + operand[0] = _mm_min_ps(operand[0], maxval);
> + operand[1] = _mm_min_ps(operand[1], maxval);
> +
> + _mm_store_ps(worker, operand[0]);
> + _mm_store_ps(worker+4, operand[1]);
> + }
> + }
> +
> + for (; c > 0; c--, worker++) {
> + operand[0] = _mm_load_ss(worker);
> + operand[0] = _mm_max_ss(operand[0], minval);
> + operand[0] = _mm_min_ss(operand[0], maxval);
> + _mm_store_ss(worker, operand[0]);
> + }
> +}
> +
> +
> +/* Clamp n amount float rgba pixels to [min,max] using SSE2 and apply
> + * scaling and mapping to components.
> + *
> + * this replace handling of [RGBA] channels:
> + * rgba_temp[RCOMP] = CLAMP(rgba[i][RCOMP], 0.0F, 1.0F);
> + * rgba[i][RCOMP] = rMap[F_TO_I(rgba_temp[RCOMP] * scale[RCOMP])];
> + */
> +void
> +_mesa_clamp_float_rgba_scale_and_map(const GLuint n, GLfloat rgba_src[][4],
> + GLfloat rgba_dst[][4], const GLfloat min,
> + const GLfloat max,
> + const GLfloat scale[4],
> + const GLfloat* rMap, const GLfloat* gMap,
> + const GLfloat* bMap, const GLfloat* aMap)
> +{
> + int i;
> + GLfloat __attribute__((aligned(16))) temp[4];
> + __m128 *operand = (__m128*) &temp, multiplier, mmove;
> + __m128i truncated_integers;
> +
> + const unsigned int* map_p = (const unsigned int*) &truncated_integers;
> +
> + multiplier = _mm_loadu_ps(scale);
> +
> + for(i = 0; i < n; i++) {
> + _mesa_clamp_float_rgba(rgba_src[i], temp, min, max);
> +
> + *operand = _mm_mul_ps(multiplier, *operand);
> + truncated_integers = _mm_cvttps_epi32(*operand);
> + mmove = _mm_set_ps(aMap[map_p[ACOMP]], bMap[map_p[BCOMP]],
> + gMap[map_p[GCOMP]], rMap[map_p[RCOMP]] );
> +
> + _mm_storeu_ps(rgba_dst[i], mmove);
> + }
> +}
> +
> +#endif /* __SSE2__ */
> diff --git a/src/mesa/main/sse2_clamping.h b/src/mesa/main/sse2_clamping.h
> new file mode 100644
> index 0000000..688fab7
> --- /dev/null
> +++ b/src/mesa/main/sse2_clamping.h
> @@ -0,0 +1,49 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + * Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
> + *
> + */
> +
> +#ifdef __SSE2__
> +
> +/* Clamp n amount float rgba pixels to [min,max] using SSE2
> + */
> +void
> +_mesa_streaming_clamp_float_rgba(const GLuint n, GLfloat rgba_src[][4],
> + GLfloat rgba_dst[][4], const GLfloat min,
> + const GLfloat max);
> +
> +
> +/* Clamp n amount float rgba pixels to [min,max] using SSE2 and apply
> + * scaling and mapping to components.
> + */
> +void
> +_mesa_clamp_float_rgba_scale_and_map(const GLuint n, GLfloat rgba_src[][4],
> + GLfloat rgba_dst[][4], const GLfloat min,
> + const GLfloat max,
> + const GLfloat scale[4],
> + const GLfloat* rMap, const GLfloat* gMap,
> + const GLfloat* bMap, const GLfloat* aMap);
> +
> +#endif /* __SSE2__ */
More information about the mesa-dev
mailing list