[Mesa-dev] [PATCH 2/3] mesa/main: Add sse2 streaming clamping

Juha-Pekka Heikkila juhapekka.heikkila at gmail.com
Wed Nov 12 04:50:15 PST 2014


Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
---
 src/mesa/Makefile.am          |   8 +++
 src/mesa/main/sse2_clamping.c | 138 ++++++++++++++++++++++++++++++++++++++++++
 src/mesa/main/sse2_clamping.h |  49 +++++++++++++++
 3 files changed, 195 insertions(+)
 create mode 100644 src/mesa/main/sse2_clamping.c
 create mode 100644 src/mesa/main/sse2_clamping.h

diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am
index 932db4f..43dbe87 100644
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -111,6 +111,10 @@ if SSE41_SUPPORTED
 ARCH_LIBS += libmesa_sse41.la
 endif
 
+if SSE2_SUPPORTED
+ARCH_LIBS += libmesa_sse2.la
+endif
+
 MESA_ASM_FILES_FOR_ARCH =
 
 if HAVE_X86_ASM
@@ -155,6 +159,10 @@ libmesa_sse41_la_SOURCES = \
 	main/sse_minmax.c
 libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) -msse4.1
 
+libmesa_sse2_la_SOURCES = \
+	main/sse2_clamping.c
+libmesa_sse2_la_CFLAGS = $(AM_CFLAGS) -msse2
+
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = gl.pc
 
diff --git a/src/mesa/main/sse2_clamping.c b/src/mesa/main/sse2_clamping.c
new file mode 100644
index 0000000..66c7dc7
--- /dev/null
+++ b/src/mesa/main/sse2_clamping.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
+ *
+ */
+
+#ifdef __SSE2__
+#include "main/macros.h"
+#include "main/sse2_clamping.h"
+#include <emmintrin.h>
+
+/**
+ * Clamp four float values to [min,max]
+ */
+static inline void
+_mesa_clamp_float_rgba(GLfloat src[4], GLfloat result[4], const float min,
+                       const float max)
+{
+   __m128  operand, minval, maxval;
+
+   operand = _mm_loadu_ps(src);
+   minval = _mm_set1_ps(min);
+   maxval = _mm_set1_ps(max);
+   operand = _mm_max_ps(operand, minval);
+   operand = _mm_min_ps(operand, maxval);
+   _mm_storeu_ps(result, operand);
+}
+
+
+/* Clamp n amount float rgba pixels to [min,max] using SSE2
+ */
+__attribute__((optimize("unroll-loops")))
+void
+_mesa_streaming_clamp_float_rgba(const GLuint n, GLfloat rgba_src[][4],
+                                 GLfloat rgba_dst[][4], const GLfloat min,
+                                 const GLfloat max)
+{
+   int      c, prefetch_c;
+   float*   worker = &rgba_src[0][0];
+   __m128   operand[2], minval, maxval;
+
+   _mm_prefetch((char*) (((unsigned long)worker)|0x1f) + 65, _MM_HINT_T0);
+
+   minval = _mm_set1_ps(min);
+   maxval = _mm_set1_ps(max);
+
+   for (c = n*4; c > 0 && (((unsigned long)worker)&0x1f) != 0; c--, worker++) {
+      operand[0] = _mm_load_ss(worker);
+      operand[0] = _mm_max_ss(operand[0], minval);
+      operand[0] = _mm_min_ss(operand[0], maxval);
+      _mm_store_ss(worker, operand[0]);
+   }
+
+   while (c >= 8) {
+      _mm_prefetch((char*) worker + 64, _MM_HINT_T0);
+
+      for (prefetch_c = 64/8; prefetch_c > 0 && c >= 8; prefetch_c--, c-=8,
+           worker += 8) {
+
+         operand[0] = _mm_load_ps(worker);
+         operand[1] = _mm_load_ps(worker+4);
+         operand[0] = _mm_max_ps(operand[0], minval);
+         operand[1] = _mm_max_ps(operand[1], minval);
+         operand[0] = _mm_min_ps(operand[0], maxval);
+         operand[1] = _mm_min_ps(operand[1], maxval);
+
+         _mm_store_ps(worker, operand[0]);
+         _mm_store_ps(worker+4, operand[1]);
+      }
+   }
+
+   for (; c > 0; c--, worker++) {
+      operand[0] = _mm_load_ss(worker);
+      operand[0] = _mm_max_ss(operand[0], minval);
+      operand[0] = _mm_min_ss(operand[0], maxval);
+      _mm_store_ss(worker, operand[0]);
+   }
+}
+
+
+/* Clamp n amount float rgba pixels to [min,max] using SSE2 and apply
+ * scaling and mapping to components.
+ *
+ * this replace handling of [RGBA] channels:
+ * rgba_temp[RCOMP] = CLAMP(rgba[i][RCOMP], 0.0F, 1.0F);
+ * rgba[i][RCOMP] = rMap[F_TO_I(rgba_temp[RCOMP] * scale[RCOMP])];
+ */
+void
+_mesa_clamp_float_rgba_scale_and_map(const GLuint n, GLfloat rgba_src[][4],
+                                     GLfloat rgba_dst[][4], const GLfloat min,
+                                     const GLfloat max,
+                                     const GLfloat scale[4],
+                                     const GLfloat* rMap, const GLfloat* gMap,
+                                     const GLfloat* bMap, const GLfloat* aMap)
+{
+   int i;
+   GLfloat __attribute__((aligned(16))) temp[4];
+   __m128  *operand = (__m128*) &temp, multiplier, mmove;
+   __m128i truncated_integers;
+
+   const unsigned int* map_p = (const unsigned int*) &truncated_integers;
+
+   multiplier = _mm_loadu_ps(scale);
+
+   for(i = 0; i < n; i++) {
+      _mesa_clamp_float_rgba(rgba_src[i], temp, min, max);
+
+      *operand = _mm_mul_ps(multiplier, *operand);
+      truncated_integers = _mm_cvttps_epi32(*operand);
+      mmove = _mm_set_ps(aMap[map_p[ACOMP]], bMap[map_p[BCOMP]],
+                         gMap[map_p[GCOMP]], rMap[map_p[RCOMP]] );
+
+      _mm_storeu_ps(rgba_dst[i], mmove);
+   }
+}
+
+#endif /* __SSE2__ */
diff --git a/src/mesa/main/sse2_clamping.h b/src/mesa/main/sse2_clamping.h
new file mode 100644
index 0000000..688fab7
--- /dev/null
+++ b/src/mesa/main/sse2_clamping.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
+ *
+ */
+
+#ifdef __SSE2__
+
+/* Clamp n amount float rgba pixels to [min,max] using SSE2
+ */
+void
+_mesa_streaming_clamp_float_rgba(const GLuint n, GLfloat rgba_src[][4],
+                                 GLfloat rgba_dst[][4], const GLfloat min,
+                                 const GLfloat max);
+
+
+/* Clamp n amount float rgba pixels to [min,max] using SSE2 and apply
+ * scaling and mapping to components.
+ */
+void
+_mesa_clamp_float_rgba_scale_and_map(const GLuint n, GLfloat rgba_src[][4],
+                                     GLfloat rgba_dst[][4], const GLfloat min,
+                                     const GLfloat max,
+                                     const GLfloat scale[4],
+                                     const GLfloat* rMap, const GLfloat* gMap,
+                                     const GLfloat* bMap, const GLfloat* aMap);
+
+#endif /* __SSE2__ */
-- 
1.8.5.1



More information about the mesa-dev mailing list