[Mesa-dev] [PATCH 3/3] util: Optimize _mesa_roundeven with SSE 4.1.

Matt Turner mattst88 at gmail.com
Thu Mar 12 12:11:42 PDT 2015


The SSE 4.1 ROUND instructions let us implement roundeven directly.
Otherwise we assume that the rounding mode has not been modified (as we
do in the rest of Mesa) and use rint().

glibc uses the ROUND instruction in rint() after a cpuid check. This
patch just lets us inline it directly when we're already building for
SSE 4.1.
---
 src/util/rounding.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/util/rounding.h b/src/util/rounding.h
index d128524..e82fb59 100644
--- a/src/util/rounding.h
+++ b/src/util/rounding.h
@@ -23,6 +23,10 @@
 
 #include <math.h>
 
+#ifdef __SSE4_1__
+#include <smmintrin.h>
+#endif
+
 /* The C standard library has functions round()/rint()/nearbyint() that round
  * their arguments according to the rounding mode set in the floating-point
  * control register. While there are trunc()/ceil()/floor() functions that do
@@ -38,10 +42,16 @@ static inline float
 _mesa_roundevenf(float x)
 {
    float ret;
+#ifdef __SSE4_1__
+   __m128 m = _mm_load_ss(&x);
+   m = _mm_round_ss(m, m, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+   _mm_store_ss(&ret, m);
+#else
    /* Assume that the floating-point rounding mode has not been changed from
     * the default (Round to nearest).
     */
    ret = rintf(x);
+#endif
    return ret;
 }
 
@@ -49,9 +59,15 @@ static inline double
 _mesa_roundeven(double x)
 {
    double ret;
+#ifdef __SSE4_1__
+   __m128d m = _mm_load_sd(&x);
+   m = _mm_round_sd(m, m, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+   _mm_store_sd(&ret, m);
+#else
    /* Assume that the floating-point rounding mode has not been changed from
     * the default (Round to nearest).
     */
    ret = rint(x);
+#endif
    return ret;
 }
-- 
2.0.5



More information about the mesa-dev mailing list