[Mesa-dev] [PATCH] util: try to use SSE instructions with MSVC and 32-bit gcc

Brian Paul brianp at vmware.com
Wed Aug 17 17:59:01 UTC 2016


The lrint() and lrintf() functions are pretty slow and make some
texture transfers very inefficient.  This patch makes a better effort
at using those intrisics for 32-bit gcc and MSVC.

Note, this patch doesn't address the use of SSE4.1 with MSVC.

Reviewed-by: José Fonseca <jfonseca at vmware.com>
---
 src/util/rounding.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/util/rounding.h b/src/util/rounding.h
index afb38fb..ab55ebb 100644
--- a/src/util/rounding.h
+++ b/src/util/rounding.h
@@ -29,9 +29,11 @@
 #include <limits.h>
 #include <stdint.h>
 
-#ifdef __x86_64__
+#if defined(__SSE__) || defined(_MSC_VER)
+/* MSVC always has SSE nowadays */
 #include <xmmintrin.h>
 #include <emmintrin.h>
+#define ROUND_WITH_SSE 1
 #endif
 
 #ifdef __SSE4_1__
@@ -95,7 +97,7 @@ _mesa_roundeven(double x)
 static inline long
 _mesa_lroundevenf(float x)
 {
-#ifdef __x86_64__
+#ifdef ROUND_WITH_SSE
 #if LONG_MAX == INT64_MAX
    return _mm_cvtss_si64(_mm_load_ss(&x));
 #elif LONG_MAX == INT32_MAX
@@ -115,7 +117,7 @@ _mesa_lroundevenf(float x)
 static inline long
 _mesa_lroundeven(double x)
 {
-#ifdef __x86_64__
+#ifdef ROUND_WITH_SSE
 #if LONG_MAX == INT64_MAX
    return _mm_cvtsd_si64(_mm_load_sd(&x));
 #elif LONG_MAX == INT32_MAX
@@ -128,4 +130,7 @@ _mesa_lroundeven(double x)
 #endif
 }
 
+
+#undef ROUND_WITH_SSE
+
 #endif
-- 
1.9.1



More information about the mesa-dev mailing list