[PATCH 2/3] Improve CPU feature detection.

Liu Xinyun xinyun.liu at intel.com
Wed Dec 8 02:46:43 PST 2010


Add SSSE3 dynamic detection.

Signed-off-by: Liu Xinyun <xinyun.liu at intel.com>
Signed-off-by: Xu Samuel <samuel.xu at intel.com>
Signed-off-by: Ma Ling <ling.ma at intel.com>
Signed-off-by: Zhao Yakui <yakui.zhao at intel.com>
---
 pixman/pixman-cpu.c |  114 ++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 100 insertions(+), 14 deletions(-)

diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index e4fb1e4..389c6e5 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -25,10 +25,13 @@
 
 #include <string.h>
 
-#if defined(USE_ARM_SIMD) && defined(_MSC_VER)
+#if defined(_MSC_VER)
+#include <intrin.h>
+#if defined(USE_ARM_SIMD)
 /* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */
 #include <windows.h>
-#endif
+#endif /* USE_ARM_SIMD */
+#endif /* _MSC_VER */
 
 #include "pixman-private.h"
 
@@ -332,14 +335,15 @@ pixman_have_arm_neon (void)
 
 #endif /* USE_ARM_SIMD || USE_ARM_NEON */
 
-#if defined(USE_MMX) || defined(USE_SSE2)
+#if defined(USE_MMX) || defined(USE_SSE2) || defined(USE_SSSE3)
 /* The CPU detection code needs to be in a file not compiled with
  * "-mmmx -msse", as gcc would generate CMOV instructions otherwise
  * that would lead to SIGILL instructions on old CPUs that don't have
  * it.
  */
-#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
-
+#if (!defined(__amd64__) && !defined(__x86_64__) && \
+     !defined(_M_AMD64)) || defined(USE_SSSE3)
+/*32 bit or (64 bit with USE_SSSE3 defined)*/
 #ifdef HAVE_GETISAX
 #include <sys/auxv.h>
 #endif
@@ -351,15 +355,19 @@ typedef enum
     MMX_EXTENSIONS = 0x2,
     SSE = 0x6,
     SSE2 = 0x8,
-    CMOV = 0x10
+    CMOV = 0x10,
+    SSSE3 = 0x20
 } cpu_features_t;
 
+#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
 
+/* 32 bits implementation */
 static unsigned int
 detect_cpu_features (void)
 {
     unsigned int features = 0;
     unsigned int result = 0;
+    unsigned int result_c = 0;
 
 #ifdef HAVE_GETISAX
     if (getisax (&result, 1))
@@ -374,6 +382,8 @@ detect_cpu_features (void)
 	    features |= SSE;
 	if (result & AV_386_SSE2)
 	    features |= SSE2;
+	if (result & AV_386_SSSE3)
+	    features |= SSSE3;
     }
 #else
     char vendor[13];
@@ -419,10 +429,12 @@ detect_cpu_features (void)
         "pop %%ebx\n"
         "1:\n"
         "mov %%edx, %0\n"
+	"mov %%ecx, %4\n"
 	: "=r" (result),
-        "=m" (vendor[0]),
-        "=m" (vendor[4]),
-        "=m" (vendor[8])
+	"=m" (vendor[0]),
+	"=m" (vendor[4]),
+	"=m" (vendor[8]),
+	"=r" (result_c)
 	:
 	: "%eax", "%ecx", "%edx"
         );
@@ -456,6 +468,7 @@ detect_cpu_features (void)
 	pop ebx
     nocpuid:
 	mov result, edx
+	mov result_c, ecx
     }
     memmove (vendor + 0, &vendor0, 4);
     memmove (vendor + 4, &vendor1, 4);
@@ -466,7 +479,7 @@ detect_cpu_features (void)
 #endif
 
     features = 0;
-    if (result)
+    if (result || result_c)
     {
 	/* result now contains the standard feature bits */
 	if (result & (1 << 15))
@@ -477,6 +490,8 @@ detect_cpu_features (void)
 	    features |= SSE;
 	if (result & (1 << 26))
 	    features |= SSE2;
+	if (result_c & (1 << 9))
+	    features |= SSSE3;
 	if ((features & MMX) && !(features & SSE) &&
 	    (strcmp (vendor, "AuthenticAMD") == 0 ||
 	     strcmp (vendor, "Geode by NSC") == 0))
@@ -498,7 +513,7 @@ detect_cpu_features (void)
 		: "=r" (result)
 		:
 		: "%eax", "%ecx", "%edx"
-	        );
+		);
 #elif defined _MSC_VER
 	    _asm {
 		push ebx
@@ -523,6 +538,48 @@ detect_cpu_features (void)
     return features;
 }
 
+#else         /* end dt_cpu32() */
+/* start dt_cpu64() */
+static unsigned int detect_cpu_features(void)
+{
+    unsigned int features = 0;
+    unsigned int result_c = 0;
+
+#ifdef HAVE_GETISAX
+    if (getisax (&result, 1)) {
+	if (result & AV_386_SSSE3)
+	    features |= (SSSE3|MMX|MMX_EXTENSIONS|SSE|SSE2);
+    }
+#elif defined(_MSC_VER)
+    int CPUInfo[4] = {-1};
+
+    __cpuid(CPUInfo, 1);
+    if ((CPUInfo[2] & 0x200)>>9)
+	features |= (SSSE3|MMX|MMX_EXTENSIONS|SSE|SSE2);
+#elif defined(__GNUC__)
+    __asm__ (
+	"mov $1, %%eax\n"
+	"cpuid\n"
+	"mov %%ecx, %0\n"
+	: "=r" (result_c)
+	:
+	: "%rax", "%rbx", "%rcx", "%rdx"
+    );
+    if (result_c & (1 << 9))
+	features |= (SSSE3|MMX|MMX_EXTENSIONS|SSE|SSE2);
+#else
+#   error unsupported compiler
+#endif
+
+    return features;
+}
+
+#endif /* end dt_cpu64() */
+
+#ifdef USE_MMX
+#if (!defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64))
+/*32 bit MMX*/
+
 static pixman_bool_t
 pixman_have_mmx (void)
 {
@@ -539,7 +596,15 @@ pixman_have_mmx (void)
     return mmx_present;
 }
 
+#else
+/*64 bit MMX*/
+#define pixman_have_mmx() TRUE
+#endif
+#endif
+
 #ifdef USE_SSE2
+#if (!defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64))
+/*32 bit SSE2*/
 static pixman_bool_t
 pixman_have_sse2 (void)
 {
@@ -555,18 +620,39 @@ pixman_have_sse2 (void)
 
     return sse2_present;
 }
+#else
+/*64 bit SSE2*/
+#define pixman_have_sse2() TRUE
+#endif
+#endif
+
+#ifdef USE_SSSE3
+static pixman_bool_t
+pixman_have_ssse3 (void)
+{
+    static pixman_bool_t initialized = FALSE;
+    static pixman_bool_t ssse3_present;
+
+    if (!initialized) {
+	unsigned int features = detect_cpu_features();
+	ssse3_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2 |
+			SSSE3)) == (MMX | MMX_EXTENSIONS | SSE | SSE2 | SSSE3);
+	initialized = TRUE;
+    }
+    return ssse3_present;
+}
 
 #endif
 
-#else /* __amd64__ */
+#else  /* (amd_64 && (MMX||SSE2))*/
 #ifdef USE_MMX
 #define pixman_have_mmx() TRUE
 #endif
 #ifdef USE_SSE2
 #define pixman_have_sse2() TRUE
 #endif
-#endif /* __amd64__ */
-#endif
+#endif /* end (amd_64 && (MMX || SSE2)) */
+#endif /* end (MMX || SSE2 || SSSE3 */
 
 pixman_implementation_t *
 _pixman_choose_implementation (void)
-- 
1.7.0.4


--d6Gm4EdcadzBjdND
Content-Type: text/x-diff; charset=iso-8859-1
Content-Disposition: attachment; filename="0003-Add-ssse3-fast-path-skeleton.patch"
Content-Transfer-Encoding: 8bit



More information about the Pixman mailing list