[PATCH 2/3] Improve CPU feature detection.
Liu Xinyun
xinyun.liu at intel.com
Wed Dec 8 02:46:43 PST 2010
Add SSSE3 dynamic detection.
Signed-off-by: Liu Xinyun <xinyun.liu at intel.com>
Signed-off-by: Xu Samuel <samuel.xu at intel.com>
Signed-off-by: Ma Ling <ling.ma at intel.com>
Signed-off-by: Zhao Yakui <yakui.zhao at intel.com>
---
pixman/pixman-cpu.c | 114 ++++++++++++++++++++++++++++++++++++++++++++------
1 files changed, 100 insertions(+), 14 deletions(-)
diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index e4fb1e4..389c6e5 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -25,10 +25,13 @@
#include <string.h>
-#if defined(USE_ARM_SIMD) && defined(_MSC_VER)
+#if defined(_MSC_VER)
+#include <intrin.h>
+#if defined(USE_ARM_SIMD)
/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */
#include <windows.h>
-#endif
+#endif /* USE_ARM_SIMD */
+#endif /* _MSC_VER */
#include "pixman-private.h"
@@ -332,14 +335,15 @@ pixman_have_arm_neon (void)
#endif /* USE_ARM_SIMD || USE_ARM_NEON */
-#if defined(USE_MMX) || defined(USE_SSE2)
+#if defined(USE_MMX) || defined(USE_SSE2) || defined(USE_SSSE3)
/* The CPU detection code needs to be in a file not compiled with
* "-mmmx -msse", as gcc would generate CMOV instructions otherwise
* that would lead to SIGILL instructions on old CPUs that don't have
* it.
*/
-#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
-
+#if (!defined(__amd64__) && !defined(__x86_64__) && \
+ !defined(_M_AMD64)) || defined(USE_SSSE3)
+/*32 bit or (64 bit with USE_SSSE3 defined)*/
#ifdef HAVE_GETISAX
#include <sys/auxv.h>
#endif
@@ -351,15 +355,19 @@ typedef enum
MMX_EXTENSIONS = 0x2,
SSE = 0x6,
SSE2 = 0x8,
- CMOV = 0x10
+ CMOV = 0x10,
+ SSSE3 = 0x20
} cpu_features_t;
+#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
+/* 32 bits implementation */
static unsigned int
detect_cpu_features (void)
{
unsigned int features = 0;
unsigned int result = 0;
+ unsigned int result_c = 0;
#ifdef HAVE_GETISAX
if (getisax (&result, 1))
@@ -374,6 +382,8 @@ detect_cpu_features (void)
features |= SSE;
if (result & AV_386_SSE2)
features |= SSE2;
+ if (result & AV_386_SSSE3)
+ features |= SSSE3;
}
#else
char vendor[13];
@@ -419,10 +429,12 @@ detect_cpu_features (void)
"pop %%ebx\n"
"1:\n"
"mov %%edx, %0\n"
+ "mov %%ecx, %4\n"
: "=r" (result),
- "=m" (vendor[0]),
- "=m" (vendor[4]),
- "=m" (vendor[8])
+ "=m" (vendor[0]),
+ "=m" (vendor[4]),
+ "=m" (vendor[8]),
+ "=r" (result_c)
:
: "%eax", "%ecx", "%edx"
);
@@ -456,6 +468,7 @@ detect_cpu_features (void)
pop ebx
nocpuid:
mov result, edx
+ mov result_c, ecx
}
memmove (vendor + 0, &vendor0, 4);
memmove (vendor + 4, &vendor1, 4);
@@ -466,7 +479,7 @@ detect_cpu_features (void)
#endif
features = 0;
- if (result)
+ if (result || result_c)
{
/* result now contains the standard feature bits */
if (result & (1 << 15))
@@ -477,6 +490,8 @@ detect_cpu_features (void)
features |= SSE;
if (result & (1 << 26))
features |= SSE2;
+ if (result_c & (1 << 9))
+ features |= SSSE3;
if ((features & MMX) && !(features & SSE) &&
(strcmp (vendor, "AuthenticAMD") == 0 ||
strcmp (vendor, "Geode by NSC") == 0))
@@ -498,7 +513,7 @@ detect_cpu_features (void)
: "=r" (result)
:
: "%eax", "%ecx", "%edx"
- );
+ );
#elif defined _MSC_VER
_asm {
push ebx
@@ -523,6 +538,48 @@ detect_cpu_features (void)
return features;
}
+#else /* end dt_cpu32() */
+/* start dt_cpu64() */
+static unsigned int detect_cpu_features(void)
+{
+ unsigned int features = 0;
+ unsigned int result_c = 0;
+
+#ifdef HAVE_GETISAX
+ if (getisax (&result, 1)) {
+ if (result & AV_386_SSSE3)
+ features |= (SSSE3|MMX|MMX_EXTENSIONS|SSE|SSE2);
+ }
+#elif defined(_MSC_VER)
+ int CPUInfo[4] = {-1};
+
+ __cpuid(CPUInfo, 1);
+ if ((CPUInfo[2] & 0x200)>>9)
+ features |= (SSSE3|MMX|MMX_EXTENSIONS|SSE|SSE2);
+#elif defined(__GNUC__)
+ __asm__ (
+ "mov $1, %%eax\n"
+ "cpuid\n"
+ "mov %%ecx, %0\n"
+ : "=r" (result_c)
+ :
+ : "%rax", "%rbx", "%rcx", "%rdx"
+ );
+ if (result_c & (1 << 9))
+ features |= (SSSE3|MMX|MMX_EXTENSIONS|SSE|SSE2);
+#else
+# error unsupported compiler
+#endif
+
+ return features;
+}
+
+#endif /* end dt_cpu64() */
+
+#ifdef USE_MMX
+#if (!defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64))
+/*32 bit MMX*/
+
static pixman_bool_t
pixman_have_mmx (void)
{
@@ -539,7 +596,15 @@ pixman_have_mmx (void)
return mmx_present;
}
+#else
+/*64 bit MMX*/
+#define pixman_have_mmx() TRUE
+#endif
+#endif
+
#ifdef USE_SSE2
+#if (!defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64))
+/*32 bit SSE2*/
static pixman_bool_t
pixman_have_sse2 (void)
{
@@ -555,18 +620,39 @@ pixman_have_sse2 (void)
return sse2_present;
}
+#else
+/*64 bit SSE2*/
+#define pixman_have_sse2() TRUE
+#endif
+#endif
+
+#ifdef USE_SSSE3
+static pixman_bool_t
+pixman_have_ssse3 (void)
+{
+ static pixman_bool_t initialized = FALSE;
+ static pixman_bool_t ssse3_present;
+
+ if (!initialized) {
+ unsigned int features = detect_cpu_features();
+ ssse3_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2 |
+ SSSE3)) == (MMX | MMX_EXTENSIONS | SSE | SSE2 | SSSE3);
+ initialized = TRUE;
+ }
+ return ssse3_present;
+}
#endif
-#else /* __amd64__ */
+#else /* (amd_64 && (MMX||SSE2))*/
#ifdef USE_MMX
#define pixman_have_mmx() TRUE
#endif
#ifdef USE_SSE2
#define pixman_have_sse2() TRUE
#endif
-#endif /* __amd64__ */
-#endif
+#endif /* end (amd_64 && (MMX || SSE2)) */
+#endif /* end (MMX || SSE2 || SSSE3 */
pixman_implementation_t *
_pixman_choose_implementation (void)
--
1.7.0.4
--d6Gm4EdcadzBjdND
Content-Type: text/x-diff; charset=iso-8859-1
Content-Disposition: attachment; filename="0003-Add-ssse3-fast-path-skeleton.patch"
Content-Transfer-Encoding: 8bit
More information about the Pixman
mailing list