[Pixman] [PATCH 08/10] Cleanups and simplifications in x86 CPU feature detection

Søren Sandmann Pedersen sandmann at cs.au.dk
Fri Jun 29 13:44:53 PDT 2012


From: Søren Sandmann Pedersen <ssp at redhat.com>

A new function pixman_cpuid() is added that runs the cpuid instruction
and returns the results.

On GCC this function uses inline assembly that is written such that it
will work on both 32 and 64 bit. Compared to the old code, the only
difference is %ebx is saved in %esi instead of on the stack. Saving 32
bit registers on a 64 bit stack is difficult or impossible because in
64 bit mode, the push and pop instructions work on 64 bit registers.

On MSVC, the function calls the __cpuid intrinsic.

There is also a new function called have_cpuid() which detects whether
cpuid is available. On x86-64 and MSVC, it simply returns TRUE; on
x86-32 bit, it checks whether the 22nd bit of eflags can be
modified. On MSVC this does have the consequence that pixman will no
longer work CPUS without cpuid (ie., older than 486 and some 486
models).

These two functions together makes it possible to write a generic
detect_cpu_features() in plain C. This function is then used in a new
have_feature() function that checks whether a specific set of feature
bits is available.

Aside from the cleanups and simplifications, the main benefit from
this patch is that pixman now can do feature detection on x86-64, so
that newer instruction sets such as SSSE3 and SSE4.1 can be used. (And
apparently the assumption that x86-64 CPUs always have MMX and SSE2 is
no longer correct: Knight's Corner is x86-64, but doesn't have them).
---
 pixman/pixman-x86.c |  311 +++++++++++++++++++++------------------------------
 1 file changed, 129 insertions(+), 182 deletions(-)

diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c
index 52ad3df..84590d2 100644
--- a/pixman/pixman-x86.c
+++ b/pixman/pixman-x86.c
@@ -32,30 +32,25 @@
  * that would lead to SIGILL instructions on old CPUs that don't have
  * it.
  */
-#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
-
-#ifdef HAVE_GETISAX
-#include <sys/auxv.h>
-#endif
 
 typedef enum
 {
-    NO_FEATURES = 0,
-    MMX = 0x1,
-    MMX_EXTENSIONS = 0x2,
-    SSE = 0x6,
-    SSE2 = 0x8,
-    CMOV = 0x10
+    X86_MMX			= (1 << 0),
+    X86_MMX_EXTENSIONS		= (1 << 1),
+    X86_SSE			= (1 << 2) | X86_MMX_EXTENSIONS,
+    X86_SSE2			= (1 << 3),
+    X86_CMOV			= (1 << 4)
 } cpu_features_t;
 
+#ifdef HAVE_GETISAX
 
-static unsigned int
+#include <sys/auxv.h>
+
+static cpu_features_t
 detect_cpu_features (void)
 {
-    unsigned int features = 0;
-    unsigned int result = 0;
-    
-#ifdef HAVE_GETISAX
+    cpu_features_t features;
+
     if (getisax (&result, 1))
     {
 	if (result & AV_386_CMOV)
@@ -69,15 +64,47 @@ detect_cpu_features (void)
 	if (result & AV_386_SSE2)
 	    features |= SSE2;
     }
+
+    return features;
+}
+
+#else
+
+static pixman_bool_t
+have_cpuid (void)
+{
+#if defined(__amd64__) || defined(__x86_64__) || defined(_M_AMD64) || defined (_MSC_VER)
+
+    return TRUE;
+
+#elif defined (__GNUC__)
+    uint32_t result;
+
+    __asm__ volatile (
+        "pushf"				"\n\t"
+        "pop %%eax"			"\n\t"
+        "mov %%eax, %%ecx"		"\n\t"
+        "xor $0x00200000, %%eax"	"\n\t"
+        "push %%eax"			"\n\t"
+        "popf"				"\n\t"
+        "pushf"				"\n\t"
+        "pop %%eax"			"\n\t"
+        "xor %%ecx, %%eax"		"\n\t"
+	"mov %%eax, %0"			"\n\t"
+	: "=r" (result)
+	:
+	: "%eax", "%ecx");
+
+    return !!result;
+
 #else
-    char vendor[13];
-#ifdef _MSC_VER
-    int vendor0 = 0, vendor1, vendor2;
+#error "Unknown compiler"
 #endif
-    vendor[0] = 0;
-    vendor[12] = 0;
-    
-#ifdef __GNUC__
+}
+
+static void
+pixman_cpuid (uint32_t feature, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
+{
     /* see p. 118 of amd64 instruction set manual Vol3 */
     /* We need to be careful about the handling of %ebx and
      * %esp here. We can't declare either one as clobbered
@@ -86,195 +113,115 @@ detect_cpu_features (void)
      * stack pointer), so we need to make sure they have their
      * original values when we access the output operands.
      */
-    __asm__ (
-        "pushf\n"
-        "pop %%eax\n"
-        "mov %%eax, %%ecx\n"
-        "xor $0x00200000, %%eax\n"
-        "push %%eax\n"
-        "popf\n"
-        "pushf\n"
-        "pop %%eax\n"
-        "mov $0x0, %%edx\n"
-        "xor %%ecx, %%eax\n"
-        "jz 1f\n"
-	
-        "mov $0x00000000, %%eax\n"
-        "push %%ebx\n"
-        "cpuid\n"
-        "mov %%ebx, %%eax\n"
-        "pop %%ebx\n"
-        "mov %%eax, %1\n"
-        "mov %%edx, %2\n"
-        "mov %%ecx, %3\n"
-        "mov $0x00000001, %%eax\n"
-        "push %%ebx\n"
-        "cpuid\n"
-        "pop %%ebx\n"
-        "1:\n"
-        "mov %%edx, %0\n"
-	: "=r" (result),
-	  "=m" (vendor[0]),
-	  "=m" (vendor[4]),
-	  "=m" (vendor[8])
-	:
-	: "%eax", "%ecx", "%edx"
-        );
-    
+#if defined (__GNUC__)
+    __asm__ volatile (
+        "mov %4, %%eax"			"\n\t"
+	"mov %%ebx, %%esi"		"\n\t"
+        "cpuid"				"\n\t"
+        "mov %%eax, %0"			"\n\t"
+	"mov %%ebx, %1"			"\n\t"
+	"mov %%ecx, %2"			"\n\t"
+        "mov %%edx, %3"			"\n\t"
+	"mov %%esi, %%ebx"		"\n\t"
+	: "=m" (*a), "=m" (*b), "=m" (*c), "=m" (*d)
+	: "r" (feature)
+	: "%eax", "%ecx", "%edx", "%esi");
 #elif defined (_MSC_VER)
-    
-    _asm {
-	pushfd
-	    pop eax
-	    mov ecx, eax
-	    xor eax, 00200000h
-	    push eax
-	    popfd
-	    pushfd
-	    pop eax
-	    mov edx, 0
-	    xor eax, ecx
-	    jz nocpuid
-	    
-	    mov eax, 0
-	    push ebx
-	    cpuid
-	    mov eax, ebx
-	    pop ebx
-	    mov vendor0, eax
-	    mov vendor1, edx
-	    mov vendor2, ecx
-	    mov eax, 1
-	    push ebx
-	    cpuid
-	    pop ebx
-	    nocpuid:
-	    mov result, edx
-	    }
-    memmove (vendor + 0, &vendor0, 4);
-    memmove (vendor + 4, &vendor1, 4);
-    memmove (vendor + 8, &vendor2, 4);
-    
+    int info[4];
+
+    __cpuid (info, feature);
+
+    *a = info[0];
+    *b = info[1];
+    *c = info[2];
+    *d = info[3];
 #else
-#   error unsupported compiler
+#error Unknown compiler
 #endif
-    
-    features = 0;
-    if (result)
+}
+
+static cpu_features_t
+detect_cpu_features (void)
+{
+    uint32_t a, b, c, d;
+    cpu_features_t features = 0;
+
+    if (!have_cpuid())
+	return features;
+
+    /* Get feature bits */
+    pixman_cpuid (0x01, &a, &b, &c, &d);
+    if (d & (1 << 15))
+	features |= X86_CMOV;
+    if (d & (1 << 23))
+	features |= X86_MMX;
+    if (d & (1 << 25))
+	features |= X86_SSE;
+    if (d & (1 << 26))
+	features |= X86_SSE2;
+
+    /* Check for AMD specific features */
+    if ((features & X86_MMX) && !(features & X86_SSE))
     {
-	/* result now contains the standard feature bits */
-	if (result & (1 << 15))
-	    features |= CMOV;
-	if (result & (1 << 23))
-	    features |= MMX;
-	if (result & (1 << 25))
-	    features |= SSE;
-	if (result & (1 << 26))
-	    features |= SSE2;
-	if ((features & MMX) && !(features & SSE) &&
-	    (strcmp (vendor, "AuthenticAMD") == 0 ||
-	     strcmp (vendor, "Geode by NSC") == 0))
+	char vendor[13];
+
+	/* Get vendor string */
+	memset (vendor, 0, sizeof vendor);
+
+	pixman_cpuid (0x00, &a, &b, &c, &d);
+	memcpy (vendor + 0, &b, 4);
+	memcpy (vendor + 4, &d, 4);
+	memcpy (vendor + 8, &c, 4);
+
+	if (strcmp (vendor, "AuthenticAMD") == 0 ||
+	    strcmp (vendor, "Geode by NSC") == 0)
 	{
-	    /* check for AMD MMX extensions */
-#ifdef __GNUC__
-	    __asm__ (
-	        "	push %%ebx\n"
-	        "	mov $0x80000000, %%eax\n"
-	        "	cpuid\n"
-	        "	xor %%edx, %%edx\n"
-	        "	cmp $0x1, %%eax\n"
-	        "	jge 2f\n"
-	        "	mov $0x80000001, %%eax\n"
-	        "	cpuid\n"
-	        "2:\n"
-	        "	pop %%ebx\n"
-	        "	mov %%edx, %0\n"
-		: "=r" (result)
-		:
-		: "%eax", "%ecx", "%edx"
-	        );
-#elif defined _MSC_VER
-	    _asm {
-		push ebx
-		    mov eax, 80000000h
-		    cpuid
-		    xor edx, edx
-		    cmp eax, 1
-		    jge notamd
-		    mov eax, 80000001h
-		    cpuid
-		    notamd:
-		    pop ebx
-		    mov result, edx
-		    }
-#endif
-	    if (result & (1 << 22))
-		features |= MMX_EXTENSIONS;
+	    pixman_cpuid (0x80000000, &a, &b, &c, &d);
+	    if (a >= 0x80000001)
+	    {
+		pixman_cpuid (0x80000001, &a, &b, &c, &d);
+
+		if (d & (1 << 22))
+		    features |= X86_MMX_EXTENSIONS;
+	    }
 	}
     }
-#endif /* HAVE_GETISAX */
-    
+
     return features;
 }
 
-#ifdef USE_X86_MMX
-static pixman_bool_t
-pixman_have_mmx (void)
-{
-    static pixman_bool_t initialized = FALSE;
-    static pixman_bool_t mmx_present;
-    
-    if (!initialized)
-    {
-	unsigned int features = detect_cpu_features ();
-	mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS);
-	initialized = TRUE;
-    }
-    
-    return mmx_present;
-}
 #endif
 
-#ifdef USE_SSE2
 static pixman_bool_t
-pixman_have_sse2 (void)
+have_feature (cpu_features_t feature)
 {
-    static pixman_bool_t initialized = FALSE;
-    static pixman_bool_t sse2_present;
-    
+    static pixman_bool_t initialized;
+    static cpu_features_t features;
+
     if (!initialized)
     {
-	unsigned int features = detect_cpu_features ();
-	sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2);
+	features = detect_cpu_features();
 	initialized = TRUE;
     }
-    
-    return sse2_present;
-}
 
-#endif
-
-#else /* __amd64__ */
-#ifdef USE_X86_MMX
-#define pixman_have_mmx() TRUE
-#endif
-#ifdef USE_SSE2
-#define pixman_have_sse2() TRUE
-#endif
-#endif /* __amd64__ */
+    return (features & feature) == feature;
+}
 
 #endif
 
 pixman_implementation_t *
 _pixman_x86_get_implementations (pixman_implementation_t *imp)
 {
+#define MMX_BITS  (X86_MMX | X86_MMX_EXTENSIONS)
+#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2)
+
 #ifdef USE_X86_MMX
-    if (!_pixman_disabled ("mmx") && pixman_have_mmx())
+    if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS))
 	imp = _pixman_implementation_create_mmx (imp);
 #endif
 
 #ifdef USE_SSE2
-    if (!_pixman_disabled ("sse2") && pixman_have_sse2())
+    if (!_pixman_disabled ("sse2") && have_feature (SSE2_BITS))
 	imp = _pixman_implementation_create_sse2 (imp);
 #endif
 
-- 
1.7.10.4



More information about the Pixman mailing list