[Pixman] [PATCH 00/10] Cleanups to CPU detection

Søren Sandmann sandmann at cs.au.dk
Fri Jul 6 22:26:29 PDT 2012


Søren Sandmann Pedersen <sandmann at cs.au.dk> writes:

> The changes to x86 are the most involved. There is now a
> pixman_cpuid() function that uses inline assembly on GCC and the
> cpuid__ intrinsic on MSVC. The assembly is written such that it will
> work on both 32 and 64 bit; the main change required was the save %ebx
> in %esi instead of on the stack.

I have pushed all the other changes, but saving %ebx in %esi is broken
because on x86-64 writing to a 32 bit register zeroes the upper 32 bits
of the corresponding 64 bit register.

I can't think of a way to save the value of a 32 bit register that both
works on x86-32 and doesn't lose the upper 32 bits on x86-64, so in the
new version below, there are some #ifdefs to deal with this issue.


Søren


>From 0037cbb84440e5cc6e64ea1c7b95ad7e80c21dd2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp at redhat.com>
Date: Thu, 28 Jun 2012 15:53:14 -0400
Subject: [PATCH] Cleanups and simplifications in x86 CPU feature detection

A new function pixman_cpuid() is added that runs the cpuid instruction
and returns the results.

On GCC this function uses inline assembly that is written such that it
will work on both 32 and 64 bit. Compared to the old code, the only
difference is %ebx is saved in %esi instead of on the stack. Saving 32
bit registers on a 64 bit stack is difficult or impossible because in
64 bit mode, the push and pop instructions work on 64 bit registers.

On MSVC, the function calls the __cpuid intrinsic.

There is also a new function called have_cpuid() which detects whether
cpuid is available. On x86-64 and MSVC, it simply returns TRUE; on
x86-32 bit, it checks whether the 22nd bit of eflags can be
modified. On MSVC this does have the consequence that pixman will no
longer work CPUS without cpuid (ie., older than 486 and some 486
models).

These two functions together makes it possible to write a generic
detect_cpu_features() in plain C. This function is then used in a new
have_feature() function that checks whether a specific set of feature
bits is available.

Aside from the cleanups and simplifications, the main benefit from
this patch is that pixman now can do feature detection on x86-64, so
that newer instruction sets such as SSSE3 and SSE4.1 can be used. (And
apparently the assumption that x86-64 CPUs always have MMX and SSE2 is
no longer correct: Knight's Corner is x86-64, but doesn't have them).

V2: Rename the constants in the getisax() code, as pointed out by Alan
Coopersmith. Also reinstate the result variable and initialize
features to 0.

V3: Fixes for the fact that the upper 32 bits of a 64 bit register are
zeroed whenever the corresponding 32 bit register is written to.
---
 pixman/pixman-x86.c |  347 +++++++++++++++++++++++----------------------------
 1 file changed, 157 insertions(+), 190 deletions(-)

diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c
index 52ad3df..3c7bc91 100644
--- a/pixman/pixman-x86.c
+++ b/pixman/pixman-x86.c
@@ -32,249 +32,216 @@
  * that would lead to SIGILL instructions on old CPUs that don't have
  * it.
  */
-#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
-
-#ifdef HAVE_GETISAX
-#include <sys/auxv.h>
-#endif
 
 typedef enum
 {
-    NO_FEATURES = 0,
-    MMX = 0x1,
-    MMX_EXTENSIONS = 0x2,
-    SSE = 0x6,
-    SSE2 = 0x8,
-    CMOV = 0x10
+    X86_MMX			= (1 << 0),
+    X86_MMX_EXTENSIONS		= (1 << 1),
+    X86_SSE			= (1 << 2) | X86_MMX_EXTENSIONS,
+    X86_SSE2			= (1 << 3),
+    X86_CMOV			= (1 << 4)
 } cpu_features_t;
 
+#ifdef HAVE_GETISAX
 
-static unsigned int
+#include <sys/auxv.h>
+
+static cpu_features_t
 detect_cpu_features (void)
 {
-    unsigned int features = 0;
+    cpu_features_t features = 0;
     unsigned int result = 0;
-    
-#ifdef HAVE_GETISAX
+
     if (getisax (&result, 1))
     {
 	if (result & AV_386_CMOV)
-	    features |= CMOV;
+	    features |= X86_CMOV;
 	if (result & AV_386_MMX)
-	    features |= MMX;
+	    features |= X86_MMX;
 	if (result & AV_386_AMD_MMX)
-	    features |= MMX_EXTENSIONS;
+	    features |= X86_MMX_EXTENSIONS;
 	if (result & AV_386_SSE)
-	    features |= SSE;
+	    features |= X86_SSE;
 	if (result & AV_386_SSE2)
-	    features |= SSE2;
+	    features |= X86_SSE2;
     }
+
+    return features;
+}
+
+#else
+
+#define _PIXMAN_X86_64							\
+    (defined(__amd64__) || defined(__x86_64__) || defined(_M_AMD64))
+
+static pixman_bool_t
+have_cpuid (void)
+{
+#if _PIXMAN_X86_64 || defined (_MSC_VER)
+
+    return TRUE;
+
+#elif defined (__GNUC__)
+    uint32_t result;
+
+    __asm__ volatile (
+        "pushf"				"\n\t"
+        "pop %%eax"			"\n\t"
+        "mov %%eax, %%ecx"		"\n\t"
+        "xor $0x00200000, %%eax"	"\n\t"
+        "push %%eax"			"\n\t"
+        "popf"				"\n\t"
+        "pushf"				"\n\t"
+        "pop %%eax"			"\n\t"
+        "xor %%ecx, %%eax"		"\n\t"
+	"mov %%eax, %0"			"\n\t"
+	: "=r" (result)
+	:
+	: "%eax", "%ecx");
+
+    return !!result;
+
 #else
-    char vendor[13];
-#ifdef _MSC_VER
-    int vendor0 = 0, vendor1, vendor2;
+#error "Unknown compiler"
 #endif
-    vendor[0] = 0;
-    vendor[12] = 0;
-    
-#ifdef __GNUC__
+}
+
+static void
+pixman_cpuid (uint32_t feature,
+	      uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
+{
     /* see p. 118 of amd64 instruction set manual Vol3 */
-    /* We need to be careful about the handling of %ebx and
-     * %esp here. We can't declare either one as clobbered
+    /* On x86-32 we need to be careful about the handling of %ebx
+     * and %esp here. We can't declare either one as clobbered
      * since they are special registers (%ebx is the "PIC
      * register" holding an offset to global data, %esp the
-     * stack pointer), so we need to make sure they have their
-     * original values when we access the output operands.
+     * stack pointer), so we need to make sure that %ebx is
+     * preserved, and that %esp has its original value when
+     * accessing the output operands.
+     *
+     * On x86-64, writing to a 32-bit register zeroes the
+     * upper upper 32 bits of the corresponding 64 bit
+     * register, so we can't just save %ebx in some other
+     * register and restore it.
      */
-    __asm__ (
-        "pushf\n"
-        "pop %%eax\n"
-        "mov %%eax, %%ecx\n"
-        "xor $0x00200000, %%eax\n"
-        "push %%eax\n"
-        "popf\n"
-        "pushf\n"
-        "pop %%eax\n"
-        "mov $0x0, %%edx\n"
-        "xor %%ecx, %%eax\n"
-        "jz 1f\n"
-	
-        "mov $0x00000000, %%eax\n"
-        "push %%ebx\n"
-        "cpuid\n"
-        "mov %%ebx, %%eax\n"
-        "pop %%ebx\n"
-        "mov %%eax, %1\n"
-        "mov %%edx, %2\n"
-        "mov %%ecx, %3\n"
-        "mov $0x00000001, %%eax\n"
-        "push %%ebx\n"
-        "cpuid\n"
-        "pop %%ebx\n"
-        "1:\n"
-        "mov %%edx, %0\n"
-	: "=r" (result),
-	  "=m" (vendor[0]),
-	  "=m" (vendor[4]),
-	  "=m" (vendor[8])
-	:
-	: "%eax", "%ecx", "%edx"
-        );
-    
+#if defined (__GNUC__)
+    __asm__ volatile (
+        "mov %4, %%eax"			"\n\t"
+#if !_PIXMAN_X86_64
+	"mov %%ebx, %%esi"		"\n\t"
+#endif
+        "cpuid"				"\n\t"
+        "mov %%eax, %0"			"\n\t"
+	"mov %%ebx, %1"			"\n\t"
+	"mov %%ecx, %2"			"\n\t"
+        "mov %%edx, %3"			"\n\t"
+#if !_PIXMAN_X86_64
+	"mov %%esi, %%ebx"		"\n\t"
+#endif
+	: "=m" (*a), "=m" (*b), "=m" (*c), "=m" (*d)
+	: "r" (feature)
+#if !_PIXMAN_X86_64
+	: "%eax", "%esi", "%ecx", "%edx"
+#else
+	: "%rax", "%rbx", "%rcx", "%rdx"
+#endif
+	);
 #elif defined (_MSC_VER)
-    
-    _asm {
-	pushfd
-	    pop eax
-	    mov ecx, eax
-	    xor eax, 00200000h
-	    push eax
-	    popfd
-	    pushfd
-	    pop eax
-	    mov edx, 0
-	    xor eax, ecx
-	    jz nocpuid
-	    
-	    mov eax, 0
-	    push ebx
-	    cpuid
-	    mov eax, ebx
-	    pop ebx
-	    mov vendor0, eax
-	    mov vendor1, edx
-	    mov vendor2, ecx
-	    mov eax, 1
-	    push ebx
-	    cpuid
-	    pop ebx
-	    nocpuid:
-	    mov result, edx
-	    }
-    memmove (vendor + 0, &vendor0, 4);
-    memmove (vendor + 4, &vendor1, 4);
-    memmove (vendor + 8, &vendor2, 4);
-    
+    int info[4];
+
+    __cpuid (info, feature);
+
+    *a = info[0];
+    *b = info[1];
+    *c = info[2];
+    *d = info[3];
 #else
-#   error unsupported compiler
+#error Unknown compiler
 #endif
-    
-    features = 0;
-    if (result)
+}
+
+static cpu_features_t
+detect_cpu_features (void)
+{
+    uint32_t a, b, c, d;
+    cpu_features_t features = 0;
+
+    if (!have_cpuid())
+	return features;
+
+    /* Get feature bits */
+    pixman_cpuid (0x01, &a, &b, &c, &d);
+    if (d & (1 << 15))
+	features |= X86_CMOV;
+    if (d & (1 << 23))
+	features |= X86_MMX;
+    if (d & (1 << 25))
+	features |= X86_SSE;
+    if (d & (1 << 26))
+	features |= X86_SSE2;
+
+    /* Check for AMD specific features */
+    if ((features & X86_MMX) && !(features & X86_SSE))
     {
-	/* result now contains the standard feature bits */
-	if (result & (1 << 15))
-	    features |= CMOV;
-	if (result & (1 << 23))
-	    features |= MMX;
-	if (result & (1 << 25))
-	    features |= SSE;
-	if (result & (1 << 26))
-	    features |= SSE2;
-	if ((features & MMX) && !(features & SSE) &&
-	    (strcmp (vendor, "AuthenticAMD") == 0 ||
-	     strcmp (vendor, "Geode by NSC") == 0))
+	char vendor[13];
+
+	/* Get vendor string */
+	memset (vendor, 0, sizeof vendor);
+
+	pixman_cpuid (0x00, &a, &b, &c, &d);
+	memcpy (vendor + 0, &b, 4);
+	memcpy (vendor + 4, &d, 4);
+	memcpy (vendor + 8, &c, 4);
+
+	if (strcmp (vendor, "AuthenticAMD") == 0 ||
+	    strcmp (vendor, "Geode by NSC") == 0)
 	{
-	    /* check for AMD MMX extensions */
-#ifdef __GNUC__
-	    __asm__ (
-	        "	push %%ebx\n"
-	        "	mov $0x80000000, %%eax\n"
-	        "	cpuid\n"
-	        "	xor %%edx, %%edx\n"
-	        "	cmp $0x1, %%eax\n"
-	        "	jge 2f\n"
-	        "	mov $0x80000001, %%eax\n"
-	        "	cpuid\n"
-	        "2:\n"
-	        "	pop %%ebx\n"
-	        "	mov %%edx, %0\n"
-		: "=r" (result)
-		:
-		: "%eax", "%ecx", "%edx"
-	        );
-#elif defined _MSC_VER
-	    _asm {
-		push ebx
-		    mov eax, 80000000h
-		    cpuid
-		    xor edx, edx
-		    cmp eax, 1
-		    jge notamd
-		    mov eax, 80000001h
-		    cpuid
-		    notamd:
-		    pop ebx
-		    mov result, edx
-		    }
-#endif
-	    if (result & (1 << 22))
-		features |= MMX_EXTENSIONS;
+	    pixman_cpuid (0x80000000, &a, &b, &c, &d);
+	    if (a >= 0x80000001)
+	    {
+		pixman_cpuid (0x80000001, &a, &b, &c, &d);
+
+		if (d & (1 << 22))
+		    features |= X86_MMX_EXTENSIONS;
+	    }
 	}
     }
-#endif /* HAVE_GETISAX */
-    
+
     return features;
 }
 
-#ifdef USE_X86_MMX
-static pixman_bool_t
-pixman_have_mmx (void)
-{
-    static pixman_bool_t initialized = FALSE;
-    static pixman_bool_t mmx_present;
-    
-    if (!initialized)
-    {
-	unsigned int features = detect_cpu_features ();
-	mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS);
-	initialized = TRUE;
-    }
-    
-    return mmx_present;
-}
 #endif
 
-#ifdef USE_SSE2
 static pixman_bool_t
-pixman_have_sse2 (void)
+have_feature (cpu_features_t feature)
 {
-    static pixman_bool_t initialized = FALSE;
-    static pixman_bool_t sse2_present;
-    
+    static pixman_bool_t initialized;
+    static cpu_features_t features;
+
     if (!initialized)
     {
-	unsigned int features = detect_cpu_features ();
-	sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2);
+	features = detect_cpu_features();
 	initialized = TRUE;
     }
-    
-    return sse2_present;
-}
 
-#endif
-
-#else /* __amd64__ */
-#ifdef USE_X86_MMX
-#define pixman_have_mmx() TRUE
-#endif
-#ifdef USE_SSE2
-#define pixman_have_sse2() TRUE
-#endif
-#endif /* __amd64__ */
+    return (features & feature) == feature;
+}
 
 #endif
 
 pixman_implementation_t *
 _pixman_x86_get_implementations (pixman_implementation_t *imp)
 {
+#define MMX_BITS  (X86_MMX | X86_MMX_EXTENSIONS)
+#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2)
+
 #ifdef USE_X86_MMX
-    if (!_pixman_disabled ("mmx") && pixman_have_mmx())
+    if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS))
 	imp = _pixman_implementation_create_mmx (imp);
 #endif
 
 #ifdef USE_SSE2
-    if (!_pixman_disabled ("sse2") && pixman_have_sse2())
+    if (!_pixman_disabled ("sse2") && have_feature (SSE2_BITS))
 	imp = _pixman_implementation_create_sse2 (imp);
 #endif
 
-- 
1.7.10.4



More information about the Pixman mailing list