[pulseaudio-discuss] [PATCH] core-util, cpu-x86: use __get_cpuid() instead of homegrown assembly
Arun Raghavan
arun at arunraghavan.net
Mon Dec 4 14:14:54 UTC 2017
On Wed, 22 Nov 2017, at 08:44 PM, Tanu Kaskinen wrote:
> The get_cpuid() function in cpu-x86.c was buggy on x86-64. When building
> without optimizations, the homegrown assembly code overwrote the
> beginning of the function argument list on the stack. That happened to
> work fine on regular x86-64, but caused crashing with the x32 ABI.
>
> At least GCC and clang provide cpuid.h, which has the __get_cpuid()
> function that can be used instead of the homegrown assembly.
Just as a sanity check -- does this introduce any compiler version
dependencies? The clang header seems to have been updated relatively
recently for SSE 4.1/4.2 (2016), but the gcc header seems quite old, so
probably okay.
> The PA_REG_* constants can be removed as well, because they're not used
> any more.
>
> BugLink: https://bugs.freedesktop.org/show_bug.cgi?id=103656
> ---
> configure.ac | 2 +-
> src/pulsecore/core-util.c | 35 ++++++++++++++++++++---------------
> src/pulsecore/cpu-x86.c | 37 +++++++++++++++++--------------------
> src/pulsecore/cpu-x86.h | 12 ------------
> 4 files changed, 38 insertions(+), 48 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 0c38fbb52..013918f1a 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -410,7 +410,7 @@ AC_SUBST([LIBLTDL])
> AC_HEADER_STDC
>
> # POSIX
> -AC_CHECK_HEADERS_ONCE([arpa/inet.h glob.h grp.h netdb.h netinet/in.h \
> +AC_CHECK_HEADERS_ONCE([arpa/inet.h cpuid.h glob.h grp.h netdb.h
> netinet/in.h \
> netinet/in_systm.h netinet/tcp.h poll.h pwd.h sched.h \
> sys/mman.h sys/select.h sys/socket.h sys/wait.h \
> sys/uio.h syslog.h sys/dl.h dlfcn.h linux/sockios.h])
> diff --git a/src/pulsecore/core-util.c b/src/pulsecore/core-util.c
> index d4cfa20c7..64e9f2171 100644
> --- a/src/pulsecore/core-util.c
> +++ b/src/pulsecore/core-util.c
> @@ -124,6 +124,10 @@
> #include <sys/personality.h>
> #endif
>
> +#ifdef HAVE_CPUID_H
> +#include <cpuid.h>
> +#endif
> +
> #include <pulse/xmalloc.h>
> #include <pulse/util.h>
> #include <pulse/utf8.h>
> @@ -136,7 +140,6 @@
> #include <pulsecore/strbuf.h>
> #include <pulsecore/usergroup.h>
> #include <pulsecore/strlist.h>
> -#include <pulsecore/cpu-x86.h>
> #include <pulsecore/pipe.h>
> #include <pulsecore/once.h>
>
> @@ -3663,11 +3666,13 @@ bool pa_running_in_vm(void) {
I wonder if the use-case for this is still valid (not being able to run
tsched in a VM).
>
> /* Both CPUID and DMI are x86 specific interfaces... */
>
> - uint32_t eax = 0x40000000;
> +#ifdef HAVE_CPUID_H
> + uint32_t eax;
> union {
> uint32_t sig32[3];
> char text[13];
> } sig;
> +#endif
>
> #ifdef __linux__
> const char *const dmi_vendors[] = {
> @@ -3701,19 +3706,18 @@ bool pa_running_in_vm(void) {
>
> #endif
>
> - /* http://lwn.net/Articles/301888/ */
> - pa_zero(sig);
> -
> - __asm__ __volatile__ (
> - /* ebx/rbx is being used for PIC! */
> - " push %%"PA_REG_b" \n\t"
> - " cpuid \n\t"
> - " mov %%ebx, %1 \n\t"
> - " pop %%"PA_REG_b" \n\t"
> +#ifdef HAVE_CPUID_H
>
> - : "=a" (eax), "=r" (sig.sig32[0]), "=c" (sig.sig32[1]), "=d"
> (sig.sig32[2])
> - : "0" (eax)
> - );
> + /* Hypervisors provide their signature on the 0x40000000 cpuid leaf.
> + * http://lwn.net/Articles/301888/
> + *
> + * XXX: Why are we checking a list of signatures instead of the
> + * "hypervisor present bit"? According to the LWN article, the
> "hypervisor
> + * present bit" would be available on bit 31 of ECX on leaf 0x1, and
> that
> + * bit would tell us directly whether we're in a virtual machine or
> not. */
> + pa_zero(sig);
> + if (__get_cpuid(0x40000000, &eax, &sig.sig32[0], &sig.sig32[1],
> &sig.sig32[2]) == 0)
> + return false;
>
> if (pa_streq(sig.text, "XenVMMXenVMM") ||
> pa_streq(sig.text, "KVMKVMKVM") ||
> @@ -3722,8 +3726,9 @@ bool pa_running_in_vm(void) {
> /* http://msdn.microsoft.com/en-us/library/bb969719.aspx */
> pa_streq(sig.text, "Microsoft Hv"))
> return true;
> +#endif /* HAVE_CPUID_H */
>
> -#endif
> +#endif /* defined(__i386__) || defined(__x86_64__) */
>
> return false;
> }
> diff --git a/src/pulsecore/cpu-x86.c b/src/pulsecore/cpu-x86.c
> index a86c26da4..4e59e14cc 100644
> --- a/src/pulsecore/cpu-x86.c
> +++ b/src/pulsecore/cpu-x86.c
> @@ -24,35 +24,28 @@
>
> #include <stdint.h>
>
> +#ifdef HAVE_CPUID_H
> +#include <cpuid.h>
> +#endif
> +
> #include <pulsecore/log.h>
>
> #include "cpu-x86.h"
>
> -#if defined (__i386__) || defined (__amd64__)
> -static void get_cpuid(uint32_t op, uint32_t *a, uint32_t *b, uint32_t
> *c, uint32_t *d) {
> - __asm__ __volatile__ (
> - " push %%"PA_REG_b" \n\t"
> - " cpuid \n\t"
> - " mov %%ebx, %%esi \n\t"
> - " pop %%"PA_REG_b" \n\t"
> -
> - : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
> - : "0" (op)
> - );
> -}
> -#endif
> -
> void pa_cpu_get_x86_flags(pa_cpu_x86_flag_t *flags) {
> -#if defined (__i386__) || defined (__amd64__)
> +#if (defined(__i386__) || defined(__amd64__)) && defined(HAVE_CPUID_H)
> uint32_t eax, ebx, ecx, edx;
> uint32_t level;
>
> *flags = 0;
>
> /* get standard level */
> - get_cpuid(0x00000000, &level, &ebx, &ecx, &edx);
> + if (__get_cpuid(0x00000000, &level, &ebx, &ecx, &edx) == 0)
> + goto finish;
> +
> if (level >= 1) {
> - get_cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
> + if (__get_cpuid(0x00000001, &eax, &ebx, &ecx, &edx) == 0)
> + goto finish;
>
> if (edx & (1<<15))
> *flags |= PA_CPU_X86_CMOV;
> @@ -80,9 +73,12 @@ void pa_cpu_get_x86_flags(pa_cpu_x86_flag_t *flags) {
> }
>
> /* get extended level */
> - get_cpuid(0x80000000, &level, &ebx, &ecx, &edx);
> + if (__get_cpuid(0x80000000, &level, &ebx, &ecx, &edx) == 0)
> + goto finish;
> +
> if (level >= 0x80000001) {
> - get_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
> + if (__get_cpuid(0x80000001, &eax, &ebx, &ecx, &edx) == 0)
> + goto finish;
>
> if (edx & (1<<22))
> *flags |= PA_CPU_X86_MMXEXT;
> @@ -97,6 +93,7 @@ void pa_cpu_get_x86_flags(pa_cpu_x86_flag_t *flags) {
> *flags |= PA_CPU_X86_3DNOW;
> }
>
> +finish:
> pa_log_info("CPU flags: %s%s%s%s%s%s%s%s%s%s%s",
> (*flags & PA_CPU_X86_CMOV) ? "CMOV " : "",
> (*flags & PA_CPU_X86_MMX) ? "MMX " : "",
> @@ -109,7 +106,7 @@ void pa_cpu_get_x86_flags(pa_cpu_x86_flag_t *flags) {
> (*flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "",
> (*flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "",
> (*flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : "");
> -#endif /* defined (__i386__) || defined (__amd64__) */
> +#endif /* (defined(__i386__) || defined(__amd64__)) &&
> defined(HAVE_CPUID_H) */
> }
>
> bool pa_cpu_init_x86(pa_cpu_x86_flag_t *flags) {
> diff --git a/src/pulsecore/cpu-x86.h b/src/pulsecore/cpu-x86.h
> index 30bbc80fd..eff201486 100644
> --- a/src/pulsecore/cpu-x86.h
> +++ b/src/pulsecore/cpu-x86.h
> @@ -43,20 +43,8 @@ bool pa_cpu_init_x86 (pa_cpu_x86_flag_t *flags);
>
> #if defined (__i386__)
> typedef int32_t pa_reg_x86;
> -#define PA_REG_a "eax"
> -#define PA_REG_b "ebx"
> -#define PA_REG_c "ecx"
> -#define PA_REG_d "edx"
> -#define PA_REG_D "edi"
> -#define PA_REG_S "esi"
> #elif defined (__amd64__)
> typedef int64_t pa_reg_x86;
> -#define PA_REG_a "rax"
> -#define PA_REG_b "rbx"
> -#define PA_REG_c "rcx"
> -#define PA_REG_d "rdx"
> -#define PA_REG_D "rdi"
> -#define PA_REG_S "rsi"
> #endif
>
> /* some optimized functions */
> --
Looks good to me.
-- Arun
More information about the pulseaudio-discuss
mailing list