[Mesa-dev] [PATCH 3/3] util: add avx2 and xop detection to cpu detection code
Jose Fonseca
jfonseca at vmware.com
Tue Aug 20 12:33:11 PDT 2013
Series looks good to me.
Jose
----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
>
> Going to need this soon (not going to bother with avx2 intrinsics at this
> time
> but don't want to do workarounds for true vector shifts if llvm itself can
> use
> them just fine and won't need the gazillion instruction emulation).
> Not really tested other than my cpu returns 0 for these features...
> (I have no idea if llvm actually would emit avx2/xop instructions neither...)
> ---
> src/gallium/auxiliary/gallivm/lp_bld_init.c | 11 ++++--
> src/gallium/auxiliary/util/u_cpu_detect.c | 48
> +++++++++++++++++++++++++++
> src/gallium/auxiliary/util/u_cpu_detect.h | 2 ++
> 3 files changed, 59 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c
> b/src/gallium/auxiliary/gallivm/lp_bld_init.c
> index 61eadb8..61b561f 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
> @@ -461,12 +461,15 @@ lp_build_init(void)
> lp_native_vector_width);
>
> if (lp_native_vector_width <= 128) {
> - /* Hide AVX support, as often LLVM AVX instrinsics are only guarded by
> + /* Hide AVX support, as often LLVM AVX intrinsics are only guarded by
> * "util_cpu_caps.has_avx" predicate, and lack the
> * "lp_native_vector_width > 128" predicate. And also to ensure a more
> * consistent behavior, allowing one to test SSE2 on AVX machines.
> + * XXX: should not play games with util_cpu_caps directly as it might
> + * get used for other things outside llvm too.
> */
> util_cpu_caps.has_avx = 0;
> + util_cpu_caps.has_avx2 = 0;
> }
>
> if (!HAVE_AVX) {
> @@ -476,13 +479,17 @@ lp_build_init(void)
> * omit it unnecessarily on amd cpus, see above).
> */
> util_cpu_caps.has_f16c = 0;
> + util_cpu_caps.has_xop = 0;
> }
>
> #ifdef PIPE_ARCH_PPC_64
> /* Set the NJ bit in VSCR to 0 so denormalized values are handled as
> - * specified by IEEE standard (PowerISA 2.06 - Section 6.3). This
> garantees
> + * specified by IEEE standard (PowerISA 2.06 - Section 6.3). This
> guarantees
> * that some rounding and half-float to float handling does not round
> * incorrectly to 0.
> + * XXX: should eventually follow same logic on all platforms.
> + * Right now denorms get explicitly disabled (but elsewhere) for x86,
> + * whereas ppc64 explicitly enables them...
> */
> if (util_cpu_caps.has_altivec) {
> unsigned short mask[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
> diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c
> b/src/gallium/auxiliary/util/u_cpu_detect.c
> index 87ad780..2ff40bb 100644
> --- a/src/gallium/auxiliary/util/u_cpu_detect.c
> +++ b/src/gallium/auxiliary/util/u_cpu_detect.c
> @@ -212,6 +212,44 @@ cpuid(uint32_t ax, uint32_t *p)
> #endif
> }
>
> +/**
> + * @sa cpuid.h included in gcc-4.4 onwards.
> + * @sa http://msdn.microsoft.com/en-us/library/hskdteyh%28v=vs.90%29.aspx
> + */
> +static INLINE void
> +cpuid_count(uint32_t ax, uint32_t cx, uint32_t *p)
> +{
> +#if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) &&
> defined(PIPE_ARCH_X86)
> + __asm __volatile (
> + "xchgl %%ebx, %1\n\t"
> + "cpuid\n\t"
> + "xchgl %%ebx, %1"
> + : "=a" (p[0]),
> + "=S" (p[1]),
> + "=c" (p[2]),
> + "=d" (p[3])
> + : "0" (ax), "2" (cx)
> + );
> +#elif (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) &&
> defined(PIPE_ARCH_X86_64)
> + __asm __volatile (
> + "cpuid\n\t"
> + : "=a" (p[0]),
> + "=b" (p[1]),
> + "=c" (p[2]),
> + "=d" (p[3])
> + : "0" (ax), "2" (cx)
> + );
> +#elif defined(PIPE_CC_MSVC)
> + __cpuidex(p, ax, cx);
> +#else
> + p[0] = 0;
> + p[1] = 0;
> + p[2] = 0;
> + p[3] = 0;
> +#endif
> +}
> +
> +
> static INLINE uint64_t xgetbv(void)
> {
> #if defined(PIPE_CC_GCC)
> @@ -341,6 +379,11 @@ util_cpu_detect(void)
> if (cacheline > 0)
> util_cpu_caps.cacheline = cacheline;
> }
> + if (util_cpu_caps.has_avx && regs[0] >= 0x00000007) {
> + uint32_t regs7[4];
> + cpuid_count(0x00000007, 0x00000000, regs7);
> + util_cpu_caps.has_avx2 = (regs7[1] >> 5) & 1;
> + }
>
> if (regs[1] == 0x756e6547 && regs[2] == 0x6c65746e && regs[3] ==
> 0x49656e69) {
> /* GenuineIntel */
> @@ -357,6 +400,9 @@ util_cpu_detect(void)
> util_cpu_caps.has_mmx2 |= (regs2[3] >> 22) & 1;
> util_cpu_caps.has_3dnow = (regs2[3] >> 31) & 1;
> util_cpu_caps.has_3dnow_ext = (regs2[3] >> 30) & 1;
> +
> + util_cpu_caps.has_xop = util_cpu_caps.has_avx &&
> + ((regs2[2] >> 11) & 1);
> }
>
> if (regs[0] >= 0x80000006) {
> @@ -394,10 +440,12 @@ util_cpu_detect(void)
> debug_printf("util_cpu_caps.has_sse4_1 = %u\n",
> util_cpu_caps.has_sse4_1);
> debug_printf("util_cpu_caps.has_sse4_2 = %u\n",
> util_cpu_caps.has_sse4_2);
> debug_printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx);
> + debug_printf("util_cpu_caps.has_avx2 = %u\n", util_cpu_caps.has_avx2);
> debug_printf("util_cpu_caps.has_f16c = %u\n", util_cpu_caps.has_f16c);
> debug_printf("util_cpu_caps.has_popcnt = %u\n",
> util_cpu_caps.has_popcnt);
> debug_printf("util_cpu_caps.has_3dnow = %u\n",
> util_cpu_caps.has_3dnow);
> debug_printf("util_cpu_caps.has_3dnow_ext = %u\n",
> util_cpu_caps.has_3dnow_ext);
> + debug_printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop);
> debug_printf("util_cpu_caps.has_altivec = %u\n",
> util_cpu_caps.has_altivec);
> debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz);
> }
> diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h
> b/src/gallium/auxiliary/util/u_cpu_detect.h
> index cc3e0ce..5ccfc93 100644
> --- a/src/gallium/auxiliary/util/u_cpu_detect.h
> +++ b/src/gallium/auxiliary/util/u_cpu_detect.h
> @@ -64,9 +64,11 @@ struct util_cpu_caps {
> unsigned has_sse4_2:1;
> unsigned has_popcnt:1;
> unsigned has_avx:1;
> + unsigned has_avx2:1;
> unsigned has_f16c:1;
> unsigned has_3dnow:1;
> unsigned has_3dnow_ext:1;
> + unsigned has_xop:1;
> unsigned has_altivec:1;
> unsigned has_daz:1;
> };
> --
> 1.7.9.5
>
More information about the mesa-dev
mailing list