[PATCH i-g-t] lib: Inline igt_x86_features() into ifunc resolvers
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Wed Mar 13 09:09:11 UTC 2024
On Mon, Mar 04, 2024 at 05:16:40PM -0500, Matt Turner wrote:
> Quoting https://sourceware.org/glibc/wiki/GNU_IFUNC
>
> > When LD_BIND_NOW=1 or -Wl,z,now is in effect symbols must be
> > immediately resolved at startup. In cases where an external function
> > call depends needs to be made that may fail if such a call has not
> > been initialized yet (PLT-based relocation which is processed later).
> > For example calling strlen in an IFUNC resolver built with -Wl,z,now
> > may lead to a segfault because the PLT is not yet resolved.
>
> We cannot rely on function calls through the PLT in ifunc resolvers as
> the PLT may not have been initialized yet.
>
> In practice, this causes crashes when igt is linked with -Wl,-z,now or
> when linked with the mold linker.
>
> To avoid this problem, we do two things:
> 1. move igt_x86_features() to igt_x86.h so its definition is
> available to compilation units that call the function.
> 2. mark the ifunc resolvers with __attribute__((flatten)) to ensure
> igt_x86_features() is inlined.
>
> Bug: https://bugs.gentoo.org/788625
> Bug: https://bugs.gentoo.org/925348
> Signed-off-by: Matt Turner <mattst88 at gmail.com>
Hi.
I started review of your code, but this this touches some linking intrisics
I'm not familiar with yet so I need more time to explore this. I hope
this is not a problem for you.
--
Zbigniew
> ---
> lib/igt_halffloat.c | 2 +
> lib/igt_x86.c | 116 +------------------------------------------
> lib/igt_x86.h | 117 +++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 119 insertions(+), 116 deletions(-)
>
> diff --git a/lib/igt_halffloat.c b/lib/igt_halffloat.c
> index 5dbe08e01..67d26c225 100644
> --- a/lib/igt_halffloat.c
> +++ b/lib/igt_halffloat.c
> @@ -194,6 +194,7 @@ static void half_to_float(const uint16_t *h, float *f, unsigned int num)
> f[i] = _half_to_float(h[i]);
> }
>
> +__attribute__((flatten))
> static void (*resolve_float_to_half(void))(const float *f, uint16_t *h, unsigned int num)
> {
> if (igt_x86_features() & F16C)
> @@ -205,6 +206,7 @@ static void (*resolve_float_to_half(void))(const float *f, uint16_t *h, unsigned
> void igt_float_to_half(const float *f, uint16_t *h, unsigned int num)
> __attribute__((ifunc("resolve_float_to_half")));
>
> +__attribute__((flatten))
> static void (*resolve_half_to_float(void))(const uint16_t *h, float *f, unsigned int num)
> {
> if (igt_x86_features() & F16C)
> diff --git a/lib/igt_x86.c b/lib/igt_x86.c
> index 8c102fd13..f103b9820 100644
> --- a/lib/igt_x86.c
> +++ b/lib/igt_x86.c
> @@ -27,14 +27,6 @@
>
> #include "config.h"
>
> -#ifdef HAVE_CPUID_H
> -#include <cpuid.h>
> -#else
> -#define __get_cpuid_max(x, y) 0
> -#define __cpuid(level, a, b, c, d) a = b = c = d = 0
> -#define __cpuid_count(level, count, a, b, c, d) a = b = c = d = 0
> -#endif
> -
> #include "igt_x86.h"
> #include "igt_aux.h"
>
> @@ -49,114 +41,7 @@
> * @include: igt_x86.h
> */
>
> -#define BASIC_CPUID 0x0
> -#define EXTENDED_CPUID 0x80000000
> -
> -#ifndef bit_MMX
> -#define bit_MMX (1 << 23)
> -#endif
> -
> -#ifndef bit_SSE
> -#define bit_SSE (1 << 25)
> -#endif
> -
> -#ifndef bit_SSE2
> -#define bit_SSE2 (1 << 26)
> -#endif
> -
> -#ifndef bit_SSE3
> -#define bit_SSE3 (1 << 0)
> -#endif
> -
> -#ifndef bit_SSSE3
> -#define bit_SSSE3 (1 << 9)
> -#endif
> -
> -#ifndef bit_SSE4_1
> -#define bit_SSE4_1 (1 << 19)
> -#endif
> -
> -#ifndef bit_SSE4_2
> -#define bit_SSE4_2 (1 << 20)
> -#endif
> -
> -#ifndef bit_OSXSAVE
> -#define bit_OSXSAVE (1 << 27)
> -#endif
> -
> -#ifndef bit_AVX
> -#define bit_AVX (1 << 28)
> -#endif
> -
> -#ifndef bit_F16C
> -#define bit_F16C (1 << 29)
> -#endif
> -
> -#ifndef bit_AVX2
> -#define bit_AVX2 (1<<5)
> -#endif
> -
> -#define xgetbv(index,eax,edx) \
> - __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))
> -
> -#define has_YMM 0x1
> -
> #if defined(__x86_64__) || defined(__i386__)
> -unsigned igt_x86_features(void)
> -{
> - unsigned max = __get_cpuid_max(BASIC_CPUID, 0);
> - unsigned eax, ebx, ecx, edx;
> - unsigned features = 0;
> - unsigned extra = 0;
> -
> - if (max >= 1) {
> - __cpuid(1, eax, ebx, ecx, edx);
> -
> - if (ecx & bit_SSE3)
> - features |= SSE3;
> -
> - if (ecx & bit_SSSE3)
> - features |= SSSE3;
> -
> - if (ecx & bit_SSE4_1)
> - features |= SSE4_1;
> -
> - if (ecx & bit_SSE4_2)
> - features |= SSE4_2;
> -
> - if (ecx & bit_OSXSAVE) {
> - unsigned int bv_eax, bv_ecx;
> - xgetbv(0, bv_eax, bv_ecx);
> - if ((bv_eax & 6) == 6)
> - extra |= has_YMM;
> - }
> -
> - if ((extra & has_YMM) && (ecx & bit_AVX))
> - features |= AVX;
> -
> - if (edx & bit_MMX)
> - features |= MMX;
> -
> - if (edx & bit_SSE)
> - features |= SSE;
> -
> - if (edx & bit_SSE2)
> - features |= SSE2;
> -
> - if (ecx & bit_F16C)
> - features |= F16C;
> - }
> -
> - if (max >= 7) {
> - __cpuid_count(7, 0, eax, ebx, ecx, edx);
> -
> - if ((extra & has_YMM) && (ebx & bit_AVX2))
> - features |= AVX2;
> - }
> -
> - return features;
> -}
> -
> char *igt_x86_features_to_string(unsigned features, char *line)
> {
> char *ret = line;
> @@ -284,6 +169,7 @@ static void memcpy_from_wc(void *dst, const void *src, unsigned long len)
> memcpy(dst, src, len);
> }
>
> +__attribute__((flatten))
> static void (*resolve_memcpy_from_wc(void))(void *, const void *, unsigned long)
> {
> if (igt_x86_features() & SSE4_1)
> diff --git a/lib/igt_x86.h b/lib/igt_x86.h
> index c7b84dec2..1e0195c4b 100644
> --- a/lib/igt_x86.h
> +++ b/lib/igt_x86.h
> @@ -30,6 +30,14 @@
> #ifndef IGT_X86_H
> #define IGT_X86_H
>
> +#ifdef HAVE_CPUID_H
> +#include <cpuid.h>
> +#else
> +#define __get_cpuid_max(x, y) 0
> +#define __cpuid(level, a, b, c, d) a = b = c = d = 0
> +#define __cpuid_count(level, count, a, b, c, d) a = b = c = d = 0
> +#endif
> +
> #define MMX 0x1
> #define SSE 0x2
> #define SSE2 0x4
> @@ -42,7 +50,114 @@
> #define F16C 0x200
>
> #if defined(__x86_64__) || defined(__i386__)
> -unsigned igt_x86_features(void);
> +
> +#define BASIC_CPUID 0x0
> +#define EXTENDED_CPUID 0x80000000
> +
> +#ifndef bit_MMX
> +#define bit_MMX (1 << 23)
> +#endif
> +
> +#ifndef bit_SSE
> +#define bit_SSE (1 << 25)
> +#endif
> +
> +#ifndef bit_SSE2
> +#define bit_SSE2 (1 << 26)
> +#endif
> +
> +#ifndef bit_SSE3
> +#define bit_SSE3 (1 << 0)
> +#endif
> +
> +#ifndef bit_SSSE3
> +#define bit_SSSE3 (1 << 9)
> +#endif
> +
> +#ifndef bit_SSE4_1
> +#define bit_SSE4_1 (1 << 19)
> +#endif
> +
> +#ifndef bit_SSE4_2
> +#define bit_SSE4_2 (1 << 20)
> +#endif
> +
> +#ifndef bit_OSXSAVE
> +#define bit_OSXSAVE (1 << 27)
> +#endif
> +
> +#ifndef bit_AVX
> +#define bit_AVX (1 << 28)
> +#endif
> +
> +#ifndef bit_F16C
> +#define bit_F16C (1 << 29)
> +#endif
> +
> +#ifndef bit_AVX2
> +#define bit_AVX2 (1<<5)
> +#endif
> +
> +#define xgetbv(index,eax,edx) \
> + __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))
> +
> +#define has_YMM 0x1
> +
> +static inline unsigned igt_x86_features(void)
> +{
> + unsigned max = __get_cpuid_max(BASIC_CPUID, 0);
> + unsigned eax, ebx, ecx, edx;
> + unsigned features = 0;
> + unsigned extra = 0;
> +
> + if (max >= 1) {
> + __cpuid(1, eax, ebx, ecx, edx);
> +
> + if (ecx & bit_SSE3)
> + features |= SSE3;
> +
> + if (ecx & bit_SSSE3)
> + features |= SSSE3;
> +
> + if (ecx & bit_SSE4_1)
> + features |= SSE4_1;
> +
> + if (ecx & bit_SSE4_2)
> + features |= SSE4_2;
> +
> + if (ecx & bit_OSXSAVE) {
> + unsigned int bv_eax, bv_ecx;
> + xgetbv(0, bv_eax, bv_ecx);
> + if ((bv_eax & 6) == 6)
> + extra |= has_YMM;
> + }
> +
> + if ((extra & has_YMM) && (ecx & bit_AVX))
> + features |= AVX;
> +
> + if (edx & bit_MMX)
> + features |= MMX;
> +
> + if (edx & bit_SSE)
> + features |= SSE;
> +
> + if (edx & bit_SSE2)
> + features |= SSE2;
> +
> + if (ecx & bit_F16C)
> + features |= F16C;
> + }
> +
> + if (max >= 7) {
> + __cpuid_count(7, 0, eax, ebx, ecx, edx);
> +
> + if ((extra & has_YMM) && (ebx & bit_AVX2))
> + features |= AVX2;
> + }
> +
> + return features;
> +}
> +
> char *igt_x86_features_to_string(unsigned features, char *line);
> #else
> static inline unsigned igt_x86_features(void)
> --
> 2.43.0
>
More information about the igt-dev
mailing list