[PATCH i-g-t] lib: Inline igt_x86_features() into ifunc resolvers

Matt Turner mattst88 at gmail.com
Tue Mar 12 15:24:39 UTC 2024


ping

On Mon, Mar 4, 2024 at 5:18 PM Matt Turner <mattst88 at gmail.com> wrote:
>
> Quoting https://sourceware.org/glibc/wiki/GNU_IFUNC
>
> > When LD_BIND_NOW=1 or -Wl,z,now is in effect symbols must be
> > immediately resolved at startup. In cases where an external function
> > call depends needs to be made that may fail if such a call has not
> > been initialized yet (PLT-based relocation which is processed later).
> > For example calling strlen in an IFUNC resolver built with -Wl,z,now
> > may lead to a segfault because the PLT is not yet resolved.
>
> We cannot rely on function calls through the PLT in ifunc resolvers as
> the PLT may not have been initialized yet.
>
> In practice, this causes crashes when igt is linked with -Wl,-z,now or
> when linked with the mold linker.
>
> To avoid this problem, we do two things:
>     1. move igt_x86_features() to igt_x86.h so its definition is
>        available to compilation units that call the function.
>     2. mark the ifunc resolvers with __attribute__((flatten)) to ensure
>        igt_x86_features() is inlined.
>
> Bug: https://bugs.gentoo.org/788625
> Bug: https://bugs.gentoo.org/925348
> Signed-off-by: Matt Turner <mattst88 at gmail.com>
> ---
>  lib/igt_halffloat.c |   2 +
>  lib/igt_x86.c       | 116 +------------------------------------------
>  lib/igt_x86.h       | 117 +++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 119 insertions(+), 116 deletions(-)
>
> diff --git a/lib/igt_halffloat.c b/lib/igt_halffloat.c
> index 5dbe08e01..67d26c225 100644
> --- a/lib/igt_halffloat.c
> +++ b/lib/igt_halffloat.c
> @@ -194,6 +194,7 @@ static void half_to_float(const uint16_t *h, float *f, unsigned int num)
>                 f[i] = _half_to_float(h[i]);
>  }
>
> +__attribute__((flatten))
>  static void (*resolve_float_to_half(void))(const float *f, uint16_t *h, unsigned int num)
>  {
>         if (igt_x86_features() & F16C)
> @@ -205,6 +206,7 @@ static void (*resolve_float_to_half(void))(const float *f, uint16_t *h, unsigned
>  void igt_float_to_half(const float *f, uint16_t *h, unsigned int num)
>         __attribute__((ifunc("resolve_float_to_half")));
>
> +__attribute__((flatten))
>  static void (*resolve_half_to_float(void))(const uint16_t *h, float *f, unsigned int num)
>  {
>         if (igt_x86_features() & F16C)
> diff --git a/lib/igt_x86.c b/lib/igt_x86.c
> index 8c102fd13..f103b9820 100644
> --- a/lib/igt_x86.c
> +++ b/lib/igt_x86.c
> @@ -27,14 +27,6 @@
>
>  #include "config.h"
>
> -#ifdef HAVE_CPUID_H
> -#include <cpuid.h>
> -#else
> -#define __get_cpuid_max(x, y) 0
> -#define __cpuid(level, a, b, c, d) a = b = c = d = 0
> -#define __cpuid_count(level, count, a, b, c, d) a = b = c = d = 0
> -#endif
> -
>  #include "igt_x86.h"
>  #include "igt_aux.h"
>
> @@ -49,114 +41,7 @@
>   * @include: igt_x86.h
>   */
>
> -#define BASIC_CPUID 0x0
> -#define EXTENDED_CPUID 0x80000000
> -
> -#ifndef bit_MMX
> -#define bit_MMX                (1 << 23)
> -#endif
> -
> -#ifndef bit_SSE
> -#define bit_SSE                (1 << 25)
> -#endif
> -
> -#ifndef bit_SSE2
> -#define bit_SSE2       (1 << 26)
> -#endif
> -
> -#ifndef bit_SSE3
> -#define bit_SSE3       (1 << 0)
> -#endif
> -
> -#ifndef bit_SSSE3
> -#define bit_SSSE3      (1 << 9)
> -#endif
> -
> -#ifndef bit_SSE4_1
> -#define bit_SSE4_1     (1 << 19)
> -#endif
> -
> -#ifndef bit_SSE4_2
> -#define bit_SSE4_2     (1 << 20)
> -#endif
> -
> -#ifndef bit_OSXSAVE
> -#define bit_OSXSAVE    (1 << 27)
> -#endif
> -
> -#ifndef bit_AVX
> -#define bit_AVX                (1 << 28)
> -#endif
> -
> -#ifndef bit_F16C
> -#define bit_F16C       (1 << 29)
> -#endif
> -
> -#ifndef bit_AVX2
> -#define bit_AVX2       (1<<5)
> -#endif
> -
> -#define xgetbv(index,eax,edx) \
> -       __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))
> -
> -#define has_YMM 0x1
> -
>  #if defined(__x86_64__) || defined(__i386__)
> -unsigned igt_x86_features(void)
> -{
> -       unsigned max = __get_cpuid_max(BASIC_CPUID, 0);
> -       unsigned eax, ebx, ecx, edx;
> -       unsigned features = 0;
> -       unsigned extra = 0;
> -
> -       if (max >= 1) {
> -               __cpuid(1, eax, ebx, ecx, edx);
> -
> -               if (ecx & bit_SSE3)
> -                       features |= SSE3;
> -
> -               if (ecx & bit_SSSE3)
> -                       features |= SSSE3;
> -
> -               if (ecx & bit_SSE4_1)
> -                       features |= SSE4_1;
> -
> -               if (ecx & bit_SSE4_2)
> -                       features |= SSE4_2;
> -
> -               if (ecx & bit_OSXSAVE) {
> -                       unsigned int bv_eax, bv_ecx;
> -                       xgetbv(0, bv_eax, bv_ecx);
> -                       if ((bv_eax & 6) == 6)
> -                               extra |= has_YMM;
> -               }
> -
> -               if ((extra & has_YMM) && (ecx & bit_AVX))
> -                       features |= AVX;
> -
> -               if (edx & bit_MMX)
> -                       features |= MMX;
> -
> -               if (edx & bit_SSE)
> -                       features |= SSE;
> -
> -               if (edx & bit_SSE2)
> -                       features |= SSE2;
> -
> -               if (ecx & bit_F16C)
> -                       features |= F16C;
> -       }
> -
> -       if (max >= 7) {
> -               __cpuid_count(7, 0, eax, ebx, ecx, edx);
> -
> -               if ((extra & has_YMM) && (ebx & bit_AVX2))
> -                       features |= AVX2;
> -       }
> -
> -       return features;
> -}
> -
>  char *igt_x86_features_to_string(unsigned features, char *line)
>  {
>         char *ret = line;
> @@ -284,6 +169,7 @@ static void memcpy_from_wc(void *dst, const void *src, unsigned long len)
>         memcpy(dst, src, len);
>  }
>
> +__attribute__((flatten))
>  static void (*resolve_memcpy_from_wc(void))(void *, const void *, unsigned long)
>  {
>         if (igt_x86_features() & SSE4_1)
> diff --git a/lib/igt_x86.h b/lib/igt_x86.h
> index c7b84dec2..1e0195c4b 100644
> --- a/lib/igt_x86.h
> +++ b/lib/igt_x86.h
> @@ -30,6 +30,14 @@
>  #ifndef IGT_X86_H
>  #define IGT_X86_H
>
> +#ifdef HAVE_CPUID_H
> +#include <cpuid.h>
> +#else
> +#define __get_cpuid_max(x, y) 0
> +#define __cpuid(level, a, b, c, d) a = b = c = d = 0
> +#define __cpuid_count(level, count, a, b, c, d) a = b = c = d = 0
> +#endif
> +
>  #define MMX    0x1
>  #define SSE    0x2
>  #define SSE2   0x4
> @@ -42,7 +50,114 @@
>  #define F16C   0x200
>
>  #if defined(__x86_64__) || defined(__i386__)
> -unsigned igt_x86_features(void);
> +
> +#define BASIC_CPUID 0x0
> +#define EXTENDED_CPUID 0x80000000
> +
> +#ifndef bit_MMX
> +#define bit_MMX                (1 << 23)
> +#endif
> +
> +#ifndef bit_SSE
> +#define bit_SSE                (1 << 25)
> +#endif
> +
> +#ifndef bit_SSE2
> +#define bit_SSE2       (1 << 26)
> +#endif
> +
> +#ifndef bit_SSE3
> +#define bit_SSE3       (1 << 0)
> +#endif
> +
> +#ifndef bit_SSSE3
> +#define bit_SSSE3      (1 << 9)
> +#endif
> +
> +#ifndef bit_SSE4_1
> +#define bit_SSE4_1     (1 << 19)
> +#endif
> +
> +#ifndef bit_SSE4_2
> +#define bit_SSE4_2     (1 << 20)
> +#endif
> +
> +#ifndef bit_OSXSAVE
> +#define bit_OSXSAVE    (1 << 27)
> +#endif
> +
> +#ifndef bit_AVX
> +#define bit_AVX                (1 << 28)
> +#endif
> +
> +#ifndef bit_F16C
> +#define bit_F16C       (1 << 29)
> +#endif
> +
> +#ifndef bit_AVX2
> +#define bit_AVX2       (1<<5)
> +#endif
> +
> +#define xgetbv(index,eax,edx) \
> +       __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))
> +
> +#define has_YMM 0x1
> +
> +static inline unsigned igt_x86_features(void)
> +{
> +       unsigned max = __get_cpuid_max(BASIC_CPUID, 0);
> +       unsigned eax, ebx, ecx, edx;
> +       unsigned features = 0;
> +       unsigned extra = 0;
> +
> +       if (max >= 1) {
> +               __cpuid(1, eax, ebx, ecx, edx);
> +
> +               if (ecx & bit_SSE3)
> +                       features |= SSE3;
> +
> +               if (ecx & bit_SSSE3)
> +                       features |= SSSE3;
> +
> +               if (ecx & bit_SSE4_1)
> +                       features |= SSE4_1;
> +
> +               if (ecx & bit_SSE4_2)
> +                       features |= SSE4_2;
> +
> +               if (ecx & bit_OSXSAVE) {
> +                       unsigned int bv_eax, bv_ecx;
> +                       xgetbv(0, bv_eax, bv_ecx);
> +                       if ((bv_eax & 6) == 6)
> +                               extra |= has_YMM;
> +               }
> +
> +               if ((extra & has_YMM) && (ecx & bit_AVX))
> +                       features |= AVX;
> +
> +               if (edx & bit_MMX)
> +                       features |= MMX;
> +
> +               if (edx & bit_SSE)
> +                       features |= SSE;
> +
> +               if (edx & bit_SSE2)
> +                       features |= SSE2;
> +
> +               if (ecx & bit_F16C)
> +                       features |= F16C;
> +       }
> +
> +       if (max >= 7) {
> +               __cpuid_count(7, 0, eax, ebx, ecx, edx);
> +
> +               if ((extra & has_YMM) && (ebx & bit_AVX2))
> +                       features |= AVX2;
> +       }
> +
> +       return features;
> +}
> +
>  char *igt_x86_features_to_string(unsigned features, char *line);
>  #else
>  static inline unsigned igt_x86_features(void)
> --
> 2.43.0
>


More information about the igt-dev mailing list