[PATCH v2] lib: Inline igt_x86_features() into ifunc resolvers

Kamil Konieczny kamil.konieczny at linux.intel.com
Tue Mar 26 17:36:21 UTC 2024


Hi Matt,
On 2024-03-25 at 13:15:17 -0400, Matt Turner wrote:
> Please commit me :)
> 

I merged your commit with some whitespaces corrected.
Thank you for your contribution!

Regards,
Kamil

> On Thu, Mar 21, 2024, 2:42 PM Matt Turner <mattst88 at gmail.com> wrote:
> 
> > Quoting https://sourceware.org/glibc/wiki/GNU_IFUNC
> >
> > > When LD_BIND_NOW=1 or -Wl,z,now is in effect symbols must be
> > > immediately resolved at startup. In cases where an external function
> > > call depends needs to be made that may fail if such a call has not
> > > been initialized yet (PLT-based relocation which is processed later).
> > > For example calling strlen in an IFUNC resolver built with -Wl,z,now
> > > may lead to a segfault because the PLT is not yet resolved.
> >
> > We cannot rely on function calls through the PLT in ifunc resolvers as
> > the PLT may not have been initialized yet.
> >
> > In practice, this causes crashes when igt is linked with -Wl,-z,now or
> > when linked with the mold linker.
> >
> > To avoid this problem, we do two things:
> >     1. move igt_x86_features() to igt_x86.h so its definition is
> >        available to compilation units that call the function.
> >     2. mark the ifunc resolvers with __attribute__((flatten)) to ensure
> >        igt_x86_features() is inlined. Since this function is only called
> >        from a few places it does not significantly increase binary size
> >        to inline it.
> >
> > Linux distros (at least Fedora since v23, Gentoo/Hardened, soon standard
> > Gentoo) use `-Wl,-z now` to improve security. By binding upfront, the
> > loader can mark the GOT as read-only for a security enhancement. See
> > https://wiki.gentoo.org/wiki/Hardened/Toolchain for more details.
> >
> > Bug: https://bugs.gentoo.org/788625
> > Bug: https://bugs.gentoo.org/925348
> > Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> > Signed-off-by: Matt Turner <mattst88 at gmail.com>
> > ---
> >  lib/igt_halffloat.c |   8 +++
> >  lib/igt_x86.c       | 119 ++------------------------------------------
> >  lib/igt_x86.h       | 117 ++++++++++++++++++++++++++++++++++++++++++-
> >  3 files changed, 128 insertions(+), 116 deletions(-)
> >
> > diff --git a/lib/igt_halffloat.c b/lib/igt_halffloat.c
> > index 5dbe08e01..560952d20 100644
> > --- a/lib/igt_halffloat.c
> > +++ b/lib/igt_halffloat.c
> > @@ -194,6 +194,10 @@ static void half_to_float(const uint16_t *h, float
> > *f, unsigned int num)
> >                 f[i] = _half_to_float(h[i]);
> >  }
> >
> > +/* The PLT is not initialized when ifunc resolvers run, so all external
> > + * functions must be inlined with __attribute__((flatten)).
> > + */
> > +__attribute__((flatten))
> >  static void (*resolve_float_to_half(void))(const float *f, uint16_t *h,
> > unsigned int num)
> >  {
> >         if (igt_x86_features() & F16C)
> > @@ -205,6 +209,10 @@ static void (*resolve_float_to_half(void))(const
> > float *f, uint16_t *h, unsigned
> >  void igt_float_to_half(const float *f, uint16_t *h, unsigned int num)
> >         __attribute__((ifunc("resolve_float_to_half")));
> >
> > +/* The PLT is not initialized when ifunc resolvers run, so all external
> > + * functions must be inlined with __attribute__((flatten)).
> > + */
> > +__attribute__((flatten))
> >  static void (*resolve_half_to_float(void))(const uint16_t *h, float *f,
> > unsigned int num)
> >  {
> >         if (igt_x86_features() & F16C)
> > diff --git a/lib/igt_x86.c b/lib/igt_x86.c
> > index 8c102fd13..f60568be5 100644
> > --- a/lib/igt_x86.c
> > +++ b/lib/igt_x86.c
> > @@ -27,14 +27,6 @@
> >
> >  #include "config.h"
> >
> > -#ifdef HAVE_CPUID_H
> > -#include <cpuid.h>
> > -#else
> > -#define __get_cpuid_max(x, y) 0
> > -#define __cpuid(level, a, b, c, d) a = b = c = d = 0
> > -#define __cpuid_count(level, count, a, b, c, d) a = b = c = d = 0
> > -#endif
> > -
> >  #include "igt_x86.h"
> >  #include "igt_aux.h"
> >
> > @@ -49,114 +41,7 @@
> >   * @include: igt_x86.h
> >   */
> >
> > -#define BASIC_CPUID 0x0
> > -#define EXTENDED_CPUID 0x80000000
> > -
> > -#ifndef bit_MMX
> > -#define bit_MMX                (1 << 23)
> > -#endif
> > -
> > -#ifndef bit_SSE
> > -#define bit_SSE                (1 << 25)
> > -#endif
> > -
> > -#ifndef bit_SSE2
> > -#define bit_SSE2       (1 << 26)
> > -#endif
> > -
> > -#ifndef bit_SSE3
> > -#define bit_SSE3       (1 << 0)
> > -#endif
> > -
> > -#ifndef bit_SSSE3
> > -#define bit_SSSE3      (1 << 9)
> > -#endif
> > -
> > -#ifndef bit_SSE4_1
> > -#define bit_SSE4_1     (1 << 19)
> > -#endif
> > -
> > -#ifndef bit_SSE4_2
> > -#define bit_SSE4_2     (1 << 20)
> > -#endif
> > -
> > -#ifndef bit_OSXSAVE
> > -#define bit_OSXSAVE    (1 << 27)
> > -#endif
> > -
> > -#ifndef bit_AVX
> > -#define bit_AVX                (1 << 28)
> > -#endif
> > -
> > -#ifndef bit_F16C
> > -#define bit_F16C       (1 << 29)
> > -#endif
> > -
> > -#ifndef bit_AVX2
> > -#define bit_AVX2       (1<<5)
> > -#endif
> > -
> > -#define xgetbv(index,eax,edx) \
> > -       __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))
> > -
> > -#define has_YMM 0x1
> > -
> >  #if defined(__x86_64__) || defined(__i386__)
> > -unsigned igt_x86_features(void)
> > -{
> > -       unsigned max = __get_cpuid_max(BASIC_CPUID, 0);
> > -       unsigned eax, ebx, ecx, edx;
> > -       unsigned features = 0;
> > -       unsigned extra = 0;
> > -
> > -       if (max >= 1) {
> > -               __cpuid(1, eax, ebx, ecx, edx);
> > -
> > -               if (ecx & bit_SSE3)
> > -                       features |= SSE3;
> > -
> > -               if (ecx & bit_SSSE3)
> > -                       features |= SSSE3;
> > -
> > -               if (ecx & bit_SSE4_1)
> > -                       features |= SSE4_1;
> > -
> > -               if (ecx & bit_SSE4_2)
> > -                       features |= SSE4_2;
> > -
> > -               if (ecx & bit_OSXSAVE) {
> > -                       unsigned int bv_eax, bv_ecx;
> > -                       xgetbv(0, bv_eax, bv_ecx);
> > -                       if ((bv_eax & 6) == 6)
> > -                               extra |= has_YMM;
> > -               }
> > -
> > -               if ((extra & has_YMM) && (ecx & bit_AVX))
> > -                       features |= AVX;
> > -
> > -               if (edx & bit_MMX)
> > -                       features |= MMX;
> > -
> > -               if (edx & bit_SSE)
> > -                       features |= SSE;
> > -
> > -               if (edx & bit_SSE2)
> > -                       features |= SSE2;
> > -
> > -               if (ecx & bit_F16C)
> > -                       features |= F16C;
> > -       }
> > -
> > -       if (max >= 7) {
> > -               __cpuid_count(7, 0, eax, ebx, ecx, edx);
> > -
> > -               if ((extra & has_YMM) && (ebx & bit_AVX2))
> > -                       features |= AVX2;
> > -       }
> > -
> > -       return features;
> > -}
> > -
> >  char *igt_x86_features_to_string(unsigned features, char *line)
> >  {
> >         char *ret = line;
> > @@ -284,6 +169,10 @@ static void memcpy_from_wc(void *dst, const void
> > *src, unsigned long len)
> >         memcpy(dst, src, len);
> >  }
> >
> > +/* The PLT is not initialized when ifunc resolvers run, so all external
> > + * functions must be inlined with __attribute__((flatten)).
> > + */
> > +__attribute__((flatten))
> >  static void (*resolve_memcpy_from_wc(void))(void *, const void *,
> > unsigned long)
> >  {
> >         if (igt_x86_features() & SSE4_1)
> > diff --git a/lib/igt_x86.h b/lib/igt_x86.h
> > index c7b84dec2..1e0195c4b 100644
> > --- a/lib/igt_x86.h
> > +++ b/lib/igt_x86.h
> > @@ -30,6 +30,14 @@
> >  #ifndef IGT_X86_H
> >  #define IGT_X86_H
> >
> > +#ifdef HAVE_CPUID_H
> > +#include <cpuid.h>
> > +#else
> > +#define __get_cpuid_max(x, y) 0
> > +#define __cpuid(level, a, b, c, d) a = b = c = d = 0
> > +#define __cpuid_count(level, count, a, b, c, d) a = b = c = d = 0
> > +#endif
> > +
> >  #define MMX    0x1
> >  #define SSE    0x2
> >  #define SSE2   0x4
> > @@ -42,7 +50,114 @@
> >  #define F16C   0x200
> >
> >  #if defined(__x86_64__) || defined(__i386__)
> > -unsigned igt_x86_features(void);
> > +
> > +#define BASIC_CPUID 0x0
> > +#define EXTENDED_CPUID 0x80000000
> > +
> > +#ifndef bit_MMX
> > +#define bit_MMX                (1 << 23)
> > +#endif
> > +
> > +#ifndef bit_SSE
> > +#define bit_SSE                (1 << 25)
> > +#endif
> > +
> > +#ifndef bit_SSE2
> > +#define bit_SSE2       (1 << 26)
> > +#endif
> > +
> > +#ifndef bit_SSE3
> > +#define bit_SSE3       (1 << 0)
> > +#endif
> > +
> > +#ifndef bit_SSSE3
> > +#define bit_SSSE3      (1 << 9)
> > +#endif
> > +
> > +#ifndef bit_SSE4_1
> > +#define bit_SSE4_1     (1 << 19)
> > +#endif
> > +
> > +#ifndef bit_SSE4_2
> > +#define bit_SSE4_2     (1 << 20)
> > +#endif
> > +
> > +#ifndef bit_OSXSAVE
> > +#define bit_OSXSAVE    (1 << 27)
> > +#endif
> > +
> > +#ifndef bit_AVX
> > +#define bit_AVX                (1 << 28)
> > +#endif
> > +
> > +#ifndef bit_F16C
> > +#define bit_F16C       (1 << 29)
> > +#endif
> > +
> > +#ifndef bit_AVX2
> > +#define bit_AVX2       (1<<5)
> > +#endif
> > +
> > +#define xgetbv(index,eax,edx) \
> > +       __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))
> > +
> > +#define has_YMM 0x1
> > +
> > +static inline unsigned igt_x86_features(void)
> > +{
> > +       unsigned max = __get_cpuid_max(BASIC_CPUID, 0);
> > +       unsigned eax, ebx, ecx, edx;
> > +       unsigned features = 0;
> > +       unsigned extra = 0;
> > +
> > +       if (max >= 1) {
> > +               __cpuid(1, eax, ebx, ecx, edx);
> > +
> > +               if (ecx & bit_SSE3)
> > +                       features |= SSE3;
> > +
> > +               if (ecx & bit_SSSE3)
> > +                       features |= SSSE3;
> > +
> > +               if (ecx & bit_SSE4_1)
> > +                       features |= SSE4_1;
> > +
> > +               if (ecx & bit_SSE4_2)
> > +                       features |= SSE4_2;
> > +
> > +               if (ecx & bit_OSXSAVE) {
> > +                       unsigned int bv_eax, bv_ecx;
> > +                       xgetbv(0, bv_eax, bv_ecx);
> > +                       if ((bv_eax & 6) == 6)
> > +                               extra |= has_YMM;
> > +               }
> > +
> > +               if ((extra & has_YMM) && (ecx & bit_AVX))
> > +                       features |= AVX;
> > +
> > +               if (edx & bit_MMX)
> > +                       features |= MMX;
> > +
> > +               if (edx & bit_SSE)
> > +                       features |= SSE;
> > +
> > +               if (edx & bit_SSE2)
> > +                       features |= SSE2;
> > +
> > +               if (ecx & bit_F16C)
> > +                       features |= F16C;
> > +       }
> > +
> > +       if (max >= 7) {
> > +               __cpuid_count(7, 0, eax, ebx, ecx, edx);
> > +
> > +               if ((extra & has_YMM) && (ebx & bit_AVX2))
> > +                       features |= AVX2;
> > +       }
> > +
> > +       return features;
> > +}
> > +
> >  char *igt_x86_features_to_string(unsigned features, char *line);
> >  #else
> >  static inline unsigned igt_x86_features(void)
> > --
> > 2.43.2
> >
> >


More information about the igt-dev mailing list