<div dir="auto">Please commit me :)</div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Thu, Mar 21, 2024, 2:42 PM Matt Turner <<a href="mailto:mattst88@gmail.com">mattst88@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Quoting <a href="https://sourceware.org/glibc/wiki/GNU_IFUNC" rel="noreferrer noreferrer" target="_blank">https://sourceware.org/glibc/wiki/GNU_IFUNC</a><br>
<br>
> When LD_BIND_NOW=1 or -Wl,z,now is in effect symbols must be<br>
> immediately resolved at startup. In cases where an external function<br>
> call depends needs to be made that may fail if such a call has not<br>
> been initialized yet (PLT-based relocation which is processed later).<br>
> For example calling strlen in an IFUNC resolver built with -Wl,z,now<br>
> may lead to a segfault because the PLT is not yet resolved.<br>
<br>
We cannot rely on function calls through the PLT in ifunc resolvers as<br>
the PLT may not have been initialized yet.<br>
<br>
In practice, this causes crashes when igt is linked with -Wl,-z,now or<br>
when linked with the mold linker.<br>
<br>
To avoid this problem, we do two things:<br>
1. move igt_x86_features() to igt_x86.h so its definition is<br>
available to compilation units that call the function.<br>
2. mark the ifunc resolvers with __attribute__((flatten)) to ensure<br>
igt_x86_features() is inlined. Since this function is only called<br>
from a few places it does not significantly increase binary size<br>
to inline it.<br>
<br>
Linux distros (at least Fedora since v23, Gentoo/Hardened, soon standard<br>
Gentoo) use `-Wl,-z now` to improve security. By binding upfront, the<br>
loader can mark the GOT as read-only for a security enhancement. See<br>
<a href="https://wiki.gentoo.org/wiki/Hardened/Toolchain" rel="noreferrer noreferrer" target="_blank">https://wiki.gentoo.org/wiki/Hardened/Toolchain</a> for more details.<br>
<br>
Bug: <a href="https://bugs.gentoo.org/788625" rel="noreferrer noreferrer" target="_blank">https://bugs.gentoo.org/788625</a><br>
Bug: <a href="https://bugs.gentoo.org/925348" rel="noreferrer noreferrer" target="_blank">https://bugs.gentoo.org/925348</a><br>
Reviewed-by: Zbigniew Kempczyński <<a href="mailto:zbigniew.kempczynski@intel.com" target="_blank" rel="noreferrer">zbigniew.kempczynski@intel.com</a>><br>
Signed-off-by: Matt Turner <<a href="mailto:mattst88@gmail.com" target="_blank" rel="noreferrer">mattst88@gmail.com</a>><br>
---<br>
lib/igt_halffloat.c | 8 +++<br>
lib/igt_x86.c | 119 ++------------------------------------------<br>
lib/igt_x86.h | 117 ++++++++++++++++++++++++++++++++++++++++++-<br>
3 files changed, 128 insertions(+), 116 deletions(-)<br>
<br>
diff --git a/lib/igt_halffloat.c b/lib/igt_halffloat.c<br>
index 5dbe08e01..560952d20 100644<br>
--- a/lib/igt_halffloat.c<br>
+++ b/lib/igt_halffloat.c<br>
@@ -194,6 +194,10 @@ static void half_to_float(const uint16_t *h, float *f, unsigned int num)<br>
f[i] = _half_to_float(h[i]);<br>
}<br>
<br>
+/* The PLT is not initialized when ifunc resolvers run, so all external<br>
+ * functions must be inlined with __attribute__((flatten)).<br>
+ */<br>
+__attribute__((flatten))<br>
static void (*resolve_float_to_half(void))(const float *f, uint16_t *h, unsigned int num)<br>
{<br>
if (igt_x86_features() & F16C)<br>
@@ -205,6 +209,10 @@ static void (*resolve_float_to_half(void))(const float *f, uint16_t *h, unsigned<br>
void igt_float_to_half(const float *f, uint16_t *h, unsigned int num)<br>
__attribute__((ifunc("resolve_float_to_half")));<br>
<br>
+/* The PLT is not initialized when ifunc resolvers run, so all external<br>
+ * functions must be inlined with __attribute__((flatten)).<br>
+ */<br>
+__attribute__((flatten))<br>
static void (*resolve_half_to_float(void))(const uint16_t *h, float *f, unsigned int num)<br>
{<br>
if (igt_x86_features() & F16C)<br>
diff --git a/lib/igt_x86.c b/lib/igt_x86.c<br>
index 8c102fd13..f60568be5 100644<br>
--- a/lib/igt_x86.c<br>
+++ b/lib/igt_x86.c<br>
@@ -27,14 +27,6 @@<br>
<br>
#include "config.h"<br>
<br>
-#ifdef HAVE_CPUID_H<br>
-#include <cpuid.h><br>
-#else<br>
-#define __get_cpuid_max(x, y) 0<br>
-#define __cpuid(level, a, b, c, d) a = b = c = d = 0<br>
-#define __cpuid_count(level, count, a, b, c, d) a = b = c = d = 0<br>
-#endif<br>
-<br>
#include "igt_x86.h"<br>
#include "igt_aux.h"<br>
<br>
@@ -49,114 +41,7 @@<br>
* @include: igt_x86.h<br>
*/<br>
<br>
-#define BASIC_CPUID 0x0<br>
-#define EXTENDED_CPUID 0x80000000<br>
-<br>
-#ifndef bit_MMX<br>
-#define bit_MMX (1 << 23)<br>
-#endif<br>
-<br>
-#ifndef bit_SSE<br>
-#define bit_SSE (1 << 25)<br>
-#endif<br>
-<br>
-#ifndef bit_SSE2<br>
-#define bit_SSE2 (1 << 26)<br>
-#endif<br>
-<br>
-#ifndef bit_SSE3<br>
-#define bit_SSE3 (1 << 0)<br>
-#endif<br>
-<br>
-#ifndef bit_SSSE3<br>
-#define bit_SSSE3 (1 << 9)<br>
-#endif<br>
-<br>
-#ifndef bit_SSE4_1<br>
-#define bit_SSE4_1 (1 << 19)<br>
-#endif<br>
-<br>
-#ifndef bit_SSE4_2<br>
-#define bit_SSE4_2 (1 << 20)<br>
-#endif<br>
-<br>
-#ifndef bit_OSXSAVE<br>
-#define bit_OSXSAVE (1 << 27)<br>
-#endif<br>
-<br>
-#ifndef bit_AVX<br>
-#define bit_AVX (1 << 28)<br>
-#endif<br>
-<br>
-#ifndef bit_F16C<br>
-#define bit_F16C (1 << 29)<br>
-#endif<br>
-<br>
-#ifndef bit_AVX2<br>
-#define bit_AVX2 (1<<5)<br>
-#endif<br>
-<br>
-#define xgetbv(index,eax,edx) \<br>
- __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))<br>
-<br>
-#define has_YMM 0x1<br>
-<br>
#if defined(__x86_64__) || defined(__i386__)<br>
-unsigned igt_x86_features(void)<br>
-{<br>
- unsigned max = __get_cpuid_max(BASIC_CPUID, 0);<br>
- unsigned eax, ebx, ecx, edx;<br>
- unsigned features = 0;<br>
- unsigned extra = 0;<br>
-<br>
- if (max >= 1) {<br>
- __cpuid(1, eax, ebx, ecx, edx);<br>
-<br>
- if (ecx & bit_SSE3)<br>
- features |= SSE3;<br>
-<br>
- if (ecx & bit_SSSE3)<br>
- features |= SSSE3;<br>
-<br>
- if (ecx & bit_SSE4_1)<br>
- features |= SSE4_1;<br>
-<br>
- if (ecx & bit_SSE4_2)<br>
- features |= SSE4_2;<br>
-<br>
- if (ecx & bit_OSXSAVE) {<br>
- unsigned int bv_eax, bv_ecx;<br>
- xgetbv(0, bv_eax, bv_ecx);<br>
- if ((bv_eax & 6) == 6)<br>
- extra |= has_YMM;<br>
- }<br>
-<br>
- if ((extra & has_YMM) && (ecx & bit_AVX))<br>
- features |= AVX;<br>
-<br>
- if (edx & bit_MMX)<br>
- features |= MMX;<br>
-<br>
- if (edx & bit_SSE)<br>
- features |= SSE;<br>
-<br>
- if (edx & bit_SSE2)<br>
- features |= SSE2;<br>
-<br>
- if (ecx & bit_F16C)<br>
- features |= F16C;<br>
- }<br>
-<br>
- if (max >= 7) {<br>
- __cpuid_count(7, 0, eax, ebx, ecx, edx);<br>
-<br>
- if ((extra & has_YMM) && (ebx & bit_AVX2))<br>
- features |= AVX2;<br>
- }<br>
-<br>
- return features;<br>
-}<br>
-<br>
char *igt_x86_features_to_string(unsigned features, char *line)<br>
{<br>
char *ret = line;<br>
@@ -284,6 +169,10 @@ static void memcpy_from_wc(void *dst, const void *src, unsigned long len)<br>
memcpy(dst, src, len);<br>
}<br>
<br>
+/* The PLT is not initialized when ifunc resolvers run, so all external<br>
+ * functions must be inlined with __attribute__((flatten)). <br>
+ */<br>
+__attribute__((flatten))<br>
static void (*resolve_memcpy_from_wc(void))(void *, const void *, unsigned long)<br>
{<br>
if (igt_x86_features() & SSE4_1)<br>
diff --git a/lib/igt_x86.h b/lib/igt_x86.h<br>
index c7b84dec2..1e0195c4b 100644<br>
--- a/lib/igt_x86.h<br>
+++ b/lib/igt_x86.h<br>
@@ -30,6 +30,14 @@<br>
#ifndef IGT_X86_H<br>
#define IGT_X86_H<br>
<br>
+#ifdef HAVE_CPUID_H<br>
+#include <cpuid.h><br>
+#else<br>
+#define __get_cpuid_max(x, y) 0<br>
+#define __cpuid(level, a, b, c, d) a = b = c = d = 0<br>
+#define __cpuid_count(level, count, a, b, c, d) a = b = c = d = 0<br>
+#endif<br>
+<br>
#define MMX 0x1<br>
#define SSE 0x2<br>
#define SSE2 0x4<br>
@@ -42,7 +50,114 @@<br>
#define F16C 0x200<br>
<br>
#if defined(__x86_64__) || defined(__i386__)<br>
-unsigned igt_x86_features(void);<br>
+<br>
+#define BASIC_CPUID 0x0<br>
+#define EXTENDED_CPUID 0x80000000<br>
+<br>
+#ifndef bit_MMX<br>
+#define bit_MMX (1 << 23)<br>
+#endif<br>
+<br>
+#ifndef bit_SSE<br>
+#define bit_SSE (1 << 25)<br>
+#endif<br>
+<br>
+#ifndef bit_SSE2<br>
+#define bit_SSE2 (1 << 26)<br>
+#endif<br>
+<br>
+#ifndef bit_SSE3<br>
+#define bit_SSE3 (1 << 0)<br>
+#endif<br>
+<br>
+#ifndef bit_SSSE3<br>
+#define bit_SSSE3 (1 << 9)<br>
+#endif<br>
+<br>
+#ifndef bit_SSE4_1<br>
+#define bit_SSE4_1 (1 << 19)<br>
+#endif<br>
+<br>
+#ifndef bit_SSE4_2<br>
+#define bit_SSE4_2 (1 << 20)<br>
+#endif<br>
+<br>
+#ifndef bit_OSXSAVE<br>
+#define bit_OSXSAVE (1 << 27)<br>
+#endif<br>
+<br>
+#ifndef bit_AVX<br>
+#define bit_AVX (1 << 28)<br>
+#endif<br>
+<br>
+#ifndef bit_F16C<br>
+#define bit_F16C (1 << 29)<br>
+#endif<br>
+<br>
+#ifndef bit_AVX2<br>
+#define bit_AVX2 (1<<5)<br>
+#endif<br>
+<br>
+#define xgetbv(index,eax,edx) \<br>
+ __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))<br>
+<br>
+#define has_YMM 0x1<br>
+<br>
+static inline unsigned igt_x86_features(void)<br>
+{<br>
+ unsigned max = __get_cpuid_max(BASIC_CPUID, 0);<br>
+ unsigned eax, ebx, ecx, edx;<br>
+ unsigned features = 0;<br>
+ unsigned extra = 0;<br>
+<br>
+ if (max >= 1) {<br>
+ __cpuid(1, eax, ebx, ecx, edx);<br>
+<br>
+ if (ecx & bit_SSE3)<br>
+ features |= SSE3;<br>
+<br>
+ if (ecx & bit_SSSE3)<br>
+ features |= SSSE3;<br>
+<br>
+ if (ecx & bit_SSE4_1)<br>
+ features |= SSE4_1;<br>
+<br>
+ if (ecx & bit_SSE4_2)<br>
+ features |= SSE4_2;<br>
+<br>
+ if (ecx & bit_OSXSAVE) {<br>
+ unsigned int bv_eax, bv_ecx;<br>
+ xgetbv(0, bv_eax, bv_ecx);<br>
+ if ((bv_eax & 6) == 6)<br>
+ extra |= has_YMM;<br>
+ }<br>
+<br>
+ if ((extra & has_YMM) && (ecx & bit_AVX))<br>
+ features |= AVX;<br>
+<br>
+ if (edx & bit_MMX)<br>
+ features |= MMX;<br>
+<br>
+ if (edx & bit_SSE)<br>
+ features |= SSE;<br>
+<br>
+ if (edx & bit_SSE2)<br>
+ features |= SSE2;<br>
+<br>
+ if (ecx & bit_F16C)<br>
+ features |= F16C;<br>
+ }<br>
+<br>
+ if (max >= 7) {<br>
+ __cpuid_count(7, 0, eax, ebx, ecx, edx);<br>
+<br>
+ if ((extra & has_YMM) && (ebx & bit_AVX2))<br>
+ features |= AVX2;<br>
+ }<br>
+<br>
+ return features;<br>
+}<br>
+<br>
char *igt_x86_features_to_string(unsigned features, char *line);<br>
#else<br>
static inline unsigned igt_x86_features(void)<br>
-- <br>
2.43.2<br>
<br>
</blockquote></div>