[Pixman] [PATCH] Workaround for _mm_empty() related miscompilation problem
Siarhei Siamashka
siarhei.siamashka at gmail.com
Wed Feb 16 07:16:07 PST 2011
From: Siarhei Siamashka <siarhei.siamashka at nokia.com>
In some cases floating point registers may get corrupted and as a result
pixman tests fail. The most likely cause is some kind of miscompilation
of inlined function containing MMX/SSE2 code:
https://bugs.freedesktop.org/show_bug.cgi?id=33069
This patch just puts MMX/SSE2 code into a normal function which now
gets called from an additional small inline wrapper function. The
size of compiled code also becomes smaller.
---
pixman/pixman-sse2.c | 60 +++++++++++++++++++++++++++++++++++---------------
1 files changed, 42 insertions(+), 18 deletions(-)
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 2e135e2..c323d1f 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5794,14 +5794,12 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
}
/* A variant of 'core_combine_over_u_sse2' with minor tweaks */
-static force_inline void
+static void
scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
const uint32_t* ps,
int32_t w,
pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx,
- pixman_bool_t fully_transparent_src)
+ pixman_fixed_t unit_x)
{
uint32_t s, d;
const uint32_t* pm = NULL;
@@ -5810,9 +5808,6 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_alpha_lo, xmm_alpha_hi;
- if (fully_transparent_src)
- return;
-
/* Align dst on a 16-byte boundary */
while (w && ((unsigned long)pd & 15))
{
@@ -5888,32 +5883,45 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
_mm_empty ();
}
+static force_inline void
+scaled_nearest_scanline_sse2_8888_8888_OVER_wrapper (uint32_t* pd,
+ const uint32_t* ps,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ if (zero_src)
+ return;
+ scaled_nearest_scanline_sse2_8888_8888_OVER (pd, ps, w, vx, unit_x);
+}
+
+
FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,
- scaled_nearest_scanline_sse2_8888_8888_OVER,
+ scaled_nearest_scanline_sse2_8888_8888_OVER_wrapper,
uint32_t, uint32_t, COVER)
FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,
- scaled_nearest_scanline_sse2_8888_8888_OVER,
+ scaled_nearest_scanline_sse2_8888_8888_OVER_wrapper,
uint32_t, uint32_t, NONE)
FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,
- scaled_nearest_scanline_sse2_8888_8888_OVER,
+ scaled_nearest_scanline_sse2_8888_8888_OVER_wrapper,
uint32_t, uint32_t, PAD)
-static force_inline void
+static void
scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
uint32_t * dst,
const uint32_t * src,
int32_t w,
pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx,
- pixman_bool_t zero_src)
+ pixman_fixed_t unit_x)
{
__m128i xmm_mask;
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_alpha_lo, xmm_alpha_hi;
- if (zero_src || (*mask >> 24) == 0)
+ if ((*mask >> 24) == 0)
return;
xmm_mask = create_mask_16_128 (*mask >> 24);
@@ -6001,14 +6009,30 @@ scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
_mm_empty ();
}
+static force_inline void
+scaled_nearest_scanline_sse2_8888_n_8888_OVER_wrapper (const uint32_t * mask,
+ uint32_t * dst,
+ const uint32_t * src,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ if (zero_src)
+ return;
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src, w,
+ vx, unit_x);
+}
+
FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
- scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER_wrapper,
uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
- scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER_wrapper,
uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
- scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER_wrapper,
uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
static const pixman_fast_path_t sse2_fast_paths[] =
--
1.7.3.4
More information about the Pixman
mailing list