[Pixman] [PATCH] Workaround for _mm_empty() related miscompilation problem

Siarhei Siamashka siarhei.siamashka at gmail.com
Wed Feb 16 07:16:07 PST 2011


From: Siarhei Siamashka <siarhei.siamashka at nokia.com>

In some cases floating point registers may get corrupted and as a result
pixman tests fail. The most likely cause is some kind of miscompilation
of inlined function containing MMX/SSE2 code:
    https://bugs.freedesktop.org/show_bug.cgi?id=33069

This patch just puts MMX/SSE2 code into a normal function which now
gets called from an additional small inline wrapper function. The
size of compiled code also becomes smaller.
---
 pixman/pixman-sse2.c |   60 +++++++++++++++++++++++++++++++++++---------------
 1 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 2e135e2..c323d1f 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5794,14 +5794,12 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
 }
 
 /* A variant of 'core_combine_over_u_sse2' with minor tweaks */
-static force_inline void
+static void
 scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t*       pd,
                                              const uint32_t* ps,
                                              int32_t         w,
                                              pixman_fixed_t  vx,
-                                             pixman_fixed_t  unit_x,
-                                             pixman_fixed_t  max_vx,
-                                             pixman_bool_t   fully_transparent_src)
+                                             pixman_fixed_t  unit_x)
 {
     uint32_t s, d;
     const uint32_t* pm = NULL;
@@ -5810,9 +5808,6 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t*       pd,
     __m128i xmm_src_lo, xmm_src_hi;
     __m128i xmm_alpha_lo, xmm_alpha_hi;
 
-    if (fully_transparent_src)
-	return;
-
     /* Align dst on a 16-byte boundary */
     while (w && ((unsigned long)pd & 15))
     {
@@ -5888,32 +5883,45 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t*       pd,
     _mm_empty ();
 }
 
+static force_inline void
+scaled_nearest_scanline_sse2_8888_8888_OVER_wrapper (uint32_t*       pd,
+                                                     const uint32_t* ps,
+                                                     int32_t         w,
+                                                     pixman_fixed_t  vx,
+                                                     pixman_fixed_t  unit_x,
+                                                     pixman_fixed_t  max_vx,
+                                                     pixman_bool_t   zero_src)
+{
+    if (zero_src)
+	return;
+    scaled_nearest_scanline_sse2_8888_8888_OVER (pd, ps, w, vx, unit_x);
+}
+
+
 FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,
-		       scaled_nearest_scanline_sse2_8888_8888_OVER,
+		       scaled_nearest_scanline_sse2_8888_8888_OVER_wrapper,
 		       uint32_t, uint32_t, COVER)
 FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,
-		       scaled_nearest_scanline_sse2_8888_8888_OVER,
+		       scaled_nearest_scanline_sse2_8888_8888_OVER_wrapper,
 		       uint32_t, uint32_t, NONE)
 FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,
-		       scaled_nearest_scanline_sse2_8888_8888_OVER,
+		       scaled_nearest_scanline_sse2_8888_8888_OVER_wrapper,
 		       uint32_t, uint32_t, PAD)
 
-static force_inline void
+static void
 scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
 					       uint32_t *       dst,
 					       const uint32_t * src,
 					       int32_t          w,
 					       pixman_fixed_t   vx,
-					       pixman_fixed_t   unit_x,
-					       pixman_fixed_t   max_vx,
-					       pixman_bool_t    zero_src)
+					       pixman_fixed_t   unit_x)
 {
     __m128i xmm_mask;
     __m128i xmm_src, xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_alpha_lo, xmm_alpha_hi;
 
-    if (zero_src || (*mask >> 24) == 0)
+    if ((*mask >> 24) == 0)
 	return;
 
     xmm_mask = create_mask_16_128 (*mask >> 24);
@@ -6001,14 +6009,30 @@ scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
     _mm_empty ();
 }
 
+static force_inline void
+scaled_nearest_scanline_sse2_8888_n_8888_OVER_wrapper (const uint32_t * mask,
+						       uint32_t *       dst,
+						       const uint32_t * src,
+						       int32_t          w,
+						       pixman_fixed_t   vx,
+						       pixman_fixed_t   unit_x,
+						       pixman_fixed_t   max_vx,
+						       pixman_bool_t    zero_src)
+{
+    if (zero_src)
+	return;
+    scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src, w,
+						   vx, unit_x);
+}
+
 FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
-			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+			      scaled_nearest_scanline_sse2_8888_n_8888_OVER_wrapper,
 			      uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
 FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
-			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+			      scaled_nearest_scanline_sse2_8888_n_8888_OVER_wrapper,
 			      uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
 FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
-			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+			      scaled_nearest_scanline_sse2_8888_n_8888_OVER_wrapper,
 			      uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
 
 static const pixman_fast_path_t sse2_fast_paths[] =
-- 
1.7.3.4



More information about the Pixman mailing list