[Pixman] [PATCH/RFC] Use OpenMP for bilinear scaled fast paths

Siarhei Siamashka siarhei.siamashka at gmail.com
Sun Jun 24 16:00:27 PDT 2012


Does it actually make sense? I remember somebody was strongly opposing
the idea of spawning threads in pixman in the past, but can't find
this e-mail right now.

Even if using multithreaded rendering is acceptable, the next question is
whether to rely on OpenMP for it. Currently OpenMP is disabled in Android
toolchain by default:
    https://groups.google.com/forum/#!topic/android-ndk/pUfqxURgNbQ
Clang/LLVM does not support OpenMP either.

Some benchmarks with cairo-perf-trace (gcc 4.7.1, CFLAGS="-O2 -fopenmp"):

=== Core i7 860 @2.8GHz ===

before patch:
[  0]    image             firefox-fishtank   66.912   66.931   0.13%    3/3

export OMP_NUM_THREADS=1
[  0]    image             firefox-fishtank   67.285   67.393   0.12%    3/3

export OMP_NUM_THREADS=2
[  0]    image             firefox-fishtank   40.156   40.192   0.07%    3/3

export OMP_NUM_THREADS=3
[  0]    image             firefox-fishtank   31.152   31.241   0.21%    3/3

export OMP_NUM_THREADS=4
[  0]    image             firefox-fishtank   26.507   26.540   0.15%    3/3

=== Radeon HD 6770 (xf86-video-ati-6.14.4, Mesa 8.1-devel (git-6e7756d)) ====

[  0]     xlib             firefox-fishtank   34.135   34.156   0.23%    3/3
[  0]       gl             firefox-fishtank    5.671    5.755   0.89%    3/3

---
 pixman/pixman-inlines.h |   24 +++++++++++++++---------
 1 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/pixman/pixman-inlines.h b/pixman/pixman-inlines.h
index 3532867..7ba0d09 100644
--- a/pixman/pixman-inlines.h
+++ b/pixman/pixman-inlines.h
@@ -765,6 +765,14 @@ bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
  *       range and can fit into unsigned byte or be used with 8-bit SIMD
  *       multiplication instructions.
  */
+
+#define OMP_BILINEAR_PARALLEL_FOR _Pragma("omp parallel for default(none)	\
+     firstprivate(height,dst_line,dst_stride,unit_y,unit_x,src_first_line,	\
+        src_stride,max_vx,right_pad,left_pad,left_tz,right_tz,src_width,	\
+        src_width_fixed,src_image,need_src_extension,mask_line,			\
+        mask_stride,v,vy,width)							\
+     private(vx,y1,y2,mask) schedule(static) if(height > 1)")
+
 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
 				  dst_type_t, repeat_mode, flags)				\
 static void											\
@@ -782,7 +790,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
     pixman_fixed_t unit_x, unit_y;								\
     int32_t left_pad, left_tz, right_tz, right_pad;						\
 												\
-    dst_type_t *dst;										\
+    int i;											\
     mask_type_t solid_mask;									\
     const mask_type_t *mask = &solid_mask;							\
     int src_stride, mask_stride, dst_stride;							\
@@ -864,20 +872,19 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
 	src_width_fixed = pixman_int_to_fixed (src_width);					\
     }												\
 												\
-    while (--height >= 0)									\
+    OMP_BILINEAR_PARALLEL_FOR									\
+    for (i = 0; i < height; i++)								\
     {												\
 	int weight1, weight2;									\
-	dst = dst_line;										\
-	dst_line += dst_stride;									\
+	dst_type_t *dst = dst_line + (uintptr_t)dst_stride * i;					\
 	vx = v.vector[0];									\
 	if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
 	{											\
-	    mask = mask_line;									\
-	    mask_line += mask_stride;								\
+	    mask = mask_line + (uintptr_t)mask_stride * i;					\
 	}											\
 												\
-	y1 = pixman_fixed_to_int (vy);								\
-	weight2 = (vy >> 8) & 0xff;								\
+	y1 = pixman_fixed_to_int (vy + unit_y * i);						\
+	weight2 = ((vy + unit_y * i) >> 8) & 0xff;						\
 	if (weight2)										\
 	{											\
 	    /* normal case, both row weights are in 0-255 range and fit unsigned byte */	\
@@ -890,7 +897,6 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
 	    y2 = y1;										\
 	    weight1 = weight2 = 128;								\
 	}											\
-	vy += unit_y;										\
 	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
 	{											\
 	    src_type_t *src1, *src2;								\
-- 
1.7.3.4



More information about the Pixman mailing list