xf86-video-intel: 2 commits - src/sna/sna.h src/sna/sna_threads.c src/sna/sna_trapezoids.c

Thu Jan 24 11:19:40 PST 2013

src/sna/sna.h            |    4 +
 src/sna/sna_threads.c    |   67 +++++++++++++---------
 src/sna/sna_trapezoids.c |  139 ++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 159 insertions(+), 51 deletions(-)

New commits:
commit f597b647180c1e7bf83693060f244926191b7462
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 24 18:45:35 2013 +0000

    sna: Tidy construction of data for threaded composite
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_threads.c b/src/sna/sna_threads.c
index 0bd797e..3f70f8f 100644
--- a/src/sna/sna_threads.c
+++ b/src/sna/sna_threads.c
@@ -203,31 +203,49 @@ void sna_image_composite(pixman_op_t        op,
 				       dst_x, dst_y,
 				       width, height);
 	} else {
-		struct thread_composite threads[num_threads];
+		struct thread_composite data[num_threads];
 		int y, dy, n;
 
+		DBG(("%s: using %d threads for compositing %dx%d\n",
+		     __FUNCTION__, num_threads, width, height));
+
 		y = dst_y;
 		dy = (height + num_threads - 1) / num_threads;
-		for (n = 0; n < num_threads; n++) {
-			threads[n].op = op;
-			threads[n].src = src;
-			threads[n].mask = mask;
-			threads[n].dst = dst;
-			threads[n].src_x = src_x;
-			threads[n].src_y = src_y + y - dst_y;
-			threads[n].mask_x = mask_x;
-			threads[n].mask_y = mask_y + y - dst_y;
-			threads[n].dst_x = dst_x;
-			threads[n].dst_y = y;
-			threads[n].width = width;
-			threads[n].height = dy;
-
-			sna_threads_run(thread_composite, &threads[n]);
 
+		data[0].op = op;
+		data[0].src = src;
+		data[0].mask = mask;
+		data[0].dst = dst;
+		data[0].src_x = src_x;
+		data[0].src_y = src_y;
+		data[0].mask_x = mask_x;
+		data[0].mask_y = mask_y;
+		data[0].dst_x = dst_x;
+		data[0].dst_y = y;
+		data[0].width = width;
+		data[0].height = dy;
+
+		for (n = 0; n < num_threads - 1; n++) {
+			data[n] = data[0];
+			data[n].src_y += y - dst_y;
+			data[n].mask_y += y - dst_y;
+			data[n].dst_y = y;
 			y += dy;
-			if (y + dy > dst_y + height)
-				dy = dst_y + height - y;
+
+			sna_threads_run(thread_composite, &data[n]);
 		}
+
+		if (y + dy > dst_y + height)
+			dy = dst_y + height - y;
+
+		data[n] = data[0];
+		data[n].src_y += y - dst_y;
+		data[n].mask_y += y - dst_y;
+		data[n].dst_y = y;
+		data[n].height = dy;
+
+		sna_threads_run(thread_composite, &data[n]);
+
 		sna_threads_wait();
 	}
 }
commit 1643c97f8f7b49738b649b5f7d1e574d689d167e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 24 18:24:02 2013 +0000

    sna: Use threads for simple mask generation
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna.h b/src/sna/sna.h
index 389884f..5832c99 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -845,6 +845,10 @@ inline static bool is_clipped(const RegionRec *r,
 }
 
 void sna_threads_init(void);
+int sna_use_threads (int width, int height, int threshold);
+void sna_threads_run(void (*func)(void *arg), void *arg);
+void sna_threads_wait(void);
+
 void sna_image_composite(pixman_op_t        op,
 			 pixman_image_t    *src,
 			 pixman_image_t    *mask,
diff --git a/src/sna/sna_threads.c b/src/sna/sna_threads.c
index afa260f..0bd797e 100644
--- a/src/sna/sna_threads.c
+++ b/src/sna/sna_threads.c
@@ -97,8 +97,7 @@ bail:
 	max_threads = 0;
 }
 
-static void
-threads_run(void (*func)(void *arg), void *arg)
+void sna_threads_run(void (*func)(void *arg), void *arg)
 {
 	int n;
 
@@ -129,7 +128,7 @@ execute:
 	pthread_mutex_unlock(&threads[n].mutex);
 }
 
-static void threads_wait(void)
+void sna_threads_wait(void)
 {
 	int n;
 
@@ -146,8 +145,7 @@ static void threads_wait(void)
 	}
 }
 
-static int
-use_threads (int width, int height, int threshold)
+int sna_use_threads(int width, int height, int threshold)
 {
 	int num_threads;
 
@@ -197,14 +195,13 @@ void sna_image_composite(pixman_op_t        op,
 {
 	int num_threads;
 
-	num_threads = use_threads(width, height, 16);
+	num_threads = sna_use_threads(width, height, 16);
 	if (num_threads <= 1) {
 		pixman_image_composite(op, src, mask, dst,
 				       src_x, src_y,
 				       mask_x, mask_y,
 				       dst_x, dst_y,
 				       width, height);
-		return;
 	} else {
 		struct thread_composite threads[num_threads];
 		int y, dy, n;
@@ -225,12 +222,12 @@ void sna_image_composite(pixman_op_t        op,
 			threads[n].width = width;
 			threads[n].height = dy;
 
-			threads_run(thread_composite, &threads[n]);
+			sna_threads_run(thread_composite, &threads[n]);
 
 			y += dy;
 			if (y + dy > dst_y + height)
 				dy = dst_y + height - y;
 		}
-		threads_wait();
+		sna_threads_wait();
 	}
 }
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 0b81018..f7a146c 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -5277,13 +5277,53 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 	return true;
 }
 
+struct inplace_thread {
+	xTrapezoid *traps;
+	RegionPtr clip;
+	span_func_t span;
+	struct inplace inplace;
+	BoxRec extents;
+	int dx, dy;
+	int draw_x, draw_y;
+	bool unbounded;
+	int ntrap;
+};
+
+static void inplace_thread(void *arg)
+{
+	struct inplace_thread *thread = arg;
+	struct tor tor;
+	int n;
+
+	if (tor_init(&tor, &thread->extents, 2*thread->ntrap))
+		return;
+
+	for (n = 0; n < thread->ntrap; n++) {
+		xTrapezoid t;
+
+		if (!project_trapezoid_onto_grid(&thread->traps[n], thread->dx, thread->dy, &t))
+			continue;
+
+		if (pixman_fixed_to_int(thread->traps[n].top) >= thread->extents.y2 - thread->draw_y ||
+		    pixman_fixed_to_int(thread->traps[n].bottom) < thread->extents.y1 - thread->draw_y)
+			continue;
+
+		tor_add_edge(&tor, &t, &t.left, 1);
+		tor_add_edge(&tor, &t, &t.right, -1);
+	}
+
+	tor_render(NULL, &tor, (void*)&thread->inplace,
+		   thread->clip, thread->span, thread->unbounded);
+
+	tor_fini(&tor);
+}
+
 static bool
 trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
 		       PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
 		       int ntrap, xTrapezoid *traps,
 		       bool fallback)
 {
-	struct tor tor;
 	struct inplace inplace;
 	span_func_t span;
 	PixmapPtr pixmap;
@@ -5293,7 +5333,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
 	bool unbounded;
 	int16_t dst_x, dst_y;
 	int dx, dy;
-	int n;
+	int num_threads, n;
 
 	if (NO_SCAN_CONVERTER)
 		return false;
@@ -5424,26 +5464,6 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
 	     region.extents.x1, region.extents.y1,
 	     region.extents.x2, region.extents.y2));
 
-	if (tor_init(&tor, &region.extents, 2*ntrap))
-		return true;
-
-	dx = dst->pDrawable->x * FAST_SAMPLES_X;
-	dy = dst->pDrawable->y * FAST_SAMPLES_Y;
-
-	for (n = 0; n < ntrap; n++) {
-		xTrapezoid t;
-
-		if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
-			continue;
-
-		if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
-		    pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
-			continue;
-
-		tor_add_edge(&tor, &t, &t.left, 1);
-		tor_add_edge(&tor, &t, &t.right, -1);
-	}
-
 	if (op == PictOpSrc) {
 		if (dst->pCompositeClip->data)
 			span = tor_blt_src_clipped;
@@ -5468,6 +5488,9 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
 					     op == PictOpSrc ? MOVE_WRITE | MOVE_INPLACE_HINT : MOVE_WRITE | MOVE_READ))
 		return true;
 
+	dx = dst->pDrawable->x * FAST_SAMPLES_X;
+	dy = dst->pDrawable->y * FAST_SAMPLES_Y;
+
 	get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y);
 
 	inplace.ptr = pixmap->devPrivate.ptr;
@@ -5475,10 +5498,76 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
 	inplace.stride = pixmap->devKind;
 	inplace.opacity = color >> 24;
 
-	tor_render(NULL, &tor, (void*)&inplace,
-		   dst->pCompositeClip, span, unbounded);
+	num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
+				      region.extents.y2 - region.extents.y1,
+				      8);
+	if (num_threads == 1) {
+		struct tor tor;
 
-	tor_fini(&tor);
+		if (tor_init(&tor, &region.extents, 2*ntrap))
+			return true;
+
+		for (n = 0; n < ntrap; n++) {
+			xTrapezoid t;
+
+			if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
+				continue;
+
+			if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
+			    pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
+				continue;
+
+			tor_add_edge(&tor, &t, &t.left, 1);
+			tor_add_edge(&tor, &t, &t.right, -1);
+		}
+
+		tor_render(NULL, &tor, (void*)&inplace,
+			   dst->pCompositeClip, span, unbounded);
+
+		tor_fini(&tor);
+	} else {
+		struct inplace_thread threads[num_threads];
+		int y, h;
+
+		DBG(("%s: using %d threads for inplace compositing %dx%d\n",
+		     __FUNCTION__, num_threads,
+		     region.extents.x2 - region.extents.x1,
+		     region.extents.y2 - region.extents.y1));
+
+		threads[0].traps = traps;
+		threads[0].ntrap = ntrap;
+		threads[0].inplace = inplace;
+		threads[0].extents = region.extents;
+		threads[0].clip = dst->pCompositeClip;
+		threads[0].span = span;
+		threads[0].unbounded = unbounded;
+		threads[0].dx = dx;
+		threads[0].dy = dy;
+		threads[0].draw_x = dst->pDrawable->x;
+		threads[0].draw_y = dst->pDrawable->y;
+
+		y = region.extents.y1;
+		h = region.extents.y2 - region.extents.y1;
+		h = (h + num_threads - 1) / num_threads;
+
+		for (n = 0; n < num_threads - 1; n++) {
+			threads[n] = threads[0];
+			threads[n].extents.y1 = y;
+			threads[n].extents.y2 = y += h;
+
+			sna_threads_run(inplace_thread, &threads[n]);
+		}
+
+		if (y + h > region.extents.y2)
+			h = region.extents.y2 - y;
+
+		threads[n] = threads[0];
+		threads[n].extents.y1 = y;
+		threads[n].extents.y2 = y + h;
+		inplace_thread(&threads[n]);
+
+		sna_threads_wait();
+	}
 
 	return true;
 }