xf86-video-intel: 3 commits - src/sna/gen4_vertex.c src/sna/sna_gradient.c src/sna/sna_render.h src/sna/sna_threads.c src/sna/sna_trapezoids_boxes.c src/sna/sna_trapezoids.c src/sna/sna_trapezoids.h src/sna/sna_trapezoids_imprecise.c src/sna/sna_trapezoids_mono.c src/sna/sna_trapezoids_precise.c

Chris Wilson ickle at kemper.freedesktop.org
Mon Oct 7 16:05:18 PDT 2013


 src/sna/gen4_vertex.c              |    3 
 src/sna/sna_gradient.c             |    9 -
 src/sna/sna_render.h               |    2 
 src/sna/sna_threads.c              |    2 
 src/sna/sna_trapezoids.c           |   13 +-
 src/sna/sna_trapezoids.h           |   41 ++++---
 src/sna/sna_trapezoids_boxes.c     |    2 
 src/sna/sna_trapezoids_imprecise.c |  152 +++++++++++++----------------
 src/sna/sna_trapezoids_mono.c      |    1 
 src/sna/sna_trapezoids_precise.c   |  191 ++++++++++++++++++-------------------
 10 files changed, 216 insertions(+), 200 deletions(-)

New commits:
commit 06a8ad9690590a605b1564012d062b98c60546a6
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Oct 7 23:21:38 2013 +0100

    sna/trapezoids: Recompute num_threads to match range
    
    We need to be careful not to execute threads past the end of the alloted
    buffer by making sure the clip extents correctly align.
    
    Reported-by: Joseph Yasi <joe.yasi at gmail.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70204
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_threads.c b/src/sna/sna_threads.c
index b771bda..3e10e5a 100644
--- a/src/sna/sna_threads.c
+++ b/src/sna/sna_threads.c
@@ -274,6 +274,7 @@ void sna_image_composite(pixman_op_t        op,
 
 		y = dst_y;
 		dy = (height + num_threads - 1) / num_threads;
+		num_threads = (height + dy - 1) / dy;
 
 		data[0].op = op;
 		data[0].src = src;
@@ -298,6 +299,7 @@ void sna_image_composite(pixman_op_t        op,
 			sna_threads_run(thread_composite, &data[n]);
 		}
 
+		assert(y < dst_y + height);
 		if (y + dy > dst_y + height)
 			dy = dst_y + height - y;
 
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index c9c031a..8827b9c 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -390,6 +390,7 @@ trapezoids_fallback(struct sna *sna,
 
 				y = bounds.y1;
 				dy = (height + num_threads - 1) / num_threads;
+				num_threads = (bounds.y2 - bounds.y1 + dy - 1) / dy;
 
 				for (n = 1; n < num_threads; n++) {
 					threads[n] = threads[0];
@@ -400,9 +401,9 @@ trapezoids_fallback(struct sna *sna,
 					sna_threads_run(rasterize_traps_thread, &threads[n]);
 				}
 
+				assert(y < threads[0].bounds.y2);
 				threads[0].ptr += (y - bounds.y1) * threads[0].stride;
 				threads[0].bounds.y1 = y;
-				threads[0].bounds.y2 = bounds.y2;
 				rasterize_traps_thread(&threads[0]);
 
 				sna_threads_wait();
diff --git a/src/sna/sna_trapezoids_boxes.c b/src/sna/sna_trapezoids_boxes.c
index d7861d2..4b0346b 100644
--- a/src/sna/sna_trapezoids_boxes.c
+++ b/src/sna/sna_trapezoids_boxes.c
@@ -1171,6 +1171,7 @@ composite_unaligned_boxes_inplace(struct sna *sna,
 
 			y = clip.extents.y1;
 			dy = (clip.extents.y2 - clip.extents.y1 + num_threads - 1) / num_threads;
+			num_threads = (clip.extents.y2 - clip.extents.y1 + dy - 1) / dy;
 
 			for (i = 1; i < num_threads; i++) {
 				thread[i] = thread[0];
@@ -1179,6 +1180,7 @@ composite_unaligned_boxes_inplace(struct sna *sna,
 				sna_threads_run(rectilinear_inplace_thread, &thread[i]);
 			}
 
+			assert(y < clip.extents.y2);
 			thread[0].y1 = y;
 			thread[0].y2 = clip.extents.y2;
 			rectilinear_inplace_thread(&thread[0]);
diff --git a/src/sna/sna_trapezoids_imprecise.c b/src/sna/sna_trapezoids_imprecise.c
index 88cb660..4809e9c 100644
--- a/src/sna/sna_trapezoids_imprecise.c
+++ b/src/sna/sna_trapezoids_imprecise.c
@@ -255,9 +255,7 @@ struct tor {
     struct active_list	active[1];
     struct cell_list	coverages[1];
 
-    /* Clip box. */
-    int xmin, xmax;
-    int ymin, ymax;
+    BoxRec extents;
 };
 
 /* Compute the floored division a/b. Assumes / and % perform symmetric
@@ -362,10 +360,7 @@ cell_list_alloc(struct cell_list *cells,
 inline static struct cell *
 cell_list_find(struct cell_list *cells, int x)
 {
-	struct cell *tail = cells->cursor;
-
-	if (tail->x == x)
-		return tail;
+	struct cell *tail;
 
 	if (x >= cells->x2)
 		return &cells->tail;
@@ -373,6 +368,10 @@ cell_list_find(struct cell_list *cells, int x)
 	if (x < cells->x1)
 		return &cells->head;
 
+	tail = cells->cursor;
+	if (tail->x == x)
+		return tail;
+
 	do {
 		if (tail->next->x > x)
 			break;
@@ -459,13 +458,9 @@ polygon_fini(struct polygon *polygon)
 }
 
 static bool
-polygon_init(struct polygon *polygon,
-	     int num_edges,
-	     int ymin,
-	     int ymax)
+polygon_init(struct polygon *polygon, int num_edges, int ymin, int ymax)
 {
-	unsigned num_buckets =
-		EDGE_Y_BUCKET_INDEX(ymax+EDGE_Y_BUCKET_HEIGHT-1, ymin);
+	unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax-1, ymin) + 1;
 
 	if (unlikely(ymax - ymin > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT))
 		return false;
@@ -502,6 +497,7 @@ _polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e)
 {
 	unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
 	struct edge **ptail = &polygon->y_buckets[ix];
+	assert(e->ytop < polygon->ymax);
 	e->next = *ptail;
 	*ptail = e;
 }
@@ -928,10 +924,7 @@ tor_init(struct tor *converter, const BoxRec *box, int num_edges)
 	       FAST_SAMPLES_X, FAST_SAMPLES_Y,
 	       num_edges));
 
-	converter->xmin = box->x1;
-	converter->ymin = box->y1;
-	converter->xmax = box->x2;
-	converter->ymax = box->y2;
+	converter->extents = *box;
 
 	if (!cell_list_init(converter->coverages, box->x1, box->x2))
 		return false;
@@ -1076,6 +1069,7 @@ tor_blt_span_mono_unbounded_clipped(struct sna *sna,
 
 static void
 tor_blt(struct sna *sna,
+	struct tor *converter,
 	struct sna_composite_spans_op *op,
 	pixman_region16_t *clip,
 	void (*span)(struct sna *sna,
@@ -1083,18 +1077,18 @@ tor_blt(struct sna *sna,
 		     pixman_region16_t *clip,
 		     const BoxRec *box,
 		     int coverage),
-	struct cell_list *cells,
 	int y, int height,
-	int xmin, int xmax,
 	int unbounded)
 {
+	struct cell_list *cells = converter->coverages;
 	struct cell *cell;
 	BoxRec box;
 	int cover;
 
-	box.y1 = y;
-	box.y2 = y + height;
-	box.x1 = xmin;
+	box.y1 = y + converter->extents.y1;
+	box.y2 = box.y1 + height;
+	assert(box.y2 <= converter->extents.y2);
+	box.x1 = converter->extents.x1;
 
 	/* Form the spans from the coverages and areas. */
 	cover = cells->head.covered_height*FAST_SAMPLES_X*2;
@@ -1102,8 +1096,8 @@ tor_blt(struct sna *sna,
 	for (cell = cells->head.next; cell != &cells->tail; cell = cell->next) {
 		int x = cell->x;
 
-		assert(x >= xmin);
-		assert(x < xmax);
+		assert(x >= converter->extents.x1);
+		assert(x < converter->extents.x2);
 		__DBG(("%s: cell=(%d, %d, %d), cover=%d, max=%d\n", __FUNCTION__,
 		       cell->x, cell->covered_height, cell->uncovered_area,
 		       cover, xmax));
@@ -1137,7 +1131,7 @@ tor_blt(struct sna *sna,
 		}
 	}
 
-	box.x2 = xmax;
+	box.x2 = converter->extents.x2;
 	if (box.x2 > box.x1 && (unbounded || cover)) {
 		__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
 		       box.x1, box.y1,
@@ -1148,28 +1142,6 @@ tor_blt(struct sna *sna,
 	}
 }
 
-static void
-tor_blt_empty(struct sna *sna,
-	      struct sna_composite_spans_op *op,
-	      pixman_region16_t *clip,
-	      void (*span)(struct sna *sna,
-			   struct sna_composite_spans_op *op,
-			   pixman_region16_t *clip,
-			   const BoxRec *box,
-			   int coverage),
-	      int y, int height,
-	      int xmin, int xmax)
-{
-	BoxRec box;
-
-	box.x1 = xmin;
-	box.x2 = xmax;
-	box.y1 = y;
-	box.y2 = y + height;
-
-	span(sna, op, clip, &box, 0);
-}
-
 flatten static void
 tor_render(struct sna *sna,
 	   struct tor *converter,
@@ -1182,14 +1154,11 @@ tor_render(struct sna *sna,
 			int coverage),
 	   int unbounded)
 {
-	int ymin = converter->ymin;
-	int xmin = converter->xmin;
-	int xmax = converter->xmax;
-	int i, j, h = converter->ymax - ymin;
 	struct polygon *polygon = converter->polygon;
 	struct cell_list *coverages = converter->coverages;
 	struct active_list *active = converter->active;
 	struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
+	int16_t i, j, h = converter->extents.y2 - converter->extents.y1;
 
 	__DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded));
 
@@ -1201,15 +1170,23 @@ tor_render(struct sna *sna,
 
 		/* Determine if we can ignore this row or use the full pixel
 		 * stepper. */
-		if (!polygon->y_buckets[i]) {
+		if (polygon->y_buckets[i] == NULL) {
 			if (active->head.next == &active->tail) {
-				for (; !polygon->y_buckets[j]; j++)
+				for (; polygon->y_buckets[j] == NULL; j++)
 					;
 				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
 				       __FUNCTION__, i, j));
 
-				if (unbounded)
-					tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax);
+				assert(j <= h);
+				if (unbounded) {
+					BoxRec box;
+
+					box = converter->extents;
+					box.y1 += i;
+					box.y2 = converter->extents.y1 + j;
+
+					span(sna, op, clip, &box, 0);
+				}
 				continue;
 			}
 
@@ -1228,6 +1205,7 @@ tor_render(struct sna *sna,
 				do_full_step -= FAST_SAMPLES_Y;
 				j++;
 			}
+			assert(j >= i + 1 && j <= h);
 			if (j != i + 1)
 				step_edges(active, j - (i + 1));
 
@@ -1249,9 +1227,8 @@ tor_render(struct sna *sna,
 			}
 		}
 
-		tor_blt(sna, op, clip, span, coverages,
-			i+ymin, j-i, xmin, xmax,
-			unbounded);
+		assert(j > i);
+		tor_blt(sna, converter, op, clip, span, i, j-i, unbounded);
 		cell_list_reset(coverages);
 	}
 }
@@ -1531,7 +1508,7 @@ inplace_end_subrows(struct active_list *active, uint8_t *row,
 static void
 tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
 {
-	int i, j, h = converter->ymax;
+	int i, j, h = converter->extents.y2;
 	struct polygon *polygon = converter->polygon;
 	struct active_list *active = converter->active;
 	struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
@@ -1541,8 +1518,8 @@ tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
 
 	__DBG(("%s: mono=%d, buf?=%d\n", __FUNCTION__, mono, buf != NULL));
 	assert(!mono);
-	assert(converter->ymin == 0);
-	assert(converter->xmin == 0);
+	assert(converter->extents.y1 == 0);
+	assert(converter->extents.x1 == 0);
 	assert(scratch->drawable.depth == 8);
 
 	/* Render each pixel row. */
@@ -2005,6 +1982,7 @@ imprecise_trapezoid_span_converter(struct sna *sna,
 		y = clip.extents.y1;
 		h = clip.extents.y2 - clip.extents.y1;
 		h = (h + num_threads - 1) / num_threads;
+		num_threads = (clip.extents.y2 - clip.extents.y1 + h - 1) / h;
 
 		for (n = 1; n < num_threads; n++) {
 			threads[n] = threads[0];
@@ -2014,6 +1992,7 @@ imprecise_trapezoid_span_converter(struct sna *sna,
 			sna_threads_run(span_thread, &threads[n]);
 		}
 
+		assert(y < threads[0].extents.y2);
 		threads[0].extents.y1 = y;
 		span_thread(&threads[0]);
 
@@ -2862,6 +2841,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 		y = region.extents.y1;
 		h = region.extents.y2 - region.extents.y1;
 		h = (h + num_threads - 1) / num_threads;
+		num_threads = (region.extents.y2 - region.extents.y1 + h - 1) / h;
 
 		for (n = 1; n < num_threads; n++) {
 			threads[n] = threads[0];
@@ -2871,8 +2851,8 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 			sna_threads_run(inplace_x8r8g8b8_thread, &threads[n]);
 		}
 
+		assert(y < threads[0].extents.y2);
 		threads[0].extents.y1 = y;
-		threads[0].extents.y2 = region.extents.y2;
 		inplace_x8r8g8b8_thread(&threads[0]);
 
 		sna_threads_wait();
@@ -3132,6 +3112,7 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
 		y = region.extents.y1;
 		h = region.extents.y2 - region.extents.y1;
 		h = (h + num_threads - 1) / num_threads;
+		num_threads = (region.extents.y2 - region.extents.y1 + h - 1) / h;
 
 		for (n = 1; n < num_threads; n++) {
 			threads[n] = threads[0];
@@ -3141,8 +3122,8 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
 			sna_threads_run(inplace_thread, &threads[n]);
 		}
 
+		assert(y < threads[0].extents.y2);
 		threads[0].extents.y1 = y;
-		threads[0].extents.y2 = region.extents.y2;
 		inplace_thread(&threads[0]);
 
 		sna_threads_wait();
diff --git a/src/sna/sna_trapezoids_mono.c b/src/sna/sna_trapezoids_mono.c
index e5cb7a6..790863e 100644
--- a/src/sna/sna_trapezoids_mono.c
+++ b/src/sna/sna_trapezoids_mono.c
@@ -843,6 +843,7 @@ mono_trapezoids_span_converter(struct sna *sna,
 		y = extents.y1;
 		h = extents.y2 - extents.y1;
 		h = (h + num_threads - 1) / num_threads;
+		num_threads = (extents.y2 - extents.y1 + h - 1) / h;
 
 		for (n = 1; n < num_threads; n++) {
 			threads[n] = threads[0];
diff --git a/src/sna/sna_trapezoids_precise.c b/src/sna/sna_trapezoids_precise.c
index b4d7592..3720d8b 100644
--- a/src/sna/sna_trapezoids_precise.c
+++ b/src/sna/sna_trapezoids_precise.c
@@ -268,9 +268,7 @@ struct tor {
     struct active_list	active[1];
     struct cell_list	coverages[1];
 
-    /* Clip box. */
-    int xmin, xmax;
-    int ymin, ymax;
+    BoxRec extents;
 };
 
 /* Compute the floored division a/b. Assumes / and % perform symmetric
@@ -473,13 +471,9 @@ polygon_fini(struct polygon *polygon)
 }
 
 static bool
-polygon_init(struct polygon *polygon,
-	     int num_edges,
-	     int ymin,
-	     int ymax)
+polygon_init(struct polygon *polygon, int num_edges, int ymin, int ymax)
 {
-	unsigned num_buckets =
-		EDGE_Y_BUCKET_INDEX(ymax+EDGE_Y_BUCKET_HEIGHT-1, ymin);
+	unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax-1, ymin) + 1;
 
 	if (unlikely(ymax - ymin > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT))
 		return false;
@@ -516,6 +510,7 @@ _polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e)
 {
 	unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
 	struct edge **ptail = &polygon->y_buckets[ix];
+	assert(e->ytop < polygon->ymax);
 	e->next = *ptail;
 	*ptail = e;
 }
@@ -534,7 +529,7 @@ polygon_add_edge(struct polygon *polygon,
 	int ymin = polygon->ymin;
 	int ymax = polygon->ymax;
 
-	assert (dy > 0);
+	assert(dy > 0);
 
 	e->dy = dy;
 	e->dir = dir;
@@ -934,10 +929,7 @@ tor_init(struct tor *converter, const BoxRec *box, int num_edges)
 	       SAMPLES_X, SAMPLES_Y,
 	       num_edges));
 
-	converter->xmin = box->x1;
-	converter->ymin = box->y1;
-	converter->xmax = box->x2;
-	converter->ymax = box->y2;
+	converter->extents = *box;
 
 	if (!cell_list_init(converter->coverages, box->x1, box->x2))
 		return false;
@@ -1033,6 +1025,7 @@ tor_blt_span_clipped(struct sna *sna,
 
 static void
 tor_blt(struct sna *sna,
+	struct tor *converter,
 	struct sna_composite_spans_op *op,
 	pixman_region16_t *clip,
 	void (*span)(struct sna *sna,
@@ -1040,18 +1033,18 @@ tor_blt(struct sna *sna,
 		     pixman_region16_t *clip,
 		     const BoxRec *box,
 		     int coverage),
-	struct cell_list *cells,
 	int y, int height,
-	int xmin, int xmax,
 	int unbounded)
 {
+	struct cell_list *cells = converter->coverages;
 	struct cell *cell;
 	BoxRec box;
 	int cover;
 
-	box.y1 = y;
-	box.y2 = y + height;
-	box.x1 = xmin;
+	box.y1 = y + converter->extents.y1;
+	box.y2 = box.y1 + height;
+	assert(box.y2 <= converter->extents.y2);
+	box.x1 = converter->extents.x1;
 
 	/* Form the spans from the coverages and areas. */
 	cover = cells->head.covered_height*SAMPLES_X*2;
@@ -1059,8 +1052,8 @@ tor_blt(struct sna *sna,
 	for (cell = cells->head.next; cell != &cells->tail; cell = cell->next) {
 		int x = cell->x;
 
-		assert(x >= xmin);
-		assert(x < xmax);
+		assert(x >= converter->extents.x1);
+		assert(x < converter->extents.x2);
 		__DBG(("%s: cell=(%d, %d, %d), cover=%d, max=%d\n", __FUNCTION__,
 		       cell->x, cell->covered_height, cell->uncovered_area,
 		       cover, xmax));
@@ -1094,7 +1087,7 @@ tor_blt(struct sna *sna,
 		}
 	}
 
-	box.x2 = xmax;
+	box.x2 = converter->extents.x2;
 	if (box.x2 > box.x1 && (unbounded || cover)) {
 		__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
 		       box.x1, box.y1,
@@ -1105,28 +1098,6 @@ tor_blt(struct sna *sna,
 	}
 }
 
-static void
-tor_blt_empty(struct sna *sna,
-	      struct sna_composite_spans_op *op,
-	      pixman_region16_t *clip,
-	      void (*span)(struct sna *sna,
-			   struct sna_composite_spans_op *op,
-			   pixman_region16_t *clip,
-			   const BoxRec *box,
-			   int coverage),
-	      int y, int height,
-	      int xmin, int xmax)
-{
-	BoxRec box;
-
-	box.x1 = xmin;
-	box.x2 = xmax;
-	box.y1 = y;
-	box.y2 = y + height;
-
-	span(sna, op, clip, &box, 0);
-}
-
 flatten static void
 tor_render(struct sna *sna,
 	   struct tor *converter,
@@ -1139,14 +1110,11 @@ tor_render(struct sna *sna,
 			int coverage),
 	   int unbounded)
 {
-	int ymin = converter->ymin;
-	int xmin = converter->xmin;
-	int xmax = converter->xmax;
-	int i, j, h = converter->ymax - ymin;
 	struct polygon *polygon = converter->polygon;
 	struct cell_list *coverages = converter->coverages;
 	struct active_list *active = converter->active;
 	struct edge *buckets[SAMPLES_Y] = { 0 };
+	int16_t i, j, h = converter->extents.y2 - converter->extents.y1;
 
 	__DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded));
 
@@ -1158,15 +1126,23 @@ tor_render(struct sna *sna,
 
 		/* Determine if we can ignore this row or use the full pixel
 		 * stepper. */
-		if (!polygon->y_buckets[i]) {
+		if (polygon->y_buckets[i] == NULL) {
 			if (active->head.next == &active->tail) {
-				for (; !polygon->y_buckets[j]; j++)
+				for (; polygon->y_buckets[j] == NULL; j++)
 					;
 				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
 				       __FUNCTION__, i, j));
 
-				if (unbounded)
-					tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax);
+				assert(j <= h);
+				if (unbounded) {
+					BoxRec box;
+
+					box = converter->extents;
+					box.y1 += i;
+					box.y2 = converter->extents.y1 + j;
+
+					span(sna, op, clip, &box, 0);
+				}
 				continue;
 			}
 
@@ -1185,6 +1161,7 @@ tor_render(struct sna *sna,
 				do_full_step -= SAMPLES_Y;
 				j++;
 			}
+			assert(j >= i + 1 && j <= h);
 			if (j != i + 1)
 				step_edges(active, j - (i + 1));
 
@@ -1193,7 +1170,7 @@ tor_render(struct sna *sna,
 		} else {
 			int suby;
 
-			fill_buckets(active, polygon->y_buckets[i], (i+ymin)*SAMPLES_Y, buckets);
+			fill_buckets(active, polygon->y_buckets[i], (i+converter->extents.y1)*SAMPLES_Y, buckets);
 
 			/* Subsample this row. */
 			for (suby = 0; suby < SAMPLES_Y; suby++) {
@@ -1206,9 +1183,8 @@ tor_render(struct sna *sna,
 			}
 		}
 
-		tor_blt(sna, op, clip, span, coverages,
-			i+ymin, j-i, xmin, xmax,
-			unbounded);
+		assert(j > i);
+		tor_blt(sna, converter, op, clip, span, i, j-i, unbounded);
 		cell_list_reset(coverages);
 	}
 }
@@ -1478,7 +1454,7 @@ flatten static void
 tor_inplace(struct tor *converter, PixmapPtr scratch)
 {
 	uint8_t buf[TOR_INPLACE_SIZE];
-	int i, j, h = converter->ymax - converter->ymin;
+	int i, j, h = converter->extents.y2 - converter->extents.y1;
 	struct polygon *polygon = converter->polygon;
 	struct active_list *active = converter->active;
 	struct edge *buckets[SAMPLES_Y] = { 0 };
@@ -1487,10 +1463,10 @@ tor_inplace(struct tor *converter, PixmapPtr scratch)
 	int width = scratch->drawable.width;
 
 	__DBG(("%s: buf?=%d\n", __FUNCTION__, buf != NULL));
-	assert(converter->xmin == 0);
+	assert(converter->extents.x1 == 0);
 	assert(scratch->drawable.depth == 8);
 
-	row += converter->ymin * stride;
+	row += converter->extents.y1 * stride;
 
 	/* Render each pixel row. */
 	for (i = 0; i < h; i = j) {
@@ -1540,7 +1516,7 @@ tor_inplace(struct tor *converter, PixmapPtr scratch)
 		} else {
 			int suby;
 
-			fill_buckets(active, polygon->y_buckets[i], (i+converter->ymin)*SAMPLES_Y, buckets);
+			fill_buckets(active, polygon->y_buckets[i], (i+converter->extents.y1)*SAMPLES_Y, buckets);
 
 			/* Subsample this row. */
 			memset(ptr, 0, width);
@@ -1931,6 +1907,7 @@ precise_trapezoid_span_converter(struct sna *sna,
 		y = clip.extents.y1;
 		h = clip.extents.y2 - clip.extents.y1;
 		h = (h + num_threads - 1) / num_threads;
+		num_threads = (clip.extents.y2 - clip.extents.y1 + h - 1) / h;
 
 		for (n = 1; n < num_threads; n++) {
 			threads[n] = threads[0];
@@ -1940,6 +1917,7 @@ precise_trapezoid_span_converter(struct sna *sna,
 			sna_threads_run(span_thread, &threads[n]);
 		}
 
+		assert(y < threads[0].extents.y2);
 		threads[0].extents.y1 = y;
 		span_thread(&threads[0]);
 
@@ -2155,6 +2133,7 @@ precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 		y = extents.y1;
 		h = extents.y2 - extents.y1;
 		h = (h + num_threads - 1) / num_threads;
+		num_threads = (extents.y2 - extents.y1 + h - 1) / h;
 
 		for (n = 1; n < num_threads; n++) {
 			threads[n] = threads[0];
@@ -2164,6 +2143,7 @@ precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 			sna_threads_run(mask_thread, &threads[n]);
 		}
 
+		assert(y < threads[0].extents.y2);
 		threads[0].extents.y1 = y;
 		mask_thread(&threads[0]);
 
@@ -2859,6 +2839,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 		y = region.extents.y1;
 		h = region.extents.y2 - region.extents.y1;
 		h = (h + num_threads - 1) / num_threads;
+		num_threads = (region.extents.y2 - region.extents.y1 + h - 1) / h;
 
 		for (n = 1; n < num_threads; n++) {
 			threads[n] = threads[0];
@@ -2868,8 +2849,8 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 			sna_threads_run(inplace_x8r8g8b8_thread, &threads[n]);
 		}
 
+		assert(y < threads[0].extents.y2);
 		threads[0].extents.y1 = y;
-		threads[0].extents.y2 = region.extents.y2;
 		inplace_x8r8g8b8_thread(&threads[0]);
 
 		sna_threads_wait();
@@ -3130,6 +3111,7 @@ precise_trapezoid_span_inplace(struct sna *sna,
 		y = region.extents.y1;
 		h = region.extents.y2 - region.extents.y1;
 		h = (h + num_threads - 1) / num_threads;
+		num_threads = (region.extents.y2 - region.extents.y1 + h - 1) / h;
 
 		for (n = 1; n < num_threads; n++) {
 			threads[n] = threads[0];
@@ -3139,8 +3121,8 @@ precise_trapezoid_span_inplace(struct sna *sna,
 			sna_threads_run(inplace_thread, &threads[n]);
 		}
 
+		assert(y < threads[0].extents.y2);
 		threads[0].extents.y1 = y;
-		threads[0].extents.y2 = region.extents.y2;
 		inplace_thread(&threads[0]);
 
 		sna_threads_wait();
@@ -3273,6 +3255,7 @@ precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 		y = extents.y1;
 		h = extents.y2 - extents.y1;
 		h = (h + num_threads - 1) / num_threads;
+		num_threads = (extents.y2 - extents.y1 + h - 1) / h;
 
 		for (n = 1; n < num_threads; n++) {
 			threads[n] = threads[0];
@@ -3282,6 +3265,7 @@ precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 			sna_threads_run(mask_thread, &threads[n]);
 		}
 
+		assert(y < threads[0].extents.y2);
 		threads[0].extents.y1 = y;
 		mask_thread(&threads[0]);
 
commit 1fb4f60671cfb0e461a2e5969ee9d0f0e39d93a4
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Oct 7 22:04:22 2013 +0100

    sna: Trim color cache allocation to a single page
    
    Instead trying to allocate 4100 bytes, fix the logic to only require a
    maximum of 4096 bytes in the cache buffer.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_gradient.c b/src/sna/sna_gradient.c
index 26e4167..818a8b7 100644
--- a/src/sna/sna_gradient.c
+++ b/src/sna/sna_gradient.c
@@ -246,8 +246,8 @@ sna_render_finish_solid(struct sna *sna, bool force)
 	struct kgem_bo *old;
 	int i;
 
-	DBG(("sna_render_finish_solid(force=%d, domain=%d, busy=%d, dirty=%d)\n",
-	     force, cache->cache_bo->domain, cache->cache_bo->rq != NULL, cache->dirty));
+	DBG(("sna_render_finish_solid(force=%d, domain=%d, busy=%d, dirty=%d, size=%d)\n",
+	     force, cache->cache_bo->domain, cache->cache_bo->rq != NULL, cache->dirty, cache->size));
 
 	if (!force && cache->cache_bo->domain != DOMAIN_GPU)
 		return;
@@ -353,9 +353,10 @@ sna_render_get_solid(struct sna *sna, uint32_t color)
 		}
 	}
 
-	sna_render_finish_solid(sna, i == 1024);
+	sna_render_finish_solid(sna, i == ARRAY_SIZE(cache->color));
 
 	i = cache->size++;
+	assert(i < ARRAY_SIZE(cache->color));
 	cache->color[i] = color;
 	cache->dirty = 1;
 	DBG(("sna_render_get_solid(%d) = %x (new)\n", i, color));
@@ -429,7 +430,7 @@ static bool sna_solid_cache_init(struct sna *sna)
 	if (!cache->cache_bo)
 		return false;
 
-	cache->last = 1024;
+	cache->last = 0;
 	cache->color[cache->last] = 0;
 	cache->dirty = 0;
 	cache->size = 0;
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index ad8121e..7f4b8e6 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -287,7 +287,7 @@ struct sna_render {
 	struct sna_solid_cache {
 		struct kgem_bo *cache_bo;
 		struct kgem_bo *bo[1024];
-		uint32_t color[1025];
+		uint32_t color[1024];
 		int last;
 		int size;
 		int dirty;
commit 55cd67485ff34a28ab8eaa7b1b6958b96c072317
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Oct 7 14:06:31 2013 +0100

    sna/trapezoids: Only use a single thread to emit rectilinear spans
    
    If the trapezoids are rectilinear, they should hit a fast path through
    the span compositors and so threading them seems pointless. Expect
    possibily for inplace pixman operations.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index 85e7413..e2a4808 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -45,7 +45,9 @@ void gen4_vertex_flush(struct sna *sna)
 	     sna->render.vertex_index - sna->render.vertex_start));
 
 	assert(sna->render.vertex_offset);
+	assert(sna->render.vertex_offset <= sna->kgem.nbatch);
 	assert(sna->render.vertex_index > sna->render.vertex_start);
+	assert(sna->render.vertex_used <= sna->render.vertex_size);
 
 	sna->kgem.batch[sna->render.vertex_offset] =
 		sna->render.vertex_index - sna->render.vertex_start;
@@ -62,6 +64,7 @@ int gen4_vertex_finish(struct sna *sna)
 	     sna->render.vertex_used, sna->render.vertex_size));
 	assert(sna->render.vertex_offset == 0);
 	assert(sna->render.vertex_used);
+	assert(sna->render.vertex_used <= sna->render.vertex_size);
 
 	sna_vertex_wait__locked(&sna->render);
 
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 7c02840..c9c031a 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -660,7 +660,7 @@ sna_composite_trapezoids(CARD8 op,
 
 	if (trapezoid_spans_maybe_inplace(sna, op, src, dst, maskFormat)) {
 		flags |= COMPOSITE_SPANS_INPLACE_HINT;
-		if (trapezoid_span_inplace(sna, op, src, dst, maskFormat,
+		if (trapezoid_span_inplace(sna, op, src, dst, maskFormat, flags,
 					   xSrc, ySrc, ntrap, traps,
 					   false))
 			return;
@@ -670,22 +670,22 @@ sna_composite_trapezoids(CARD8 op,
 				     xSrc, ySrc, ntrap, traps))
 		return;
 
-	if (trapezoid_span_inplace(sna, op, src, dst, maskFormat,
+	if (trapezoid_span_inplace(sna, op, src, dst, maskFormat, flags,
 				   xSrc, ySrc, ntrap, traps,
 				   false))
 		return;
 
-	if (trapezoid_mask_converter(op, src, dst, maskFormat,
+	if (trapezoid_mask_converter(op, src, dst, maskFormat, flags,
 				     xSrc, ySrc, ntrap, traps))
 		return;
 
 fallback:
-	if (trapezoid_span_inplace(sna, op, src, dst, maskFormat,
+	if (trapezoid_span_inplace(sna, op, src, dst, maskFormat, flags,
 				   xSrc, ySrc, ntrap, traps,
 				   true))
 		return;
 
-	if (trapezoid_span_fallback(op, src, dst, maskFormat,
+	if (trapezoid_span_fallback(op, src, dst, maskFormat, flags,
 				    xSrc, ySrc, ntrap, traps))
 		return;
 
diff --git a/src/sna/sna_trapezoids.h b/src/sna/sna_trapezoids.h
index 729334c..d101726 100644
--- a/src/sna/sna_trapezoids.h
+++ b/src/sna/sna_trapezoids.h
@@ -62,7 +62,8 @@ mono_triangles_span_converter(struct sna *sna,
 bool
 imprecise_trapezoid_span_inplace(struct sna *sna,
 				 CARD8 op, PicturePtr src, PicturePtr dst,
-				 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+				 PictFormatPtr maskFormat, unsigned flags,
+				 INT16 src_x, INT16 src_y,
 				 int ntrap, xTrapezoid *traps,
 				 bool fallback);
 
@@ -75,18 +76,21 @@ imprecise_trapezoid_span_converter(struct sna *sna,
 
 bool
 imprecise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
-				   PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+				   PictFormatPtr maskFormat, unsigned flags,
+				   INT16 src_x, INT16 src_y,
 				   int ntrap, xTrapezoid *traps);
 
 bool
 imprecise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
-				  PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+				  PictFormatPtr maskFormat, unsigned flags,
+				  INT16 src_x, INT16 src_y,
 				  int ntrap, xTrapezoid *traps);
 
 bool
 precise_trapezoid_span_inplace(struct sna *sna,
 				 CARD8 op, PicturePtr src, PicturePtr dst,
-				 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+				 PictFormatPtr maskFormat, unsigned flags,
+				 INT16 src_x, INT16 src_y,
 				 int ntrap, xTrapezoid *traps,
 				 bool fallback);
 
@@ -99,13 +103,15 @@ precise_trapezoid_span_converter(struct sna *sna,
 
 bool
 precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
-				   PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+				   PictFormatPtr maskFormat, unsigned flags,
+				   INT16 src_x, INT16 src_y,
 				   int ntrap, xTrapezoid *traps);
 
 bool
 precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
-				  PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
-				  int ntrap, xTrapezoid *traps);
+				PictFormatPtr maskFormat, unsigned flags,
+				INT16 src_x, INT16 src_y,
+				int ntrap, xTrapezoid *traps);
 
 static inline bool is_mono(PicturePtr dst, PictFormatPtr mask)
 {
@@ -120,7 +126,8 @@ static inline bool is_precise(PicturePtr dst, PictFormatPtr mask)
 static inline bool
 trapezoid_span_inplace(struct sna *sna,
 		       CARD8 op, PicturePtr src, PicturePtr dst,
-		       PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+		       PictFormatPtr maskFormat, unsigned flags,
+		       INT16 src_x, INT16 src_y,
 		       int ntrap, xTrapezoid *traps,
 		       bool fallback)
 {
@@ -143,9 +150,9 @@ trapezoid_span_inplace(struct sna *sna,
 	if (is_mono(dst, maskFormat))
 		return mono_trapezoid_span_inplace(sna, op, src, dst, src_x, src_y, ntrap, traps);
 	else if (is_precise(dst, maskFormat))
-		return precise_trapezoid_span_inplace(sna, op, src, dst, maskFormat, src_x, src_y, ntrap, traps, fallback);
+		return precise_trapezoid_span_inplace(sna, op, src, dst, maskFormat, flags, src_x, src_y, ntrap, traps, fallback);
 	else
-		return imprecise_trapezoid_span_inplace(sna, op, src, dst, maskFormat, src_x, src_y, ntrap, traps, fallback);
+		return imprecise_trapezoid_span_inplace(sna, op, src, dst, maskFormat, flags, src_x, src_y, ntrap, traps, fallback);
 }
 
 static inline bool
@@ -168,30 +175,32 @@ trapezoid_span_converter(struct sna *sna,
 
 static inline bool
 trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
-			 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+			 PictFormatPtr maskFormat, unsigned flags,
+			 INT16 src_x, INT16 src_y,
 			 int ntrap, xTrapezoid *traps)
 {
 	if (NO_SCAN_CONVERTER)
 		return false;
 
 	if (is_precise(dst, maskFormat))
-		return precise_trapezoid_mask_converter(op, src, dst, maskFormat, src_x, src_y, ntrap, traps);
+		return precise_trapezoid_mask_converter(op, src, dst, maskFormat, flags, src_x, src_y, ntrap, traps);
 	else
-		return imprecise_trapezoid_mask_converter(op, src, dst, maskFormat, src_x, src_y, ntrap, traps);
+		return imprecise_trapezoid_mask_converter(op, src, dst, maskFormat, flags, src_x, src_y, ntrap, traps);
 }
 
 static inline bool
 trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
-			PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+			PictFormatPtr maskFormat, unsigned flags,
+			INT16 src_x, INT16 src_y,
 			int ntrap, xTrapezoid *traps)
 {
 	if (NO_SCAN_CONVERTER)
 		return false;
 
 	if (is_precise(dst, maskFormat))
-		return precise_trapezoid_span_fallback(op, src, dst, maskFormat, src_x, src_y, ntrap, traps);
+		return precise_trapezoid_span_fallback(op, src, dst, maskFormat, flags, src_x, src_y, ntrap, traps);
 	else
-		return imprecise_trapezoid_span_fallback(op, src, dst, maskFormat, src_x, src_y, ntrap, traps);
+		return imprecise_trapezoid_span_fallback(op, src, dst, maskFormat, flags, src_x, src_y, ntrap, traps);
 }
 
 bool
diff --git a/src/sna/sna_trapezoids_imprecise.c b/src/sna/sna_trapezoids_imprecise.c
index 6ca3fed..88cb660 100644
--- a/src/sna/sna_trapezoids_imprecise.c
+++ b/src/sna/sna_trapezoids_imprecise.c
@@ -389,7 +389,7 @@ cell_list_find(struct cell_list *cells, int x)
 	} while (1);
 
 	if (tail->x != x)
-		tail = cell_list_alloc (cells, tail, x);
+		tail = cell_list_alloc(cells, tail, x);
 
 	return cells->cursor = tail;
 }
@@ -777,7 +777,7 @@ merge_unsorted_edges(struct edge *head, struct edge *unsorted)
 
 /* Test if the edges on the active list can be safely advanced by a
  * full row without intersections or any edges ending. */
-inline static bool
+inline static int
 can_full_step(struct active_list *active)
 {
 	const struct edge *e;
@@ -785,7 +785,7 @@ can_full_step(struct active_list *active)
 
 	assert(active->head.next != &active->tail);
 	for (e = active->head.next; &active->tail != e; e = e->next) {
-		assert(e->height_left >= 0);
+		assert(e->height_left > 0);
 
 		if (e->dy != 0)
 			return 0;
@@ -842,6 +842,7 @@ nonzero_subrow(struct active_list *active, struct cell_list *coverages)
 			xstart = edge->next->x.quo;
 		}
 
+		assert(edge->height_left > 0);
 		if (--edge->height_left) {
 			if (edge->dy) {
 				edge->x.quo += edge->dxdy.quo;
@@ -885,7 +886,7 @@ nonzero_row(struct active_list *active, struct cell_list *coverages)
 
 		left->height_left -= FAST_SAMPLES_Y;
 		assert(left->height_left >= 0);
-		if (! left->height_left) {
+		if (!left->height_left) {
 			left->prev->next = left->next;
 			left->next->prev = left->prev;
 		}
@@ -967,6 +968,7 @@ step_edges(struct active_list *active, int count)
 	count *= FAST_SAMPLES_Y;
 	for (edge = active->head.next; edge != &active->tail; edge = edge->next) {
 		edge->height_left -= count;
+		assert(edge->height_left >= 0);
 		if (!edge->height_left) {
 			edge->prev->next = edge->next;
 			edge->next->prev = edge->prev;
@@ -1266,6 +1268,7 @@ inplace_row(struct active_list *active, uint8_t *row, int width)
 		int lix, rix;
 
 		left->height_left -= FAST_SAMPLES_Y;
+		assert(left->height_left >= 0);
 		if (!left->height_left) {
 			left->prev->next = left->next;
 			left->next->prev = left->prev;
@@ -1274,6 +1277,7 @@ inplace_row(struct active_list *active, uint8_t *row, int width)
 		right = left->next;
 		do {
 			right->height_left -= FAST_SAMPLES_Y;
+			assert(right->height_left >= 0);
 			if (!right->height_left) {
 				right->prev->next = right->next;
 				right->next->prev = right->prev;
@@ -1414,6 +1418,7 @@ inplace_subrow(struct active_list *active, int8_t *row,
 			xstart = MAX(edge->x.quo, 0);
 		}
 
+		assert(edge->height_left > 0);
 		if (--edge->height_left) {
 			if (edge->dy) {
 				edge->x.quo += edge->dxdy.quo;
@@ -1704,8 +1709,8 @@ struct span_thread {
 #define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
 struct span_thread_boxes {
 	const struct sna_composite_spans_op *op;
-	struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
 	int num_boxes;
+	struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
 };
 
 static void span_thread_add_boxes(struct sna *sna, void *data,
@@ -1943,7 +1948,9 @@ imprecise_trapezoid_span_converter(struct sna *sna,
 	dy *= FAST_SAMPLES_Y;
 
 	num_threads = 1;
-	if (!NO_GPU_THREADS && tmp.thread_boxes &&
+	if (!NO_GPU_THREADS &&
+	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0 &&
+	    tmp.thread_boxes &&
 	    thread_choose_span(&tmp, dst, maskFormat, &clip))
 		num_threads = sna_use_threads(clip.extents.x2-clip.extents.x1,
 					      clip.extents.y2-clip.extents.y1,
@@ -2063,7 +2070,8 @@ tor_blt_mask_mono(struct sna *sna,
 
 bool
 imprecise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
-				   PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+				   PictFormatPtr maskFormat, unsigned flags,
+				   INT16 src_x, INT16 src_y,
 				   int ntrap, xTrapezoid *traps)
 {
 	struct tor tor;
@@ -2083,7 +2091,7 @@ imprecise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 		     __FUNCTION__));
 		do {
 			/* XXX unwind errors? */
-			if (!imprecise_trapezoid_mask_converter(op, src, dst, NULL,
+			if (!imprecise_trapezoid_mask_converter(op, src, dst, NULL, flags,
 								src_x, src_y, 1, traps++))
 				return false;
 		} while (--ntrap);
@@ -2183,8 +2191,8 @@ imprecise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 }
 
 struct inplace {
-	uint32_t stride;
 	uint8_t *ptr;
+	uint32_t stride;
 	union {
 		uint8_t opacity;
 		uint32_t color;
@@ -2917,7 +2925,8 @@ static void inplace_thread(void *arg)
 bool
 imprecise_trapezoid_span_inplace(struct sna *sna,
 				 CARD8 op, PicturePtr src, PicturePtr dst,
-				 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+				 PictFormatPtr maskFormat, unsigned flags,
+				 INT16 src_x, INT16 src_y,
 				 int ntrap, xTrapezoid *traps,
 				 bool fallback)
 {
@@ -3007,7 +3016,7 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
 		     __FUNCTION__));
 		do {
 			/* XXX unwind errors? */
-			if (!imprecise_trapezoid_span_inplace(sna, op, src, dst, NULL,
+			if (!imprecise_trapezoid_span_inplace(sna, op, src, dst, NULL, flags,
 							      src_x, src_y, 1, traps++,
 							      fallback))
 				return false;
@@ -3064,16 +3073,17 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
 	dx = dst->pDrawable->x * FAST_SAMPLES_X;
 	dy = dst->pDrawable->y * FAST_SAMPLES_Y;
 
-
 	inplace.ptr = pixmap->devPrivate.ptr;
 	if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
 		inplace.ptr += dst_y * pixmap->devKind + dst_x;
 	inplace.stride = pixmap->devKind;
 	inplace.opacity = color >> 24;
 
-	num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
-				      region.extents.y2 - region.extents.y1,
-				      16);
+	num_threads = 1;
+	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
+		num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
+					      region.extents.y2 - region.extents.y1,
+					      16);
 	if (num_threads == 1) {
 		struct tor tor;
 
@@ -3143,7 +3153,8 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
 
 bool
 imprecise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
-				  PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+				  PictFormatPtr maskFormat, unsigned flags,
+				  INT16 src_x, INT16 src_y,
 				  int ntrap, xTrapezoid *traps)
 {
 	struct tor tor;
@@ -3163,7 +3174,7 @@ imprecise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 		     __FUNCTION__));
 		do {
 			/* XXX unwind errors? */
-			if (!imprecise_trapezoid_span_fallback(op, src, dst, NULL,
+			if (!imprecise_trapezoid_span_fallback(op, src, dst, NULL, flags,
 							       src_x, src_y, 1, traps++))
 				return false;
 		} while (--ntrap);
diff --git a/src/sna/sna_trapezoids_precise.c b/src/sna/sna_trapezoids_precise.c
index bfafca0..b4d7592 100644
--- a/src/sna/sna_trapezoids_precise.c
+++ b/src/sna/sna_trapezoids_precise.c
@@ -782,7 +782,7 @@ merge_unsorted_edges(struct edge *head, struct edge *unsorted)
 
 /* Test if the edges on the active list can be safely advanced by a
  * full row without intersections or any edges ending. */
-inline static bool
+inline static int
 can_full_step(struct active_list *active)
 {
 	const struct edge *e;
@@ -790,7 +790,7 @@ can_full_step(struct active_list *active)
 
 	assert(active->head.next != &active->tail);
 	for (e = active->head.next; &active->tail != e; e = e->next) {
-		assert(e->height_left >= 0);
+		assert(e->height_left > 0);
 
 		if (e->dy != 0)
 			return 0;
@@ -848,6 +848,7 @@ nonzero_subrow(struct active_list *active, struct cell_list *coverages)
 			xstart = edge->next->x.quo;
 		}
 
+		assert(edge->height_left > 0);
 		if (--edge->height_left) {
 			if (edge->dy) {
 				edge->x.quo += edge->dxdy.quo;
@@ -972,6 +973,7 @@ step_edges(struct active_list *active, int count)
 	count *= SAMPLES_Y;
 	for (edge = active->head.next; edge != &active->tail; edge = edge->next) {
 		edge->height_left -= count;
+		assert(edge->height_left >= 0);
 		if (!edge->height_left) {
 			edge->prev->next = edge->next;
 			edge->next->prev = edge->prev;
@@ -1223,6 +1225,7 @@ inplace_row(struct active_list *active, uint8_t *row, int width)
 		int lix, rix;
 
 		left->height_left -= SAMPLES_Y;
+		assert(left->height_left >= 0);
 		if (!left->height_left) {
 			left->prev->next = left->next;
 			left->next->prev = left->prev;
@@ -1231,6 +1234,7 @@ inplace_row(struct active_list *active, uint8_t *row, int width)
 		right = left->next;
 		do {
 			right->height_left -= SAMPLES_Y;
+			assert(right->height_left >= 0);
 			if (!right->height_left) {
 				right->prev->next = right->next;
 				right->next->prev = right->prev;
@@ -1342,6 +1346,7 @@ inplace_subrow(struct active_list *active, int8_t *row, int width)
 		} else
 			SAMPLES_X_TO_INT_FRAC(edge->x.quo, lix, lfx);
 
+		assert(edge->height_left > 0);
 		if (--edge->height_left) {
 			if (edge->dy) {
 				edge->x.quo += edge->dxdy.quo;
@@ -1377,6 +1382,7 @@ inplace_subrow(struct active_list *active, int8_t *row, int width)
 			if (0 == winding && edge->x.quo != next->x.quo)
 				break;
 
+			assert(edge->height_left > 0);
 			if (--edge->height_left) {
 				if (edge->dy) {
 					edge->x.quo += edge->dxdy.quo;
@@ -1416,6 +1422,7 @@ inplace_subrow(struct active_list *active, int8_t *row, int width)
 		} else
 			SAMPLES_X_TO_INT_FRAC(edge->x.quo, rix, rfx);
 
+		assert(edge->height_left > 0);
 		if (--edge->height_left) {
 			if (edge->dy) {
 				edge->x.quo += edge->dxdy.quo;
@@ -1509,9 +1516,8 @@ tor_inplace(struct tor *converter, PixmapPtr scratch)
 			do_full_step = can_full_step(active);
 		}
 
-		__DBG(("%s: y=%d, do_full_step=%d, new edges=%d, min_height=%d, vertical=%d\n",
-		       __FUNCTION__,
-		       i, do_full_step,
+		__DBG(("%s: y=%d, do_full_step=%d, new edges=%d\n",
+		       __FUNCTION__, i, do_full_step,
 		       polygon->y_buckets[i] != NULL));
 		if (do_full_step) {
 			memset(ptr, 0, width);
@@ -1633,8 +1639,8 @@ struct span_thread {
 #define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
 struct span_thread_boxes {
 	const struct sna_composite_spans_op *op;
-	struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
 	int num_boxes;
+	struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
 };
 
 static void span_thread_add_boxes(struct sna *sna, void *data,
@@ -1792,8 +1798,9 @@ precise_trapezoid_span_converter(struct sna *sna,
 
 #if 1
 	if (((clip.extents.y2 - clip.extents.y1) | (clip.extents.x2 - clip.extents.x1)) < 32) {
-		DBG(("%s: fallback -- traps extents too small %dx%d\n",
-		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
+		DBG(("%s: fallback -- traps extents too small %dx%d\n", __FUNCTION__,
+		     clip.extents.y2 - clip.extents.y1,
+		     clip.extents.x2 - clip.extents.x1));
 		return false;
 	}
 #endif
@@ -1867,7 +1874,9 @@ precise_trapezoid_span_converter(struct sna *sna,
 	dy *= SAMPLES_Y;
 
 	num_threads = 1;
-	if (!NO_GPU_THREADS && tmp.thread_boxes &&
+	if (!NO_GPU_THREADS &&
+	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0 &&
+	    tmp.thread_boxes &&
 	    thread_choose_span(&tmp, dst, maskFormat, &clip))
 		num_threads = sna_use_threads(clip.extents.x2-clip.extents.x1,
 					      clip.extents.y2-clip.extents.y1,
@@ -2022,7 +2031,8 @@ mask_thread(void *arg)
 
 bool
 precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
-				 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+				 PictFormatPtr maskFormat, unsigned flags,
+				 INT16 src_x, INT16 src_y,
 				 int ntrap, xTrapezoid *traps)
 {
 	ScreenPtr screen = dst->pDrawable->pScreen;
@@ -2042,7 +2052,7 @@ precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 		     __FUNCTION__));
 		do {
 			/* XXX unwind errors? */
-			if (!precise_trapezoid_mask_converter(op, src, dst, NULL,
+			if (!precise_trapezoid_mask_converter(op, src, dst, NULL, flags,
 							      src_x, src_y, 1, traps++))
 				return false;
 		} while (--ntrap);
@@ -2088,9 +2098,11 @@ precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 	DBG(("%s: created buffer %p, stride %d\n",
 	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
 
-	num_threads = sna_use_threads(extents.x2 - extents.x1,
-				      extents.y2 - extents.y1,
-				      4);
+	num_threads = 1;
+	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
+		num_threads = sna_use_threads(extents.x2 - extents.x1,
+					      extents.y2 - extents.y1,
+					      4);
 	if (num_threads == 1) {
 		struct tor tor;
 
@@ -2180,8 +2192,8 @@ precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 }
 
 struct inplace {
-	uint32_t stride;
 	uint8_t *ptr;
+	uint32_t stride;
 	union {
 		uint8_t opacity;
 		uint32_t color;
@@ -2624,7 +2636,7 @@ static bool
 trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 				 PicturePtr dst,
 				 PicturePtr src, int16_t src_x, int16_t src_y,
-				 PictFormatPtr maskFormat,
+				 PictFormatPtr maskFormat, unsigned flags,
 				 int ntrap, xTrapezoid *traps)
 {
 	uint32_t color;
@@ -2664,7 +2676,8 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 			/* XXX unwind errors? */
 			if (!trapezoid_span_inplace__x8r8g8b8(op, dst,
 							      src, src_x, src_y,
-							      NULL, 1, traps++))
+							      NULL, flags,
+							      1, traps++))
 				return false;
 		} while (--ntrap);
 		return true;
@@ -2711,9 +2724,11 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 	dx = dst->pDrawable->x * SAMPLES_X;
 	dy = dst->pDrawable->y * SAMPLES_Y;
 
-	num_threads = sna_use_threads(4*(region.extents.x2 - region.extents.x1),
-				      region.extents.y2 - region.extents.y1,
-				      4);
+	num_threads = 1;
+	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0 && (lerp || is_solid))
+		num_threads = sna_use_threads(4*(region.extents.x2 - region.extents.x1),
+					      region.extents.y2 - region.extents.y1,
+					      4);
 
 	DBG(("%s: %dx%d, format=%x, op=%d, lerp?=%d, num_threads=%d\n",
 	     __FUNCTION__,
@@ -2907,7 +2922,8 @@ static void inplace_thread(void *arg)
 bool
 precise_trapezoid_span_inplace(struct sna *sna,
 			       CARD8 op, PicturePtr src, PicturePtr dst,
-			       PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+			       PictFormatPtr maskFormat, unsigned flags,
+			       INT16 src_x, INT16 src_y,
 			       int ntrap, xTrapezoid *traps,
 			       bool fallback)
 {
@@ -2928,7 +2944,7 @@ precise_trapezoid_span_inplace(struct sna *sna,
 	if (dst->format == PICT_a8r8g8b8 || dst->format == PICT_x8r8g8b8)
 		return trapezoid_span_inplace__x8r8g8b8(op, dst,
 							src, src_x, src_y,
-							maskFormat,
+							maskFormat, flags,
 							ntrap, traps);
 
 	if (!sna_picture_is_solid(src, &color)) {
@@ -2997,7 +3013,7 @@ precise_trapezoid_span_inplace(struct sna *sna,
 		     __FUNCTION__));
 		do {
 			/* XXX unwind errors? */
-			if (!precise_trapezoid_span_inplace(sna, op, src, dst, NULL,
+			if (!precise_trapezoid_span_inplace(sna, op, src, dst, NULL, flags,
 							    src_x, src_y, 1, traps++,
 							    fallback))
 				return false;
@@ -3061,9 +3077,11 @@ precise_trapezoid_span_inplace(struct sna *sna,
 	inplace.stride = pixmap->devKind;
 	inplace.opacity = color >> 24;
 
-	num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
-				      region.extents.y2 - region.extents.y1,
-				      4);
+	num_threads = 1;
+	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
+		num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
+					      region.extents.y2 - region.extents.y1,
+					      4);
 	if (num_threads == 1) {
 		struct tor tor;
 
@@ -3133,7 +3151,8 @@ precise_trapezoid_span_inplace(struct sna *sna,
 
 bool
 precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
-				PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+				PictFormatPtr maskFormat, unsigned flags,
+				INT16 src_x, INT16 src_y,
 				int ntrap, xTrapezoid *traps)
 {
 	ScreenPtr screen = dst->pDrawable->pScreen;
@@ -3152,7 +3171,7 @@ precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 		     __FUNCTION__));
 		do {
 			/* XXX unwind errors? */
-			if (!precise_trapezoid_span_fallback(op, src, dst, NULL,
+			if (!precise_trapezoid_span_fallback(op, src, dst, NULL, flags,
 							     src_x, src_y, 1, traps++))
 				return false;
 		} while (--ntrap);
@@ -3197,9 +3216,11 @@ precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 	DBG(("%s: created buffer %p, stride %d\n",
 	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
 
-	num_threads = sna_use_threads(extents.x2 - extents.x1,
-				      extents.y2 - extents.y1,
-				      4);
+	num_threads = 1;
+	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
+		num_threads = sna_use_threads(extents.x2 - extents.x1,
+					      extents.y2 - extents.y1,
+					      4);
 	if (num_threads == 1) {
 		struct tor tor;
 


More information about the xorg-commit mailing list