xf86-video-intel: 3 commits - src/sna/gen4_vertex.c src/sna/sna_gradient.c src/sna/sna_render.h src/sna/sna_threads.c src/sna/sna_trapezoids_boxes.c src/sna/sna_trapezoids.c src/sna/sna_trapezoids.h src/sna/sna_trapezoids_imprecise.c src/sna/sna_trapezoids_mono.c src/sna/sna_trapezoids_precise.c
Chris Wilson
ickle at kemper.freedesktop.org
Mon Oct 7 16:05:18 PDT 2013
src/sna/gen4_vertex.c | 3
src/sna/sna_gradient.c | 9 -
src/sna/sna_render.h | 2
src/sna/sna_threads.c | 2
src/sna/sna_trapezoids.c | 13 +-
src/sna/sna_trapezoids.h | 41 ++++---
src/sna/sna_trapezoids_boxes.c | 2
src/sna/sna_trapezoids_imprecise.c | 152 +++++++++++++----------------
src/sna/sna_trapezoids_mono.c | 1
src/sna/sna_trapezoids_precise.c | 191 ++++++++++++++++++-------------------
10 files changed, 216 insertions(+), 200 deletions(-)
New commits:
commit 06a8ad9690590a605b1564012d062b98c60546a6
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 7 23:21:38 2013 +0100
sna/trapezoids: Recompute num_threads to match range
We need to be careful not to execute threads past the end of the alloted
buffer by making sure the clip extents correctly align.
Reported-by: Joseph Yasi <joe.yasi at gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70204
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_threads.c b/src/sna/sna_threads.c
index b771bda..3e10e5a 100644
--- a/src/sna/sna_threads.c
+++ b/src/sna/sna_threads.c
@@ -274,6 +274,7 @@ void sna_image_composite(pixman_op_t op,
y = dst_y;
dy = (height + num_threads - 1) / num_threads;
+ num_threads = (height + dy - 1) / dy;
data[0].op = op;
data[0].src = src;
@@ -298,6 +299,7 @@ void sna_image_composite(pixman_op_t op,
sna_threads_run(thread_composite, &data[n]);
}
+ assert(y < dst_y + height);
if (y + dy > dst_y + height)
dy = dst_y + height - y;
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index c9c031a..8827b9c 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -390,6 +390,7 @@ trapezoids_fallback(struct sna *sna,
y = bounds.y1;
dy = (height + num_threads - 1) / num_threads;
+ num_threads = (bounds.y2 - bounds.y1 + dy - 1) / dy;
for (n = 1; n < num_threads; n++) {
threads[n] = threads[0];
@@ -400,9 +401,9 @@ trapezoids_fallback(struct sna *sna,
sna_threads_run(rasterize_traps_thread, &threads[n]);
}
+ assert(y < threads[0].bounds.y2);
threads[0].ptr += (y - bounds.y1) * threads[0].stride;
threads[0].bounds.y1 = y;
- threads[0].bounds.y2 = bounds.y2;
rasterize_traps_thread(&threads[0]);
sna_threads_wait();
diff --git a/src/sna/sna_trapezoids_boxes.c b/src/sna/sna_trapezoids_boxes.c
index d7861d2..4b0346b 100644
--- a/src/sna/sna_trapezoids_boxes.c
+++ b/src/sna/sna_trapezoids_boxes.c
@@ -1171,6 +1171,7 @@ composite_unaligned_boxes_inplace(struct sna *sna,
y = clip.extents.y1;
dy = (clip.extents.y2 - clip.extents.y1 + num_threads - 1) / num_threads;
+ num_threads = (clip.extents.y2 - clip.extents.y1 + dy - 1) / dy;
for (i = 1; i < num_threads; i++) {
thread[i] = thread[0];
@@ -1179,6 +1180,7 @@ composite_unaligned_boxes_inplace(struct sna *sna,
sna_threads_run(rectilinear_inplace_thread, &thread[i]);
}
+ assert(y < clip.extents.y2);
thread[0].y1 = y;
thread[0].y2 = clip.extents.y2;
rectilinear_inplace_thread(&thread[0]);
diff --git a/src/sna/sna_trapezoids_imprecise.c b/src/sna/sna_trapezoids_imprecise.c
index 88cb660..4809e9c 100644
--- a/src/sna/sna_trapezoids_imprecise.c
+++ b/src/sna/sna_trapezoids_imprecise.c
@@ -255,9 +255,7 @@ struct tor {
struct active_list active[1];
struct cell_list coverages[1];
- /* Clip box. */
- int xmin, xmax;
- int ymin, ymax;
+ BoxRec extents;
};
/* Compute the floored division a/b. Assumes / and % perform symmetric
@@ -362,10 +360,7 @@ cell_list_alloc(struct cell_list *cells,
inline static struct cell *
cell_list_find(struct cell_list *cells, int x)
{
- struct cell *tail = cells->cursor;
-
- if (tail->x == x)
- return tail;
+ struct cell *tail;
if (x >= cells->x2)
return &cells->tail;
@@ -373,6 +368,10 @@ cell_list_find(struct cell_list *cells, int x)
if (x < cells->x1)
return &cells->head;
+ tail = cells->cursor;
+ if (tail->x == x)
+ return tail;
+
do {
if (tail->next->x > x)
break;
@@ -459,13 +458,9 @@ polygon_fini(struct polygon *polygon)
}
static bool
-polygon_init(struct polygon *polygon,
- int num_edges,
- int ymin,
- int ymax)
+polygon_init(struct polygon *polygon, int num_edges, int ymin, int ymax)
{
- unsigned num_buckets =
- EDGE_Y_BUCKET_INDEX(ymax+EDGE_Y_BUCKET_HEIGHT-1, ymin);
+ unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax-1, ymin) + 1;
if (unlikely(ymax - ymin > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT))
return false;
@@ -502,6 +497,7 @@ _polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e)
{
unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
struct edge **ptail = &polygon->y_buckets[ix];
+ assert(e->ytop < polygon->ymax);
e->next = *ptail;
*ptail = e;
}
@@ -928,10 +924,7 @@ tor_init(struct tor *converter, const BoxRec *box, int num_edges)
FAST_SAMPLES_X, FAST_SAMPLES_Y,
num_edges));
- converter->xmin = box->x1;
- converter->ymin = box->y1;
- converter->xmax = box->x2;
- converter->ymax = box->y2;
+ converter->extents = *box;
if (!cell_list_init(converter->coverages, box->x1, box->x2))
return false;
@@ -1076,6 +1069,7 @@ tor_blt_span_mono_unbounded_clipped(struct sna *sna,
static void
tor_blt(struct sna *sna,
+ struct tor *converter,
struct sna_composite_spans_op *op,
pixman_region16_t *clip,
void (*span)(struct sna *sna,
@@ -1083,18 +1077,18 @@ tor_blt(struct sna *sna,
pixman_region16_t *clip,
const BoxRec *box,
int coverage),
- struct cell_list *cells,
int y, int height,
- int xmin, int xmax,
int unbounded)
{
+ struct cell_list *cells = converter->coverages;
struct cell *cell;
BoxRec box;
int cover;
- box.y1 = y;
- box.y2 = y + height;
- box.x1 = xmin;
+ box.y1 = y + converter->extents.y1;
+ box.y2 = box.y1 + height;
+ assert(box.y2 <= converter->extents.y2);
+ box.x1 = converter->extents.x1;
/* Form the spans from the coverages and areas. */
cover = cells->head.covered_height*FAST_SAMPLES_X*2;
@@ -1102,8 +1096,8 @@ tor_blt(struct sna *sna,
for (cell = cells->head.next; cell != &cells->tail; cell = cell->next) {
int x = cell->x;
- assert(x >= xmin);
- assert(x < xmax);
+ assert(x >= converter->extents.x1);
+ assert(x < converter->extents.x2);
__DBG(("%s: cell=(%d, %d, %d), cover=%d, max=%d\n", __FUNCTION__,
cell->x, cell->covered_height, cell->uncovered_area,
cover, xmax));
@@ -1137,7 +1131,7 @@ tor_blt(struct sna *sna,
}
}
- box.x2 = xmax;
+ box.x2 = converter->extents.x2;
if (box.x2 > box.x1 && (unbounded || cover)) {
__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
box.x1, box.y1,
@@ -1148,28 +1142,6 @@ tor_blt(struct sna *sna,
}
}
-static void
-tor_blt_empty(struct sna *sna,
- struct sna_composite_spans_op *op,
- pixman_region16_t *clip,
- void (*span)(struct sna *sna,
- struct sna_composite_spans_op *op,
- pixman_region16_t *clip,
- const BoxRec *box,
- int coverage),
- int y, int height,
- int xmin, int xmax)
-{
- BoxRec box;
-
- box.x1 = xmin;
- box.x2 = xmax;
- box.y1 = y;
- box.y2 = y + height;
-
- span(sna, op, clip, &box, 0);
-}
-
flatten static void
tor_render(struct sna *sna,
struct tor *converter,
@@ -1182,14 +1154,11 @@ tor_render(struct sna *sna,
int coverage),
int unbounded)
{
- int ymin = converter->ymin;
- int xmin = converter->xmin;
- int xmax = converter->xmax;
- int i, j, h = converter->ymax - ymin;
struct polygon *polygon = converter->polygon;
struct cell_list *coverages = converter->coverages;
struct active_list *active = converter->active;
struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
+ int16_t i, j, h = converter->extents.y2 - converter->extents.y1;
__DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded));
@@ -1201,15 +1170,23 @@ tor_render(struct sna *sna,
/* Determine if we can ignore this row or use the full pixel
* stepper. */
- if (!polygon->y_buckets[i]) {
+ if (polygon->y_buckets[i] == NULL) {
if (active->head.next == &active->tail) {
- for (; !polygon->y_buckets[j]; j++)
+ for (; polygon->y_buckets[j] == NULL; j++)
;
__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
__FUNCTION__, i, j));
- if (unbounded)
- tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax);
+ assert(j <= h);
+ if (unbounded) {
+ BoxRec box;
+
+ box = converter->extents;
+ box.y1 += i;
+ box.y2 = converter->extents.y1 + j;
+
+ span(sna, op, clip, &box, 0);
+ }
continue;
}
@@ -1228,6 +1205,7 @@ tor_render(struct sna *sna,
do_full_step -= FAST_SAMPLES_Y;
j++;
}
+ assert(j >= i + 1 && j <= h);
if (j != i + 1)
step_edges(active, j - (i + 1));
@@ -1249,9 +1227,8 @@ tor_render(struct sna *sna,
}
}
- tor_blt(sna, op, clip, span, coverages,
- i+ymin, j-i, xmin, xmax,
- unbounded);
+ assert(j > i);
+ tor_blt(sna, converter, op, clip, span, i, j-i, unbounded);
cell_list_reset(coverages);
}
}
@@ -1531,7 +1508,7 @@ inplace_end_subrows(struct active_list *active, uint8_t *row,
static void
tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
{
- int i, j, h = converter->ymax;
+ int i, j, h = converter->extents.y2;
struct polygon *polygon = converter->polygon;
struct active_list *active = converter->active;
struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
@@ -1541,8 +1518,8 @@ tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
__DBG(("%s: mono=%d, buf?=%d\n", __FUNCTION__, mono, buf != NULL));
assert(!mono);
- assert(converter->ymin == 0);
- assert(converter->xmin == 0);
+ assert(converter->extents.y1 == 0);
+ assert(converter->extents.x1 == 0);
assert(scratch->drawable.depth == 8);
/* Render each pixel row. */
@@ -2005,6 +1982,7 @@ imprecise_trapezoid_span_converter(struct sna *sna,
y = clip.extents.y1;
h = clip.extents.y2 - clip.extents.y1;
h = (h + num_threads - 1) / num_threads;
+ num_threads = (clip.extents.y2 - clip.extents.y1 + h - 1) / h;
for (n = 1; n < num_threads; n++) {
threads[n] = threads[0];
@@ -2014,6 +1992,7 @@ imprecise_trapezoid_span_converter(struct sna *sna,
sna_threads_run(span_thread, &threads[n]);
}
+ assert(y < threads[0].extents.y2);
threads[0].extents.y1 = y;
span_thread(&threads[0]);
@@ -2862,6 +2841,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
y = region.extents.y1;
h = region.extents.y2 - region.extents.y1;
h = (h + num_threads - 1) / num_threads;
+ num_threads = (region.extents.y2 - region.extents.y1 + h - 1) / h;
for (n = 1; n < num_threads; n++) {
threads[n] = threads[0];
@@ -2871,8 +2851,8 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
sna_threads_run(inplace_x8r8g8b8_thread, &threads[n]);
}
+ assert(y < threads[0].extents.y2);
threads[0].extents.y1 = y;
- threads[0].extents.y2 = region.extents.y2;
inplace_x8r8g8b8_thread(&threads[0]);
sna_threads_wait();
@@ -3132,6 +3112,7 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
y = region.extents.y1;
h = region.extents.y2 - region.extents.y1;
h = (h + num_threads - 1) / num_threads;
+ num_threads = (region.extents.y2 - region.extents.y1 + h - 1) / h;
for (n = 1; n < num_threads; n++) {
threads[n] = threads[0];
@@ -3141,8 +3122,8 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
sna_threads_run(inplace_thread, &threads[n]);
}
+ assert(y < threads[0].extents.y2);
threads[0].extents.y1 = y;
- threads[0].extents.y2 = region.extents.y2;
inplace_thread(&threads[0]);
sna_threads_wait();
diff --git a/src/sna/sna_trapezoids_mono.c b/src/sna/sna_trapezoids_mono.c
index e5cb7a6..790863e 100644
--- a/src/sna/sna_trapezoids_mono.c
+++ b/src/sna/sna_trapezoids_mono.c
@@ -843,6 +843,7 @@ mono_trapezoids_span_converter(struct sna *sna,
y = extents.y1;
h = extents.y2 - extents.y1;
h = (h + num_threads - 1) / num_threads;
+ num_threads = (extents.y2 - extents.y1 + h - 1) / h;
for (n = 1; n < num_threads; n++) {
threads[n] = threads[0];
diff --git a/src/sna/sna_trapezoids_precise.c b/src/sna/sna_trapezoids_precise.c
index b4d7592..3720d8b 100644
--- a/src/sna/sna_trapezoids_precise.c
+++ b/src/sna/sna_trapezoids_precise.c
@@ -268,9 +268,7 @@ struct tor {
struct active_list active[1];
struct cell_list coverages[1];
- /* Clip box. */
- int xmin, xmax;
- int ymin, ymax;
+ BoxRec extents;
};
/* Compute the floored division a/b. Assumes / and % perform symmetric
@@ -473,13 +471,9 @@ polygon_fini(struct polygon *polygon)
}
static bool
-polygon_init(struct polygon *polygon,
- int num_edges,
- int ymin,
- int ymax)
+polygon_init(struct polygon *polygon, int num_edges, int ymin, int ymax)
{
- unsigned num_buckets =
- EDGE_Y_BUCKET_INDEX(ymax+EDGE_Y_BUCKET_HEIGHT-1, ymin);
+ unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax-1, ymin) + 1;
if (unlikely(ymax - ymin > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT))
return false;
@@ -516,6 +510,7 @@ _polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e)
{
unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
struct edge **ptail = &polygon->y_buckets[ix];
+ assert(e->ytop < polygon->ymax);
e->next = *ptail;
*ptail = e;
}
@@ -534,7 +529,7 @@ polygon_add_edge(struct polygon *polygon,
int ymin = polygon->ymin;
int ymax = polygon->ymax;
- assert (dy > 0);
+ assert(dy > 0);
e->dy = dy;
e->dir = dir;
@@ -934,10 +929,7 @@ tor_init(struct tor *converter, const BoxRec *box, int num_edges)
SAMPLES_X, SAMPLES_Y,
num_edges));
- converter->xmin = box->x1;
- converter->ymin = box->y1;
- converter->xmax = box->x2;
- converter->ymax = box->y2;
+ converter->extents = *box;
if (!cell_list_init(converter->coverages, box->x1, box->x2))
return false;
@@ -1033,6 +1025,7 @@ tor_blt_span_clipped(struct sna *sna,
static void
tor_blt(struct sna *sna,
+ struct tor *converter,
struct sna_composite_spans_op *op,
pixman_region16_t *clip,
void (*span)(struct sna *sna,
@@ -1040,18 +1033,18 @@ tor_blt(struct sna *sna,
pixman_region16_t *clip,
const BoxRec *box,
int coverage),
- struct cell_list *cells,
int y, int height,
- int xmin, int xmax,
int unbounded)
{
+ struct cell_list *cells = converter->coverages;
struct cell *cell;
BoxRec box;
int cover;
- box.y1 = y;
- box.y2 = y + height;
- box.x1 = xmin;
+ box.y1 = y + converter->extents.y1;
+ box.y2 = box.y1 + height;
+ assert(box.y2 <= converter->extents.y2);
+ box.x1 = converter->extents.x1;
/* Form the spans from the coverages and areas. */
cover = cells->head.covered_height*SAMPLES_X*2;
@@ -1059,8 +1052,8 @@ tor_blt(struct sna *sna,
for (cell = cells->head.next; cell != &cells->tail; cell = cell->next) {
int x = cell->x;
- assert(x >= xmin);
- assert(x < xmax);
+ assert(x >= converter->extents.x1);
+ assert(x < converter->extents.x2);
__DBG(("%s: cell=(%d, %d, %d), cover=%d, max=%d\n", __FUNCTION__,
cell->x, cell->covered_height, cell->uncovered_area,
cover, xmax));
@@ -1094,7 +1087,7 @@ tor_blt(struct sna *sna,
}
}
- box.x2 = xmax;
+ box.x2 = converter->extents.x2;
if (box.x2 > box.x1 && (unbounded || cover)) {
__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
box.x1, box.y1,
@@ -1105,28 +1098,6 @@ tor_blt(struct sna *sna,
}
}
-static void
-tor_blt_empty(struct sna *sna,
- struct sna_composite_spans_op *op,
- pixman_region16_t *clip,
- void (*span)(struct sna *sna,
- struct sna_composite_spans_op *op,
- pixman_region16_t *clip,
- const BoxRec *box,
- int coverage),
- int y, int height,
- int xmin, int xmax)
-{
- BoxRec box;
-
- box.x1 = xmin;
- box.x2 = xmax;
- box.y1 = y;
- box.y2 = y + height;
-
- span(sna, op, clip, &box, 0);
-}
-
flatten static void
tor_render(struct sna *sna,
struct tor *converter,
@@ -1139,14 +1110,11 @@ tor_render(struct sna *sna,
int coverage),
int unbounded)
{
- int ymin = converter->ymin;
- int xmin = converter->xmin;
- int xmax = converter->xmax;
- int i, j, h = converter->ymax - ymin;
struct polygon *polygon = converter->polygon;
struct cell_list *coverages = converter->coverages;
struct active_list *active = converter->active;
struct edge *buckets[SAMPLES_Y] = { 0 };
+ int16_t i, j, h = converter->extents.y2 - converter->extents.y1;
__DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded));
@@ -1158,15 +1126,23 @@ tor_render(struct sna *sna,
/* Determine if we can ignore this row or use the full pixel
* stepper. */
- if (!polygon->y_buckets[i]) {
+ if (polygon->y_buckets[i] == NULL) {
if (active->head.next == &active->tail) {
- for (; !polygon->y_buckets[j]; j++)
+ for (; polygon->y_buckets[j] == NULL; j++)
;
__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
__FUNCTION__, i, j));
- if (unbounded)
- tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax);
+ assert(j <= h);
+ if (unbounded) {
+ BoxRec box;
+
+ box = converter->extents;
+ box.y1 += i;
+ box.y2 = converter->extents.y1 + j;
+
+ span(sna, op, clip, &box, 0);
+ }
continue;
}
@@ -1185,6 +1161,7 @@ tor_render(struct sna *sna,
do_full_step -= SAMPLES_Y;
j++;
}
+ assert(j >= i + 1 && j <= h);
if (j != i + 1)
step_edges(active, j - (i + 1));
@@ -1193,7 +1170,7 @@ tor_render(struct sna *sna,
} else {
int suby;
- fill_buckets(active, polygon->y_buckets[i], (i+ymin)*SAMPLES_Y, buckets);
+ fill_buckets(active, polygon->y_buckets[i], (i+converter->extents.y1)*SAMPLES_Y, buckets);
/* Subsample this row. */
for (suby = 0; suby < SAMPLES_Y; suby++) {
@@ -1206,9 +1183,8 @@ tor_render(struct sna *sna,
}
}
- tor_blt(sna, op, clip, span, coverages,
- i+ymin, j-i, xmin, xmax,
- unbounded);
+ assert(j > i);
+ tor_blt(sna, converter, op, clip, span, i, j-i, unbounded);
cell_list_reset(coverages);
}
}
@@ -1478,7 +1454,7 @@ flatten static void
tor_inplace(struct tor *converter, PixmapPtr scratch)
{
uint8_t buf[TOR_INPLACE_SIZE];
- int i, j, h = converter->ymax - converter->ymin;
+ int i, j, h = converter->extents.y2 - converter->extents.y1;
struct polygon *polygon = converter->polygon;
struct active_list *active = converter->active;
struct edge *buckets[SAMPLES_Y] = { 0 };
@@ -1487,10 +1463,10 @@ tor_inplace(struct tor *converter, PixmapPtr scratch)
int width = scratch->drawable.width;
__DBG(("%s: buf?=%d\n", __FUNCTION__, buf != NULL));
- assert(converter->xmin == 0);
+ assert(converter->extents.x1 == 0);
assert(scratch->drawable.depth == 8);
- row += converter->ymin * stride;
+ row += converter->extents.y1 * stride;
/* Render each pixel row. */
for (i = 0; i < h; i = j) {
@@ -1540,7 +1516,7 @@ tor_inplace(struct tor *converter, PixmapPtr scratch)
} else {
int suby;
- fill_buckets(active, polygon->y_buckets[i], (i+converter->ymin)*SAMPLES_Y, buckets);
+ fill_buckets(active, polygon->y_buckets[i], (i+converter->extents.y1)*SAMPLES_Y, buckets);
/* Subsample this row. */
memset(ptr, 0, width);
@@ -1931,6 +1907,7 @@ precise_trapezoid_span_converter(struct sna *sna,
y = clip.extents.y1;
h = clip.extents.y2 - clip.extents.y1;
h = (h + num_threads - 1) / num_threads;
+ num_threads = (clip.extents.y2 - clip.extents.y1 + h - 1) / h;
for (n = 1; n < num_threads; n++) {
threads[n] = threads[0];
@@ -1940,6 +1917,7 @@ precise_trapezoid_span_converter(struct sna *sna,
sna_threads_run(span_thread, &threads[n]);
}
+ assert(y < threads[0].extents.y2);
threads[0].extents.y1 = y;
span_thread(&threads[0]);
@@ -2155,6 +2133,7 @@ precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
y = extents.y1;
h = extents.y2 - extents.y1;
h = (h + num_threads - 1) / num_threads;
+ num_threads = (extents.y2 - extents.y1 + h - 1) / h;
for (n = 1; n < num_threads; n++) {
threads[n] = threads[0];
@@ -2164,6 +2143,7 @@ precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
sna_threads_run(mask_thread, &threads[n]);
}
+ assert(y < threads[0].extents.y2);
threads[0].extents.y1 = y;
mask_thread(&threads[0]);
@@ -2859,6 +2839,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
y = region.extents.y1;
h = region.extents.y2 - region.extents.y1;
h = (h + num_threads - 1) / num_threads;
+ num_threads = (region.extents.y2 - region.extents.y1 + h - 1) / h;
for (n = 1; n < num_threads; n++) {
threads[n] = threads[0];
@@ -2868,8 +2849,8 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
sna_threads_run(inplace_x8r8g8b8_thread, &threads[n]);
}
+ assert(y < threads[0].extents.y2);
threads[0].extents.y1 = y;
- threads[0].extents.y2 = region.extents.y2;
inplace_x8r8g8b8_thread(&threads[0]);
sna_threads_wait();
@@ -3130,6 +3111,7 @@ precise_trapezoid_span_inplace(struct sna *sna,
y = region.extents.y1;
h = region.extents.y2 - region.extents.y1;
h = (h + num_threads - 1) / num_threads;
+ num_threads = (region.extents.y2 - region.extents.y1 + h - 1) / h;
for (n = 1; n < num_threads; n++) {
threads[n] = threads[0];
@@ -3139,8 +3121,8 @@ precise_trapezoid_span_inplace(struct sna *sna,
sna_threads_run(inplace_thread, &threads[n]);
}
+ assert(y < threads[0].extents.y2);
threads[0].extents.y1 = y;
- threads[0].extents.y2 = region.extents.y2;
inplace_thread(&threads[0]);
sna_threads_wait();
@@ -3273,6 +3255,7 @@ precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
y = extents.y1;
h = extents.y2 - extents.y1;
h = (h + num_threads - 1) / num_threads;
+ num_threads = (extents.y2 - extents.y1 + h - 1) / h;
for (n = 1; n < num_threads; n++) {
threads[n] = threads[0];
@@ -3282,6 +3265,7 @@ precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
sna_threads_run(mask_thread, &threads[n]);
}
+ assert(y < threads[0].extents.y2);
threads[0].extents.y1 = y;
mask_thread(&threads[0]);
commit 1fb4f60671cfb0e461a2e5969ee9d0f0e39d93a4
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 7 22:04:22 2013 +0100
sna: Trim color cache allocation to a single page
Instead trying to allocate 4100 bytes, fix the logic to only require a
maximum of 4096 bytes in the cache buffer.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_gradient.c b/src/sna/sna_gradient.c
index 26e4167..818a8b7 100644
--- a/src/sna/sna_gradient.c
+++ b/src/sna/sna_gradient.c
@@ -246,8 +246,8 @@ sna_render_finish_solid(struct sna *sna, bool force)
struct kgem_bo *old;
int i;
- DBG(("sna_render_finish_solid(force=%d, domain=%d, busy=%d, dirty=%d)\n",
- force, cache->cache_bo->domain, cache->cache_bo->rq != NULL, cache->dirty));
+ DBG(("sna_render_finish_solid(force=%d, domain=%d, busy=%d, dirty=%d, size=%d)\n",
+ force, cache->cache_bo->domain, cache->cache_bo->rq != NULL, cache->dirty, cache->size));
if (!force && cache->cache_bo->domain != DOMAIN_GPU)
return;
@@ -353,9 +353,10 @@ sna_render_get_solid(struct sna *sna, uint32_t color)
}
}
- sna_render_finish_solid(sna, i == 1024);
+ sna_render_finish_solid(sna, i == ARRAY_SIZE(cache->color));
i = cache->size++;
+ assert(i < ARRAY_SIZE(cache->color));
cache->color[i] = color;
cache->dirty = 1;
DBG(("sna_render_get_solid(%d) = %x (new)\n", i, color));
@@ -429,7 +430,7 @@ static bool sna_solid_cache_init(struct sna *sna)
if (!cache->cache_bo)
return false;
- cache->last = 1024;
+ cache->last = 0;
cache->color[cache->last] = 0;
cache->dirty = 0;
cache->size = 0;
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index ad8121e..7f4b8e6 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -287,7 +287,7 @@ struct sna_render {
struct sna_solid_cache {
struct kgem_bo *cache_bo;
struct kgem_bo *bo[1024];
- uint32_t color[1025];
+ uint32_t color[1024];
int last;
int size;
int dirty;
commit 55cd67485ff34a28ab8eaa7b1b6958b96c072317
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 7 14:06:31 2013 +0100
sna/trapezoids: Only use a single thread to emit rectilinear spans
If the trapezoids are rectilinear, they should hit a fast path through
the span compositors and so threading them seems pointless. Expect
possibily for inplace pixman operations.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index 85e7413..e2a4808 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -45,7 +45,9 @@ void gen4_vertex_flush(struct sna *sna)
sna->render.vertex_index - sna->render.vertex_start));
assert(sna->render.vertex_offset);
+ assert(sna->render.vertex_offset <= sna->kgem.nbatch);
assert(sna->render.vertex_index > sna->render.vertex_start);
+ assert(sna->render.vertex_used <= sna->render.vertex_size);
sna->kgem.batch[sna->render.vertex_offset] =
sna->render.vertex_index - sna->render.vertex_start;
@@ -62,6 +64,7 @@ int gen4_vertex_finish(struct sna *sna)
sna->render.vertex_used, sna->render.vertex_size));
assert(sna->render.vertex_offset == 0);
assert(sna->render.vertex_used);
+ assert(sna->render.vertex_used <= sna->render.vertex_size);
sna_vertex_wait__locked(&sna->render);
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 7c02840..c9c031a 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -660,7 +660,7 @@ sna_composite_trapezoids(CARD8 op,
if (trapezoid_spans_maybe_inplace(sna, op, src, dst, maskFormat)) {
flags |= COMPOSITE_SPANS_INPLACE_HINT;
- if (trapezoid_span_inplace(sna, op, src, dst, maskFormat,
+ if (trapezoid_span_inplace(sna, op, src, dst, maskFormat, flags,
xSrc, ySrc, ntrap, traps,
false))
return;
@@ -670,22 +670,22 @@ sna_composite_trapezoids(CARD8 op,
xSrc, ySrc, ntrap, traps))
return;
- if (trapezoid_span_inplace(sna, op, src, dst, maskFormat,
+ if (trapezoid_span_inplace(sna, op, src, dst, maskFormat, flags,
xSrc, ySrc, ntrap, traps,
false))
return;
- if (trapezoid_mask_converter(op, src, dst, maskFormat,
+ if (trapezoid_mask_converter(op, src, dst, maskFormat, flags,
xSrc, ySrc, ntrap, traps))
return;
fallback:
- if (trapezoid_span_inplace(sna, op, src, dst, maskFormat,
+ if (trapezoid_span_inplace(sna, op, src, dst, maskFormat, flags,
xSrc, ySrc, ntrap, traps,
true))
return;
- if (trapezoid_span_fallback(op, src, dst, maskFormat,
+ if (trapezoid_span_fallback(op, src, dst, maskFormat, flags,
xSrc, ySrc, ntrap, traps))
return;
diff --git a/src/sna/sna_trapezoids.h b/src/sna/sna_trapezoids.h
index 729334c..d101726 100644
--- a/src/sna/sna_trapezoids.h
+++ b/src/sna/sna_trapezoids.h
@@ -62,7 +62,8 @@ mono_triangles_span_converter(struct sna *sna,
bool
imprecise_trapezoid_span_inplace(struct sna *sna,
CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps,
bool fallback);
@@ -75,18 +76,21 @@ imprecise_trapezoid_span_converter(struct sna *sna,
bool
imprecise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps);
bool
imprecise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps);
bool
precise_trapezoid_span_inplace(struct sna *sna,
CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps,
bool fallback);
@@ -99,13 +103,15 @@ precise_trapezoid_span_converter(struct sna *sna,
bool
precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps);
bool
precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
- int ntrap, xTrapezoid *traps);
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
+ int ntrap, xTrapezoid *traps);
static inline bool is_mono(PicturePtr dst, PictFormatPtr mask)
{
@@ -120,7 +126,8 @@ static inline bool is_precise(PicturePtr dst, PictFormatPtr mask)
static inline bool
trapezoid_span_inplace(struct sna *sna,
CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps,
bool fallback)
{
@@ -143,9 +150,9 @@ trapezoid_span_inplace(struct sna *sna,
if (is_mono(dst, maskFormat))
return mono_trapezoid_span_inplace(sna, op, src, dst, src_x, src_y, ntrap, traps);
else if (is_precise(dst, maskFormat))
- return precise_trapezoid_span_inplace(sna, op, src, dst, maskFormat, src_x, src_y, ntrap, traps, fallback);
+ return precise_trapezoid_span_inplace(sna, op, src, dst, maskFormat, flags, src_x, src_y, ntrap, traps, fallback);
else
- return imprecise_trapezoid_span_inplace(sna, op, src, dst, maskFormat, src_x, src_y, ntrap, traps, fallback);
+ return imprecise_trapezoid_span_inplace(sna, op, src, dst, maskFormat, flags, src_x, src_y, ntrap, traps, fallback);
}
static inline bool
@@ -168,30 +175,32 @@ trapezoid_span_converter(struct sna *sna,
static inline bool
trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps)
{
if (NO_SCAN_CONVERTER)
return false;
if (is_precise(dst, maskFormat))
- return precise_trapezoid_mask_converter(op, src, dst, maskFormat, src_x, src_y, ntrap, traps);
+ return precise_trapezoid_mask_converter(op, src, dst, maskFormat, flags, src_x, src_y, ntrap, traps);
else
- return imprecise_trapezoid_mask_converter(op, src, dst, maskFormat, src_x, src_y, ntrap, traps);
+ return imprecise_trapezoid_mask_converter(op, src, dst, maskFormat, flags, src_x, src_y, ntrap, traps);
}
static inline bool
trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps)
{
if (NO_SCAN_CONVERTER)
return false;
if (is_precise(dst, maskFormat))
- return precise_trapezoid_span_fallback(op, src, dst, maskFormat, src_x, src_y, ntrap, traps);
+ return precise_trapezoid_span_fallback(op, src, dst, maskFormat, flags, src_x, src_y, ntrap, traps);
else
- return imprecise_trapezoid_span_fallback(op, src, dst, maskFormat, src_x, src_y, ntrap, traps);
+ return imprecise_trapezoid_span_fallback(op, src, dst, maskFormat, flags, src_x, src_y, ntrap, traps);
}
bool
diff --git a/src/sna/sna_trapezoids_imprecise.c b/src/sna/sna_trapezoids_imprecise.c
index 6ca3fed..88cb660 100644
--- a/src/sna/sna_trapezoids_imprecise.c
+++ b/src/sna/sna_trapezoids_imprecise.c
@@ -389,7 +389,7 @@ cell_list_find(struct cell_list *cells, int x)
} while (1);
if (tail->x != x)
- tail = cell_list_alloc (cells, tail, x);
+ tail = cell_list_alloc(cells, tail, x);
return cells->cursor = tail;
}
@@ -777,7 +777,7 @@ merge_unsorted_edges(struct edge *head, struct edge *unsorted)
/* Test if the edges on the active list can be safely advanced by a
* full row without intersections or any edges ending. */
-inline static bool
+inline static int
can_full_step(struct active_list *active)
{
const struct edge *e;
@@ -785,7 +785,7 @@ can_full_step(struct active_list *active)
assert(active->head.next != &active->tail);
for (e = active->head.next; &active->tail != e; e = e->next) {
- assert(e->height_left >= 0);
+ assert(e->height_left > 0);
if (e->dy != 0)
return 0;
@@ -842,6 +842,7 @@ nonzero_subrow(struct active_list *active, struct cell_list *coverages)
xstart = edge->next->x.quo;
}
+ assert(edge->height_left > 0);
if (--edge->height_left) {
if (edge->dy) {
edge->x.quo += edge->dxdy.quo;
@@ -885,7 +886,7 @@ nonzero_row(struct active_list *active, struct cell_list *coverages)
left->height_left -= FAST_SAMPLES_Y;
assert(left->height_left >= 0);
- if (! left->height_left) {
+ if (!left->height_left) {
left->prev->next = left->next;
left->next->prev = left->prev;
}
@@ -967,6 +968,7 @@ step_edges(struct active_list *active, int count)
count *= FAST_SAMPLES_Y;
for (edge = active->head.next; edge != &active->tail; edge = edge->next) {
edge->height_left -= count;
+ assert(edge->height_left >= 0);
if (!edge->height_left) {
edge->prev->next = edge->next;
edge->next->prev = edge->prev;
@@ -1266,6 +1268,7 @@ inplace_row(struct active_list *active, uint8_t *row, int width)
int lix, rix;
left->height_left -= FAST_SAMPLES_Y;
+ assert(left->height_left >= 0);
if (!left->height_left) {
left->prev->next = left->next;
left->next->prev = left->prev;
@@ -1274,6 +1277,7 @@ inplace_row(struct active_list *active, uint8_t *row, int width)
right = left->next;
do {
right->height_left -= FAST_SAMPLES_Y;
+ assert(right->height_left >= 0);
if (!right->height_left) {
right->prev->next = right->next;
right->next->prev = right->prev;
@@ -1414,6 +1418,7 @@ inplace_subrow(struct active_list *active, int8_t *row,
xstart = MAX(edge->x.quo, 0);
}
+ assert(edge->height_left > 0);
if (--edge->height_left) {
if (edge->dy) {
edge->x.quo += edge->dxdy.quo;
@@ -1704,8 +1709,8 @@ struct span_thread {
#define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
struct span_thread_boxes {
const struct sna_composite_spans_op *op;
- struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
int num_boxes;
+ struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
};
static void span_thread_add_boxes(struct sna *sna, void *data,
@@ -1943,7 +1948,9 @@ imprecise_trapezoid_span_converter(struct sna *sna,
dy *= FAST_SAMPLES_Y;
num_threads = 1;
- if (!NO_GPU_THREADS && tmp.thread_boxes &&
+ if (!NO_GPU_THREADS &&
+ (flags & COMPOSITE_SPANS_RECTILINEAR) == 0 &&
+ tmp.thread_boxes &&
thread_choose_span(&tmp, dst, maskFormat, &clip))
num_threads = sna_use_threads(clip.extents.x2-clip.extents.x1,
clip.extents.y2-clip.extents.y1,
@@ -2063,7 +2070,8 @@ tor_blt_mask_mono(struct sna *sna,
bool
imprecise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps)
{
struct tor tor;
@@ -2083,7 +2091,7 @@ imprecise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
__FUNCTION__));
do {
/* XXX unwind errors? */
- if (!imprecise_trapezoid_mask_converter(op, src, dst, NULL,
+ if (!imprecise_trapezoid_mask_converter(op, src, dst, NULL, flags,
src_x, src_y, 1, traps++))
return false;
} while (--ntrap);
@@ -2183,8 +2191,8 @@ imprecise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
}
struct inplace {
- uint32_t stride;
uint8_t *ptr;
+ uint32_t stride;
union {
uint8_t opacity;
uint32_t color;
@@ -2917,7 +2925,8 @@ static void inplace_thread(void *arg)
bool
imprecise_trapezoid_span_inplace(struct sna *sna,
CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps,
bool fallback)
{
@@ -3007,7 +3016,7 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
__FUNCTION__));
do {
/* XXX unwind errors? */
- if (!imprecise_trapezoid_span_inplace(sna, op, src, dst, NULL,
+ if (!imprecise_trapezoid_span_inplace(sna, op, src, dst, NULL, flags,
src_x, src_y, 1, traps++,
fallback))
return false;
@@ -3064,16 +3073,17 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
dx = dst->pDrawable->x * FAST_SAMPLES_X;
dy = dst->pDrawable->y * FAST_SAMPLES_Y;
-
inplace.ptr = pixmap->devPrivate.ptr;
if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
inplace.ptr += dst_y * pixmap->devKind + dst_x;
inplace.stride = pixmap->devKind;
inplace.opacity = color >> 24;
- num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
- region.extents.y2 - region.extents.y1,
- 16);
+ num_threads = 1;
+ if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
+ num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
+ region.extents.y2 - region.extents.y1,
+ 16);
if (num_threads == 1) {
struct tor tor;
@@ -3143,7 +3153,8 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
bool
imprecise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps)
{
struct tor tor;
@@ -3163,7 +3174,7 @@ imprecise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
__FUNCTION__));
do {
/* XXX unwind errors? */
- if (!imprecise_trapezoid_span_fallback(op, src, dst, NULL,
+ if (!imprecise_trapezoid_span_fallback(op, src, dst, NULL, flags,
src_x, src_y, 1, traps++))
return false;
} while (--ntrap);
diff --git a/src/sna/sna_trapezoids_precise.c b/src/sna/sna_trapezoids_precise.c
index bfafca0..b4d7592 100644
--- a/src/sna/sna_trapezoids_precise.c
+++ b/src/sna/sna_trapezoids_precise.c
@@ -782,7 +782,7 @@ merge_unsorted_edges(struct edge *head, struct edge *unsorted)
/* Test if the edges on the active list can be safely advanced by a
* full row without intersections or any edges ending. */
-inline static bool
+inline static int
can_full_step(struct active_list *active)
{
const struct edge *e;
@@ -790,7 +790,7 @@ can_full_step(struct active_list *active)
assert(active->head.next != &active->tail);
for (e = active->head.next; &active->tail != e; e = e->next) {
- assert(e->height_left >= 0);
+ assert(e->height_left > 0);
if (e->dy != 0)
return 0;
@@ -848,6 +848,7 @@ nonzero_subrow(struct active_list *active, struct cell_list *coverages)
xstart = edge->next->x.quo;
}
+ assert(edge->height_left > 0);
if (--edge->height_left) {
if (edge->dy) {
edge->x.quo += edge->dxdy.quo;
@@ -972,6 +973,7 @@ step_edges(struct active_list *active, int count)
count *= SAMPLES_Y;
for (edge = active->head.next; edge != &active->tail; edge = edge->next) {
edge->height_left -= count;
+ assert(edge->height_left >= 0);
if (!edge->height_left) {
edge->prev->next = edge->next;
edge->next->prev = edge->prev;
@@ -1223,6 +1225,7 @@ inplace_row(struct active_list *active, uint8_t *row, int width)
int lix, rix;
left->height_left -= SAMPLES_Y;
+ assert(left->height_left >= 0);
if (!left->height_left) {
left->prev->next = left->next;
left->next->prev = left->prev;
@@ -1231,6 +1234,7 @@ inplace_row(struct active_list *active, uint8_t *row, int width)
right = left->next;
do {
right->height_left -= SAMPLES_Y;
+ assert(right->height_left >= 0);
if (!right->height_left) {
right->prev->next = right->next;
right->next->prev = right->prev;
@@ -1342,6 +1346,7 @@ inplace_subrow(struct active_list *active, int8_t *row, int width)
} else
SAMPLES_X_TO_INT_FRAC(edge->x.quo, lix, lfx);
+ assert(edge->height_left > 0);
if (--edge->height_left) {
if (edge->dy) {
edge->x.quo += edge->dxdy.quo;
@@ -1377,6 +1382,7 @@ inplace_subrow(struct active_list *active, int8_t *row, int width)
if (0 == winding && edge->x.quo != next->x.quo)
break;
+ assert(edge->height_left > 0);
if (--edge->height_left) {
if (edge->dy) {
edge->x.quo += edge->dxdy.quo;
@@ -1416,6 +1422,7 @@ inplace_subrow(struct active_list *active, int8_t *row, int width)
} else
SAMPLES_X_TO_INT_FRAC(edge->x.quo, rix, rfx);
+ assert(edge->height_left > 0);
if (--edge->height_left) {
if (edge->dy) {
edge->x.quo += edge->dxdy.quo;
@@ -1509,9 +1516,8 @@ tor_inplace(struct tor *converter, PixmapPtr scratch)
do_full_step = can_full_step(active);
}
- __DBG(("%s: y=%d, do_full_step=%d, new edges=%d, min_height=%d, vertical=%d\n",
- __FUNCTION__,
- i, do_full_step,
+ __DBG(("%s: y=%d, do_full_step=%d, new edges=%d\n",
+ __FUNCTION__, i, do_full_step,
polygon->y_buckets[i] != NULL));
if (do_full_step) {
memset(ptr, 0, width);
@@ -1633,8 +1639,8 @@ struct span_thread {
#define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
struct span_thread_boxes {
const struct sna_composite_spans_op *op;
- struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
int num_boxes;
+ struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
};
static void span_thread_add_boxes(struct sna *sna, void *data,
@@ -1792,8 +1798,9 @@ precise_trapezoid_span_converter(struct sna *sna,
#if 1
if (((clip.extents.y2 - clip.extents.y1) | (clip.extents.x2 - clip.extents.x1)) < 32) {
- DBG(("%s: fallback -- traps extents too small %dx%d\n",
- __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
+ DBG(("%s: fallback -- traps extents too small %dx%d\n", __FUNCTION__,
+ clip.extents.y2 - clip.extents.y1,
+ clip.extents.x2 - clip.extents.x1));
return false;
}
#endif
@@ -1867,7 +1874,9 @@ precise_trapezoid_span_converter(struct sna *sna,
dy *= SAMPLES_Y;
num_threads = 1;
- if (!NO_GPU_THREADS && tmp.thread_boxes &&
+ if (!NO_GPU_THREADS &&
+ (flags & COMPOSITE_SPANS_RECTILINEAR) == 0 &&
+ tmp.thread_boxes &&
thread_choose_span(&tmp, dst, maskFormat, &clip))
num_threads = sna_use_threads(clip.extents.x2-clip.extents.x1,
clip.extents.y2-clip.extents.y1,
@@ -2022,7 +2031,8 @@ mask_thread(void *arg)
bool
precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps)
{
ScreenPtr screen = dst->pDrawable->pScreen;
@@ -2042,7 +2052,7 @@ precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
__FUNCTION__));
do {
/* XXX unwind errors? */
- if (!precise_trapezoid_mask_converter(op, src, dst, NULL,
+ if (!precise_trapezoid_mask_converter(op, src, dst, NULL, flags,
src_x, src_y, 1, traps++))
return false;
} while (--ntrap);
@@ -2088,9 +2098,11 @@ precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
DBG(("%s: created buffer %p, stride %d\n",
__FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
- num_threads = sna_use_threads(extents.x2 - extents.x1,
- extents.y2 - extents.y1,
- 4);
+ num_threads = 1;
+ if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
+ num_threads = sna_use_threads(extents.x2 - extents.x1,
+ extents.y2 - extents.y1,
+ 4);
if (num_threads == 1) {
struct tor tor;
@@ -2180,8 +2192,8 @@ precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
}
struct inplace {
- uint32_t stride;
uint8_t *ptr;
+ uint32_t stride;
union {
uint8_t opacity;
uint32_t color;
@@ -2624,7 +2636,7 @@ static bool
trapezoid_span_inplace__x8r8g8b8(CARD8 op,
PicturePtr dst,
PicturePtr src, int16_t src_x, int16_t src_y,
- PictFormatPtr maskFormat,
+ PictFormatPtr maskFormat, unsigned flags,
int ntrap, xTrapezoid *traps)
{
uint32_t color;
@@ -2664,7 +2676,8 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
/* XXX unwind errors? */
if (!trapezoid_span_inplace__x8r8g8b8(op, dst,
src, src_x, src_y,
- NULL, 1, traps++))
+ NULL, flags,
+ 1, traps++))
return false;
} while (--ntrap);
return true;
@@ -2711,9 +2724,11 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
dx = dst->pDrawable->x * SAMPLES_X;
dy = dst->pDrawable->y * SAMPLES_Y;
- num_threads = sna_use_threads(4*(region.extents.x2 - region.extents.x1),
- region.extents.y2 - region.extents.y1,
- 4);
+ num_threads = 1;
+ if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0 && (lerp || is_solid))
+ num_threads = sna_use_threads(4*(region.extents.x2 - region.extents.x1),
+ region.extents.y2 - region.extents.y1,
+ 4);
DBG(("%s: %dx%d, format=%x, op=%d, lerp?=%d, num_threads=%d\n",
__FUNCTION__,
@@ -2907,7 +2922,8 @@ static void inplace_thread(void *arg)
bool
precise_trapezoid_span_inplace(struct sna *sna,
CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps,
bool fallback)
{
@@ -2928,7 +2944,7 @@ precise_trapezoid_span_inplace(struct sna *sna,
if (dst->format == PICT_a8r8g8b8 || dst->format == PICT_x8r8g8b8)
return trapezoid_span_inplace__x8r8g8b8(op, dst,
src, src_x, src_y,
- maskFormat,
+ maskFormat, flags,
ntrap, traps);
if (!sna_picture_is_solid(src, &color)) {
@@ -2997,7 +3013,7 @@ precise_trapezoid_span_inplace(struct sna *sna,
__FUNCTION__));
do {
/* XXX unwind errors? */
- if (!precise_trapezoid_span_inplace(sna, op, src, dst, NULL,
+ if (!precise_trapezoid_span_inplace(sna, op, src, dst, NULL, flags,
src_x, src_y, 1, traps++,
fallback))
return false;
@@ -3061,9 +3077,11 @@ precise_trapezoid_span_inplace(struct sna *sna,
inplace.stride = pixmap->devKind;
inplace.opacity = color >> 24;
- num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
- region.extents.y2 - region.extents.y1,
- 4);
+ num_threads = 1;
+ if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
+ num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
+ region.extents.y2 - region.extents.y1,
+ 4);
if (num_threads == 1) {
struct tor tor;
@@ -3133,7 +3151,8 @@ precise_trapezoid_span_inplace(struct sna *sna,
bool
precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
- PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ PictFormatPtr maskFormat, unsigned flags,
+ INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps)
{
ScreenPtr screen = dst->pDrawable->pScreen;
@@ -3152,7 +3171,7 @@ precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
__FUNCTION__));
do {
/* XXX unwind errors? */
- if (!precise_trapezoid_span_fallback(op, src, dst, NULL,
+ if (!precise_trapezoid_span_fallback(op, src, dst, NULL, flags,
src_x, src_y, 1, traps++))
return false;
} while (--ntrap);
@@ -3197,9 +3216,11 @@ precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
DBG(("%s: created buffer %p, stride %d\n",
__FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
- num_threads = sna_use_threads(extents.x2 - extents.x1,
- extents.y2 - extents.y1,
- 4);
+ num_threads = 1;
+ if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
+ num_threads = sna_use_threads(extents.x2 - extents.x1,
+ extents.y2 - extents.y1,
+ 4);
if (num_threads == 1) {
struct tor tor;
More information about the xorg-commit
mailing list