xf86-video-intel: 5 commits - src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna_render.h src/sna/sna_trapezoids.c
Chris Wilson
ickle at kemper.freedesktop.org
Sun Mar 11 12:55:24 PDT 2012
src/sna/kgem.c | 6
src/sna/kgem.h | 1
src/sna/sna_accel.c | 67 +++-
src/sna/sna_blt.c | 31 +-
src/sna/sna_render.h | 1
src/sna/sna_trapezoids.c | 687 ++++++++++++++++++++++++++++++++++++++++++++---
6 files changed, 736 insertions(+), 57 deletions(-)
New commits:
commit 8136bc5e113ae06c30def3c91b1615e5fab8af44
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Mar 11 19:45:55 2012 +0000
sna: Make the maximum BLT pitch assertions consistent
The maximum permissibly BLT pitch value is 32767, so make the assertions
match...
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=47206
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index d70e30e..eb8dbf8 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -118,6 +118,7 @@ static bool sna_blt_fill_init(struct sna *sna,
{
struct kgem *kgem = &sna->kgem;
+ assert(kgem_bo_can_blt (kgem, bo));
assert(bo->tiling != I915_TILING_Y);
blt->bo[0] = bo;
@@ -127,7 +128,7 @@ static bool sna_blt_fill_init(struct sna *sna,
blt->cmd |= BLT_DST_TILED;
blt->br13 >>= 2;
}
- assert(blt->br13 < MAXSHORT);
+ assert(blt->br13 <= MAXSHORT);
if (alu == GXclear)
pixel = 0;
@@ -258,6 +259,9 @@ static Bool sna_blt_copy_init(struct sna *sna,
{
struct kgem *kgem = &sna->kgem;
+ assert(kgem_bo_can_blt (kgem, src));
+ assert(kgem_bo_can_blt (kgem, dst));
+
blt->bo[0] = src;
blt->bo[1] = dst;
@@ -270,14 +274,14 @@ static Bool sna_blt_copy_init(struct sna *sna,
blt->cmd |= BLT_SRC_TILED;
blt->pitch[0] >>= 2;
}
- assert(blt->pitch[0] < MAXSHORT);
+ assert(blt->pitch[0] <= MAXSHORT);
blt->pitch[1] = dst->pitch;
if (kgem->gen >= 40 && dst->tiling) {
blt->cmd |= BLT_DST_TILED;
blt->pitch[1] >>= 2;
}
- assert(blt->pitch[1] < MAXSHORT);
+ assert(blt->pitch[1] <= MAXSHORT);
blt->overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
blt->br13 = (copy_ROP[alu] << 16) | blt->pitch[1];
@@ -308,6 +312,9 @@ static Bool sna_blt_alpha_fixup_init(struct sna *sna,
{
struct kgem *kgem = &sna->kgem;
+ assert(kgem_bo_can_blt (kgem, src));
+ assert(kgem_bo_can_blt (kgem, dst));
+
blt->bo[0] = src;
blt->bo[1] = dst;
@@ -317,14 +324,14 @@ static Bool sna_blt_alpha_fixup_init(struct sna *sna,
blt->cmd |= BLT_SRC_TILED;
blt->pitch[0] >>= 2;
}
- assert(blt->pitch[0] < MAXSHORT);
+ assert(blt->pitch[0] <= MAXSHORT);
blt->pitch[1] = dst->pitch;
if (kgem->gen >= 40 && dst->tiling) {
blt->cmd |= BLT_DST_TILED;
blt->pitch[1] >>= 2;
}
- assert(blt->pitch[1] < MAXSHORT);
+ assert(blt->pitch[1] <= MAXSHORT);
blt->overwrites = 1;
blt->br13 = (0xfc << 16) | blt->pitch[1];
@@ -1829,6 +1836,8 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
uint32_t br13, cmd, *b;
bool overwrites;
+ assert(kgem_bo_can_blt (kgem, bo));
+
DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
@@ -1841,7 +1850,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
cmd |= BLT_DST_TILED;
br13 >>= 2;
}
- assert(br13 < MAXSHORT);
+ assert(br13 <= MAXSHORT);
br13 |= fill_ROP[alu] << 16;
switch (bpp) {
@@ -1954,7 +1963,7 @@ Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
cmd |= 1 << 11;
br13 >>= 2;
}
- assert(br13 < MAXSHORT);
+ assert(br13 <= MAXSHORT);
br13 |= 1<<31 | fill_ROP[alu] << 16;
switch (bpp) {
@@ -2105,7 +2114,7 @@ Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
cmd |= BLT_DST_TILED;
br13 >>= 2;
}
- assert(br13 < MAXSHORT);
+ assert(br13 <= MAXSHORT);
br13 |= copy_ROP[alu] << 16;
switch (bpp) {
commit 989615493608525fc252e4e94ac7259cba0741f5
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Mar 9 22:44:16 2012 +0000
sna: Feed fallback mono trapezoids through the mono rasteriser
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 8420730..d70e30e 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -631,6 +631,12 @@ sna_rgba_for_color(uint32_t color, int depth)
return color_convert(color, sna_format_for_depth(depth), PICT_a8r8g8b8);
}
+uint32_t
+sna_rgba_to_color(uint32_t rgba, uint32_t format)
+{
+ return color_convert(rgba, PICT_a8r8g8b8, format);
+}
+
static uint32_t
get_pixel(PicturePtr picture)
{
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 71a6fc5..e6015af 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -482,6 +482,7 @@ sna_render_get_gradient(struct sna *sna,
PictGradient *pattern);
uint32_t sna_rgba_for_color(uint32_t color, int depth);
+uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format);
Bool sna_picture_is_solid(PicturePtr picture, uint32_t *color);
void no_render_init(struct sna *sna);
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 4493331..3e2802e 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -3681,6 +3681,262 @@ tor_blt_add_clipped_mono(struct sna *sna,
tor_blt_add_clipped(sna, op, clip, box, FAST_SAMPLES_XY);
}
+struct mono_inplace_composite {
+ pixman_image_t *src, *dst;
+ int dx, dy;
+ int sx, sy;
+ int op;
+};
+struct mono_inplace_fill {
+ uint32_t *data, stride;
+ uint32_t color;
+ int bpp;
+};
+
+fastcall static void
+mono_inplace_fill_box(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box)
+{
+ struct mono_inplace_fill *fill = op->priv;
+
+ DBG(("(%s: (%d, %d)x(%d, %d):%08x\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1,
+ fill->color));
+ pixman_fill(fill->data, fill->stride, fill->bpp,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1,
+ fill->color);
+}
+
+static void
+mono_inplace_fill_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ struct mono_inplace_fill *fill = op->priv;
+
+ do {
+ DBG(("(%s: (%d, %d)x(%d, %d):%08x\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1,
+ fill->color));
+ pixman_fill(fill->data, fill->stride, fill->bpp,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1,
+ fill->color);
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
+mono_inplace_composite_box(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box)
+{
+ struct mono_inplace_composite *c = op->priv;
+
+ pixman_image_composite(c->op, c->src, NULL, c->dst,
+ box->x1 + c->sx, box->y1 + c->sy,
+ 0, 0,
+ box->x1 + c->dx, box->y1 + c->dy,
+ box->x2 - box->x1,
+ box->y2 - box->y1);
+}
+
+static void
+mono_inplace_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ struct mono_inplace_composite *c = op->priv;
+
+ do {
+ pixman_image_composite(c->op, c->src, NULL, c->dst,
+ box->x1 + c->sx, box->y1 + c->sy,
+ 0, 0,
+ box->x1 + c->dx, box->y1 + c->dy,
+ box->x2 - box->x1,
+ box->y2 - box->y1);
+ box++;
+ } while (--nbox);
+}
+
+static bool
+trapezoid_span_mono_inplace(CARD8 op,
+ PicturePtr src,
+ PicturePtr dst,
+ INT16 src_x, INT16 src_y,
+ int ntrap, xTrapezoid *traps)
+{
+ struct mono mono;
+ union {
+ struct mono_inplace_fill fill;
+ struct mono_inplace_composite composite;
+ } inplace;
+ int was_clear;
+ int x, y, n;
+
+ trapezoids_bounds(ntrap, traps, &mono.clip.extents);
+ if (mono.clip.extents.y1 >= mono.clip.extents.y2 ||
+ mono.clip.extents.x1 >= mono.clip.extents.x2)
+ return true;
+
+ DBG(("%s: extents (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ mono.clip.extents.x1, mono.clip.extents.y1,
+ mono.clip.extents.x2, mono.clip.extents.y2));
+
+ if (!sna_compute_composite_region(&mono.clip,
+ src, NULL, dst,
+ src_x, src_y,
+ 0, 0,
+ mono.clip.extents.x1, mono.clip.extents.y1,
+ mono.clip.extents.x2 - mono.clip.extents.x1,
+ mono.clip.extents.y2 - mono.clip.extents.y1)) {
+ DBG(("%s: trapezoids do not intersect drawable clips\n",
+ __FUNCTION__)) ;
+ return true;
+ }
+
+ DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ mono.clip.extents.x1, mono.clip.extents.y1,
+ mono.clip.extents.x2, mono.clip.extents.y2));
+
+ was_clear = sna_drawable_is_clear(dst->pDrawable);
+ if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &mono.clip,
+ MOVE_WRITE | MOVE_READ))
+ return true;
+
+ mono.sna = to_sna_from_drawable(dst->pDrawable);
+ if (!mono_init(&mono, 2*ntrap))
+ return false;
+
+ mono.op.damage = NULL;
+
+ x = dst->pDrawable->x;
+ y = dst->pDrawable->y;
+
+ for (n = 0; n < ntrap; n++) {
+ if (!xTrapezoidValid(&traps[n]))
+ continue;
+
+ if (pixman_fixed_to_int(traps[n].top) + y >= mono.clip.extents.y2 ||
+ pixman_fixed_to_int(traps[n].bottom) + y < mono.clip.extents.y1)
+ continue;
+
+ mono_add_line(&mono, x, y,
+ traps[n].top, traps[n].bottom,
+ &traps[n].left.p1, &traps[n].left.p2, 1);
+ mono_add_line(&mono, x, y,
+ traps[n].top, traps[n].bottom,
+ &traps[n].right.p1, &traps[n].right.p2, -1);
+ }
+
+ if (sna_picture_is_solid(src, &inplace.fill.color) &&
+ (op == PictOpSrc || op == PictOpClear ||
+ (op == PictOpOver && inplace.fill.color >> 24 == 0xff))) {
+ PixmapPtr pixmap;
+ int16_t dx, dy;
+ uint8_t *ptr;
+
+unbounded_pass:
+ pixmap = get_drawable_pixmap(dst->pDrawable);
+ get_drawable_deltas(dst->pDrawable, pixmap, &dx, &dy);
+
+ ptr = pixmap->devPrivate.ptr;
+ ptr += dy * pixmap->devKind + dx * pixmap->drawable.bitsPerPixel / 8;
+ inplace.fill.data = (uint32_t *)ptr;
+ inplace.fill.stride = pixmap->devKind / sizeof(uint32_t);
+ inplace.fill.bpp = pixmap->drawable.bitsPerPixel;
+
+ if (op == PictOpClear)
+ inplace.fill.color = 0;
+ else if (dst->format != PICT_a8r8g8b8)
+ inplace.fill.color = sna_rgba_to_color(inplace.fill.color, dst->format);
+
+ DBG(("%s: fill %x\n", __FUNCTION__, inplace.fill.color));
+
+ mono.op.priv = &inplace.fill;
+ mono.op.box = mono_inplace_fill_box;
+ mono.op.boxes = mono_inplace_fill_boxes;
+
+ op = 0;
+ } else {
+ inplace.composite.dst = image_from_pict(dst, FALSE,
+ &inplace.composite.dx,
+ &inplace.composite.dy);
+ inplace.composite.src = image_from_pict(src, FALSE,
+ &inplace.composite.sx,
+ &inplace.composite.sy);
+ inplace.composite.sx +=
+ src_x - pixman_fixed_to_int(traps[0].left.p1.x),
+ inplace.composite.sy +=
+ src_y - pixman_fixed_to_int(traps[0].left.p1.y),
+ inplace.composite.op = op;
+
+ mono.op.priv = &inplace.composite;
+ mono.op.box = mono_inplace_composite_box;
+ mono.op.boxes = mono_inplace_composite_boxes;
+ }
+ mono_render(&mono);
+ mono_fini(&mono);
+
+ if (op) {
+ free_pixman_pict(src, inplace.composite.src);
+ free_pixman_pict(dst, inplace.composite.dst);
+
+ if (!was_clear && !operator_is_bounded(op)) {
+ xPointFixed p1, p2;
+
+ DBG(("%s: unbounded fixup\n", __FUNCTION__));
+
+ if (!mono_init(&mono, 2+2*ntrap))
+ return false;
+
+ p1.y = mono.clip.extents.y1 * pixman_fixed_1;
+ p2.y = mono.clip.extents.y2 * pixman_fixed_1;
+
+ p1.x = mono.clip.extents.x1 * pixman_fixed_1;
+ p2.x = mono.clip.extents.x1 * pixman_fixed_1;
+ mono_add_line(&mono, 0, 0, p1.y, p2.y, &p1, &p2, -1);
+
+ p1.x = mono.clip.extents.x2 * pixman_fixed_1;
+ p2.x = mono.clip.extents.x2 * pixman_fixed_1;
+ mono_add_line(&mono, 0, 0, p1.y, p2.y, &p1, &p2, 1);
+
+ for (n = 0; n < ntrap; n++) {
+ if (!xTrapezoidValid(&traps[n]))
+ continue;
+
+ if (pixman_fixed_to_int(traps[n].top) + x >= mono.clip.extents.y2 ||
+ pixman_fixed_to_int(traps[n].bottom) + y < mono.clip.extents.y1)
+ continue;
+
+ mono_add_line(&mono, x, y,
+ traps[n].top, traps[n].bottom,
+ &traps[n].left.p1, &traps[n].left.p2, 1);
+ mono_add_line(&mono, x, y,
+ traps[n].top, traps[n].bottom,
+ &traps[n].right.p1, &traps[n].right.p2, -1);
+ }
+
+ op = PictOpClear;
+ goto unbounded_pass;
+ }
+ }
+
+ return true;
+}
+
static bool
trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
@@ -3713,7 +3969,24 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
return false;
}
- if (dst->format != PICT_a8 || !sna_picture_is_solid(src, &color)) {
+ if (!fallback && is_gpu(dst->pDrawable)) {
+ DBG(("%s: fallback -- can not perform operation in place, destination busy\n",
+ __FUNCTION__));
+
+ return false;
+ }
+
+ if (is_mono(dst, maskFormat))
+ return trapezoid_span_mono_inplace(op, src, dst,
+ src_x, src_y, ntrap, traps);
+
+ if (!sna_picture_is_solid(src, &color)) {
+ DBG(("%s: fallback -- can not perform operation in place, requires solid source\n",
+ __FUNCTION__));
+ return false;
+ }
+
+ if (dst->format != PICT_a8) {
DBG(("%s: fallback -- can not perform operation in place, format=%x\n",
__FUNCTION__, dst->format));
return false;
@@ -3744,8 +4017,6 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
__FUNCTION__, op));
return false;
}
- if (!fallback && is_gpu(dst->pDrawable))
- return false;
DBG(("%s: format=%x, op=%d, color=%x\n",
__FUNCTION__, dst->format, op, color));
commit 552e4fbd2c25eb5ab0ae77e11f5f8ba2fdb29daa
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Mar 9 20:02:44 2012 +0000
sna/traps: Add a fast path for narrow masks
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 5773d66..db579d0 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3785,6 +3785,12 @@ done:
return kgem_create_proxy(&bo->base, offset, size);
}
+bool kgem_buffer_is_inplace(struct kgem_bo *_bo)
+{
+ struct kgem_partial_bo *bo = (struct kgem_partial_bo *)_bo->proxy;
+ return bo->write & KGEM_BUFFER_WRITE_INPLACE;
+}
+
struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
int width, int height, int bpp,
uint32_t flags,
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 6c31f33..dff8bb2 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -503,6 +503,7 @@ struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
int width, int height, int bpp,
uint32_t flags,
void **ret);
+bool kgem_buffer_is_inplace(struct kgem_bo *bo);
void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo);
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 3429438..3619101 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2007,17 +2007,10 @@ sna_pixmap_create_upload(ScreenPtr screen,
pixmap = sna->freed_pixmap;
sna->freed_pixmap = NULL;
- pixmap->usage_hint = CREATE_PIXMAP_USAGE_SCRATCH;
pixmap->drawable.serialNumber = NEXT_SERIAL_NUMBER;
pixmap->refcnt = 1;
-
- DBG(("%s: serial=%ld, usage=%d\n",
- __FUNCTION__,
- pixmap->drawable.serialNumber,
- pixmap->usage_hint));
} else {
- pixmap = create_pixmap(sna, screen, 0, 0, depth,
- CREATE_PIXMAP_USAGE_SCRATCH);
+ pixmap = create_pixmap(sna, screen, 0, 0, depth, 0);
if (!pixmap)
return NullPixmap;
@@ -2035,8 +2028,7 @@ sna_pixmap_create_upload(ScreenPtr screen,
priv->gpu_bo = kgem_create_buffer_2d(&sna->kgem,
width, height, bpp,
- flags,
- &ptr);
+ flags, &ptr);
if (!priv->gpu_bo) {
free(priv);
fbDestroyPixmap(pixmap);
@@ -2058,6 +2050,15 @@ sna_pixmap_create_upload(ScreenPtr screen,
pixmap->devKind = priv->gpu_bo->pitch;
pixmap->devPrivate.ptr = ptr;
+ pixmap->usage_hint = 0;
+ if (!kgem_buffer_is_inplace(priv->gpu_bo))
+ pixmap->usage_hint = 1;
+
+ DBG(("%s: serial=%ld, usage=%d\n",
+ __FUNCTION__,
+ pixmap->drawable.serialNumber,
+ pixmap->usage_hint));
+
return pixmap;
}
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 8c6cf34..4493331 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -1409,6 +1409,342 @@ tor_render(struct sna *sna,
}
}
+static void
+inplace_row(struct active_list *active, uint8_t *row, int width)
+{
+ struct edge *left = active->head.next;
+
+ assert(active->is_vertical);
+
+ while (&active->tail != left) {
+ struct edge *right;
+ int winding = left->dir;
+ grid_scaled_x_t lfx, rfx;
+ int lix, rix;
+
+ left->height_left -= FAST_SAMPLES_Y;
+ if (!left->height_left) {
+ left->prev->next = left->next;
+ left->next->prev = left->prev;
+ }
+
+ right = left->next;
+ do {
+ right->height_left -= FAST_SAMPLES_Y;
+ if (!right->height_left) {
+ right->prev->next = right->next;
+ right->next->prev = right->prev;
+ }
+
+ winding += right->dir;
+ if (0 == winding)
+ break;
+
+ right = right->next;
+ } while (1);
+
+ if (left->x.quo < 0) {
+ lix = lfx = 0;
+ } else if (left->x.quo > width * FAST_SAMPLES_X) {
+ lix = width;
+ lfx = 0;
+ } else
+ FAST_SAMPLES_X_TO_INT_FRAC(left->x.quo, lix, lfx);
+
+ if (right->x.quo < 0) {
+ rix = rfx = 0;
+ } else if (right->x.quo > width * FAST_SAMPLES_X) {
+ rix = width;
+ rfx = 0;
+ } else
+ FAST_SAMPLES_X_TO_INT_FRAC(right->x.quo, rix, rfx);
+ if (lix == rix) {
+ if (rfx != lfx)
+ row[lix] += (rfx-lfx) * 256 / FAST_SAMPLES_X;
+ } else {
+ if (lfx == 0)
+ row[lix] = 0xff;
+ else
+ row[lix] += 256 - lfx * 256 / FAST_SAMPLES_X;
+
+ if (rfx)
+ row[rix] += rfx * 256 / FAST_SAMPLES_X;
+
+ if (rix > ++lix) {
+ rix -= lix;
+#if 0
+ if (rix == 1)
+ row[lix] = 0xff;
+ else
+ memset(row+lix, 0xff, rix);
+#else
+ while (rix && lix & 3)
+ row[lix++] = 0xff, rix--;
+ while (rix > 4) {
+ *(uint32_t *)(row+lix) = 0xffffffff;
+ lix += 4;
+ rix -= 4;
+ }
+ if (rix & 2) {
+ *(uint16_t *)(row+lix) = 0xffff;
+ lix += 2;
+ }
+ if (rix & 1)
+ row[lix] = 0xff;
+#endif
+ }
+ }
+
+ left = right->next;
+ }
+}
+
+static inline uint8_t clip255(int x)
+{
+ if (x > 255)
+ return 255;
+
+ return x;
+}
+
+inline static void
+inplace_subrow(struct active_list *active, int8_t *row,
+ int width, int *min, int *max)
+{
+ struct edge *edge = active->head.next;
+ grid_scaled_x_t prev_x = INT_MIN;
+ int winding = 0, xstart = INT_MIN;
+
+ while (&active->tail != edge) {
+ struct edge *next = edge->next;
+
+ winding += edge->dir;
+ if (0 == winding) {
+ if (edge->x.quo >= FAST_SAMPLES_X * width) {
+ *max = width;
+ } else if (edge->next->x.quo != edge->x.quo) {
+ grid_scaled_x_t fx;
+ int ix;
+
+ xstart = edge->x.quo;
+ FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
+ row[ix++] -= FAST_SAMPLES_X - fx;
+ if (ix < width)
+ row[ix] -= fx;
+
+ if (ix > *max)
+ *max = ix;
+
+ xstart = INT_MIN;
+ }
+ } else if (xstart < 0) {
+ grid_scaled_x_t fx;
+ int ix;
+
+ xstart = MAX(edge->x.quo, 0);
+ FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
+ if (ix < *min)
+ *min = ix;
+
+ row[ix++] += FAST_SAMPLES_X - fx;
+ row[ix] += fx;
+ }
+
+ if (--edge->height_left) {
+ if (!edge->vertical) {
+ edge->x.quo += edge->dxdy.quo;
+ edge->x.rem += edge->dxdy.rem;
+ if (edge->x.rem >= 0) {
+ ++edge->x.quo;
+ edge->x.rem -= edge->dy;
+ }
+ }
+
+ if (edge->x.quo < prev_x) {
+ struct edge *pos = edge->prev;
+ pos->next = next;
+ next->prev = pos;
+ do {
+ pos = pos->prev;
+ } while (edge->x.quo < pos->x.quo);
+ pos->next->prev = edge;
+ edge->next = pos->next;
+ edge->prev = pos;
+ pos->next = edge;
+ } else
+ prev_x = edge->x.quo;
+ } else {
+ edge->prev->next = next;
+ next->prev = edge->prev;
+ }
+
+ edge = next;
+ }
+}
+
+inline static void
+inplace_end_subrows(struct active_list *active, uint8_t *row,
+ int8_t *buf, int width)
+{
+ int cover = 0;
+
+ while (width > 4) {
+ uint32_t dw;
+ int v;
+
+ dw = *(uint32_t *)buf;
+ buf += 4;
+
+ if (dw == 0){
+ v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+ v -= v >> 8;
+ v |= v << 8;
+ dw = v | v << 16;
+ } else if (dw) {
+ cover += (int8_t)(dw & 0xff);
+ assert(cover >= 0);
+ v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+ v -= v >> 8;
+ dw >>= 8;
+ dw |= v << 24;
+
+ cover += (int8_t)(dw & 0xff);
+ assert(cover >= 0);
+ v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+ v -= v >> 8;
+ dw >>= 8;
+ dw |= v << 24;
+
+ cover += (int8_t)(dw & 0xff);
+ assert(cover >= 0);
+ v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+ v -= v >> 8;
+ dw >>= 8;
+ dw |= v << 24;
+
+ cover += (int8_t)(dw & 0xff);
+ assert(cover >= 0);
+ v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+ v -= v >> 8;
+ dw >>= 8;
+ dw |= v << 24;
+ }
+
+ *(uint32_t *)row = dw;
+ row += 4;
+
+ width -= 4;
+ }
+
+ while (width--) {
+ int v;
+
+ cover += *buf++;
+ assert(cover >= 0);
+
+ v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+ v -= v >> 8;
+ *row++ = v;
+ }
+}
+
+#define TOR_INPLACE_SIZE 128
+static void
+tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
+{
+ int i, j, h = converter->ymax;
+ struct polygon *polygon = converter->polygon;
+ struct active_list *active = converter->active;
+ struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
+ uint8_t *row = scratch->devPrivate.ptr;
+ int stride = scratch->devKind;
+ int width = scratch->drawable.width;
+
+ __DBG(("%s: mono=%d, buf=%d\n", __FUNCTION__, mono, buf));
+ assert(!mono);
+
+ /* Render each pixel row. */
+ for (i = 0; i < h; i = j) {
+ int do_full_step = 0;
+ void *ptr = buf ?: row;
+
+ j = i + 1;
+
+ /* Determine if we can ignore this row or use the full pixel
+ * stepper. */
+ if (!polygon->y_buckets[i]) {
+ if (active->head.next == &active->tail) {
+ active->min_height = INT_MAX;
+ active->is_vertical = 1;
+ for (; j < h && !polygon->y_buckets[j]; j++)
+ ;
+ __DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
+ __FUNCTION__, i, j));
+
+ memset(row, 0, stride*(j-i));
+ row += stride*(j-i);
+ continue;
+ }
+
+ do_full_step = can_full_step(active);
+ }
+
+ __DBG(("%s: y=%d [%d], do_full_step=%d, new edges=%d, min_height=%d, vertical=%d\n",
+ __FUNCTION__,
+ i, i+ymin, do_full_step,
+ polygon->y_buckets[i] != NULL,
+ active->min_height,
+ active->is_vertical));
+ if (do_full_step) {
+ memset(ptr, 0, width);
+ inplace_row(active, ptr, width);
+ if (row != ptr)
+ memcpy(row, ptr, width);
+
+ if (active->is_vertical) {
+ while (j < h &&
+ polygon->y_buckets[j] == NULL &&
+ active->min_height >= 2*FAST_SAMPLES_Y)
+ {
+ active->min_height -= FAST_SAMPLES_Y;
+ row += stride;
+ memcpy(row, ptr, width);
+ j++;
+ }
+ if (j != i + 1)
+ step_edges(active, j - (i + 1));
+
+ __DBG(("%s: vertical edges, full step (%d, %d)\n",
+ __FUNCTION__, i, j));
+ }
+ } else {
+ grid_scaled_y_t suby;
+ int min = width, max = 0;
+
+ fill_buckets(active, polygon->y_buckets[i], buckets);
+
+ /* Subsample this row. */
+ memset(ptr, 0, width);
+ for (suby = 0; suby < FAST_SAMPLES_Y; suby++) {
+ if (buckets[suby]) {
+ merge_edges(active, buckets[suby]);
+ buckets[suby] = NULL;
+ }
+
+ inplace_subrow(active, ptr, width, &min, &max);
+ }
+ memset(row, 0, min);
+ if (max > min)
+ inplace_end_subrows(active, row+min, (int8_t*)ptr+min, max-min);
+ if (max < width)
+ memset(row+max, 0, width-max);
+ }
+
+ active->min_height -= FAST_SAMPLES_Y;
+ row += stride;
+ }
+}
+
struct mono_edge {
struct mono_edge *next, *prev;
@@ -1936,7 +2272,7 @@ trapezoids_bounds(int n, const xTrapezoid *t, BoxPtr box)
if (((x2 - t->right.p1.x) | (x2 - t->right.p2.x)) < 0) {
if (pixman_fixed_floor(t->right.p1.x) == pixman_fixed_floor(t->right.p2.x)) {
x2 = pixman_fixed_ceil(t->right.p1.x);
- } else {
+ } else {
if (t->right.p1.y == t->top)
fx1 = t->right.p1.x;
else
@@ -3007,7 +3343,6 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
int ntrap, xTrapezoid *traps)
{
struct tor tor;
- span_func_t span;
ScreenPtr screen = dst->pDrawable->pScreen;
PixmapPtr scratch;
PicturePtr mask;
@@ -3041,8 +3376,8 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
return true;
- DBG(("%s: extents (%d, %d), (%d, %d)\n",
- __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
+ DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
+ __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
if (!sna_compute_composite_extents(&extents,
src, NULL, dst,
@@ -3096,15 +3431,18 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
tor_add_edge(&tor, &t, &t.right, -1);
}
- if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp)
- span = tor_blt_mask_mono;
- else
- span = tor_blt_mask;
-
- tor_render(NULL, &tor,
- scratch->devPrivate.ptr,
- (void *)(intptr_t)scratch->devKind,
- span, true);
+ if (extents.x2 <= TOR_INPLACE_SIZE) {
+ uint8_t buf[TOR_INPLACE_SIZE];
+ tor_inplace(&tor, scratch, is_mono(dst, maskFormat),
+ scratch->usage_hint ? NULL : buf);
+ } else {
+ tor_render(NULL, &tor,
+ scratch->devPrivate.ptr,
+ (void *)(intptr_t)scratch->devKind,
+ is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
+ true);
+ }
+ tor_fini(&tor);
mask = CreatePicture(0, &scratch->drawable,
PictureMatchFormat(screen, 8, PICT_a8),
@@ -3119,7 +3457,6 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
extents.x2, extents.y2);
FreePicture(mask, 0);
}
- tor_fini(&tor);
return true;
}
@@ -3535,7 +3872,6 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
int ntrap, xTrapezoid *traps)
{
struct tor tor;
- span_func_t span;
ScreenPtr screen = dst->pDrawable->pScreen;
PixmapPtr scratch;
PicturePtr mask;
@@ -3569,8 +3905,8 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
return true;
- DBG(("%s: extents (%d, %d), (%d, %d)\n",
- __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
+ DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
+ __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
if (!sna_compute_composite_extents(&extents,
src, NULL, dst,
@@ -3624,15 +3960,16 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
tor_add_edge(&tor, &t, &t.right, -1);
}
- if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp)
- span = tor_blt_mask_mono;
- else
- span = tor_blt_mask;
-
- tor_render(NULL, &tor,
- scratch->devPrivate.ptr,
- (void *)(intptr_t)scratch->devKind,
- span, true);
+ if (extents.x2 <= TOR_INPLACE_SIZE) {
+ tor_inplace(&tor, scratch, is_mono(dst, maskFormat), NULL);
+ } else {
+ tor_render(NULL, &tor,
+ scratch->devPrivate.ptr,
+ (void *)(intptr_t)scratch->devKind,
+ is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
+ true);
+ }
+ tor_fini(&tor);
mask = CreatePicture(0, &scratch->drawable,
PictureMatchFormat(screen, 8, PICT_a8),
@@ -3675,7 +4012,6 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
done:
FreePicture(mask, 0);
}
- tor_fini(&tor);
return true;
}
commit 494edfaaacaae13adfa5e727c66a83cb2294d330
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Mar 9 12:19:33 2012 +0000
sna: Handle partial reads with a pending clear
Skip the filling of the whole pixmap if we have a small read and we
know the GPU bo is clear. Also choose to operate inplace on the GPU bo
if we meet the usual criteria.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 5aad88b..3429438 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1257,13 +1257,6 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
return _sna_pixmap_move_to_cpu(pixmap, flags);
}
- if (priv->clear) {
- DBG(("%s: pending clear, moving whole pixmap\n", __FUNCTION__));
- if (dx | dy)
- RegionTranslate(region, -dx, -dy);
- return _sna_pixmap_move_to_cpu(pixmap, flags | MOVE_READ);
- }
-
if ((flags & MOVE_READ) == 0) {
DBG(("%s: no read, checking to see if we can stream the write into the GPU bo\n",
__FUNCTION__));
@@ -1295,6 +1288,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
sna_damage_add(&priv->gpu_damage,
region);
+ priv->clear = false;
return true;
}
}
@@ -1333,6 +1327,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
} else
sna_damage_add(&priv->gpu_damage, region);
+ priv->clear = false;
return true;
}
}
@@ -1354,12 +1349,20 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
pixmap->devKind = priv->gpu_bo->pitch;
if (!DAMAGE_IS_ALL(priv->gpu_damage))
sna_damage_add(&priv->gpu_damage, region);
+ priv->clear = false;
return true;
}
priv->mapped = false;
}
+ if (priv->clear && flags & MOVE_WRITE) {
+ DBG(("%s: pending clear, moving whole pixmap for partial write\n", __FUNCTION__));
+ if (dx | dy)
+ RegionTranslate(region, -dx, -dy);
+ return _sna_pixmap_move_to_cpu(pixmap, flags | MOVE_READ);
+ }
+
if (priv->mapped) {
pixmap->devPrivate.ptr = NULL;
priv->mapped = false;
@@ -1372,6 +1375,35 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
if (priv->gpu_bo == NULL)
goto done;
+ if (priv->clear) {
+ int n = REGION_NUM_RECTS(region);
+ BoxPtr box = REGION_RECTS(region);
+
+ DBG(("%s: pending clear, doing partial fill\n", __FUNCTION__));
+ if (priv->cpu_bo) {
+ DBG(("%s: syncing CPU bo\n", __FUNCTION__));
+ kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
+ }
+
+ do {
+ pixman_fill(pixmap->devPrivate.ptr,
+ pixmap->devKind/sizeof(uint32_t),
+ pixmap->drawable.bitsPerPixel,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1,
+ priv->clear_color);
+ box++;
+ } while (--n);
+
+ if (region->extents.x2 - region->extents.x1 > 1 ||
+ region->extents.y2 - region->extents.y1 > 1) {
+ sna_damage_subtract(&priv->gpu_damage, region);
+ priv->clear = false;
+ }
+ goto done;
+ }
+
if ((flags & MOVE_READ) == 0) {
assert(flags & MOVE_WRITE);
sna_damage_subtract(&priv->gpu_damage, region);
commit bd62dc73dcdbab34aa5c83382e46c7315d554a1a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Mar 9 10:37:34 2012 +0000
sna/traps: Apply somes simple but common operator reduction for clipmasks
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index e28c669..8c6cf34 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -3354,8 +3354,10 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
struct inplace inplace;
span_func_t span;
PixmapPtr pixmap;
+ struct sna_pixmap *priv;
RegionRec region;
uint32_t color;
+ bool unbounded;
int16_t dst_x, dst_y;
int dx, dy;
int n;
@@ -3380,18 +3382,33 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
return false;
}
+ pixmap = get_drawable_pixmap(dst->pDrawable);
+ priv = sna_pixmap(pixmap);
+ if (priv == NULL) {
+ DBG(("%s: fallback -- unattached\n", __FUNCTION__));
+ return false;
+ }
+
+ unbounded = false;
switch (op) {
case PictOpIn:
+ unbounded = true;
+ if (priv->clear && priv->clear_color == 0xff)
+ op = PictOpSrc;
+ break;
case PictOpAdd:
+ if (priv->clear && priv->clear_color == 0)
+ op = PictOpSrc;
+ break;
case PictOpSrc:
- if (!fallback && is_gpu(dst->pDrawable))
- return false;
break;
default:
DBG(("%s: fallback -- can not perform op [%d] in place\n",
__FUNCTION__, op));
return false;
}
+ if (!fallback && is_gpu(dst->pDrawable))
+ return false;
DBG(("%s: format=%x, op=%d, color=%x\n",
__FUNCTION__, dst->format, op, color));
@@ -3497,7 +3514,6 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
op == PictOpSrc ? MOVE_WRITE : MOVE_WRITE | MOVE_READ))
return true;
- pixmap = get_drawable_pixmap(dst->pDrawable);
get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y);
inplace.ptr = pixmap->devPrivate.ptr;
@@ -3506,7 +3522,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
inplace.opacity = color >> 24;
tor_render(NULL, &tor, (void*)&inplace,
- dst->pCompositeClip, span, op == PictOpIn);
+ dst->pCompositeClip, span, unbounded);
tor_fini(&tor);
More information about the xorg-commit
mailing list