xf86-video-intel: 8 commits - src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen5_render.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_render.c src/sna/sna_trapezoids.c
Chris Wilson
ickle at kemper.freedesktop.org
Sun Mar 18 09:52:07 PDT 2012
src/sna/gen3_render.c | 52 +++++++++++++++++++++++--
src/sna/gen4_render.c | 57 ++++++++++++++++++++-------
src/sna/gen5_render.c | 25 ++++++++++--
src/sna/kgem.c | 53 ++++++++++++++++++++-----
src/sna/kgem.h | 4 -
src/sna/sna_render.c | 24 ++++++++---
src/sna/sna_trapezoids.c | 96 +++++++++++++++++++++++++++++++++--------------
7 files changed, 240 insertions(+), 71 deletions(-)
New commits:
commit 17c19ea8e21e1b20eee446045573dfd94ce6f537
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Mar 18 13:23:26 2012 +0000
sna/traps: Remove separate edge->vertical flag
Mark vertical edges with dy==0 to reduce structure size and reduce
memory load during edge walking.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index f2caf9a..4752e48 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -190,7 +190,6 @@ struct edge {
struct edge *next, *prev;
int dir;
- int vertical;
grid_scaled_y_t height_left;
@@ -713,13 +712,12 @@ polygon_add_edge(struct polygon *polygon,
e->height_left = ybot - ytop;
if (dx == 0) {
- e->vertical = true;
e->x.quo = x1;
e->x.rem = 0;
+ e->dy = 0;
e->dxdy.quo = 0;
e->dxdy.rem = 0;
} else {
- e->vertical = false;
e->dxdy = floored_divrem(dx, dy);
if (ytop == y1) {
e->x.quo = x1;
@@ -776,13 +774,12 @@ polygon_add_line(struct polygon *polygon,
e->height_left = bot - top;
if (dx == 0) {
- e->vertical = true;
e->x.quo = p1->x;
e->x.rem = -dy;
e->dxdy.quo = 0;
e->dxdy.rem = 0;
+ e->dy = 0;
} else {
- e->vertical = false;
e->dxdy = floored_divrem(dx, dy);
if (top == p1->y) {
e->x.quo = p1->x;
@@ -819,16 +816,16 @@ polygon_add_line(struct polygon *polygon,
static void
active_list_reset(struct active_list *active)
{
- active->head.vertical = 1;
active->head.height_left = INT_MAX;
active->head.x.quo = INT_MIN;
+ active->head.dy = 0;
active->head.prev = NULL;
active->head.next = &active->tail;
active->tail.prev = &active->head;
active->tail.next = NULL;
active->tail.x.quo = INT_MAX;
active->tail.height_left = INT_MAX;
- active->tail.vertical = 1;
+ active->tail.dy = 0;
active->min_height = INT_MAX;
active->is_vertical = 1;
}
@@ -934,7 +931,7 @@ can_full_step(struct active_list *active)
for (e = active->head.next; &active->tail != e; e = e->next) {
if (e->height_left < min_height)
min_height = e->height_left;
- is_vertical &= e->vertical;
+ is_vertical &= e->dy == 0;
}
active->is_vertical = is_vertical;
@@ -971,7 +968,7 @@ fill_buckets(struct active_list *active,
*b = edge;
if (edge->height_left < min_height)
min_height = edge->height_left;
- is_vertical &= edge->vertical;
+ is_vertical &= edge->dy == 0;
edge = next;
}
@@ -1002,7 +999,7 @@ nonzero_subrow(struct active_list *active, struct cell_list *coverages)
xstart = edge->x.quo;
if (--edge->height_left) {
- if (!edge->vertical) {
+ if (edge->dy) {
edge->x.quo += edge->dxdy.quo;
edge->x.rem += edge->dxdy.rem;
if (edge->x.rem >= 0) {
@@ -1595,7 +1592,7 @@ inplace_subrow(struct active_list *active, int8_t *row,
}
if (--edge->height_left) {
- if (!edge->vertical) {
+ if (edge->dy) {
edge->x.quo += edge->dxdy.quo;
edge->x.rem += edge->dxdy.rem;
if (edge->x.rem >= 0) {
@@ -1805,7 +1802,6 @@ struct mono_edge {
int32_t height_left;
int32_t dir;
- int32_t vertical;
int32_t dy;
struct quorem x;
@@ -1925,14 +1921,12 @@ mono_add_line(struct mono *mono,
dy = p2->y - p1->y;
if (dx == 0) {
- e->vertical = TRUE;
e->x.quo = p1->x;
e->x.rem = 0;
e->dxdy.quo = 0;
e->dxdy.rem = 0;
e->dy = 0;
} else {
- e->vertical = FALSE;
e->dxdy = floored_muldivrem (dx, pixman_fixed_1, dy);
e->dy = dy;
@@ -2079,7 +2073,7 @@ mono_merge_edges(struct mono *c, struct mono_edge *edges)
DBG_MONO_EDGES(edges);
for (e = edges; c->is_vertical && e; e = e->next)
- c->is_vertical = e->vertical;
+ c->is_vertical = e->dy == 0;
c->head.next = mono_merge_unsorted_edges(c->head.next, edges);
}
@@ -2137,11 +2131,13 @@ mono_row(struct mono *c, int16_t y, int16_t h)
int16_t xend = I(edge->x.quo);
if (--edge->height_left) {
- edge->x.quo += edge->dxdy.quo;
- edge->x.rem += edge->dxdy.rem;
- if (edge->x.rem >= 0) {
- ++edge->x.quo;
- edge->x.rem -= edge->dy;
+ if (edge->dy) {
+ edge->x.quo += edge->dxdy.quo;
+ edge->x.rem += edge->dxdy.rem;
+ if (edge->x.rem >= 0) {
+ ++edge->x.quo;
+ edge->x.rem -= edge->dy;
+ }
}
if (edge->x.quo < prev_x) {
@@ -2185,7 +2181,7 @@ mono_init(struct mono *c, int num_edges)
if (!mono_polygon_init(&c->polygon, &c->clip.extents, num_edges))
return false;
- c->head.vertical = 1;
+ c->head.dy = 0;
c->head.height_left = INT_MAX;
c->head.x.quo = INT16_MIN << 16;
c->head.prev = NULL;
@@ -2194,7 +2190,7 @@ mono_init(struct mono *c, int num_edges)
c->tail.next = NULL;
c->tail.x.quo = INT16_MAX << 16;
c->tail.height_left = INT_MAX;
- c->tail.vertical = 1;
+ c->tail.dy = 0;
c->is_vertical = 1;
commit 82023397cd05c05cfcb8939829a95e287d438875
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Mar 18 11:10:14 2012 +0000
sna/gen3: Improve clear-to-solid reduction
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 8c88722..b236761 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2258,6 +2258,27 @@ gen3_init_radial(struct sna *sna,
}
static Bool
+sna_picture_is_clear(PicturePtr picture,
+ int x, int y, int w, int h,
+ uint32_t *color)
+{
+ struct sna_pixmap *priv;
+
+ if (!picture->pDrawable)
+ return FALSE;
+
+ priv = sna_pixmap(get_drawable_pixmap(picture->pDrawable));
+ if (priv == NULL || !priv->clear)
+ return FALSE;
+
+ if (!source_is_covered(picture, x, y, w, h))
+ return FALSE;
+
+ *color = priv->clear_color;
+ return TRUE;
+}
+
+static Bool
gen3_composite_picture(struct sna *sna,
PicturePtr picture,
struct sna_composite_op *op,
@@ -2310,6 +2331,9 @@ gen3_composite_picture(struct sna *sna,
if (sna_picture_is_solid(picture, &color))
return gen3_init_solid(channel, color);
+ if (sna_picture_is_clear(picture, x, y, w, h, &color))
+ return gen3_init_solid(channel, color);
+
if (!gen3_check_repeat(picture))
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
commit 7ce5f4a11389b2fe9f27a6f09c4848ac71424d5d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Mar 18 10:55:17 2012 +0000
sna/gen3: Do not force tiling for large pixmaps
As the extraction routine is now smarter and can construction
subsurfaces without copying we do not need to force tiling.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 8a2222c..bb09214 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2300,17 +2300,11 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
return tiling < 0 ? tiling : I915_TILING_NONE;
if (kgem->gen < 40) {
- if (tiling) {
- if (width * bpp > 8192 * 8) {
- DBG(("%s: pitch too large for tliing [%d]\n",
- __FUNCTION__, width*bpp/8));
- tiling = I915_TILING_NONE;
- goto done;
- } else if ((width|height) > 2048) {
- DBG(("%s: large buffer (%dx%d), forcing TILING_X\n",
- __FUNCTION__, width, height));
- tiling = -I915_TILING_X;
- }
+ if (tiling && width * bpp > 8192 * 8) {
+ DBG(("%s: pitch too large for tliing [%d]\n",
+ __FUNCTION__, width*bpp/8));
+ tiling = I915_TILING_NONE;
+ goto done;
}
} else {
if (width*bpp > (MAXSHORT-512) * 8) {
commit 342dda3fe361c8be2f3af5af1516cdc6a5fdcaa9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Mar 18 10:55:06 2012 +0000
sna/gen3: Prevent copy-fallback if we cannot blit
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 67c8956..8c88722 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -3987,6 +3987,10 @@ gen3_render_copy_boxes(struct sna *sna, uint8_t alu,
src_bo->pitch > MAX_3D_PITCH ||
too_large(src->drawable.width, src->drawable.height)) {
fallback_blt:
+ if (!kgem_bo_can_blt(&sna->kgem, src_bo) ||
+ !kgem_bo_can_blt(&sna->kgem, dst_bo))
+ return FALSE;
+
return sna_blt_copy_boxes_fallback(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
commit 28c089781fdf74a9bac2e138e65383748558fcc1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Mar 18 10:55:27 2012 +0000
sna: Fixup the cpu shadow mappings before uploading the box
On the off-chance we arrive here with a pointer to the GTT mapping.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 8be3e72..542cdb9 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -442,6 +442,7 @@ static struct kgem_bo *upload(struct sna *sna,
PixmapPtr pixmap,
BoxPtr box)
{
+ struct sna_pixmap *priv;
struct kgem_bo *bo;
DBG(("%s: box=(%d, %d), (%d, %d), pixmap=%dx%d\n",
@@ -451,6 +452,19 @@ static struct kgem_bo *upload(struct sna *sna,
assert(box->x2 <= pixmap->drawable.width);
assert(box->y2 <= pixmap->drawable.height);
+ priv = sna_pixmap(pixmap);
+ if (priv) {
+ /* As we know this box is on the CPU just fixup the shadow */
+ if (priv->mapped) {
+ pixmap->devPrivate.ptr = NULL;
+ priv->mapped = false;
+ }
+ if (pixmap->devPrivate.ptr == NULL) {
+ pixmap->devPrivate.ptr = priv->ptr;
+ pixmap->devKind = priv->stride;
+ }
+ }
+
bo = kgem_upload_source_image(&sna->kgem,
pixmap->devPrivate.ptr, box,
pixmap->devKind,
@@ -463,13 +477,11 @@ static struct kgem_bo *upload(struct sna *sna,
channel->scale[0] = 1.f/channel->width;
channel->scale[1] = 1.f/channel->height;
- if (pixmap->usage_hint == 0 &&
+ if (priv &&
+ pixmap->usage_hint == 0 &&
channel->width == pixmap->drawable.width &&
- channel->height == pixmap->drawable.height) {
- struct sna_pixmap *priv = sna_pixmap(pixmap);
- if (priv)
- kgem_proxy_bo_attach(bo, &priv->gpu_bo);
- }
+ channel->height == pixmap->drawable.height)
+ kgem_proxy_bo_attach(bo, &priv->gpu_bo);
}
return bo;
commit 79258fe560d08dd3dd22b4782360e1597e54948c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Mar 18 09:45:27 2012 +0000
sna/traps: Apply some more operator and unbounded reductions
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 5594023..f2caf9a 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -2698,6 +2698,8 @@ composite_unaligned_box(struct sna *sna,
float opacity,
pixman_region16_t *clip)
{
+ assert(opacity != 0.);
+
if (clip) {
pixman_region16_t region;
@@ -3017,6 +3019,7 @@ composite_unaligned_boxes(struct sna *sna,
{
BoxRec extents;
struct sna_composite_spans_op tmp;
+ struct sna_pixmap *priv;
pixman_region16_t clip, *c;
int dst_x, dst_y;
int dx, dy, n;
@@ -3030,7 +3033,8 @@ composite_unaligned_boxes(struct sna *sna,
if (ntrap > 1 && maskFormat)
return false;
- if (!sna->render.composite_spans)
+ priv = sna_pixmap(get_drawable_pixmap(dst->pDrawable));
+ if (priv == NULL || !sna->render.composite_spans)
return composite_unaligned_boxes_fallback(op, src, dst, src_x, src_y, ntrap, traps);
dst_x = extents.x1 = pixman_fixed_to_int(traps[0].left.p1.x);
@@ -3090,6 +3094,17 @@ composite_unaligned_boxes(struct sna *sna,
src_x + extents.x1 - dst_x - dx,
src_y + extents.y1 - dst_y - dy));
+ switch (op) {
+ case PictOpAdd:
+ if (priv->clear && priv->clear_color == 0)
+ op = PictOpSrc;
+ break;
+ case PictOpIn:
+ if (priv->clear && priv->clear_color == 0)
+ return true;
+ }
+ assert((priv->clear && priv->clear_color == 0) || operator_is_bounded(op));
+
memset(&tmp, 0, sizeof(tmp));
if (!sna->render.composite_spans(sna, op, src, dst,
src_x + extents.x1 - dst_x - dx,
@@ -3321,6 +3336,7 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
BoxRec extents;
pixman_region16_t clip;
int16_t dst_x, dst_y;
+ bool was_clear;
int dx, dy, n;
if (NO_SCAN_CONVERTER)
@@ -3388,6 +3404,19 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
src_x + extents.x1 - dst_x - dx,
src_y + extents.y1 - dst_y - dy));
+ was_clear = sna_drawable_is_clear(dst->pDrawable);
+ switch (op) {
+ case PictOpAdd:
+ case PictOpOver:
+ if (was_clear)
+ op = PictOpSrc;
+ break;
+ case PictOpIn:
+ if (was_clear)
+ return true;
+ break;
+ }
+
memset(&tmp, 0, sizeof(tmp));
if (!sna->render.composite_spans(sna, op, src, dst,
src_x + extents.x1 - dst_x - dx,
@@ -3422,7 +3451,7 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
tor_render(sna, &tor, &tmp, &clip,
choose_span(&tmp, dst, maskFormat, op, &clip),
- maskFormat && !operator_is_bounded(op));
+ !was_clear && maskFormat && !operator_is_bounded(op));
skip:
tor_fini(&tor);
@@ -5150,6 +5179,7 @@ triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
pixman_region16_t clip;
int16_t dst_x, dst_y;
int dx, dy, n;
+ bool was_clear;
if (NO_SCAN_CONVERTER)
return false;
@@ -5216,6 +5246,8 @@ triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
src_x + extents.x1 - dst_x - dx,
src_y + extents.y1 - dst_y - dy));
+ was_clear = sna_drawable_is_clear(dst->pDrawable);
+
memset(&tmp, 0, sizeof(tmp));
if (!sna->render.composite_spans(sna, op, src, dst,
src_x + extents.x1 - dst_x - dx,
@@ -5248,7 +5280,7 @@ triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
tor_render(sna, &tor, &tmp, &clip,
choose_span(&tmp, dst, maskFormat, op, &clip),
- maskFormat && !operator_is_bounded(op));
+ !was_clear && maskFormat && !operator_is_bounded(op));
skip:
tor_fini(&tor);
@@ -5508,6 +5540,7 @@ tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
int16_t dst_x, dst_y;
int dx, dy;
int cw, ccw, n;
+ bool was_clear;
if (NO_SCAN_CONVERTER)
return false;
@@ -5569,6 +5602,8 @@ tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
src_x + extents.x1 - dst_x - dx,
src_y + extents.y1 - dst_y - dy));
+ was_clear = sna_drawable_is_clear(dst->pDrawable);
+
memset(&tmp, 0, sizeof(tmp));
if (!sna->render.composite_spans(sna, op, src, dst,
src_x + extents.x1 - dst_x - dx,
@@ -5611,7 +5646,7 @@ tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
tor_render(sna, &tor, &tmp, &clip,
choose_span(&tmp, dst, maskFormat, op, &clip),
- maskFormat && !operator_is_bounded(op));
+ !was_clear && maskFormat && !operator_is_bounded(op));
skip:
tor_fini(&tor);
commit fe8866d6112c3e187d6682e9e4610325668427a0
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Mar 17 21:49:56 2012 +0000
sna/gen[345]: Convert CPU mappings to GTT for vertices on submit
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 0991a98..67c8956 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -1680,14 +1680,21 @@ static void gen3_vertex_close(struct sna *sna)
bo = sna->render.vbo;
if (bo) {
- if (IS_CPU_MAP(bo->map) ||
- sna->render.vertex_size - sna->render.vertex_used < 64) {
- DBG(("%s: discarding vbo (was CPU mapped)\n",
- __FUNCTION__));
+ if (sna->render.vertex_size - sna->render.vertex_used < 64) {
+ DBG(("%s: discarding full vbo\n", __FUNCTION__));
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
free_bo = bo;
+ } else if (IS_CPU_MAP(bo->map)) {
+ DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
+ sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo);
+ if (sna->render.vertices == NULL) {
+ sna->render.vbo = NULL;
+ sna->render.vertices = sna->render.vertex_data;
+ sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+ free_bo = bo;
+ }
}
} else {
if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
@@ -1950,6 +1957,15 @@ gen3_render_reset(struct sna *sna)
state->last_floats_per_vertex = 0;
state->last_vertex_offset = 0;
state->vertex_offset = 0;
+
+ if (sna->render.vbo &&
+ !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
+ DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
+ kgem_bo_destroy(&sna->kgem, sna->render.vbo);
+ sna->render.vbo = NULL;
+ sna->render.vertices = sna->render.vertex_data;
+ sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+ }
}
static void
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index a69852e..def5d19 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -419,9 +419,14 @@ static int gen4_vertex_finish(struct sna *sna)
static void gen4_vertex_close(struct sna *sna)
{
- struct kgem_bo *bo;
+ struct kgem_bo *bo, *free_bo = NULL;
unsigned int i, delta = 0;
+ assert(sna->render_state.gen4.vertex_offset == 0);
+
+ DBG(("%s: used=%d, vbo active? %d\n",
+ __FUNCTION__, sna->render.vertex_used, sna->render.vbo != NULL));
+
if (!sna->render.vertex_used) {
assert(sna->render.vbo == NULL);
assert(sna->render.vertices == sna->render.vertex_data);
@@ -429,10 +434,26 @@ static void gen4_vertex_close(struct sna *sna)
return;
}
- DBG(("%s: used=%d\n", __FUNCTION__, sna->render.vertex_used));
-
bo = sna->render.vbo;
- if (bo == NULL) {
+ if (bo) {
+ if (sna->render.vertex_size - sna->render.vertex_used < 64) {
+ DBG(("%s: discarding full vbo\n", __FUNCTION__));
+ sna->render.vbo = NULL;
+ sna->render.vertices = sna->render.vertex_data;
+ sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+ free_bo = bo;
+ } else if (IS_CPU_MAP(bo->map)) {
+ DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
+ sna->render.vertices =
+ kgem_bo_map__gtt(&sna->kgem, sna->render.vbo);
+ if (sna->render.vertices == NULL) {
+ sna->render.vbo = NULL;
+ sna->render.vertices = sna->render.vertex_data;
+ sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+ free_bo = bo;
+ }
+ }
+ } else {
if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
sna->render.vertex_used, sna->kgem.nbatch));
@@ -449,10 +470,11 @@ static void gen4_vertex_close(struct sna *sna)
sna->render.vertex_data,
4*sna->render.vertex_used)) {
kgem_bo_destroy(&sna->kgem, bo);
- goto reset;
+ bo = NULL;
}
DBG(("%s: new vbo: %d\n", __FUNCTION__,
sna->render.vertex_used));
+ free_bo = bo;
}
}
@@ -471,17 +493,13 @@ static void gen4_vertex_close(struct sna *sna)
}
}
- if (bo)
- kgem_bo_destroy(&sna->kgem, bo);
-
-reset:
- sna->render.vertex_used = 0;
- sna->render.vertex_index = 0;
- sna->render_state.gen4.vb_id = 0;
+ if (sna->render.vbo == NULL) {
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+ }
- sna->render.vbo = NULL;
- sna->render.vertices = sna->render.vertex_data;
- sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+ if (free_bo)
+ kgem_bo_destroy(&sna->kgem, free_bo);
}
@@ -3207,6 +3225,15 @@ static void gen4_render_reset(struct sna *sna)
sna->render_state.gen4.drawrect_offset = -1;
sna->render_state.gen4.drawrect_limit = -1;
sna->render_state.gen4.surface_table = -1;
+
+ if (sna->render.vbo &&
+ !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
+ DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
+ kgem_bo_destroy(&sna->kgem, sna->render.vbo);
+ sna->render.vbo = NULL;
+ sna->render.vertices = sna->render.vertex_data;
+ sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+ }
}
static void gen4_render_fini(struct sna *sna)
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 01604ef..565d22a 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -428,14 +428,22 @@ static void gen5_vertex_close(struct sna *sna)
bo = sna->render.vbo;
if (bo) {
- if (IS_CPU_MAP(bo->map) ||
- sna->render.vertex_size - sna->render.vertex_used < 64) {
- DBG(("%s: discarding vbo (was CPU mapped)\n",
- __FUNCTION__));
+ if (sna->render.vertex_size - sna->render.vertex_used < 64) {
+ DBG(("%s: discarding full vbo\n", __FUNCTION__));
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
free_bo = bo;
+ } else if (IS_CPU_MAP(bo->map)) {
+ DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
+ sna->render.vertices =
+ kgem_bo_map__gtt(&sna->kgem, sna->render.vbo);
+ if (sna->render.vertices == NULL) {
+ sna->render.vbo = NULL;
+ sna->render.vertices = sna->render.vertex_data;
+ sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+ free_bo = bo;
+ }
}
} else {
if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
@@ -3655,6 +3663,15 @@ static void gen5_render_reset(struct sna *sna)
sna->render_state.gen5.drawrect_offset = -1;
sna->render_state.gen5.drawrect_limit = -1;
sna->render_state.gen5.surface_table = -1;
+
+ if (sna->render.vbo &&
+ !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
+ DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
+ kgem_bo_destroy(&sna->kgem, sna->render.vbo);
+ sna->render.vbo = NULL;
+ sna->render.vertices = sna->render.vertex_data;
+ sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+ }
}
static void gen5_render_fini(struct sna *sna)
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index daca7af..8a2222c 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3190,6 +3190,43 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
return ptr;
}
+void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
+{
+ void *ptr;
+
+ DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
+ bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
+
+ assert(!bo->purged);
+ assert(bo->exec == NULL);
+ assert(list_is_empty(&bo->list));
+
+ if (IS_CPU_MAP(bo->map))
+ kgem_bo_release_map(kgem, bo);
+
+ ptr = bo->map;
+ if (ptr == NULL) {
+ assert(bytes(bo) <= kgem->aperture_mappable / 4);
+
+ kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
+
+ ptr = gem_mmap(kgem->fd, bo->handle, bytes(bo),
+ PROT_READ | PROT_WRITE);
+ if (ptr == NULL)
+ return NULL;
+
+ /* Cache this mapping to avoid the overhead of an
+ * excruciatingly slow GTT pagefault. This is more an
+ * issue with compositing managers which need to frequently
+ * flush CPU damage to their GPU bo.
+ */
+ bo->map = ptr;
+ DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
+ }
+
+ return ptr;
+}
+
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
{
if (bo->map)
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 98534d9..27e0e04 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -364,6 +364,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
uint32_t delta);
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo);
+void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo);
@@ -425,9 +426,6 @@ static inline bool kgem_bo_is_mappable(struct kgem *kgem,
if (bo->domain == DOMAIN_GTT)
return true;
- if (IS_GTT_MAP(bo->map))
- return true;
-
if (kgem->gen < 40 && bo->tiling &&
bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1))
return false;
commit 97cd0c7da51024400e8900e46f51620a5f7ad402
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Mar 17 23:57:46 2012 +0000
sna/traps: Upon reducing an ADD to a SRC, we need to apply the pending clear
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=47444
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 3a93450..5594023 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -4077,6 +4077,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
struct sna_pixmap *priv;
RegionRec region;
uint32_t color;
+ bool unbounded;
int16_t dst_x, dst_y;
int dx, dy;
int n;
@@ -4125,19 +4126,27 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
return false;
}
+ unbounded = false;
switch (op) {
case PictOpAdd:
- if (priv->clear && priv->clear_color == 0)
+ if (priv->clear && priv->clear_color == 0) {
+ unbounded = true;
op = PictOpSrc;
+ }
if ((color >> 24) == 0)
return true;
break;
case PictOpIn:
if (priv->clear && priv->clear_color == 0)
return true;
+ if (priv->clear && priv->clear_color == 0xff)
+ op = PictOpSrc;
if ((color >> 24) == 0)
return true;
+ unbounded = true;
+ break;
case PictOpSrc:
+ unbounded = !(priv->clear && priv->clear_color == 0);
break;
default:
DBG(("%s: fallback -- can not perform op [%d] in place\n",
@@ -4237,7 +4246,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
inplace.opacity = color >> 24;
tor_render(NULL, &tor, (void*)&inplace,
- dst->pCompositeClip, span, op == PictOpIn);
+ dst->pCompositeClip, span, unbounded);
tor_fini(&tor);
More information about the xorg-commit
mailing list