xf86-video-intel: 4 commits - src/sna/gen2_render.c src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/sna_composite.c src/sna/sna_gradient.c src/sna/sna_render.c src/sna/sna_render.h

Chris Wilson ickle at kemper.freedesktop.org
Mon Sep 12 04:30:56 PDT 2011


 src/sna/gen2_render.c   |    4 ++--
 src/sna/gen3_render.c   |    4 ++--
 src/sna/gen4_render.c   |    8 ++++----
 src/sna/gen5_render.c   |    8 ++++----
 src/sna/gen6_render.c   |   10 ++++------
 src/sna/gen7_render.c   |    4 ++--
 src/sna/sna_composite.c |   22 ++++++++++++++++------
 src/sna/sna_gradient.c  |   44 +++++++++++++++++++++++++++++++++++++++++---
 src/sna/sna_render.c    |   24 ++++++++++++------------
 src/sna/sna_render.h    |    5 +++++
 10 files changed, 92 insertions(+), 41 deletions(-)

New commits:
commit 03a7fc16f9ef00ca5591655337621ec67bc37cba
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Sep 12 12:20:24 2011 +0100

    sna: Avoid the call overhead for the trivial clip case
    
    Profile guided micro-optimisation. /o\
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c
index 9c623fd..bda40c3 100644
--- a/src/sna/sna_composite.c
+++ b/src/sna/sna_composite.c
@@ -61,6 +61,12 @@ static void dst_move_area_to_cpu(PicturePtr picture,
 
 #define BOUND(v)	(INT16) ((v) < MINSHORT ? MINSHORT : (v) > MAXSHORT ? MAXSHORT : (v))
 
+static inline bool
+region_is_singular(pixman_region16_t *region)
+{
+	return region->data == NULL;
+}
+
 static inline pixman_bool_t
 clip_to_dst(pixman_region16_t *region,
 	    pixman_region16_t *clip,
@@ -76,10 +82,9 @@ clip_to_dst(pixman_region16_t *region,
 	     clip->extents.x1, clip->extents.y1,
 	     clip->extents.x2, clip->extents.y2));
 
-	if (pixman_region_n_rects(region) == 1 &&
-	    pixman_region_n_rects(clip) == 1) {
-		pixman_box16_t *r = pixman_region_rectangles(region, NULL);
-		pixman_box16_t *c = pixman_region_rectangles(clip, NULL);
+	if (region_is_singular(region) && region_is_singular(clip)) {
+		pixman_box16_t *r = &region->extents;
+		pixman_box16_t *c = &clip->extents;
 		int v;
 
 		if (r->x1 < (v = c->x1 + dx))
@@ -91,8 +96,12 @@ clip_to_dst(pixman_region16_t *region,
 		if (r->y2 > (v = c->y2 + dy))
 			r->y2 = BOUND(v);
 
-		if (r->x1 >= r->x2 || r->y1 >= r->y2)
+		if (r->x1 >= r->x2 || r->y1 >= r->y2) {
 			pixman_region_init(region);
+			return FALSE;
+		}
+
+		return TRUE;
 	} else if (!pixman_region_not_empty(clip)) {
 		return FALSE;
 	} else {
@@ -102,8 +111,9 @@ clip_to_dst(pixman_region16_t *region,
 			return FALSE;
 		if (dx | dy)
 			pixman_region_translate(region, dx, dy);
+
+		return pixman_region_not_empty(region);
 	}
-	return pixman_region_not_empty(region);
 }
 
 static inline Bool
commit 2540c877d4811b318e1c2cd707745e5b06c9fa4b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Sep 12 11:54:46 2011 +0100

    sna/gen6: Prefer RENDER for copies as it compacts better
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 6fb7275..03ddf75 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2307,8 +2307,7 @@ gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
 	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
 	     src_bo == dst_bo));
 
-	/* XXX benchmark me! */
-	if (sna->kgem.mode != KGEM_RENDER &&
+	if (sna->kgem.mode == KGEM_BLT &&
 	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 	    sna_blt_copy_boxes(sna, alu,
 			       src_bo, src_dx, src_dy,
@@ -2465,8 +2464,7 @@ gen6_render_copy(struct sna *sna, uint8_t alu,
 	     src->drawable.width, src->drawable.height,
 	     dst->drawable.width, dst->drawable.height));
 
-	/* XXX benchmark me! */
-	if (sna->kgem.mode != KGEM_RENDER &&
+	if (sna->kgem.mode == KGEM_BLT &&
 	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 	    sna_blt_copy(sna, alu,
 			 src_bo, dst_bo,
commit f2a617dcc5fbeaf503db4777f0ec7b3d862085b9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Sep 12 11:54:02 2011 +0100

    sna: Use single precision for computing the texcoord scale factor
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index ff20032..ce6dced 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -2019,8 +2019,8 @@ gen2_render_copy_setup_source(struct sna *sna,
 	channel->repeat = RepeatNone;
 	channel->width  = pixmap->drawable.width;
 	channel->height = pixmap->drawable.height;
-	channel->scale[0] = 1./pixmap->drawable.width;
-	channel->scale[1] = 1./pixmap->drawable.height;
+	channel->scale[0] = 1.f/pixmap->drawable.width;
+	channel->scale[1] = 1.f/pixmap->drawable.height;
 	channel->offset[0] = 0;
 	channel->offset[1] = 0;
 	channel->pict_format = sna_format_for_depth(pixmap->drawable.depth);
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 8526301..429d504 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -3233,8 +3233,8 @@ gen3_render_copy_setup_source(struct sna *sna,
 	channel->repeat = gen3_texture_repeat(RepeatNone);
 	channel->width  = pixmap->drawable.width;
 	channel->height = pixmap->drawable.height;
-	channel->scale[0] = 1./pixmap->drawable.width;
-	channel->scale[1] = 1./pixmap->drawable.height;
+	channel->scale[0] = 1.f/pixmap->drawable.width;
+	channel->scale[1] = 1.f/pixmap->drawable.height;
 	channel->offset[0] = 0;
 	channel->offset[1] = 0;
 	gen3_composite_channel_set_format(channel,
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index bddc681..d2b3746 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -2224,8 +2224,8 @@ gen4_render_copy_boxes(struct sna *sna, uint8_t alu,
 	gen4_copy_bind_surfaces(sna, &tmp);
 	gen4_align_vertex(sna, &tmp);
 
-	tmp.src.scale[0] = 1. / src->drawable.width;
-	tmp.src.scale[1] = 1. / src->drawable.height;
+	tmp.src.scale[0] = 1.f/src->drawable.width;
+	tmp.src.scale[1] = 1.f/src->drawable.height;
 	do {
 		gen4_render_copy_one(sna, &tmp,
 				     box->x1 + src_dx, box->y1 + src_dy,
@@ -2303,8 +2303,8 @@ gen4_render_copy(struct sna *sna, uint8_t alu,
 		gen4_get_card_format_for_depth(src->drawable.depth),
 	op->base.src.width  = src->drawable.width;
 	op->base.src.height = src->drawable.height;
-	op->base.src.scale[0] = 1./src->drawable.width;
-	op->base.src.scale[1] = 1./src->drawable.height;
+	op->base.src.scale[0] = 1.f/src->drawable.width;
+	op->base.src.scale[1] = 1.f/src->drawable.height;
 	op->base.src.filter = SAMPLER_FILTER_NEAREST;
 	op->base.src.repeat = SAMPLER_EXTEND_NONE;
 
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 703b361..f60c081 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -2163,8 +2163,8 @@ gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
 	gen5_copy_bind_surfaces(sna, &tmp);
 	gen5_align_vertex(sna, &tmp);
 
-	tmp.src.scale[0] = 1. / src->drawable.width;
-	tmp.src.scale[1] = 1. / src->drawable.height;
+	tmp.src.scale[0] = 1.f/src->drawable.width;
+	tmp.src.scale[1] = 1.f/src->drawable.height;
 	do {
 		int n_this_time = gen5_get_rectangles(sna, &tmp, n);
 		if (n_this_time == 0) {
@@ -2277,8 +2277,8 @@ gen5_render_copy(struct sna *sna, uint8_t alu,
 		gen5_get_card_format_for_depth(src->drawable.depth),
 	op->base.src.width  = src->drawable.width;
 	op->base.src.height = src->drawable.height;
-	op->base.src.scale[0] = 1./src->drawable.width;
-	op->base.src.scale[1] = 1./src->drawable.height;
+	op->base.src.scale[0] = 1.f/src->drawable.width;
+	op->base.src.scale[1] = 1.f/src->drawable.height;
 	op->base.src.filter = SAMPLER_FILTER_NEAREST;
 	op->base.src.repeat = SAMPLER_EXTEND_NONE;
 
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index eb67fc6..6fb7275 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2498,8 +2498,8 @@ gen6_render_copy(struct sna *sna, uint8_t alu,
 		gen6_get_card_format_for_depth(src->drawable.depth),
 	op->base.src.width  = src->drawable.width;
 	op->base.src.height = src->drawable.height;
-	op->base.src.scale[0] = 1./src->drawable.width;
-	op->base.src.scale[1] = 1./src->drawable.height;
+	op->base.src.scale[0] = 1.f/src->drawable.width;
+	op->base.src.scale[1] = 1.f/src->drawable.height;
 	op->base.src.filter = SAMPLER_FILTER_NEAREST;
 	op->base.src.repeat = SAMPLER_EXTEND_NONE;
 
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 379ff56..c7906ca 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2633,8 +2633,8 @@ gen7_render_copy(struct sna *sna, uint8_t alu,
 		gen7_get_card_format_for_depth(src->drawable.depth),
 	op->base.src.width  = src->drawable.width;
 	op->base.src.height = src->drawable.height;
-	op->base.src.scale[0] = 1./src->drawable.width;
-	op->base.src.scale[1] = 1./src->drawable.height;
+	op->base.src.scale[0] = 1.f/src->drawable.width;
+	op->base.src.scale[1] = 1.f/src->drawable.height;
 	op->base.src.filter = SAMPLER_FILTER_NEAREST;
 	op->base.src.repeat = SAMPLER_EXTEND_NONE;
 
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 4a38f39..dc740fc 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -319,8 +319,8 @@ static struct kgem_bo *upload(struct sna *sna,
 	if (bo) {
 		channel->offset[0] -= box->x1;
 		channel->offset[1] -= box->y1;
-		channel->scale[0] = 1./w;
-		channel->scale[1] = 1./h;
+		channel->scale[0] = 1.f/w;
+		channel->scale[1] = 1.f/h;
 		channel->width  = w;
 		channel->height = h;
 	}
@@ -387,8 +387,8 @@ sna_render_pixmap_bo(struct sna *sna,
 
 	channel->height = pixmap->drawable.height;
 	channel->width  = pixmap->drawable.width;
-	channel->scale[0] = 1. / pixmap->drawable.width;
-	channel->scale[1] = 1. / pixmap->drawable.height;
+	channel->scale[0] = 1.f / pixmap->drawable.width;
+	channel->scale[1] = 1.f / pixmap->drawable.height;
 	channel->offset[0] = x - dst_x;
 	channel->offset[1] = y - dst_y;
 
@@ -648,8 +648,8 @@ static int sna_render_picture_downsample(struct sna *sna,
 
 	channel->offset[0] = x - dst_x;
 	channel->offset[1] = y - dst_y;
-	channel->scale[0] = 1./w;
-	channel->scale[1] = 1./h;
+	channel->scale[0] = 1.f/w;
+	channel->scale[1] = 1.f/h;
 	channel->width  = w / 2;
 	channel->height = h / 2;
 	channel->bo = bo;
@@ -824,8 +824,8 @@ sna_render_picture_extract(struct sna *sna,
 
 	channel->offset[0] = x - dst_x;
 	channel->offset[1] = y - dst_y;
-	channel->scale[0] = 1./w;
-	channel->scale[1] = 1./h;
+	channel->scale[0] = 1.f/w;
+	channel->scale[1] = 1.f/h;
 	channel->width  = w;
 	channel->height = h;
 	channel->bo = bo;
@@ -941,8 +941,8 @@ sna_render_picture_fixup(struct sna *sna,
 	channel->repeat = RepeatNone;
 	channel->is_affine = TRUE;
 
-	channel->scale[0] = 1./w;
-	channel->scale[1] = 1./h;
+	channel->scale[0] = 1.f/w;
+	channel->scale[1] = 1.f/h;
 	channel->offset[0] = -dst_x;
 	channel->offset[1] = -dst_y;
 	channel->transform = NULL;
@@ -1061,8 +1061,8 @@ sna_render_picture_convert(struct sna *sna,
 	channel->width  = w;
 	channel->height = h;
 
-	channel->scale[0] = 1. / w;
-	channel->scale[1] = 1. / h;
+	channel->scale[0] = 1.f/w;
+	channel->scale[1] = 1.f/h;
 	channel->offset[0] = x - dst_x - box.x1;
 	channel->offset[1] = y - dst_y - box.y1;
 
commit c2e19987c333de31b258a3af37a2ed1f63ef949a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Sep 12 10:37:24 2011 +0100

    sna: Provide a direct lookup cache of alpha values
    
    To reflect recent changes in cairo.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_gradient.c b/src/sna/sna_gradient.c
index 6b85b10..20850c8 100644
--- a/src/sna/sna_gradient.c
+++ b/src/sna/sna_gradient.c
@@ -260,8 +260,11 @@ sna_render_get_solid(struct sna *sna, uint32_t color)
 	struct sna_solid_cache *cache = &sna->render.solid_cache;
 	unsigned int i;
 
-	if (color == 0) {
-		DBG(("%s(clear)\n", __FUNCTION__));
+	if ((color & 0xffffff) == 0) /* alpha only */
+		return kgem_bo_reference(sna->render.alpha_cache.bo[color>>24]);
+
+	if (color == 0xffffffff) {
+		DBG(("%s(white)\n", __FUNCTION__));
 		return kgem_bo_reference(cache->bo[0]);
 	}
 
@@ -302,6 +305,27 @@ done:
 	return kgem_bo_reference(cache->bo[i]);
 }
 
+static Bool sna_alpha_cache_init(struct sna *sna)
+{
+	struct sna_alpha_cache *cache = &sna->render.alpha_cache;
+	uint32_t color[256];
+	int i;
+
+	cache->cache_bo = kgem_create_linear(&sna->kgem, sizeof(color));
+	if (!cache->cache_bo)
+		return FALSE;
+
+	for (i = 0; i < 256; i++) {
+		color[i] = i << 24;
+		cache->bo[i] = kgem_create_proxy(cache->cache_bo,
+						 sizeof(uint32_t)*i,
+						 sizeof(uint32_t));
+		cache->bo[i]->pitch = 4;
+	}
+	kgem_bo_write(&sna->kgem, cache->cache_bo, color, sizeof(color));
+	return TRUE;
+}
+
 static Bool sna_solid_cache_init(struct sna *sna)
 {
 	struct sna_solid_cache *cache = &sna->render.solid_cache;
@@ -311,6 +335,7 @@ static Bool sna_solid_cache_init(struct sna *sna)
 	if (!cache->cache_bo)
 		return FALSE;
 
+	cache->color[0] = 0xffffffff;
 	cache->bo[0] = kgem_create_proxy(cache->cache_bo, 0, sizeof(uint32_t));
 	cache->bo[0]->pitch = 4;
 	cache->size = 1;
@@ -320,13 +345,26 @@ static Bool sna_solid_cache_init(struct sna *sna)
 
 Bool sna_gradients_create(struct sna *sna)
 {
-	return sna_solid_cache_init(sna);
+	if (!sna_alpha_cache_init(sna))
+		return FALSE;
+
+	if (!sna_solid_cache_init(sna))
+		return FALSE;
+
+	return TRUE;
 }
 
 void sna_gradients_close(struct sna *sna)
 {
 	int i;
 
+	for (i = 0; i < 256; i++) {
+		if (sna->render.alpha_cache.bo[i])
+			kgem_bo_destroy(&sna->kgem, sna->render.alpha_cache.bo[i]);
+	}
+	if (sna->render.alpha_cache.cache_bo)
+		kgem_bo_destroy(&sna->kgem, sna->render.alpha_cache.cache_bo);
+
 	if (sna->render.solid_cache.cache_bo)
 		kgem_bo_destroy(&sna->kgem, sna->render.solid_cache.cache_bo);
 	for (i = 0; i < sna->render.solid_cache.size; i++) {
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index e59c017..65e1f35 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -215,6 +215,11 @@ struct sna_render {
 	void (*reset)(struct sna *sna);
 	void (*fini)(struct sna *sna);
 
+	struct sna_alpha_cache {
+		struct kgem_bo *cache_bo;
+		struct kgem_bo *bo[256];
+	} alpha_cache;
+
 	struct sna_solid_cache {
 		struct kgem_bo *cache_bo;
 		uint32_t color[1024];


More information about the xorg-commit mailing list