xf86-video-intel: 10 commits - src/intel_options.c src/intel_options.h src/sna/gen2_render.c src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen4_source.c src/sna/gen4_source.h src/sna/gen4_vertex.c src/sna/gen4_vertex.h src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c src/sna/Makefile.am src/sna/sna_accel.c src/sna/sna_driver.c src/sna/sna_glyphs.c src/sna/sna.h src/sna/sna_render.h src/sna/sna_render_inline.h src/sna/sna_trapezoids.c

Wed Jan 2 05:01:17 PST 2013

src/intel_options.c         |    2 
 src/intel_options.h         |    2 
 src/sna/Makefile.am         |    2 
 src/sna/gen2_render.c       |  207 +++++++++---------------------------
 src/sna/gen3_render.c       |   16 +-
 src/sna/gen4_render.c       |  252 ++++++--------------------------------------
 src/sna/gen4_source.c       |  179 +++++++++++++++++++++++++++++++
 src/sna/gen4_source.h       |   22 +++
 src/sna/gen4_vertex.c       |  205 +++++++++++++++++++++++++++++++++--
 src/sna/gen4_vertex.h       |   26 ----
 src/sna/gen5_render.c       |  227 ++++-----------------------------------
 src/sna/gen6_render.c       |  240 ++++-------------------------------------
 src/sna/gen7_render.c       |  242 ++++--------------------------------------
 src/sna/kgem.c              |    4 
 src/sna/sna.h               |   20 +--
 src/sna/sna_accel.c         |   12 --
 src/sna/sna_driver.c        |  170 ++++++++++++++---------------
 src/sna/sna_glyphs.c        |   53 ++++++---
 src/sna/sna_render.h        |    8 -
 src/sna/sna_render_inline.h |    7 +
 src/sna/sna_trapezoids.c    |   64 +++++++++++
 21 files changed, 784 insertions(+), 1176 deletions(-)

New commits:
commit 2559cfcc4cbc1d0d84b048565cad3bfee61df8da
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Jan 2 10:22:14 2013 +0000

    sna/gen4+: Specialise linear vertex emission
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am
index 8b654d7..bcf757e 100644
--- a/src/sna/Makefile.am
+++ b/src/sna/Makefile.am
@@ -73,6 +73,8 @@ libsna_la_SOURCES = \
 	gen3_render.h \
 	gen4_render.c \
 	gen4_render.h \
+	gen4_source.c \
+	gen4_source.h \
 	gen4_vertex.c \
 	gen4_vertex.h \
 	gen5_render.c \
diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index c5ad07a..b539e27 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -762,9 +762,9 @@ gen2_emit_composite_linear(struct sna *sna,
 {
 	float v;
 
-	v = (x * channel->u.gen2.linear_dx +
-	     y * channel->u.gen2.linear_dy +
-	     channel->u.gen2.linear_offset);
+	v = (x * channel->u.linear.dx +
+	     y * channel->u.linear.dy +
+	     channel->u.linear.offset);
 	DBG(("%s: (%d, %d) -> %f\n", __FUNCTION__, x, y, v));
 	VERTEX(v);
 	VERTEX(v);
@@ -1262,12 +1262,12 @@ gen2_composite_linear_init(struct sna *sna,
 	dx /= sf;
 	dy /= sf;
 
-	channel->u.gen2.linear_dx = dx;
-	channel->u.gen2.linear_dy = dy;
-	channel->u.gen2.linear_offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y);
+	channel->u.linear.dx = dx;
+	channel->u.linear.dy = dy;
+	channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y);
 
 	DBG(("%s: dx=%f, dy=%f, offset=%f\n",
-	     __FUNCTION__, dx, dy, channel->u.gen2.linear_offset));
+	     __FUNCTION__, dx, dy, channel->u.linear.offset));
 
 	return channel->bo != NULL;
 }
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 893c5ee..ac099a5 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -42,6 +42,7 @@
 
 #include "brw/brw.h"
 #include "gen4_render.h"
+#include "gen4_source.h"
 #include "gen4_vertex.h"
 
 /* gen4 has a serious issue with its shaders that we need to flush
@@ -1333,141 +1334,6 @@ gen4_render_video(struct sna *sna,
 	return true;
 }
 
-static bool
-gen4_composite_solid_init(struct sna *sna,
-			  struct sna_composite_channel *channel,
-			  uint32_t color)
-{
-	channel->filter = PictFilterNearest;
-	channel->repeat = RepeatNormal;
-	channel->is_affine = true;
-	channel->is_solid  = true;
-	channel->transform = NULL;
-	channel->width  = 1;
-	channel->height = 1;
-	channel->card_format = GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
-
-	channel->bo = sna_render_get_solid(sna, color);
-
-	channel->scale[0]  = channel->scale[1]  = 1;
-	channel->offset[0] = channel->offset[1] = 0;
-	return channel->bo != NULL;
-}
-
-static bool
-gen4_composite_linear_init(struct sna *sna,
-			   PicturePtr picture,
-			   struct sna_composite_channel *channel,
-			   int x, int y,
-			   int w, int h,
-			   int dst_x, int dst_y)
-{
-	PictLinearGradient *linear =
-		(PictLinearGradient *)picture->pSourcePict;
-	pixman_fixed_t tx, ty;
-	float x0, y0, sf;
-	float dx, dy;
-
-	DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
-	     __FUNCTION__,
-	     pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y),
-	     pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y),
-	     x, y, dst_x, dst_y, w, h));
-
-	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
-		return 0;
-
-	if (!sna_transform_is_affine(picture->transform)) {
-		DBG(("%s: fallback due to projective transform\n",
-		     __FUNCTION__));
-		return sna_render_picture_fixup(sna, picture, channel,
-						x, y, w, h, dst_x, dst_y);
-	}
-
-	channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
-	if (!channel->bo)
-		return 0;
-
-	channel->filter = PictFilterNearest;
-	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
-	channel->width  = channel->bo->pitch / 4;
-	channel->height = 1;
-	channel->pict_format = PICT_a8r8g8b8;
-
-	channel->scale[0]  = channel->scale[1]  = 1;
-	channel->offset[0] = channel->offset[1] = 0;
-
-	if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
-		dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x);
-		dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y);
-
-		x0 = pixman_fixed_to_double(linear->p1.x);
-		y0 = pixman_fixed_to_double(linear->p1.y);
-
-		if (tx | ty) {
-			x0 -= pixman_fixed_to_double(tx);
-			y0 -= pixman_fixed_to_double(ty);
-		}
-	} else {
-		struct pixman_f_vector p1, p2;
-		struct pixman_f_transform m, inv;
-
-		pixman_f_transform_from_pixman_transform(&m, picture->transform);
-		DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
-		     __FUNCTION__,
-		     m.m[0][0], m.m[0][1], m.m[0][2],
-		     m.m[1][0], m.m[1][1], m.m[1][2],
-		     m.m[2][0], m.m[2][1], m.m[2][2]));
-		if (!pixman_f_transform_invert(&inv, &m))
-			return 0;
-
-		p1.v[0] = pixman_fixed_to_double(linear->p1.x);
-		p1.v[1] = pixman_fixed_to_double(linear->p1.y);
-		p1.v[2] = 1.;
-		pixman_f_transform_point(&inv, &p1);
-
-		p2.v[0] = pixman_fixed_to_double(linear->p2.x);
-		p2.v[1] = pixman_fixed_to_double(linear->p2.y);
-		p2.v[2] = 1.;
-		pixman_f_transform_point(&inv, &p2);
-
-		DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
-		     __FUNCTION__,
-		     p1.v[0], p1.v[1], p1.v[2],
-		     p2.v[0], p2.v[1], p2.v[2]));
-
-		dx = p2.v[0] - p1.v[0];
-		dy = p2.v[1] - p1.v[1];
-
-		x0 = p1.v[0];
-		y0 = p1.v[1];
-	}
-
-	sf = dx*dx + dy*dy;
-	dx /= sf;
-	dy /= sf;
-
-	channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx);
-	channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy);
-	channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y));
-
-	channel->embedded_transform.matrix[1][0] = 0;
-	channel->embedded_transform.matrix[1][1] = 0;
-	channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5);
-
-	channel->embedded_transform.matrix[2][0] = 0;
-	channel->embedded_transform.matrix[2][1] = 0;
-	channel->embedded_transform.matrix[2][2] = pixman_fixed_1;
-
-	channel->transform = &channel->embedded_transform;
-	channel->is_affine = 1;
-
-	DBG(("%s: dx=%f, dy=%f, offset=%f\n",
-	     __FUNCTION__, dx, dy, -dx*(x0-x+dst_x) + -dy*(y0-y+dst_y)));
-
-	return channel->bo != NULL;
-}
-
 static int
 gen4_composite_picture(struct sna *sna,
 		       PicturePtr picture,
@@ -1488,16 +1354,16 @@ gen4_composite_picture(struct sna *sna,
 	channel->card_format = -1;
 
 	if (sna_picture_is_solid(picture, &color))
-		return gen4_composite_solid_init(sna, channel, color);
+		return gen4_channel_init_solid(sna, channel, color);
 
 	if (picture->pDrawable == NULL) {
 		int ret;
 
 		if (picture->pSourcePict->type == SourcePictTypeLinear)
-			return gen4_composite_linear_init(sna, picture, channel,
-							  x, y,
-							  w, h,
-							  dst_x, dst_y);
+			return gen4_channel_init_linear(sna, picture, channel,
+							x, y,
+							w, h,
+							dst_x, dst_y);
 
 		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
 		ret = -1;
@@ -1845,7 +1711,7 @@ reuse_source(struct sna *sna,
 	}
 
 	if (sna_picture_is_solid(mask, &color))
-		return gen4_composite_solid_init(sna, mc, color);
+		return gen4_channel_init_solid(sna, mc, color);
 
 	if (sc->is_solid)
 		return false;
@@ -1937,7 +1803,7 @@ gen4_render_composite(struct sna *sna,
 		DBG(("%s: failed to prepare source\n", __FUNCTION__));
 		goto cleanup_dst;
 	case 0:
-		if (!gen4_composite_solid_init(sna, &tmp->src, 0))
+		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
 			goto cleanup_dst;
 		/* fall through to fixup */
 	case 1:
@@ -1989,7 +1855,7 @@ gen4_render_composite(struct sna *sna,
 				DBG(("%s: failed to prepare mask\n", __FUNCTION__));
 				goto cleanup_src;
 			case 0:
-				if (!gen4_composite_solid_init(sna, &tmp->mask, 0))
+				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
 					goto cleanup_src;
 				/* fall through to fixup */
 			case 1:
@@ -2000,7 +1866,6 @@ gen4_render_composite(struct sna *sna,
 
 		tmp->is_affine &= tmp->mask.is_affine;
 	}
-	gen4_choose_composite_emitter(tmp);
 
 	/* XXX using more then one thread causes corruption? */
 	tmp->u.gen4.sf = (tmp->mask.bo == NULL &&
@@ -2012,7 +1877,7 @@ gen4_render_composite(struct sna *sna,
 					     tmp->mask.bo != NULL,
 					     tmp->has_component_alpha,
 					     tmp->is_affine);
-	tmp->u.gen4.ve_id = gen4_choose_composite_vertex_buffer(tmp);
+	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(tmp);
 
 	tmp->blt   = gen4_render_composite_blt;
 	tmp->box   = gen4_render_composite_box;
@@ -2189,7 +2054,7 @@ gen4_render_composite_spans(struct sna *sna,
 	case -1:
 		goto cleanup_dst;
 	case 0:
-		if (!gen4_composite_solid_init(sna, &tmp->base.src, 0))
+		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
 			goto cleanup_dst;
 		/* fall through to fixup */
 	case 1:
@@ -2204,10 +2069,8 @@ gen4_render_composite_spans(struct sna *sna,
 	tmp->base.need_magic_ca_pass = false;
 
 	tmp->base.u.gen4.sf = !tmp->base.src.is_solid;
-	gen4_choose_spans_emitter(tmp);
-
+	tmp->base.u.gen4.ve_id = gen4_choose_spans_emitter(tmp);
 	tmp->base.u.gen4.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
-	tmp->base.u.gen4.ve_id = gen4_choose_spans_vertex_buffer(&tmp->base);
 
 	tmp->box   = gen4_render_composite_spans_box;
 	tmp->boxes = gen4_render_composite_spans_boxes;
@@ -2634,7 +2497,7 @@ gen4_render_fill_boxes(struct sna *sna,
 	tmp.dst.format = format;
 	tmp.dst.bo = dst_bo;
 
-	gen4_composite_solid_init(sna, &tmp.src, pixel);
+	gen4_channel_init_solid(sna, &tmp.src, pixel);
 
 	tmp.is_affine = true;
 	tmp.floats_per_vertex = 2;
@@ -2792,9 +2655,9 @@ gen4_render_fill(struct sna *sna, uint8_t alu,
 	op->base.need_magic_ca_pass = 0;
 	op->base.has_component_alpha = 0;
 
-	gen4_composite_solid_init(sna, &op->base.src,
-				  sna_rgba_for_color(color,
-						     dst->drawable.depth));
+	gen4_channel_init_solid(sna, &op->base.src,
+				sna_rgba_for_color(color,
+						   dst->drawable.depth));
 	op->base.mask.bo = NULL;
 
 	op->base.is_affine = true;
@@ -2869,9 +2732,9 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	tmp.dst.bo = bo;
 	tmp.dst.x = tmp.dst.y = 0;
 
-	gen4_composite_solid_init(sna, &tmp.src,
-				  sna_rgba_for_color(color,
-						     dst->drawable.depth));
+	gen4_channel_init_solid(sna, &tmp.src,
+				sna_rgba_for_color(color,
+						   dst->drawable.depth));
 	tmp.mask.bo = NULL;
 
 	tmp.is_affine = true;
diff --git a/src/sna/gen4_source.c b/src/sna/gen4_source.c
new file mode 100644
index 0000000..8ea1617
--- /dev/null
+++ b/src/sna/gen4_source.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright Â© 2011,2012,2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris at chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+#include "gen4_source.h"
+#include "gen4_render.h"
+
+bool
+gen4_channel_init_solid(struct sna *sna,
+			struct sna_composite_channel *channel,
+			uint32_t color)
+{
+	channel->filter = PictFilterNearest;
+	channel->repeat = RepeatNormal;
+	channel->is_affine = true;
+	channel->is_solid  = true;
+	channel->transform = NULL;
+	channel->width  = 1;
+	channel->height = 1;
+	channel->pict_format = PICT_a8r8g8b8;
+	channel->card_format = GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+	channel->bo = sna_render_get_solid(sna, color);
+
+	channel->scale[0]  = channel->scale[1]  = 1;
+	channel->offset[0] = channel->offset[1] = 0;
+	return channel->bo != NULL;
+}
+
+
+bool
+gen4_channel_init_linear(struct sna *sna,
+			 PicturePtr picture,
+			 struct sna_composite_channel *channel,
+			 int x, int y,
+			 int w, int h,
+			 int dst_x, int dst_y)
+{
+	PictLinearGradient *linear =
+		(PictLinearGradient *)picture->pSourcePict;
+	pixman_fixed_t tx, ty;
+	float x0, y0, sf;
+	float dx, dy;
+
+	DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
+	     __FUNCTION__,
+	     pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y),
+	     pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y),
+	     x, y, dst_x, dst_y, w, h));
+
+	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
+		return 0;
+
+	if (!sna_transform_is_affine(picture->transform)) {
+		DBG(("%s: fallback due to projective transform\n",
+		     __FUNCTION__));
+		return sna_render_picture_fixup(sna, picture, channel,
+						x, y, w, h, dst_x, dst_y);
+	}
+
+	channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
+	if (!channel->bo)
+		return 0;
+
+	channel->filter = PictFilterNearest;
+	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
+	channel->width  = channel->bo->pitch / 4;
+	channel->height = 1;
+	channel->pict_format = PICT_a8r8g8b8;
+	channel->card_format = GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
+	channel->is_linear = 1;
+	channel->is_affine = 1;
+
+	channel->scale[0]  = channel->scale[1]  = 1;
+	channel->offset[0] = channel->offset[1] = 0;
+
+	if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
+		dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x);
+		dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y);
+
+		x0 = pixman_fixed_to_double(linear->p1.x);
+		y0 = pixman_fixed_to_double(linear->p1.y);
+
+		if (tx | ty) {
+			x0 -= pixman_fixed_to_double(tx);
+			y0 -= pixman_fixed_to_double(ty);
+		}
+	} else {
+		struct pixman_f_vector p1, p2;
+		struct pixman_f_transform m, inv;
+
+		pixman_f_transform_from_pixman_transform(&m, picture->transform);
+		DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
+		     __FUNCTION__,
+		     m.m[0][0], m.m[0][1], m.m[0][2],
+		     m.m[1][0], m.m[1][1], m.m[1][2],
+		     m.m[2][0], m.m[2][1], m.m[2][2]));
+		if (!pixman_f_transform_invert(&inv, &m))
+			return 0;
+
+		p1.v[0] = pixman_fixed_to_double(linear->p1.x);
+		p1.v[1] = pixman_fixed_to_double(linear->p1.y);
+		p1.v[2] = 1.;
+		pixman_f_transform_point(&inv, &p1);
+
+		p2.v[0] = pixman_fixed_to_double(linear->p2.x);
+		p2.v[1] = pixman_fixed_to_double(linear->p2.y);
+		p2.v[2] = 1.;
+		pixman_f_transform_point(&inv, &p2);
+
+		DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
+		     __FUNCTION__,
+		     p1.v[0], p1.v[1], p1.v[2],
+		     p2.v[0], p2.v[1], p2.v[2]));
+
+		dx = p2.v[0] - p1.v[0];
+		dy = p2.v[1] - p1.v[1];
+
+		x0 = p1.v[0];
+		y0 = p1.v[1];
+	}
+
+	sf = dx*dx + dy*dy;
+	dx /= sf;
+	dy /= sf;
+
+	channel->u.linear.dx = dx;
+	channel->u.linear.dy = dy;
+	channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y);
+
+	channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx);
+	channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy);
+	channel->embedded_transform.matrix[0][2] = pixman_double_to_fixed(channel->u.linear.offset);
+
+	channel->embedded_transform.matrix[1][0] = 0;
+	channel->embedded_transform.matrix[1][1] = 0;
+	channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5);
+
+	channel->embedded_transform.matrix[2][0] = 0;
+	channel->embedded_transform.matrix[2][1] = 0;
+	channel->embedded_transform.matrix[2][2] = pixman_fixed_1;
+
+	channel->transform = &channel->embedded_transform;
+
+	DBG(("%s: dx=%f, dy=%f, offset=%f\n",
+	     __FUNCTION__, dx, dy, channel->u.linear.offset));
+
+	return channel->bo != NULL;
+}
diff --git a/src/sna/gen4_source.h b/src/sna/gen4_source.h
new file mode 100644
index 0000000..c73afac
--- /dev/null
+++ b/src/sna/gen4_source.h
@@ -0,0 +1,22 @@
+#ifndef GEN4_SOURCE_H
+#define GEN4_SOURCE_H
+
+#include "compiler.h"
+
+#include "sna.h"
+#include "sna_render.h"
+
+bool
+gen4_channel_init_solid(struct sna *sna,
+			struct sna_composite_channel *channel,
+			uint32_t color);
+
+bool
+gen4_channel_init_linear(struct sna *sna,
+			 PicturePtr picture,
+			 struct sna_composite_channel *channel,
+			 int x, int y,
+			 int w, int h,
+			 int dst_x, int dst_y);
+
+#endif /* GEN4_SOURCE_H */
diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index ba88ca5..f8becce 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -223,6 +223,15 @@ void gen4_vertex_close(struct sna *sna)
 #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) /* XXX assert(!too_large(x, y)); */
 #define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
+inline static float
+compute_linear(const struct sna_composite_channel *channel,
+	       int16_t x, int16_t y)
+{
+	return ((x+channel->offset[0]) * channel->u.linear.dx +
+		(y+channel->offset[1]) * channel->u.linear.dy +
+		channel->u.linear.offset);
+}
+
 inline static void
 emit_texcoord(struct sna *sna,
 	      const struct sna_composite_channel *channel,
@@ -346,6 +355,36 @@ emit_primitive_solid(struct sna *sna,
 }
 
 fastcall static void
+emit_primitive_linear(struct sna *sna,
+		      const struct sna_composite_op *op,
+		      const struct sna_composite_rectangles *r)
+{
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	assert(op->floats_per_rect == 6);
+	assert((sna->render.vertex_used % 2) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 6;
+	assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	dst.p.x = r->dst.x;
+	v[2] = dst.f;
+	dst.p.y = r->dst.y;
+	v[4] = dst.f;
+
+	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+	v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+	v[5] = compute_linear(&op->src, r->src.x, r->src.y);
+}
+
+fastcall static void
 emit_primitive_identity_source(struct sna *sna,
 			       const struct sna_composite_op *op,
 			       const struct sna_composite_rectangles *r)
@@ -499,6 +538,51 @@ emit_primitive_identity_mask(struct sna *sna,
 }
 
 fastcall static void
+emit_primitive_linear_identity_mask(struct sna *sna,
+				    const struct sna_composite_op *op,
+				    const struct sna_composite_rectangles *r)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float msk_x, msk_y;
+	float w, h;
+	float *v;
+
+	msk_x = r->mask.x + op->mask.offset[0];
+	msk_y = r->mask.y + op->mask.offset[1];
+	w = r->width;
+	h = r->height;
+
+	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
+	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
+
+	assert(op->floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 12;
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	v[2] = (msk_x + w) * op->mask.scale[0];
+	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
+
+	dst.p.x = r->dst.x;
+	v[4] = dst.f;
+	v[10] = v[6] = msk_x * op->mask.scale[0];
+
+	dst.p.y = r->dst.y;
+	v[8] = dst.f;
+	v[11] = msk_y * op->mask.scale[1];
+
+	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+	v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+	v[9] = compute_linear(&op->src, r->src.x, r->src.y);
+}
+
+fastcall static void
 emit_primitive_identity_source_mask(struct sna *sna,
 				    const struct sna_composite_op *op,
 				    const struct sna_composite_rectangles *r)
@@ -661,20 +745,28 @@ emit_composite_texcoord_affine(struct sna *sna,
 	OUT_VERTEX_F(t[1] * channel->scale[1]);
 }
 
-void gen4_choose_composite_emitter(struct sna_composite_op *tmp)
+
+unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
 {
-	tmp->prim_emit = emit_primitive;
-	tmp->floats_per_vertex = 1 + (tmp->src.is_solid ? 1 : 2 + !tmp->src.is_affine);
+	unsigned vb;
+
 	if (tmp->mask.bo) {
-		tmp->floats_per_vertex += 2 + !tmp->mask.is_affine;
-		tmp->prim_emit = emit_primitive_mask;
 		if (tmp->mask.transform == NULL) {
 			if (tmp->src.is_solid) {
 				DBG(("%s: solid, identity mask\n", __FUNCTION__));
 				tmp->prim_emit = emit_primitive_identity_mask;
+				tmp->floats_per_vertex = 4;
+				vb = 1 | 2 << 2;
+			} else if (tmp->src.is_linear) {
+				DBG(("%s: linear, identity mask\n", __FUNCTION__));
+				tmp->prim_emit = emit_primitive_linear_identity_mask;
+				tmp->floats_per_vertex = 4;
+				vb = 1 | 2 << 2;
 			} else if (tmp->src.transform == NULL) {
 				DBG(("%s: identity source, identity mask\n", __FUNCTION__));
 				tmp->prim_emit = emit_primitive_identity_source_mask;
+				tmp->floats_per_vertex = 5;
+				vb = 2 << 2 | 2;
 			} else if (tmp->src.is_affine) {
 				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
 				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
@@ -685,7 +777,19 @@ void gen4_choose_composite_emitter(struct sna_composite_op *tmp)
 					DBG(("%s: affine src, identity mask\n", __FUNCTION__));
 					tmp->prim_emit = emit_primitive_affine_source_identity;
 				}
+				tmp->floats_per_vertex = 5;
+				vb = 2 << 2 | 2;
+			} else {
+				tmp->prim_emit = emit_primitive_mask;
+				tmp->floats_per_vertex = 6;
+				vb = 2 << 2 | 3;
 			}
+		} else {
+			tmp->prim_emit = emit_primitive_mask;
+			tmp->floats_per_vertex = 1;
+			tmp->floats_per_vertex += 2 + !tmp->mask.is_affine;
+			tmp->floats_per_vertex += 2 + !tmp->src.is_affine;
+			vb = (2 + !tmp->src.is_affine) | (2 + !tmp->mask.is_affine) << 2;
 		}
 	} else {
 		if (tmp->src.is_solid) {
@@ -693,9 +797,17 @@ void gen4_choose_composite_emitter(struct sna_composite_op *tmp)
 			tmp->prim_emit = emit_primitive_solid;
 			if (tmp->src.is_opaque && tmp->op == PictOpOver)
 				tmp->op = PictOpSrc;
+			tmp->floats_per_vertex = 2;
+			vb = 1;
+		} else if (tmp->src.is_linear) {
+			tmp->prim_emit = emit_primitive_linear;
+			tmp->floats_per_vertex = 2;
+			vb = 1;
 		} else if (tmp->src.transform == NULL) {
 			DBG(("%s: identity src, no mask\n", __FUNCTION__));
 			tmp->prim_emit = emit_primitive_identity_source;
+			tmp->floats_per_vertex = 3;
+			vb = 2;
 		} else if (tmp->src.is_affine) {
 			tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
 			tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
@@ -706,9 +818,17 @@ void gen4_choose_composite_emitter(struct sna_composite_op *tmp)
 				DBG(("%s: affine src, no mask\n", __FUNCTION__));
 				tmp->prim_emit = emit_primitive_affine_source;
 			}
+			tmp->floats_per_vertex = 3;
+			vb = 2;
+		} else {
+			tmp->prim_emit = emit_primitive;
+			tmp->floats_per_vertex = 4;
+			vb = 3;
 		}
 	}
 	tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
+
+	return vb;
 }
 
 inline static void
@@ -896,15 +1016,63 @@ emit_spans_affine(struct sna *sna,
 	v[11] = v[7] = v[3] = opacity;
 }
 
-void gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp)
+fastcall static void
+emit_spans_linear(struct sna *sna,
+		  const struct sna_composite_spans_op *op,
+		  const BoxRec *box,
+		  float opacity)
 {
-	tmp->prim_emit = emit_composite_spans_primitive;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float *v;
+
+	assert(op->base.floats_per_rect == 9);
+	assert((sna->render.vertex_used % 3) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 9;
+
+	dst.p.x = box->x2;
+	dst.p.y = box->y2;
+	v[0] = dst.f;
+	dst.p.x = box->x1;
+	v[3] = dst.f;
+	dst.p.y = box->y1;
+	v[6] = dst.f;
+
+	v[1] = compute_linear(&op->base.src, box->x2, box->y2);
+	v[4] = compute_linear(&op->base.src, box->x1, box->y2);
+	v[7] = compute_linear(&op->base.src, box->x1, box->y1);
+
+	v[8] = v[5] = v[2] = opacity;
+}
+
+inline inline static uint32_t
+gen4_choose_spans_vertex_buffer(const struct sna_composite_op *op)
+{
+	int id = op->src.is_solid ? 1 : 2 + !op->src.is_affine;
+	DBG(("%s: id=%x (%d, 1)\n", __FUNCTION__, 1 << 2 | id, id));
+	return 1 << 2 | id;
+}
+
+
+unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp)
+{
+	unsigned vb;
+
 	if (tmp->base.src.is_solid) {
 		tmp->prim_emit = emit_spans_solid;
 		tmp->base.floats_per_vertex = 3;
+		vb = 1 << 2 | 1;
+	} else if (tmp->base.src.is_linear) {
+		tmp->prim_emit = emit_spans_linear;
+		tmp->base.floats_per_vertex = 3;
+		vb = 1 << 2 | 1;
 	} else if (tmp->base.src.transform == NULL) {
 		tmp->prim_emit = emit_spans_identity;
 		tmp->base.floats_per_vertex = 4;
+		vb = 1 << 2 | 2;
 	} else if (tmp->base.is_affine) {
 		tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
 		tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
@@ -913,7 +1081,12 @@ void gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp)
 		else
 			tmp->prim_emit = emit_spans_affine;
 		tmp->base.floats_per_vertex = 4;
-	} else
+		vb = 1 << 2 | 2;
+	} else {
+		tmp->prim_emit = emit_composite_spans_primitive;
 		tmp->base.floats_per_vertex = 5;
+		vb = 1 << 2 | 3;
+	}
 	tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
+	return vb;
 }
diff --git a/src/sna/gen4_vertex.h b/src/sna/gen4_vertex.h
index b6d1374..431b545 100644
--- a/src/sna/gen4_vertex.h
+++ b/src/sna/gen4_vertex.h
@@ -10,29 +10,7 @@ void gen4_vertex_flush(struct sna *sna);
 int gen4_vertex_finish(struct sna *sna);
 void gen4_vertex_close(struct sna *sna);
 
-inline static uint32_t
-gen4_choose_composite_vertex_buffer(const struct sna_composite_op *op)
-{
-	int id = op->src.is_solid ? 1 : 2 + !op->src.is_affine;
-	if (op->mask.bo)
-		id |= (2 + !op->mask.is_affine) << 2;
-	DBG(("%s: id=%x (%d, %d)\n", __FUNCTION__, id,
-	     op->src.is_solid ? 1 : 2 + !op->src.is_affine,
-	     op->mask.bo ?  2 + !op->mask.is_affine : 0));
-	assert(id > 0 && id < 16);
-	return id;
-}
-
-inline inline static uint32_t
-gen4_choose_spans_vertex_buffer(const struct sna_composite_op *op)
-{
-	int id = op->src.is_solid ? 1 : 2 + !op->src.is_affine;
-	DBG(("%s: id=%x (%d, 1)\n", __FUNCTION__, 1 << 2 | id, id));
-	return 1 << 2 | id;
-}
-
-void gen4_choose_composite_emitter(struct sna_composite_op *tmp);
-void gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp);
-
+unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp);
+unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp);
 
 #endif /* GEN4_VERTEX_H */
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 15f5f58..2e64b8e 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -42,6 +42,7 @@
 
 #include "brw/brw.h"
 #include "gen5_render.h"
+#include "gen4_source.h"
 #include "gen4_vertex.h"
 
 #define NO_COMPOSITE 0
@@ -1321,141 +1322,6 @@ gen5_render_video(struct sna *sna,
 	return true;
 }
 
-static bool
-gen5_composite_solid_init(struct sna *sna,
-			  struct sna_composite_channel *channel,
-			  uint32_t color)
-{
-	channel->filter = PictFilterNearest;
-	channel->repeat = RepeatNormal;
-	channel->is_affine = true;
-	channel->is_solid  = true;
-	channel->transform = NULL;
-	channel->width  = 1;
-	channel->height = 1;
-	channel->card_format = GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
-
-	channel->bo = sna_render_get_solid(sna, color);
-
-	channel->scale[0]  = channel->scale[1]  = 1;
-	channel->offset[0] = channel->offset[1] = 0;
-	return channel->bo != NULL;
-}
-
-static bool
-gen5_composite_linear_init(struct sna *sna,
-			   PicturePtr picture,
-			   struct sna_composite_channel *channel,
-			   int x, int y,
-			   int w, int h,
-			   int dst_x, int dst_y)
-{
-	PictLinearGradient *linear =
-		(PictLinearGradient *)picture->pSourcePict;
-	pixman_fixed_t tx, ty;
-	float x0, y0, sf;
-	float dx, dy;
-
-	DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
-	     __FUNCTION__,
-	     pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y),
-	     pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y),
-	     x, y, dst_x, dst_y, w, h));
-
-	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
-		return 0;
-
-	if (!sna_transform_is_affine(picture->transform)) {
-		DBG(("%s: fallback due to projective transform\n",
-		     __FUNCTION__));
-		return sna_render_picture_fixup(sna, picture, channel,
-						x, y, w, h, dst_x, dst_y);
-	}
-
-	channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
-	if (!channel->bo)
-		return 0;
-
-	channel->filter = PictFilterNearest;
-	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
-	channel->width  = channel->bo->pitch / 4;
-	channel->height = 1;
-	channel->pict_format = PICT_a8r8g8b8;
-
-	channel->scale[0]  = channel->scale[1]  = 1;
-	channel->offset[0] = channel->offset[1] = 0;
-
-	if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
-		dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x);
-		dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y);
-
-		x0 = pixman_fixed_to_double(linear->p1.x);
-		y0 = pixman_fixed_to_double(linear->p1.y);
-
-		if (tx | ty) {
-			x0 -= pixman_fixed_to_double(tx);
-			y0 -= pixman_fixed_to_double(ty);
-		}
-	} else {
-		struct pixman_f_vector p1, p2;
-		struct pixman_f_transform m, inv;
-
-		pixman_f_transform_from_pixman_transform(&m, picture->transform);
-		DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
-		     __FUNCTION__,
-		     m.m[0][0], m.m[0][1], m.m[0][2],
-		     m.m[1][0], m.m[1][1], m.m[1][2],
-		     m.m[2][0], m.m[2][1], m.m[2][2]));
-		if (!pixman_f_transform_invert(&inv, &m))
-			return 0;
-
-		p1.v[0] = pixman_fixed_to_double(linear->p1.x);
-		p1.v[1] = pixman_fixed_to_double(linear->p1.y);
-		p1.v[2] = 1.;
-		pixman_f_transform_point(&inv, &p1);
-
-		p2.v[0] = pixman_fixed_to_double(linear->p2.x);
-		p2.v[1] = pixman_fixed_to_double(linear->p2.y);
-		p2.v[2] = 1.;
-		pixman_f_transform_point(&inv, &p2);
-
-		DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
-		     __FUNCTION__,
-		     p1.v[0], p1.v[1], p1.v[2],
-		     p2.v[0], p2.v[1], p2.v[2]));
-
-		dx = p2.v[0] - p1.v[0];
-		dy = p2.v[1] - p1.v[1];
-
-		x0 = p1.v[0];
-		y0 = p1.v[1];
-	}
-
-	sf = dx*dx + dy*dy;
-	dx /= sf;
-	dy /= sf;
-
-	channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx);
-	channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy);
-	channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y));
-
-	channel->embedded_transform.matrix[1][0] = 0;
-	channel->embedded_transform.matrix[1][1] = 0;
-	channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5);
-
-	channel->embedded_transform.matrix[2][0] = 0;
-	channel->embedded_transform.matrix[2][1] = 0;
-	channel->embedded_transform.matrix[2][2] = pixman_fixed_1;
-
-	channel->transform = &channel->embedded_transform;
-	channel->is_affine = 1;
-
-	DBG(("%s: dx=%f, dy=%f, offset=%f\n",
-	     __FUNCTION__, dx, dy, -dx*(x0-x+dst_x) + -dy*(y0-y+dst_y)));
-
-	return channel->bo != NULL;
-}
-
 static int
 gen5_composite_picture(struct sna *sna,
 		       PicturePtr picture,
@@ -1476,16 +1342,16 @@ gen5_composite_picture(struct sna *sna,
 	channel->card_format = -1;
 
 	if (sna_picture_is_solid(picture, &color))
-		return gen5_composite_solid_init(sna, channel, color);
+		return gen4_channel_init_solid(sna, channel, color);
 
 	if (picture->pDrawable == NULL) {
 		int ret;
 
 		if (picture->pSourcePict->type == SourcePictTypeLinear)
-			return gen5_composite_linear_init(sna, picture, channel,
-							  x, y,
-							  w, h,
-							  dst_x, dst_y);
+			return gen4_channel_init_linear(sna, picture, channel,
+							x, y,
+							w, h,
+							dst_x, dst_y);
 
 		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
 		ret = -1;
@@ -1826,7 +1692,7 @@ reuse_source(struct sna *sna,
 	}
 
 	if (sna_picture_is_solid(mask, &color))
-		return gen5_composite_solid_init(sna, mc, color);
+		return gen4_channel_init_solid(sna, mc, color);
 
 	if (sc->is_solid)
 		return false;
@@ -1921,7 +1787,7 @@ gen5_render_composite(struct sna *sna,
 		DBG(("%s: failed to prepare source picture\n", __FUNCTION__));
 		goto cleanup_dst;
 	case 0:
-		if (!gen5_composite_solid_init(sna, &tmp->src, 0))
+		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
 			goto cleanup_dst;
 		/* fall through to fixup */
 	case 1:
@@ -1972,7 +1838,7 @@ gen5_render_composite(struct sna *sna,
 				DBG(("%s: failed to prepare mask picture\n", __FUNCTION__));
 				goto cleanup_src;
 			case 0:
-				if (!gen5_composite_solid_init(sna, &tmp->mask, 0))
+				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
 					goto cleanup_src;
 				/* fall through to fixup */
 			case 1:
@@ -1983,14 +1849,13 @@ gen5_render_composite(struct sna *sna,
 
 		tmp->is_affine &= tmp->mask.is_affine;
 	}
-	gen4_choose_composite_emitter(tmp);
 
 	tmp->u.gen5.wm_kernel =
 		gen5_choose_composite_kernel(tmp->op,
 					     tmp->mask.bo != NULL,
 					     tmp->has_component_alpha,
 					     tmp->is_affine);
-	tmp->u.gen5.ve_id = gen4_choose_composite_vertex_buffer(tmp);
+	tmp->u.gen5.ve_id = gen4_choose_composite_emitter(tmp);
 
 	tmp->blt   = gen5_render_composite_blt;
 	tmp->box   = gen5_render_composite_box;
@@ -2162,7 +2027,7 @@ gen5_render_composite_spans(struct sna *sna,
 	case -1:
 		goto cleanup_dst;
 	case 0:
-		if (!gen5_composite_solid_init(sna, &tmp->base.src, 0))
+		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
 			goto cleanup_dst;
 		/* fall through to fixup */
 	case 1:
@@ -2176,10 +2041,8 @@ gen5_render_composite_spans(struct sna *sna,
 	tmp->base.has_component_alpha = false;
 	tmp->base.need_magic_ca_pass = false;
 
-	gen4_choose_spans_emitter(tmp);
-
+	tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(tmp);
 	tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
-	tmp->base.u.gen5.ve_id = gen4_choose_spans_vertex_buffer(&tmp->base);
 
 	tmp->box   = gen5_render_composite_spans_box;
 	tmp->boxes = gen5_render_composite_spans_boxes;
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index d484dde..77bd853 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -42,6 +42,7 @@
 
 #include "brw/brw.h"
 #include "gen6_render.h"
+#include "gen4_source.h"
 #include "gen4_vertex.h"
 
 #define NO_COMPOSITE 0
@@ -1602,144 +1603,6 @@ gen6_render_video(struct sna *sna,
 	return true;
 }
 
-static bool
-gen6_composite_solid_init(struct sna *sna,
-			  struct sna_composite_channel *channel,
-			  uint32_t color)
-{
-	DBG(("%s: color=%x\n", __FUNCTION__, color));
-
-	channel->filter = PictFilterNearest;
-	channel->repeat = RepeatNormal;
-	channel->is_affine = true;
-	channel->is_solid  = true;
-	channel->is_opaque = (color >> 24) == 0xff;
-	channel->transform = NULL;
-	channel->width  = 1;
-	channel->height = 1;
-	channel->card_format = GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
-
-	channel->bo = sna_render_get_solid(sna, color);
-
-	channel->scale[0]  = channel->scale[1]  = 1;
-	channel->offset[0] = channel->offset[1] = 0;
-	return channel->bo != NULL;
-}
-
-static bool
-gen6_composite_linear_init(struct sna *sna,
-			   PicturePtr picture,
-			   struct sna_composite_channel *channel,
-			   int x, int y,
-			   int w, int h,
-			   int dst_x, int dst_y)
-{
-	PictLinearGradient *linear =
-		(PictLinearGradient *)picture->pSourcePict;
-	pixman_fixed_t tx, ty;
-	float x0, y0, sf;
-	float dx, dy;
-
-	DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
-	     __FUNCTION__,
-	     pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y),
-	     pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y),
-	     x, y, dst_x, dst_y, w, h));
-
-	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
-		return 0;
-
-	if (!sna_transform_is_affine(picture->transform)) {
-		DBG(("%s: fallback due to projective transform\n",
-		     __FUNCTION__));
-		return sna_render_picture_fixup(sna, picture, channel,
-						x, y, w, h, dst_x, dst_y);
-	}
-
-	channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
-	if (!channel->bo)
-		return 0;
-
-	channel->filter = PictFilterNearest;
-	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
-	channel->width  = channel->bo->pitch / 4;
-	channel->height = 1;
-	channel->pict_format = PICT_a8r8g8b8;
-
-	channel->scale[0]  = channel->scale[1]  = 1;
-	channel->offset[0] = channel->offset[1] = 0;
-
-	if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
-		dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x);
-		dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y);
-
-		x0 = pixman_fixed_to_double(linear->p1.x);
-		y0 = pixman_fixed_to_double(linear->p1.y);
-
-		if (tx | ty) {
-			x0 -= pixman_fixed_to_double(tx);
-			y0 -= pixman_fixed_to_double(ty);
-		}
-	} else {
-		struct pixman_f_vector p1, p2;
-		struct pixman_f_transform m, inv;
-
-		pixman_f_transform_from_pixman_transform(&m, picture->transform);
-		DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
-		     __FUNCTION__,
-		     m.m[0][0], m.m[0][1], m.m[0][2],
-		     m.m[1][0], m.m[1][1], m.m[1][2],
-		     m.m[2][0], m.m[2][1], m.m[2][2]));
-		if (!pixman_f_transform_invert(&inv, &m))
-			return 0;
-
-		p1.v[0] = pixman_fixed_to_double(linear->p1.x);
-		p1.v[1] = pixman_fixed_to_double(linear->p1.y);
-		p1.v[2] = 1.;
-		pixman_f_transform_point(&inv, &p1);
-
-		p2.v[0] = pixman_fixed_to_double(linear->p2.x);
-		p2.v[1] = pixman_fixed_to_double(linear->p2.y);
-		p2.v[2] = 1.;
-		pixman_f_transform_point(&inv, &p2);
-
-		DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
-		     __FUNCTION__,
-		     p1.v[0], p1.v[1], p1.v[2],
-		     p2.v[0], p2.v[1], p2.v[2]));
-
-		dx = p2.v[0] - p1.v[0];
-		dy = p2.v[1] - p1.v[1];
-
-		x0 = p1.v[0];
-		y0 = p1.v[1];
-	}
-
-	sf = dx*dx + dy*dy;
-	dx /= sf;
-	dy /= sf;
-
-	channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx);
-	channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy);
-	channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y));
-
-	channel->embedded_transform.matrix[1][0] = 0;
-	channel->embedded_transform.matrix[1][1] = 0;
-	channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5);
-
-	channel->embedded_transform.matrix[2][0] = 0;
-	channel->embedded_transform.matrix[2][1] = 0;
-	channel->embedded_transform.matrix[2][2] = pixman_fixed_1;
-
-	channel->transform = &channel->embedded_transform;
-	channel->is_affine = 1;
-
-	DBG(("%s: dx=%f, dy=%f, offset=%f\n",
-	     __FUNCTION__, dx, dy, -dx*(x0-x+dst_x) + -dy*(y0-y+dst_y)));
-
-	return channel->bo != NULL;
-}
-
 static int
 gen6_composite_picture(struct sna *sna,
 		       PicturePtr picture,
@@ -1760,16 +1623,16 @@ gen6_composite_picture(struct sna *sna,
 	channel->card_format = -1;
 
 	if (sna_picture_is_solid(picture, &color))
-		return gen6_composite_solid_init(sna, channel, color);
+		return gen4_channel_init_solid(sna, channel, color);
 
 	if (picture->pDrawable == NULL) {
 		int ret;
 
 		if (picture->pSourcePict->type == SourcePictTypeLinear)
-			return gen6_composite_linear_init(sna, picture, channel,
-							  x, y,
-							  w, h,
-							  dst_x, dst_y);
+			return gen4_channel_init_linear(sna, picture, channel,
+							x, y,
+							w, h,
+							dst_x, dst_y);
 
 		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
 		ret = -1;
@@ -2135,7 +1998,7 @@ reuse_source(struct sna *sna,
 	}
 
 	if (sna_picture_is_solid(mask, &color))
-		return gen6_composite_solid_init(sna, mc, color);
+		return gen4_channel_init_solid(sna, mc, color);
 
 	if (sc->is_solid)
 		return false;
@@ -2242,7 +2105,7 @@ gen6_render_composite(struct sna *sna,
 	case -1:
 		goto cleanup_dst;
 	case 0:
-		if (!gen6_composite_solid_init(sna, &tmp->src, 0))
+		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
 			goto cleanup_dst;
 		/* fall through to fixup */
 	case 1:
@@ -2295,7 +2158,7 @@ gen6_render_composite(struct sna *sna,
 			case -1:
 				goto cleanup_src;
 			case 0:
-				if (!gen6_composite_solid_init(sna, &tmp->mask, 0))
+				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
 					goto cleanup_src;
 				/* fall through to fixup */
 			case 1:
@@ -2306,7 +2169,6 @@ gen6_render_composite(struct sna *sna,
 
 		tmp->is_affine &= tmp->mask.is_affine;
 	}
-	gen4_choose_composite_emitter(tmp);
 
 	tmp->u.gen6.flags =
 		GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
@@ -2320,7 +2182,7 @@ gen6_render_composite(struct sna *sna,
 							    tmp->mask.bo != NULL,
 							    tmp->has_component_alpha,
 							    tmp->is_affine),
-			       gen4_choose_composite_vertex_buffer(tmp));
+			       gen4_choose_composite_emitter(tmp));
 
 	tmp->blt   = gen6_render_composite_blt;
 	tmp->box   = gen6_render_composite_box;
@@ -2498,7 +2360,7 @@ gen6_render_composite_spans(struct sna *sna,
 	case -1:
 		goto cleanup_dst;
 	case 0:
-		if (!gen6_composite_solid_init(sna, &tmp->base.src, 0))
+		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
 			goto cleanup_dst;
 		/* fall through to fixup */
 	case 1:
@@ -2510,8 +2372,6 @@ gen6_render_composite_spans(struct sna *sna,
 	tmp->base.is_affine = tmp->base.src.is_affine;
 	tmp->base.need_magic_ca_pass = false;
 
-	gen4_choose_spans_emitter(tmp);
-
 	tmp->base.u.gen6.flags =
 		GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
 					      tmp->base.src.repeat,
@@ -2519,7 +2379,7 @@ gen6_render_composite_spans(struct sna *sna,
 					      SAMPLER_EXTEND_PAD),
 			       gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
 			       GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
-			       gen4_choose_spans_vertex_buffer(&tmp->base));
+			       gen4_choose_spans_emitter(tmp));
 
 	tmp->box   = gen6_render_composite_spans_box;
 	tmp->boxes = gen6_render_composite_spans_boxes;
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 8a1cfff..478a252 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -42,6 +42,7 @@
 
 #include "brw/brw.h"
 #include "gen7_render.h"
+#include "gen4_source.h"
 #include "gen4_vertex.h"
 
 #define NO_COMPOSITE 0
@@ -1727,144 +1728,6 @@ gen7_render_video(struct sna *sna,
 	return true;
 }
 
-static bool
-gen7_composite_solid_init(struct sna *sna,
-			  struct sna_composite_channel *channel,
-			  uint32_t color)
-{
-	DBG(("%s: color=%x\n", __FUNCTION__, color));
-
-	channel->filter = PictFilterNearest;
-	channel->repeat = RepeatNormal;
-	channel->is_affine = true;
-	channel->is_solid  = true;
-	channel->is_opaque = (color >> 24) == 0xff;
-	channel->transform = NULL;
-	channel->width  = 1;
-	channel->height = 1;
-	channel->card_format = GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
-
-	channel->bo = sna_render_get_solid(sna, color);
-
-	channel->scale[0]  = channel->scale[1]  = 1;
-	channel->offset[0] = channel->offset[1] = 0;
-	return channel->bo != NULL;
-}
-
-static bool
-gen7_composite_linear_init(struct sna *sna,
-			   PicturePtr picture,
-			   struct sna_composite_channel *channel,
-			   int x, int y,
-			   int w, int h,
-			   int dst_x, int dst_y)
-{
-	PictLinearGradient *linear =
-		(PictLinearGradient *)picture->pSourcePict;
-	pixman_fixed_t tx, ty;
-	float x0, y0, sf;
-	float dx, dy;
-
-	DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
-	     __FUNCTION__,
-	     pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y),
-	     pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y),
-	     x, y, dst_x, dst_y, w, h));
-
-	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
-		return 0;
-
-	if (!sna_transform_is_affine(picture->transform)) {
-		DBG(("%s: fallback due to projective transform\n",
-		     __FUNCTION__));
-		return sna_render_picture_fixup(sna, picture, channel,
-						x, y, w, h, dst_x, dst_y);
-	}
-
-	channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
-	if (!channel->bo)
-		return 0;
-
-	channel->filter = PictFilterNearest;
-	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
-	channel->width  = channel->bo->pitch / 4;
-	channel->height = 1;
-	channel->pict_format = PICT_a8r8g8b8;
-
-	channel->scale[0]  = channel->scale[1]  = 1;
-	channel->offset[0] = channel->offset[1] = 0;
-
-	if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
-		dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x);
-		dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y);
-
-		x0 = pixman_fixed_to_double(linear->p1.x);
-		y0 = pixman_fixed_to_double(linear->p1.y);
-
-		if (tx | ty) {
-			x0 -= pixman_fixed_to_double(tx);
-			y0 -= pixman_fixed_to_double(ty);
-		}
-	} else {
-		struct pixman_f_vector p1, p2;
-		struct pixman_f_transform m, inv;
-
-		pixman_f_transform_from_pixman_transform(&m, picture->transform);
-		DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
-		     __FUNCTION__,
-		     m.m[0][0], m.m[0][1], m.m[0][2],
-		     m.m[1][0], m.m[1][1], m.m[1][2],
-		     m.m[2][0], m.m[2][1], m.m[2][2]));
-		if (!pixman_f_transform_invert(&inv, &m))
-			return 0;
-
-		p1.v[0] = pixman_fixed_to_double(linear->p1.x);
-		p1.v[1] = pixman_fixed_to_double(linear->p1.y);
-		p1.v[2] = 1.;
-		pixman_f_transform_point(&inv, &p1);
-
-		p2.v[0] = pixman_fixed_to_double(linear->p2.x);
-		p2.v[1] = pixman_fixed_to_double(linear->p2.y);
-		p2.v[2] = 1.;
-		pixman_f_transform_point(&inv, &p2);
-
-		DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
-		     __FUNCTION__,
-		     p1.v[0], p1.v[1], p1.v[2],
-		     p2.v[0], p2.v[1], p2.v[2]));
-
-		dx = p2.v[0] - p1.v[0];
-		dy = p2.v[1] - p1.v[1];
-
-		x0 = p1.v[0];
-		y0 = p1.v[1];
-	}
-
-	sf = dx*dx + dy*dy;
-	dx /= sf;
-	dy /= sf;
-
-	channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx);
-	channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy);
-	channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y));
-
-	channel->embedded_transform.matrix[1][0] = 0;
-	channel->embedded_transform.matrix[1][1] = 0;
-	channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5);
-
-	channel->embedded_transform.matrix[2][0] = 0;
-	channel->embedded_transform.matrix[2][1] = 0;
-	channel->embedded_transform.matrix[2][2] = pixman_fixed_1;
-
-	channel->transform = &channel->embedded_transform;
-	channel->is_affine = 1;
-
-	DBG(("%s: dx=%f, dy=%f, offset=%f\n",
-	     __FUNCTION__, dx, dy, -dx*(x0-x+dst_x) + -dy*(y0-y+dst_y)));
-
-	return channel->bo != NULL;
-}
-
 static int
 gen7_composite_picture(struct sna *sna,
 		       PicturePtr picture,
@@ -1885,16 +1748,16 @@ gen7_composite_picture(struct sna *sna,
 	channel->card_format = -1;
 
 	if (sna_picture_is_solid(picture, &color))
-		return gen7_composite_solid_init(sna, channel, color);
+		return gen4_channel_init_solid(sna, channel, color);
 
 	if (picture->pDrawable == NULL) {
 		int ret;
 
 		if (picture->pSourcePict->type == SourcePictTypeLinear)
-			return gen7_composite_linear_init(sna, picture, channel,
-							  x, y,
-							  w, h,
-							  dst_x, dst_y);
+			return gen4_channel_init_linear(sna, picture, channel,
+							x, y,
+							w, h,
+							dst_x, dst_y);
 
 		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
 		ret = -1;
@@ -2259,7 +2122,7 @@ reuse_source(struct sna *sna,
 	}
 
 	if (sna_picture_is_solid(mask, &color))
-		return gen7_composite_solid_init(sna, mc, color);
+		return gen4_channel_init_solid(sna, mc, color);
 
 	if (sc->is_solid)
 		return false;
@@ -2366,7 +2229,7 @@ gen7_render_composite(struct sna *sna,
 	case -1:
 		goto cleanup_dst;
 	case 0:
-		if (!gen7_composite_solid_init(sna, &tmp->src, 0))
+		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
 			goto cleanup_dst;
 		/* fall through to fixup */
 	case 1:
@@ -2419,7 +2282,7 @@ gen7_render_composite(struct sna *sna,
 			case -1:
 				goto cleanup_src;
 			case 0:
-				if (!gen7_composite_solid_init(sna, &tmp->mask, 0))
+				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
 					goto cleanup_src;
 				/* fall through to fixup */
 			case 1:
@@ -2431,8 +2294,6 @@ gen7_render_composite(struct sna *sna,
 		tmp->is_affine &= tmp->mask.is_affine;
 	}
 
-	gen4_choose_composite_emitter(tmp);
-
 	tmp->u.gen7.flags =
 		GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
 					      tmp->src.repeat,
@@ -2445,7 +2306,7 @@ gen7_render_composite(struct sna *sna,
 							    tmp->mask.bo != NULL,
 							    tmp->has_component_alpha,
 							    tmp->is_affine),
-			       gen4_choose_composite_vertex_buffer(tmp));
+			       gen4_choose_composite_emitter(tmp));
 
 	tmp->blt   = gen7_render_composite_blt;
 	tmp->box   = gen7_render_composite_box;
@@ -2603,7 +2464,7 @@ gen7_render_composite_spans(struct sna *sna,
 	case -1:
 		goto cleanup_dst;
 	case 0:
-		if (!gen7_composite_solid_init(sna, &tmp->base.src, 0))
+		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
 			goto cleanup_dst;
 		/* fall through to fixup */
 	case 1:
@@ -2615,8 +2476,6 @@ gen7_render_composite_spans(struct sna *sna,
 	tmp->base.is_affine = tmp->base.src.is_affine;
 	tmp->base.need_magic_ca_pass = false;
 
-	gen4_choose_spans_emitter(tmp);
-
 	tmp->base.u.gen7.flags =
 		GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
 					      tmp->base.src.repeat,
@@ -2624,7 +2483,7 @@ gen7_render_composite_spans(struct sna *sna,
 					      SAMPLER_EXTEND_PAD),
 			       gen7_get_blend(tmp->base.op, false, tmp->base.dst.format),
 			       GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine,
-			       gen4_choose_spans_vertex_buffer(&tmp->base));
+			       gen4_choose_spans_emitter(tmp));
 
 	tmp->box   = gen7_render_composite_spans_box;
 	tmp->boxes = gen7_render_composite_spans_boxes;
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 55e3806..91b3568 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -69,10 +69,10 @@ struct sna_composite_op {
 
 		union {
 			struct {
+				float dx, dy, offset;
+			} linear;
+			struct {
 				uint32_t pixel;
-				float linear_dx;
-				float linear_dy;
-				float linear_offset;
 			} gen2;
 			struct gen3_shader_channel {
 				int type;
commit 0996ed85fd8bd79f41f28908733b85566f9e2b69
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 1 22:53:26 2013 +0000

    sna/gen2+: Precompute the affine transformation scale factors
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 687ae1f..c5ad07a 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -1870,6 +1870,8 @@ gen2_render_composite(struct sna *sna,
 			tmp->prim_emit = gen2_emit_composite_primitive_identity;
 		} else if (tmp->src.is_affine) {
 			assert(tmp->floats_per_rect == 12);
+			tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
+			tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
 			tmp->prim_emit = gen2_emit_composite_primitive_affine;
 		}
 	}
@@ -2300,8 +2302,11 @@ gen2_render_composite_spans(struct sna *sna,
 		tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
 		if (tmp->base.src.transform == NULL)
 			tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source;
-		else if (tmp->base.src.is_affine)
+		else if (tmp->base.src.is_affine) {
+			tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
+			tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
 			tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source;
+		}
 	}
 	tmp->base.mask.bo = NULL;
 	tmp->base.floats_per_rect = 3*tmp->base.floats_per_vertex;
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 6da3038..42e4cdd 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2977,8 +2977,11 @@ gen3_render_composite(struct sna *sna,
 					tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
 				else
 					tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
-			} else if (tmp->src.is_affine)
+			} else if (tmp->src.is_affine) {
+				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
+				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
 				tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
+			}
 			break;
 		}
 	} else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
@@ -3462,8 +3465,11 @@ gen3_render_composite_spans(struct sna *sna,
 	case SHADER_TEXTURE:
 		if (tmp->base.src.transform == NULL)
 			tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
-		else if (tmp->base.src.is_affine)
+		else if (tmp->base.src.is_affine) {
+			tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
+			tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
 			tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
+		}
 		break;
 	}
 
diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index 1f34fa6..ba88ca5 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -676,10 +676,10 @@ void gen4_choose_composite_emitter(struct sna_composite_op *tmp)
 				DBG(("%s: identity source, identity mask\n", __FUNCTION__));
 				tmp->prim_emit = emit_primitive_identity_source_mask;
 			} else if (tmp->src.is_affine) {
+				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
+				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
 				if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
 					DBG(("%s: simple src, identity mask\n", __FUNCTION__));
-					tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
-					tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
 					tmp->prim_emit = emit_primitive_simple_source_identity;
 				} else {
 					DBG(("%s: affine src, identity mask\n", __FUNCTION__));
@@ -697,10 +697,10 @@ void gen4_choose_composite_emitter(struct sna_composite_op *tmp)
 			DBG(("%s: identity src, no mask\n", __FUNCTION__));
 			tmp->prim_emit = emit_primitive_identity_source;
 		} else if (tmp->src.is_affine) {
+			tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
+			tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
 			if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
 				DBG(("%s: simple src, no mask\n", __FUNCTION__));
-				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
-				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
 				tmp->prim_emit = emit_primitive_simple_source;
 			} else {
 				DBG(("%s: affine src, no mask\n", __FUNCTION__));
@@ -906,11 +906,11 @@ void gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp)
 		tmp->prim_emit = emit_spans_identity;
 		tmp->base.floats_per_vertex = 4;
 	} else if (tmp->base.is_affine) {
-		if (!sna_affine_transform_is_rotation(tmp->base.src.transform)) {
-			tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
-			tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
+		tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
+		tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
+		if (!sna_affine_transform_is_rotation(tmp->base.src.transform))
 			tmp->prim_emit = emit_spans_simple;
-		} else
+		else
 			tmp->prim_emit = emit_spans_affine;
 		tmp->base.floats_per_vertex = 4;
 	} else
diff --git a/src/sna/sna.h b/src/sna/sna.h
index c43daaa..6fe9e5e 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -606,12 +606,13 @@ _sna_get_transformed_scaled(int x, int y,
 			    const PictTransform *transform, const float *sf,
 			    float *x_out, float *y_out)
 {
+	*x_out = sf[0] * (transform->matrix[0][0] * x +
+			  transform->matrix[0][1] * y +
+			  transform->matrix[0][2]);
 
-	int64_t result[3];
-
-	_sna_transform_point(transform, x, y, result);
-	*x_out = result[0] * sf[0] / (double)result[2];
-	*y_out = result[1] * sf[1] / (double)result[2];
+	*y_out = sf[1] * (transform->matrix[1][0] * x +
+			  transform->matrix[1][1] * y +
+			  transform->matrix[1][2]);
 }
 
 void
commit d36cae801f1dcb06d4f93f2f27cc9b9de73e89c9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 1 21:03:06 2013 +0000

    sna/gen4+: Tidy special handling of 2s2s vertex elements
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 405eb7d..893c5ee 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -876,51 +876,12 @@ gen4_emit_vertex_elements(struct sna *sna,
 	 */
 	struct gen4_render_state *render = &sna->render_state.gen4;
 	uint32_t src_format, dw;
-	int src_offset, dst_offset;
 	int id = op->u.gen4.ve_id;
 
 	if (render->ve_id == id)
 		return;
 	render->ve_id = id;
 
-	if (id == VERTEX_2s2s) {
-		DBG(("%s: setup COPY\n", __FUNCTION__));
-		assert(op->floats_per_rect == 6);
-
-		OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | ((2 * (1 + 2)) + 1 - 2));
-
-		/* x,y */
-		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
-			  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
-			  0 << VE0_OFFSET_SHIFT);
-		OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
-			  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
-			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
-			  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
-			  4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
-
-		/* s,t */
-		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
-			  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
-			  4 << VE0_OFFSET_SHIFT);
-		OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
-			  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
-			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
-			  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
-			  8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
-
-		/* magic */
-		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
-			  GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
-			  0 << VE0_OFFSET_SHIFT);
-		OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
-			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
-			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
-			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT |
-			  12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
-		return;
-	}
-
 	/* The VUE layout
 	 *    dword 0-3: position (x, y, 1.0, 1.0),
 	 *    dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
@@ -937,23 +898,26 @@ gen4_emit_vertex_elements(struct sna *sna,
 		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
 		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
 		  (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
-	src_offset = 4;
-	dst_offset = 8;
 
 	/* u0, v0, w0 */
 	/* u0, v0, w0 */
-	DBG(("%s: first channel %d floats, offset=%d\n", __FUNCTION__,
-	     id & 3, src_offset));
+	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
 	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
 	switch (id & 3) {
+	default:
+		assert(0);
+	case 0:
+		src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
+		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
+		break;
 	case 1:
 		src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
 		dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
 		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
 		break;
-	default:
-		assert(0);
 	case 2:
 		src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
@@ -969,17 +933,16 @@ gen4_emit_vertex_elements(struct sna *sna,
 	}
 	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
 		  src_format << VE0_FORMAT_SHIFT |
-		  src_offset << VE0_OFFSET_SHIFT);
-	OUT_BATCH(dw | dst_offset << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
-	src_offset += (id & 3) * sizeof(float);
-	dst_offset += 4;
+		  4 << VE0_OFFSET_SHIFT);
+	OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
 
 	/* u1, v1, w1 */
 	if (id >> 2) {
-		DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__,
-		     (id >> 2) & 3, src_offset));
+		unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
+		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
+		     id >> 2, src_offset));
 		dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
-		switch ((id >> 2) & 3) {
+		switch (id >> 2) {
 		case 1:
 			src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
@@ -1004,7 +967,7 @@ gen4_emit_vertex_elements(struct sna *sna,
 		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
 			  src_format << VE0_FORMAT_SHIFT |
 			  src_offset << VE0_OFFSET_SHIFT);
-		OUT_BATCH(dw | dst_offset << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
+		OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
 	} else {
 		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
 			  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
@@ -1013,7 +976,7 @@ gen4_emit_vertex_elements(struct sna *sna,
 			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
 			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
 			  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
-			  dst_offset << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
+			  12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
 	}
 }
 
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 5be56dd..15f5f58 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -871,7 +871,6 @@ gen5_emit_vertex_elements(struct sna *sna,
 	int id = op->u.gen5.ve_id;
 	bool has_mask = id >> 2;
 	uint32_t format, dw;
-	int offset;
 
 	if (!DBG_NO_STATE_CACHE && render->ve_id == id)
 		return;
@@ -879,40 +878,6 @@ gen5_emit_vertex_elements(struct sna *sna,
 	DBG(("%s: changing %d -> %d\n", __FUNCTION__, render->ve_id, id));
 	render->ve_id = id;
 
-	if (id == VERTEX_2s2s) {
-		DBG(("%s: setup COPY\n", __FUNCTION__));
-		assert(op->floats_per_rect == 6);
-
-		OUT_BATCH(GEN5_3DSTATE_VERTEX_ELEMENTS | ((2 * (1 + 2)) + 1 - 2));
-
-		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
-			  GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
-			  0 << VE0_OFFSET_SHIFT);
-		OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
-			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
-			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
-			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
-
-		/* x,y */
-		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
-			  GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
-			  0 << VE0_OFFSET_SHIFT);
-		OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
-			  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
-			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
-			  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
-
-		/* s,t */
-		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
-			  GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
-			  4 << VE0_OFFSET_SHIFT);
-		OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
-			  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
-			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
-			  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
-		return;
-	}
-
 	/* The VUE layout
 	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
 	 *    dword 4-7: position (x, y, 1.0, 1.0),
@@ -940,21 +905,26 @@ gen5_emit_vertex_elements(struct sna *sna,
 		  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
 		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
 		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
-	offset = 4;
 
 	/* u0, v0, w0 */
-	DBG(("%s: id=%d, first channel %d floats, offset=%d\n", __FUNCTION__,
-	     id, id & 3, offset));
+	DBG(("%s: id=%d, first channel %d floats, offset=4b\n", __FUNCTION__,
+	     id, id & 3));
 	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
 	switch (id & 3) {
+	default:
+		assert(0);
+	case 0:
+		format = GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT;
+		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
+		break;
 	case 1:
 		format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
 		dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
 		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
 		break;
-	default:
-		assert(0);
 	case 2:
 		format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
@@ -969,16 +939,16 @@ gen5_emit_vertex_elements(struct sna *sna,
 		break;
 	}
 	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
-		  format | offset << VE0_OFFSET_SHIFT);
+		  format | 4 << VE0_OFFSET_SHIFT);
 	OUT_BATCH(dw);
 
 	/* u1, v1, w1 */
 	if (has_mask) {
-		offset += (id & 3) * sizeof(float);
-		DBG(("%s: id=%x, second channel %d floats, offset=%d\n", __FUNCTION__,
-		     id, (id >> 2) & 3, offset));
+		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
+		DBG(("%s: id=%x, second channel %d floats, offset=%db\n", __FUNCTION__,
+		     id, id >> 2, offset));
 		dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
-		switch ((id >> 2) & 3) {
+		switch (id >> 2) {
 		case 1:
 			format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 142fc04..d484dde 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -731,7 +731,7 @@ gen6_emit_vertex_elements(struct sna *sna,
 	 *    texture coordinate 1 if (has_mask is true): same as above
 	 */
 	struct gen6_render_state *render = &sna->render_state.gen6;
-	uint32_t src_format, dw, offset;
+	uint32_t src_format, dw;
 	int id = GEN6_VERTEX(op->u.gen6.flags);
 	bool has_mask;
 
@@ -741,40 +741,6 @@ gen6_emit_vertex_elements(struct sna *sna,
 		return;
 	render->ve_id = id;
 
-	if (id == VERTEX_2s2s) {
-		DBG(("%s: setup COPY\n", __FUNCTION__));
-
-		OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
-			  ((2 * (1 + 2)) + 1 - 2));
-
-		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
-			  GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
-			  0 << VE0_OFFSET_SHIFT);
-		OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
-			  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
-			  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
-			  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
-
-		/* x,y */
-		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
-			  GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
-			  0 << VE0_OFFSET_SHIFT);
-		OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
-			  GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
-			  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
-			  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
-
-		/* u0, v0, w0 */
-		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
-			  GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
-			  4 << VE0_OFFSET_SHIFT);
-		OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
-			  GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
-			  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
-			  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
-		return;
-	}
-
 	/* The VUE layout
 	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
 	 *    dword 4-7: position (x, y, 1.0, 1.0),
@@ -803,20 +769,25 @@ gen6_emit_vertex_elements(struct sna *sna,
 		  GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
 		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
 		  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
-	offset = 4;
 
 	/* u0, v0, w0 */
-	DBG(("%s: first channel %d floats, offset=%d\n", __FUNCTION__, id & 3, offset));
+	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
 	dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
 	switch (id & 3) {
+	default:
+		assert(0);
+	case 0:
+		src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED;
+		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
+		break;
 	case 1:
 		src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
 		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
 		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
 		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
 		break;
-	default:
-		assert(0);
 	case 2:
 		src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
 		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
@@ -832,15 +803,15 @@ gen6_emit_vertex_elements(struct sna *sna,
 	}
 	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
 		  src_format << VE0_FORMAT_SHIFT |
-		  offset << VE0_OFFSET_SHIFT);
+		  4 << VE0_OFFSET_SHIFT);
 	OUT_BATCH(dw);
-	offset += (id & 3) * sizeof(float);
 
 	/* u1, v1, w1 */
 	if (has_mask) {
-		DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset));
+		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
+		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
 		dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
-		switch ((id >> 2) & 3) {
+		switch (id >> 2) {
 		case 1:
 			src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
 			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index ca688d4..8a1cfff 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -862,7 +862,7 @@ gen7_emit_vertex_elements(struct sna *sna,
 	 *    texture coordinate 1 if (has_mask is true): same as above
 	 */
 	struct gen7_render_state *render = &sna->render_state.gen7;
-	uint32_t src_format, dw, offset;
+	uint32_t src_format, dw;
 	int id = GEN7_VERTEX(op->u.gen7.flags);
 	bool has_mask;
 
@@ -872,39 +872,6 @@ gen7_emit_vertex_elements(struct sna *sna,
 		return;
 	render->ve_id = id;
 
-	if (id == VERTEX_2s2s) {
-		DBG(("%s: setup COPY\n", __FUNCTION__));
-
-		OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS |
-			  ((2 * (1 + 2)) + 1 - 2));
-
-		OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
-			  GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT |
-			  0 << GEN7_VE0_OFFSET_SHIFT);
-		OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT |
-			  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT |
-			  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
-			  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT);
-
-		/* x,y */
-		OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
-			  GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
-			  0 << GEN7_VE0_OFFSET_SHIFT); /* offsets vb in bytes */
-		OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
-			  GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
-			  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
-			  GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
-
-		OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
-			  GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
-			  4 << GEN7_VE0_OFFSET_SHIFT);	/* offset vb in bytes */
-		OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
-			  GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
-			  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
-			  GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
-		return;
-	}
-
 	/* The VUE layout
 	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
 	 *    dword 4-7: position (x, y, 1.0, 1.0),
@@ -933,20 +900,25 @@ gen7_emit_vertex_elements(struct sna *sna,
 		  GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
 		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
 		  GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
-	offset = 4;
 
 	/* u0, v0, w0 */
-	DBG(("%s: first channel %d floats, offset=%d\n", __FUNCTION__, id & 3, offset));
+	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
 	dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
 	switch (id & 3) {
+	default:
+		assert(0);
+	case 0:
+		src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED;
+		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
+		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
+		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
+		break;
 	case 1:
 		src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
 		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
 		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
 		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
 		break;
-	default:
-		assert(0);
 	case 2:
 		src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
 		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
@@ -962,15 +934,15 @@ gen7_emit_vertex_elements(struct sna *sna,
 	}
 	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
 		  src_format << GEN7_VE0_FORMAT_SHIFT |
-		  offset << GEN7_VE0_OFFSET_SHIFT);
+		  4 << GEN7_VE0_OFFSET_SHIFT);
 	OUT_BATCH(dw);
-	offset += (id & 3) * sizeof(float);
 
 	/* u1, v1, w1 */
 	if (has_mask) {
-		DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset));
+		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
+		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
 		dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
-		switch ((id >> 2) & 3) {
+		switch (id >> 2) {
 		case 1:
 			src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
 			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
commit 8582c6f0bbe1bf01324b46933ff2f50c65f2a82d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 1 20:53:12 2013 +0000

    sna/gen6+: Remove vestigial CC viewport state
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index c152fa4..142fc04 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -434,7 +434,7 @@ gen6_emit_viewports(struct sna *sna)
 		  (4 - 2));
 	OUT_BATCH(0);
 	OUT_BATCH(0);
-	OUT_BATCH(sna->render_state.gen6.cc_vp);
+	OUT_BATCH(0);
 }
 
 static void
@@ -1033,16 +1033,6 @@ sampler_fill_init(struct gen6_sampler_state *ss)
 	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
 }
 
-static uint32_t gen6_create_cc_viewport(struct sna_static_stream *stream)
-{
-	struct gen6_cc_viewport vp;
-
-	vp.min_depth = -1.e35;
-	vp.max_depth = 1.e35;
-
-	return sna_static_stream_add(stream, &vp, sizeof(vp), 32);
-}
-
 static uint32_t
 gen6_tiling_bits(uint32_t tiling)
 {
@@ -3669,7 +3659,6 @@ static bool gen6_render_setup(struct sna *sna)
 		}
 	}
 
-	state->cc_vp = gen6_create_cc_viewport(&general);
 	state->cc_blend = gen6_composite_create_blend_state(&general);
 
 	state->general_bo = sna_static_stream_fini(sna, &general);
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 0cbbf80..ca688d4 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -624,9 +624,9 @@ gen7_emit_cc_invariant(struct sna *sna)
 	OUT_BATCH(0);
 #endif
 
-	assert(is_aligned(sna->render_state.gen7.cc_vp, 32));
+	/* XXX clear to be safe */
 	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
-	OUT_BATCH(sna->render_state.gen7.cc_vp);
+	OUT_BATCH(0);
 }
 
 static void
@@ -1165,16 +1165,6 @@ sampler_fill_init(struct gen7_sampler_state *ss)
 	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
 }
 
-static uint32_t gen7_create_cc_viewport(struct sna_static_stream *stream)
-{
-	struct gen7_cc_viewport vp;
-
-	vp.min_depth = -1.e35;
-	vp.max_depth = 1.e35;
-
-	return sna_static_stream_add(stream, &vp, sizeof(vp), 32);
-}
-
 static uint32_t
 gen7_tiling_bits(uint32_t tiling)
 {
@@ -3753,7 +3743,6 @@ static bool gen7_render_setup(struct sna *sna)
 		}
 	}
 
-	state->cc_vp = gen7_create_cc_viewport(&general);
 	state->cc_blend = gen7_composite_create_blend_state(&general);
 
 	state->general_bo = sna_static_stream_fini(sna, &general);
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index b957a9f..55e3806 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -403,7 +403,6 @@ struct gen6_render_state {
 	uint32_t wm_state;
 	uint32_t wm_kernel[GEN6_KERNEL_COUNT][3];
 
-	uint32_t cc_vp;
 	uint32_t cc_blend;
 
 	uint32_t drawrect_offset;
@@ -453,7 +452,6 @@ struct gen7_render_state {
 	uint32_t wm_state;
 	uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT][3];
 
-	uint32_t cc_vp;
 	uint32_t cc_blend;
 
 	uint32_t drawrect_offset;
commit 24264af2912f9abae5aff2a6fb5a50383d9e33be
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 1 20:39:23 2013 +0000

    sna: Fast path inplace addition of solid trapezoids
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index c8adcdb..687ae1f 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -1535,12 +1535,6 @@ has_alphamap(PicturePtr p)
 }
 
 static bool
-untransformed(PicturePtr p)
-{
-	return !p->transform || pixman_transform_is_int_translate(p->transform);
-}
-
-static bool
 need_upload(PicturePtr p)
 {
 	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 03b4cd8..6da3038 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2583,12 +2583,6 @@ has_alphamap(PicturePtr p)
 }
 
 static bool
-untransformed(PicturePtr p)
-{
-	return !p->transform || pixman_transform_is_int_translate(p->transform);
-}
-
-static bool
 need_upload(PicturePtr p)
 {
 	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 5b137f0..405eb7d 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1732,12 +1732,6 @@ has_alphamap(PicturePtr p)
 }
 
 static bool
-untransformed(PicturePtr p)
-{
-	return !p->transform || pixman_transform_is_int_translate(p->transform);
-}
-
-static bool
 need_upload(PicturePtr p)
 {
 	return p->pDrawable && untransformed(p) && !is_gpu(p->pDrawable);
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 5b48537..5be56dd 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -1708,12 +1708,6 @@ has_alphamap(PicturePtr p)
 }
 
 static bool
-untransformed(PicturePtr p)
-{
-	return !p->transform || pixman_transform_is_int_translate(p->transform);
-}
-
-static bool
 need_upload(PicturePtr p)
 {
 	return p->pDrawable && untransformed(p) && !is_gpu(p->pDrawable);
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 65b4dc4..c152fa4 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2028,12 +2028,6 @@ has_alphamap(PicturePtr p)
 }
 
 static bool
-untransformed(PicturePtr p)
-{
-	return !p->transform || pixman_transform_is_int_translate(p->transform);
-}
-
-static bool
 need_upload(PicturePtr p)
 {
 	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 173a558..0cbbf80 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2151,12 +2151,6 @@ has_alphamap(PicturePtr p)
 }
 
 static bool
-untransformed(PicturePtr p)
-{
-	return !p->transform || pixman_transform_is_int_translate(p->transform);
-}
-
-static bool
 need_upload(PicturePtr p)
 {
 	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index f4c2b1c..3cc79d3 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -4593,7 +4593,7 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
 	if (bo->domain != DOMAIN_CPU) {
 		struct drm_i915_gem_set_domain set_domain;
 
-		DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
+		DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
 		     bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle)));
 
 		VG_CLEAR(set_domain);
@@ -4616,7 +4616,7 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
 	if (bo->domain != DOMAIN_GTT) {
 		struct drm_i915_gem_set_domain set_domain;
 
-		DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
+		DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
 		     bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle)));
 
 		VG_CLEAR(set_domain);
diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h
index 7314889..2feb7ca 100644
--- a/src/sna/sna_render_inline.h
+++ b/src/sna/sna_render_inline.h
@@ -250,4 +250,11 @@ inline static bool dst_is_cpu(PixmapPtr pixmap)
 	return priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage);
 }
 
+inline static bool
+untransformed(PicturePtr p)
+{
+	return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+
 #endif /* SNA_RENDER_INLINE_H */
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 8cd987e..d4899e9 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -4936,6 +4936,45 @@ unbounded_pass:
 }
 
 static void
+pixmask_span_solid(struct sna *sna,
+		   struct sna_composite_spans_op *op,
+		   pixman_region16_t *clip,
+		   const BoxRec *box,
+		   int coverage)
+{
+	struct pixman_inplace *pi = (struct pixman_inplace *)op;
+	if (coverage != FAST_SAMPLES_XY) {
+		coverage = coverage * 256 / FAST_SAMPLES_XY;
+		coverage -= coverage >> 8;
+		*pi->bits = mul_4x8_8(pi->color, coverage);
+	} else
+		*pi->bits = pi->color;
+	pixman_image_composite(pi->op, pi->source, NULL, pi->image,
+			       box->x1, box->y1,
+			       0, 0,
+			       pi->dx + box->x1, pi->dy + box->y1,
+			       box->x2 - box->x1, box->y2 - box->y1);
+}
+static void
+pixmask_span_solid__clipped(struct sna *sna,
+			    struct sna_composite_spans_op *op,
+			    pixman_region16_t *clip,
+			    const BoxRec *box,
+			    int coverage)
+{
+	pixman_region16_t region;
+	int n;
+
+	pixman_region_init_rects(&region, box, 1);
+	RegionIntersect(&region, &region, clip);
+	n = REGION_NUM_RECTS(&region);
+	box = REGION_RECTS(&region);
+	while (n--)
+		pixmask_span_solid(sna, op, NULL, box++, coverage);
+	pixman_region_fini(&region);
+}
+
+static void
 pixmask_span(struct sna *sna,
 	     struct sna_composite_spans_op *op,
 	     pixman_region16_t *clip,
@@ -5102,6 +5141,29 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 			tor_render(NULL, &tor, (void*)&inplace,
 				   dst->pCompositeClip, span, false);
 			tor_fini(&tor);
+		} else if (sna_picture_is_solid(src, &color)) {
+			struct pixman_inplace pi;
+
+			pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
+			pi.op = op;
+			pi.color = color;
+
+			pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8, 1, 1, NULL, 0);
+			pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
+			pi.bits = pixman_image_get_data(pi.source);
+
+			if (dst->pCompositeClip->data)
+				span = pixmask_span_solid__clipped;
+			else
+				span = pixmask_span_solid;
+
+			tor_render(NULL, &tor, (void*)&pi,
+				   dst->pCompositeClip, span,
+				   false);
+			tor_fini(&tor);
+
+			pixman_image_unref(pi.source);
+			pixman_image_unref(pi.image);
 		} else {
 			struct pixman_inplace pi;
 
@@ -5546,7 +5608,7 @@ sna_composite_trapezoids(CARD8 op,
 
 	force_fallback = FORCE_FALLBACK > 0;
 	if ((too_small(priv) || DAMAGE_IS_ALL(priv->cpu_damage)) &&
-	    !picture_is_gpu(src)) {
+	    !picture_is_gpu(src) && untransformed(src)) {
 		DBG(("%s: force fallbacks --too small, %dx%d? %d, all-cpu? %d, src-is-cpu? %d\n",
 		     __FUNCTION__,
 		     dst->pDrawable->width,
commit e9a9f9b02978cb2d73c38163827eb7141ebed16c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 1 16:40:28 2013 +0000

    sna: Micro-optimise glyph_valid()
    
    Note that this requires fixing up the glyph->info if the xserver didn't
    create a GlyphPicture.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c
index f068f22..6225397 100644
--- a/src/sna/sna_glyphs.c
+++ b/src/sna/sna_glyphs.c
@@ -84,6 +84,8 @@
 
 #define N_STACK_GLYPHS 512
 
+#define glyph_valid(g) *((uint32_t *)&(g)->info.width)
+
 #if HAS_DEBUG_FULL
 static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function)
 {
@@ -297,7 +299,7 @@ glyph_extents(int nlist,
 		while (n--) {
 			GlyphPtr glyph = *glyphs++;
 
-			if (glyph->info.width && glyph->info.height) {
+			if (glyph_valid(glyph)) {
 				int v;
 
 				v = x - glyph->info.x;
@@ -350,14 +352,20 @@ glyph_cache(ScreenPtr screen,
 	    struct sna_render *render,
 	    GlyphPtr glyph)
 {
-	PicturePtr glyph_picture = GetGlyphPicture(glyph, screen);
-	struct sna_glyph_cache *cache = &render->glyph[PICT_FORMAT_RGB(glyph_picture->format) != 0];
+	PicturePtr glyph_picture;
+	struct sna_glyph_cache *cache;
 	struct sna_glyph *priv;
 	int size, mask, pos, s;
 
 	if (NO_GLYPH_CACHE)
 		return false;
 
+	glyph_picture = GetGlyphPicture(glyph, screen);
+	if (unlikely(glyph_picture == NULL)) {
+		glyph->info.width = glyph->info.height = 0;
+		return false;
+	}
+
 	if (glyph->info.width > GLYPH_MAX_SIZE ||
 	    glyph->info.height > GLYPH_MAX_SIZE) {
 		PixmapPtr pixmap = (PixmapPtr)glyph_picture->pDrawable;
@@ -373,6 +381,7 @@ glyph_cache(ScreenPtr screen,
 		if (glyph->info.width <= size && glyph->info.height <= size)
 			break;
 
+	cache = &render->glyph[PICT_FORMAT_RGB(glyph_picture->format) != 0];
 	s = glyph_size_to_count(size);
 	mask = glyph_count_to_mask(s);
 	pos = (cache->count + s - 1) & mask;
@@ -528,7 +537,7 @@ glyphs_to_dst(struct sna *sna,
 			struct sna_glyph priv;
 			int i;
 
-			if (glyph->info.width == 0 || glyph->info.height == 0)
+			if (!glyph_valid(glyph))
 				goto next_glyph;
 
 			priv = *sna_glyph(glyph);
@@ -540,6 +549,10 @@ glyphs_to_dst(struct sna *sna,
 				if (!glyph_cache(screen, &sna->render, glyph)) {
 					/* no cache for this glyph */
 					priv.atlas = GetGlyphPicture(glyph, screen);
+					if (unlikely(priv.atlas == NULL)) {
+						glyph->info.width = glyph->info.height = 0;
+						goto next_glyph;
+					}
 					priv.coordinate.x = priv.coordinate.y = 0;
 				} else
 					priv = *sna_glyph(glyph);
@@ -671,7 +684,7 @@ glyphs_slow(struct sna *sna,
 			BoxPtr rects;
 			int nrect;
 
-			if (glyph->info.width == 0 || glyph->info.height == 0)
+			if (!glyph_valid(glyph))
 				goto next_glyph;
 
 			priv = *sna_glyph(glyph);
@@ -679,6 +692,10 @@ glyphs_slow(struct sna *sna,
 				if (!glyph_cache(screen, &sna->render, glyph)) {
 					/* no cache for this glyph */
 					priv.atlas = GetGlyphPicture(glyph, screen);
+					if (unlikely(priv.atlas == NULL)) {
+						glyph->info.width = glyph->info.height = 0;
+						goto next_glyph;
+					}
 					priv.coordinate.x = priv.coordinate.y = 0;
 				} else
 					priv = *sna_glyph(glyph);
@@ -780,7 +797,7 @@ __sna_glyph_get_image(GlyphPtr g, ScreenPtr s)
 	int dx, dy;
 
 	p = GetGlyphPicture(g, s);
-	if (p == NULL)
+	if (unlikely(p == NULL))
 		return NULL;
 
 	image = image_from_pict(p, FALSE, &dx, &dy);
@@ -917,7 +934,7 @@ glyphs_via_mask(struct sna *sna,
 					GlyphPtr g = *glyphs++;
 					const void *ptr;
 
-					if (g->info.width == 0 || g->info.height == 0)
+					if (!glyph_valid(g))
 						goto next_pglyph;
 
 					ptr = pixman_glyph_cache_lookup(cache, g, NULL);
@@ -968,7 +985,7 @@ next_pglyph:
 				pixman_image_t *glyph_image;
 				int16_t xi, yi;
 
-				if (g->info.width == 0 || g->info.height == 0)
+				if (!glyph_valid(g))
 					goto next_image;
 
 				/* If the mask has been cropped, it is likely
@@ -984,6 +1001,8 @@ next_pglyph:
 
 				glyph_image =
 					sna_glyph_get_image(g, dst->pDrawable->pScreen);
+				if (glyph_image == NULL)
+					goto next_image;
 
 				DBG(("%s: glyph to mask (%d, %d)x(%d, %d)\n",
 				     __FUNCTION__,
@@ -1058,7 +1077,7 @@ next_image:
 				PicturePtr this_atlas;
 				struct sna_composite_rectangles r;
 
-				if (glyph->info.width == 0 || glyph->info.height == 0)
+				if (!glyph_valid(glyph))
 					goto next_glyph;
 
 				priv = sna_glyph(glyph);
@@ -1076,6 +1095,10 @@ next_image:
 					} else {
 						/* no cache for this glyph */
 						this_atlas = GetGlyphPicture(glyph, screen);
+						if (unlikely(this_atlas == NULL)) {
+							glyph->info.width = glyph->info.height = 0;
+							goto next_glyph;
+						}
 						r.src.x = r.src.y = 0;
 					}
 				}
@@ -1195,7 +1218,7 @@ glyphs_format(int nlist, GlyphListPtr list, GlyphPtr * glyphs)
 		while (n--) {
 			GlyphPtr glyph = *glyphs++;
 
-			if (glyph->info.width == 0 || glyph->info.height == 0) {
+			if (!glyph_valid(glyph)) {
 				x += glyph->info.xOff;
 				y += glyph->info.yOff;
 				continue;
@@ -1392,7 +1415,7 @@ glyphs_fallback(CARD8 op,
 				GlyphPtr g = *glyphs++;
 				const void *ptr;
 
-				if (g->info.width == 0 || g->info.height == 0)
+				if (!glyph_valid(g))
 					goto next;
 
 				ptr = pixman_glyph_cache_lookup(cache, g, NULL);
@@ -1518,7 +1541,7 @@ out:
 				GlyphPtr g = *glyphs++;
 				pixman_image_t *glyph_image;
 
-				if (g->info.width == 0 || g->info.height == 0)
+				if (!glyph_valid(g))
 					goto next_glyph;
 
 				glyph_image = sna_glyph_get_image(g, screen);
@@ -1811,7 +1834,7 @@ glyphs_via_image(struct sna *sna,
 				GlyphPtr g = *glyphs++;
 				const void *ptr;
 
-				if (g->info.width == 0 || g->info.height == 0)
+				if (!glyph_valid(g))
 					goto next_pglyph;
 
 				ptr = pixman_glyph_cache_lookup(cache, g, NULL);
@@ -1862,7 +1885,7 @@ next_pglyph:
 				pixman_image_t *glyph_image;
 				int16_t xi, yi;
 
-				if (g->info.width == 0 || g->info.height == 0)
+				if (!glyph_valid(g))
 					goto next_image;
 
 				/* If the mask has been cropped, it is likely
@@ -1878,6 +1901,8 @@ next_pglyph:
 
 				glyph_image =
 					sna_glyph_get_image(g, dst->pDrawable->pScreen);
+				if (glyph_image == NULL)
+					goto next_image;
 
 				DBG(("%s: glyph to mask (%d, %d)x(%d, %d)\n",
 				     __FUNCTION__,
commit 372c14aae8f4fd2c5865b9d23cd825dcbc33765f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 1 15:49:12 2013 +0000

    sna: Remove some obsolete Options
    
    Throttling and delayed-flush are now redundant.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/intel_options.c b/src/intel_options.c
index f7ff595..fda2e8b 100644
--- a/src/intel_options.c
+++ b/src/intel_options.c
@@ -22,9 +22,7 @@ const OptionInfoRec intel_options[] = {
 	{OPTION_XVMC,	"XvMC",		OPTV_BOOLEAN,	{0},	1},
 #endif
 #ifdef USE_SNA
-	{OPTION_THROTTLE,	"Throttle",	OPTV_BOOLEAN,	{0},	1},
 	{OPTION_ZAPHOD,		"ZaphodHeads",	OPTV_STRING,	{0},	0},
-	{OPTION_DELAYED_FLUSH,	"DelayedFlush",	OPTV_BOOLEAN,	{0},	1},
 	{OPTION_TEAR_FREE,	"TearFree",	OPTV_BOOLEAN,	{0},	0},
 	{OPTION_CRTC_PIXMAPS,	"PerCrtcPixmaps", OPTV_BOOLEAN,	{0},	0},
 #endif
diff --git a/src/intel_options.h b/src/intel_options.h
index 953fc9c..8fa7a8f 100644
--- a/src/intel_options.h
+++ b/src/intel_options.h
@@ -29,9 +29,7 @@ enum intel_options {
 #define INTEL_XVMC 1
 #endif
 #ifdef USE_SNA
-	OPTION_THROTTLE,
 	OPTION_ZAPHOD,
-	OPTION_DELAYED_FLUSH,
 	OPTION_TEAR_FREE,
 	OPTION_CRTC_PIXMAPS,
 #endif
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 267c4ff..c43daaa 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -193,13 +193,11 @@ struct sna {
 	ScrnInfoPtr scrn;
 
 	unsigned flags;
-#define SNA_NO_THROTTLE		0x1
-#define SNA_NO_DELAYED_FLUSH	0x2
-#define SNA_NO_WAIT		0x4
-#define SNA_NO_FLIP		0x8
+#define SNA_NO_WAIT		0x1
+#define SNA_NO_FLIP		0x2
+#define SNA_TRIPLE_BUFFER	0x4
 #define SNA_TEAR_FREE		0x10
 #define SNA_FORCE_SHADOW	0x20
-#define SNA_TRIPLE_BUFFER	0x40
 
 	unsigned watch_flush;
 
@@ -234,7 +232,6 @@ struct sna {
 	unsigned int tiling;
 #define SNA_TILING_FB		0x1
 #define SNA_TILING_2D		0x2
-#define SNA_TILING_3D		0x4
 #define SNA_TILING_ALL (~0)
 
 	EntityInfoPtr pEnt;
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 7def353..227db7b 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -13860,9 +13860,6 @@ static bool sna_accel_do_flush(struct sna *sna)
 		return false;
 	}
 
-	if (sna->flags & SNA_NO_DELAYED_FLUSH)
-		return true;
-
 	interval = sna->vblank_interval ?: 20;
 	if (sna->timer_active & (1<<(FLUSH_TIMER))) {
 		int32_t delta = sna->timer_expire[FLUSH_TIMER] - TIME;
@@ -13885,9 +13882,6 @@ static bool sna_accel_do_flush(struct sna *sna)
 
 static bool sna_accel_do_throttle(struct sna *sna)
 {
-	if (sna->flags & SNA_NO_THROTTLE)
-		return false;
-
 	if (sna->timer_active & (1<<(THROTTLE_TIMER))) {
 		int32_t delta = sna->timer_expire[THROTTLE_TIMER] - TIME;
 		if (delta <= 3) {
@@ -14405,15 +14399,13 @@ void sna_accel_block_handler(struct sna *sna, struct timeval **tv)
 
 	if (sna_accel_do_flush(sna))
 		sna_accel_flush(sna);
-	assert(sna->flags & SNA_NO_DELAYED_FLUSH ||
-	       sna_accel_scanout(sna) == NULL ||
+	assert(sna_accel_scanout(sna) == NULL ||
 	       sna_accel_scanout(sna)->gpu_bo->exec == NULL ||
 	       sna->timer_active & (1<<(FLUSH_TIMER)));
 
 	if (sna_accel_do_throttle(sna))
 		sna_accel_throttle(sna);
-	assert(sna->flags & SNA_NO_THROTTLE ||
-	       !sna->kgem.need_retire ||
+	assert(!sna->kgem.need_retire ||
 	       sna->timer_active & (1<<(THROTTLE_TIMER)));
 
 	if (sna_accel_do_expire(sna))
diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c
index a8cc2a5..1ebbe9a 100644
--- a/src/sna/sna_driver.c
+++ b/src/sna/sna_driver.c
@@ -563,10 +563,6 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags)
 		sna->tiling &= ~SNA_TILING_FB;
 
 	sna->flags = 0;
-	if (!xf86ReturnOptValBool(sna->Options, OPTION_THROTTLE, TRUE))
-		sna->flags |= SNA_NO_THROTTLE;
-	if (!xf86ReturnOptValBool(sna->Options, OPTION_DELAYED_FLUSH, TRUE))
-		sna->flags |= SNA_NO_DELAYED_FLUSH;
 	if (!xf86ReturnOptValBool(sna->Options, OPTION_SWAPBUFFERS_WAIT, TRUE))
 		sna->flags |= SNA_NO_WAIT;
 	if (xf86ReturnOptValBool(sna->Options, OPTION_TRIPLE_BUFFER, TRUE))
@@ -583,12 +579,6 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags)
 		   sna->tiling & SNA_TILING_FB ? "tiled" : "linear");
 	xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Pixmaps %s\n",
 		   sna->tiling & SNA_TILING_2D ? "tiled" : "linear");
-	xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "3D buffers %s\n",
-		   sna->tiling & SNA_TILING_3D ? "tiled" : "linear");
-	xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Throttling %sabled\n",
-		   sna->flags & SNA_NO_THROTTLE ? "dis" : "en");
-	xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Delayed flush %sabled\n",
-		   sna->flags & SNA_NO_DELAYED_FLUSH ? "dis" : "en");
 	xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "\"Tear free\" %sabled\n",
 		   sna->flags & SNA_TEAR_FREE ? "en" : "dis");
 	xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Forcing per-crtc-pixmaps? %s\n",
commit 65924da91da4bb617df1bb0a7c3e9d4aa475b6b1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 1 11:40:15 2013 +0000

    sna: Tidy compat interfaces
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c
index ff0c776..a8cc2a5 100644
--- a/src/sna/sna_driver.c
+++ b/src/sna/sna_driver.c
@@ -78,11 +78,6 @@ DevPrivateKeyRec sna_gc_key;
 DevPrivateKeyRec sna_window_key;
 DevPrivateKeyRec sna_glyph_key;
 
-static Bool sna_enter_vt(VT_FUNC_ARGS_DECL);
-
-/* temporary */
-extern void xf86SetCursor(ScreenPtr screen, CursorPtr pCurs, int x, int y);
-
 static void
 sna_load_palette(ScrnInfoPtr scrn, int numColors, int *indices,
 		 LOCO * colors, VisualPtr pVisual)
@@ -149,6 +144,76 @@ sna_load_palette(ScrnInfoPtr scrn, int numColors, int *indices,
 	}
 }
 
+static void
+sna_set_fallback_mode(ScrnInfoPtr scrn)
+{
+	xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn);
+	xf86OutputPtr output;
+	xf86CrtcPtr crtc;
+	DisplayModePtr mode;
+	int n;
+
+	output = xf86CompatOutput(scrn);
+	crtc = xf86CompatCrtc(scrn);
+	if (output == NULL || crtc == NULL)
+		return;
+
+	for (n = 0; n < config->num_output; n++)
+		config->output[n]->crtc = NULL;
+	for (n = 0; n < config->num_crtc; n++)
+		config->crtc[n]->enabled = FALSE;
+
+	output->crtc = crtc;
+
+	mode = xf86OutputFindClosestMode(output, scrn->currentMode);
+	if (mode &&
+	    xf86CrtcSetModeTransform(crtc, mode, RR_Rotate_0, NULL, 0, 0)) {
+		crtc->desiredMode = *mode;
+		crtc->desiredMode.prev = crtc->desiredMode.next = NULL;
+		crtc->desiredMode.name = NULL;
+		crtc->desiredMode.PrivSize = 0;
+		crtc->desiredMode.PrivFlags = 0;
+		crtc->desiredMode.Private = NULL;
+		crtc->desiredRotation = RR_Rotate_0;
+		crtc->desiredTransformPresent = FALSE;
+		crtc->desiredX = 0;
+		crtc->desiredY = 0;
+		crtc->enabled = TRUE;
+	}
+
+	xf86DisableUnusedFunctions(scrn);
+#ifdef RANDR_12_INTERFACE
+	if (scrn->pScreen->root)
+		xf86RandR12TellChanged(scrn->pScreen);
+#endif
+}
+
+static Bool sna_become_master(struct sna *sna)
+{
+	ScrnInfoPtr scrn = sna->scrn;
+
+	DBG(("%s\n", __FUNCTION__));
+
+	if (drmSetMaster(sna->kgem.fd)) {
+		sleep(2); /* XXX wait for the current master to decease */
+		if (drmSetMaster(sna->kgem.fd)) {
+			xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+					"drmSetMaster failed: %s\n",
+					strerror(errno));
+			return FALSE;
+		}
+	}
+
+	if (!xf86SetDesiredModes(scrn)) {
+		xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+			   "failed to restore desired modes on VT switch\n");
+		sna_set_fallback_mode(scrn);
+	}
+
+	sna_mode_disable_unused(sna);
+	return TRUE;
+}
+
 /**
  * Adjust the screen pixmap for the current location of the front buffer.
  * This is done at EnterVT when buffers are bound as long as the resources
@@ -157,7 +222,6 @@ sna_load_palette(ScrnInfoPtr scrn, int numColors, int *indices,
  */
 static Bool sna_create_screen_resources(ScreenPtr screen)
 {
-	ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
 	struct sna *sna = to_sna_from_screen(screen);
 
 	DBG(("%s(%dx%d@%d)\n", __FUNCTION__,
@@ -196,7 +260,7 @@ static Bool sna_create_screen_resources(ScreenPtr screen)
 
 	sna_copy_fbcon(sna);
 
-	if (!sna_enter_vt(VT_FUNC_ARGS(0))) {
+	if (!sna_become_master(sna)) {
 		xf86DrvMsg(screen->myNum, X_ERROR,
 			   "[intel] Failed to become DRM master\n");
 		goto cleanup_front;
@@ -560,9 +624,11 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags)
 static void
 sna_block_handler(BLOCKHANDLER_ARGS_DECL)
 {
-	SCREEN_PTR(arg);
-	ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
-	struct sna *sna = to_sna(scrn);
+#ifndef XF86_SCRN_INTERFACE
+	struct sna *sna = to_sna(xf86Screens[arg]);
+#else
+	struct sna *sna = to_sna_from_screen(arg);
+#endif
 	struct timeval **tv = timeout;
 
 	DBG(("%s (tv=%ld.%06ld)\n", __FUNCTION__,
@@ -577,9 +643,11 @@ sna_block_handler(BLOCKHANDLER_ARGS_DECL)
 static void
 sna_wakeup_handler(WAKEUPHANDLER_ARGS_DECL)
 {
-	SCREEN_PTR(arg);
-	ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
-	struct sna *sna = to_sna(scrn);
+#ifndef XF86_SCRN_INTERFACE
+	struct sna *sna = to_sna(xf86Screens[arg]);
+#else
+	struct sna *sna = to_sna_from_screen(arg);
+#endif
 
 	DBG(("%s\n", __FUNCTION__));
 
@@ -978,79 +1046,15 @@ static void sna_free_screen(FREE_SCREEN_ARGS_DECL)
 	sna_close_drm_master(scrn);
 }
 
-static void
-sna_set_fallback_mode(ScrnInfoPtr scrn)
-{
-	xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn);
-	xf86OutputPtr output;
-	xf86CrtcPtr crtc;
-	DisplayModePtr mode;
-	int n;
-
-	output = xf86CompatOutput(scrn);
-	crtc = xf86CompatCrtc(scrn);
-	if (output == NULL || crtc == NULL)
-		return;
-
-	for (n = 0; n < config->num_output; n++)
-		config->output[n]->crtc = NULL;
-	for (n = 0; n < config->num_crtc; n++)
-		config->crtc[n]->enabled = FALSE;
-
-	output->crtc = crtc;
-
-	mode = xf86OutputFindClosestMode(output, scrn->currentMode);
-	if (mode &&
-	    xf86CrtcSetModeTransform(crtc, mode, RR_Rotate_0, NULL, 0, 0)) {
-		crtc->desiredMode = *mode;
-		crtc->desiredMode.prev = crtc->desiredMode.next = NULL;
-		crtc->desiredMode.name = NULL;
-		crtc->desiredMode.PrivSize = 0;
-		crtc->desiredMode.PrivFlags = 0;
-		crtc->desiredMode.Private = NULL;
-		crtc->desiredRotation = RR_Rotate_0;
-		crtc->desiredTransformPresent = FALSE;
-		crtc->desiredX = 0;
-		crtc->desiredY = 0;
-		crtc->enabled = TRUE;
-	}
-
-	xf86DisableUnusedFunctions(scrn);
-#ifdef RANDR_12_INTERFACE
-	if (scrn->pScreen->root)
-		xf86RandR12TellChanged(scrn->pScreen);
-#endif
-}
-
 /*
  * This gets called when gaining control of the VT, and from ScreenInit().
  */
 static Bool sna_enter_vt(VT_FUNC_ARGS_DECL)
 {
 	SCRN_INFO_PTR(arg);
-	struct sna *sna = to_sna(scrn);
 
 	DBG(("%s\n", __FUNCTION__));
-
-	if (drmSetMaster(sna->kgem.fd)) {
-		sleep(2); /* XXX wait for the current master to decease */
-		if (drmSetMaster(sna->kgem.fd)) {
-			xf86DrvMsg(scrn->scrnIndex, X_ERROR,
-					"drmSetMaster failed: %s\n",
-					strerror(errno));
-			return FALSE;
-		}
-	}
-
-	if (!xf86SetDesiredModes(scrn)) {
-		xf86DrvMsg(scrn->scrnIndex, X_WARNING,
-			   "failed to restore desired modes on VT switch\n");
-		sna_set_fallback_mode(scrn);
-	}
-
-	sna_mode_disable_unused(sna);
-
-	return TRUE;
+	return sna_become_master(to_sna(scrn));
 }
 
 static Bool sna_switch_mode(SWITCH_MODE_ARGS_DECL)
commit 0a35d9287397031c95ebd9dc53b68e33e7dcf092
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 1 11:12:02 2013 +0000

    sna/gen2: Always try to use the BLT pipeline first
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 78c3666..c8adcdb 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -46,9 +46,6 @@
 #define NO_FILL_ONE 0
 #define NO_FILL_BOXES 0
 
-#define PREFER_BLT_FILL 1
-#define PREFER_BLT_COPY 1
-
 #define MAX_3D_SIZE 2048
 #define MAX_3D_PITCH 8192
 
@@ -1517,49 +1514,6 @@ gen2_composite_set_target(struct sna *sna,
 }
 
 static bool
-try_blt(struct sna *sna,
-	PicturePtr dst,
-	PicturePtr src,
-	int width, int height)
-{
-	uint32_t color;
-
-	if (sna->kgem.mode != KGEM_RENDER) {
-		DBG(("%s: already performing BLT\n", __FUNCTION__));
-		return true;
-	}
-
-	if (too_large(width, height)) {
-		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
-		     __FUNCTION__, width, height));
-		return true;
-	}
-
-	if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
-		DBG(("%s: target too large for 3D pipe (%d, %d)\n",
-		     __FUNCTION__,
-		     dst->pDrawable->width, dst->pDrawable->height));
-		return true;
-	}
-
-	/* If it is a solid, try to use the BLT paths */
-	if (sna_picture_is_solid(src, &color))
-		return true;
-
-	if (!src->pDrawable)
-		return false;
-
-	if (too_large(src->pDrawable->width, src->pDrawable->height)) {
-		DBG(("%s: source too large for 3D pipe (%d, %d)\n",
-		     __FUNCTION__,
-		     src->pDrawable->width, src->pDrawable->height));
-		return true;
-	}
-
-	return !is_gpu(src->pDrawable);
-}
-
-static bool
 is_unhandled_gradient(PicturePtr picture)
 {
 	if (picture->pDrawable)
@@ -1789,13 +1743,8 @@ gen2_render_composite(struct sna *sna,
 		return false;
 	}
 
-	/* Try to use the BLT engine unless it implies a
-	 * 3D -> 2D context switch.
-	 */
 	if (mask == NULL &&
-	    try_blt(sna, dst, src, width, height) &&
-	    sna_blt_composite(sna,
-			      op, src, dst,
+	    sna_blt_composite(sna, op, src, dst,
 			      src_x, src_y,
 			      dst_x, dst_y,
 			      width, height,
@@ -2479,24 +2428,6 @@ gen2_render_fill_boxes_try_blt(struct sna *sna,
 				  pixel, box, n);
 }
 
-static inline bool prefer_blt_fill(struct sna *sna)
-{
-#if PREFER_BLT_FILL
-	return true;
-#else
-	return sna->kgem.mode != KGEM_RENDER;
-#endif
-}
-
-static inline bool prefer_blt_copy(struct sna *sna, unsigned flags)
-{
-#if PREFER_BLT_COPY
-	return true;
-#else
-	return sna->kgem.mode != KGEM_RENDER;
-#endif
-}
-
 static bool
 gen2_render_fill_boxes(struct sna *sna,
 		       CARD8 op,
@@ -2519,6 +2450,11 @@ gen2_render_fill_boxes(struct sna *sna,
 					      dst, dst_bo,
 					      box, n);
 #endif
+	if (gen2_render_fill_boxes_try_blt(sna, op, format, color,
+					   dst, dst_bo,
+					   box, n))
+		return true;
+
 
 	DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n",
 	     __FUNCTION__, op, (int)format,
@@ -2529,11 +2465,6 @@ gen2_render_fill_boxes(struct sna *sna,
 	    !gen2_check_dst_format(format)) {
 		DBG(("%s: try blt, too large or incompatible destination\n",
 		     __FUNCTION__));
-		if (gen2_render_fill_boxes_try_blt(sna, op, format, color,
-						   dst, dst_bo,
-						   box, n))
-			return true;
-
 		if (!gen2_check_dst_format(format))
 			return false;
 
@@ -2542,12 +2473,6 @@ gen2_render_fill_boxes(struct sna *sna,
 					     dst, dst_bo, box, n);
 	}
 
-	if (prefer_blt_fill(sna) &&
-	    gen2_render_fill_boxes_try_blt(sna, op, format, color,
-					   dst, dst_bo,
-					   box, n))
-		return true;
-
 	if (op == PictOpClear)
 		pixel = 0;
 	else if (!sna_get_pixel_from_rgba(&pixel,
@@ -2719,8 +2644,7 @@ gen2_render_fill(struct sna *sna, uint8_t alu,
 #endif
 
 	/* Prefer to use the BLT if already engaged */
-	if (prefer_blt_fill(sna) &&
-	    sna_blt_fill(sna, alu,
+	if (sna_blt_fill(sna, alu,
 			 dst_bo, dst->drawable.bitsPerPixel,
 			 color,
 			 tmp))
@@ -2729,10 +2653,7 @@ gen2_render_fill(struct sna *sna, uint8_t alu,
 	/* Must use the BLT if we can't RENDER... */
 	if (too_large(dst->drawable.width, dst->drawable.height) ||
 	    dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH)
-		return sna_blt_fill(sna, alu,
-				    dst_bo, dst->drawable.bitsPerPixel,
-				    color,
-				    tmp);
+		return false;
 
 	tmp->base.op = alu;
 	tmp->base.dst.pixmap = dst;
@@ -2797,16 +2718,14 @@ gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 #endif
 
 	/* Prefer to use the BLT if already engaged */
-	if (prefer_blt_fill(sna) &&
-	    gen2_render_fill_one_try_blt(sna, dst, bo, color,
+	if (gen2_render_fill_one_try_blt(sna, dst, bo, color,
 					 x1, y1, x2, y2, alu))
 		return true;
 
 	/* Must use the BLT if we can't RENDER... */
 	if (too_large(dst->drawable.width, dst->drawable.height) ||
 	    bo->pitch < 8 || bo->pitch > MAX_3D_PITCH)
-		return gen2_render_fill_one_try_blt(sna, dst, bo, color,
-						    x1, y1, x2, y2, alu);
+		return false;
 
 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
 		kgem_submit(&sna->kgem);
@@ -2961,8 +2880,7 @@ gen2_render_copy_boxes(struct sna *sna, uint8_t alu,
 	DBG(("%s (%d, %d)->(%d, %d) x %d\n",
 	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
 
-	if (prefer_blt_copy(sna, flags) &&
-	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 	    sna_blt_copy_boxes(sna, alu,
 			       src_bo, src_dx, src_dy,
 			       dst_bo, dst_dx, dst_dy,
@@ -3127,8 +3045,7 @@ gen2_render_copy(struct sna *sna, uint8_t alu,
 #endif
 
 	/* Prefer to use the BLT */
-	if (prefer_blt_copy(sna, 0) &&
-	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 	    sna_blt_copy(sna, alu,
 			 src_bo, dst_bo,
 			 dst->drawable.bitsPerPixel,
commit c1457fbd8a169ee19c8e625ea4e779180eb4b070
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 1 10:49:27 2013 +0000

    sna/gen2: Tidy a pair of vertex emitters
    
    Switch to the new inline scaled transforms.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 4bde17d..78c3666 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -911,33 +911,30 @@ gen2_emit_composite_primitive_affine(struct sna *sna,
 				     const struct sna_composite_rectangles *r)
 {
 	PictTransform *transform = op->src.transform;
-	int16_t dst_x = r->dst.x + op->dst.x;
-	int16_t dst_y = r->dst.y + op->dst.y;
 	int src_x = r->src.x + (int)op->src.offset[0];
 	int src_y = r->src.y + (int)op->src.offset[1];
-	float sx, sy;
+	float *v;
 
-	_sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
-					 transform,
-					 &sx, &sy);
+	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
+	sna->kgem.nbatch += 12;
 
-	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
-	VERTEX(sx * op->src.scale[0]);
-	VERTEX(sy * op->src.scale[1]);
+	v[8] = v[4] = r->dst.x + op->dst.x;
+	v[0] = v[4] + r->width;
 
-	_sna_get_transformed_coordinates(src_x, src_y + r->height,
-					 transform,
-					 &sx, &sy);
-	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
-	VERTEX(sx * op->src.scale[0]);
-	VERTEX(sy * op->src.scale[1]);
+	v[9] = r->dst.y + op->dst.y;
+	v[5] = v[1] = v[9] + r->height;
 
-	_sna_get_transformed_coordinates(src_x, src_y,
-					 transform,
-					 &sx, &sy);
-	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
-	VERTEX(sx * op->src.scale[0]);
-	VERTEX(sy * op->src.scale[1]);
+	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
+				    transform, op->src.scale,
+				    &v[2], &v[3]);
+
+	_sna_get_transformed_scaled(src_x, src_y + r->height,
+				    transform, op->src.scale,
+				    &v[6], &v[7]);
+
+	_sna_get_transformed_scaled(src_x, src_y,
+				    transform, op->src.scale,
+				    &v[10], &v[11]);
 }
 
 fastcall static void
@@ -2059,8 +2056,8 @@ gen2_emit_composite_spans_primitive_affine_source(struct sna *sna,
 {
 	PictTransform *transform = op->base.src.transform;
 	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
-	float x, y, *v;
-       
+	float *v;
+
 	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
 	sna->kgem.nbatch += 15;
 
@@ -2072,26 +2069,20 @@ gen2_emit_composite_spans_primitive_affine_source(struct sna *sna,
 	*((uint32_t *)v + 7) = alpha;
 	*((uint32_t *)v + 12) = alpha;
 
-	_sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2,
-					 (int)op->base.src.offset[1] + box->y2,
-					 transform,
-					 &x, &y);
-	v[3] = x * op->base.src.scale[0];
-	v[4] = y * op->base.src.scale[1];
-
-	_sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
-					 (int)op->base.src.offset[1] + box->y2,
-					 transform,
-					 &x, &y);
-	v[8] = x * op->base.src.scale[0];
-	v[9] = y * op->base.src.scale[1];
-
-	_sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
-					 (int)op->base.src.offset[1] + box->y1,
-					 transform,
-					 &x, &y);
-	v[13] = x * op->base.src.scale[0];
-	v[14] = y * op->base.src.scale[1];
+	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
+				    (int)op->base.src.offset[1] + box->y2,
+				    transform, op->base.src.scale,
+				    &v[3], &v[4]);
+
+	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
+				    (int)op->base.src.offset[1] + box->y2,
+				    transform, op->base.src.scale,
+				    &v[8], &v[9]);
+
+	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
+				    (int)op->base.src.offset[1] + box->y1,
+				    transform, op->base.src.scale,
+				    &v[13], &v[14]);
 }
 
 static void