xf86-video-intel: 6 commits - src/sna/gen4_render.c src/sna/gen5_render.c src/sna/gen5_render.h src/sna/kgem.c src/sna/kgem.h src/sna/sna_render.h src/sna/sna_trapezoids.c

Chris Wilson ickle at kemper.freedesktop.org
Sun Jan 6 06:05:47 PST 2013


 src/sna/gen4_render.c    |  214 +++++++++++++++++------------------------------
 src/sna/gen5_render.c    |   85 ++++++++++--------
 src/sna/gen5_render.h    |   63 ++++++-------
 src/sna/kgem.c           |   27 ++---
 src/sna/kgem.h           |    1 
 src/sna/sna_render.h     |    4 
 src/sna/sna_trapezoids.c |   78 +++++++++++++++--
 7 files changed, 243 insertions(+), 229 deletions(-)

New commits:
commit 4af910e8be92e0ca241ce1e93e322c712dcbe340
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Jan 6 13:43:55 2013 +0000

    sna/gen4+: Trim the redundant float from the fill vertices
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index e713d35..c3e452d 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -2431,16 +2431,13 @@ gen4_render_fill_rectangle(struct sna *sna,
 	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
 
 	OUT_VERTEX(x+w, y+h);
-	OUT_VERTEX_F(1);
-	OUT_VERTEX_F(1);
+	OUT_VERTEX_F(.5);
 
 	OUT_VERTEX(x, y+h);
-	OUT_VERTEX_F(0);
-	OUT_VERTEX_F(1);
+	OUT_VERTEX_F(.5);
 
 	OUT_VERTEX(x, y);
-	OUT_VERTEX_F(0);
-	OUT_VERTEX_F(0);
+	OUT_VERTEX_F(.5);
 }
 
 static bool
@@ -2514,10 +2511,10 @@ gen4_render_fill_boxes(struct sna *sna,
 	gen4_channel_init_solid(sna, &tmp.src, pixel);
 
 	tmp.is_affine = true;
-	tmp.floats_per_vertex = 3;
-	tmp.floats_per_rect = 9;
+	tmp.floats_per_vertex = 2;
+	tmp.floats_per_rect = 6;
 	tmp.u.gen4.wm_kernel = WM_KERNEL;
-	tmp.u.gen4.ve_id = 2;
+	tmp.u.gen4.ve_id = 1;
 	tmp.u.gen4.sf = 0;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
@@ -2620,10 +2617,10 @@ gen4_render_fill(struct sna *sna, uint8_t alu,
 	op->base.mask.bo = NULL;
 
 	op->base.is_affine = true;
-	op->base.floats_per_vertex = 3;
-	op->base.floats_per_rect = 9;
+	op->base.floats_per_vertex = 2;
+	op->base.floats_per_rect = 6;
 	op->base.u.gen4.wm_kernel = WM_KERNEL;
-	op->base.u.gen4.ve_id = 2;
+	op->base.u.gen4.ve_id = 1;
 	op->base.u.gen4.sf = 0;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
@@ -2697,13 +2694,13 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	tmp.mask.bo = NULL;
 
 	tmp.is_affine = true;
-	tmp.floats_per_vertex = 3;
-	tmp.floats_per_rect = 9;
+	tmp.floats_per_vertex = 2;
+	tmp.floats_per_rect = 6;
 	tmp.has_component_alpha = false;
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen4.wm_kernel = WM_KERNEL;
-	tmp.u.gen4.ve_id = 2;
+	tmp.u.gen4.ve_id = 1;
 	tmp.u.gen4.sf = 0;
 
 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index e5c79aa..c86cc24 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -2528,10 +2528,10 @@ gen5_render_fill_boxes(struct sna *sna,
 	tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
 
 	tmp.is_affine = true;
-	tmp.floats_per_vertex = 3;
-	tmp.floats_per_rect = 9;
+	tmp.floats_per_vertex = 2;
+	tmp.floats_per_rect = 6;
 	tmp.u.gen5.wm_kernel = WM_KERNEL;
-	tmp.u.gen5.ve_id = 2;
+	tmp.u.gen5.ve_id = 1;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
 		kgem_submit(&sna->kgem);
@@ -2552,16 +2552,13 @@ gen5_render_fill_boxes(struct sna *sna,
 			DBG(("	(%d, %d), (%d, %d)\n",
 			     box->x1, box->y1, box->x2, box->y2));
 			OUT_VERTEX(box->x2, box->y2);
-			OUT_VERTEX_F(1);
-			OUT_VERTEX_F(1);
+			OUT_VERTEX_F(.5);
 
 			OUT_VERTEX(box->x1, box->y2);
-			OUT_VERTEX_F(0);
-			OUT_VERTEX_F(1);
+			OUT_VERTEX_F(.5);
 
 			OUT_VERTEX(box->x1, box->y1);
-			OUT_VERTEX_F(0);
-			OUT_VERTEX_F(0);
+			OUT_VERTEX_F(.5);
 
 			box++;
 		} while (--n_this_time);
@@ -2582,16 +2579,13 @@ gen5_render_fill_op_blt(struct sna *sna,
 	gen5_get_rectangles(sna, &op->base, 1, gen5_fill_bind_surfaces);
 
 	OUT_VERTEX(x+w, y+h);
-	OUT_VERTEX_F(1);
-	OUT_VERTEX_F(1);
+	OUT_VERTEX_F(.5);
 
 	OUT_VERTEX(x, y+h);
-	OUT_VERTEX_F(0);
-	OUT_VERTEX_F(1);
+	OUT_VERTEX_F(.5);
 
 	OUT_VERTEX(x, y);
-	OUT_VERTEX_F(0);
-	OUT_VERTEX_F(0);
+	OUT_VERTEX_F(.5);
 }
 
 fastcall static void
@@ -2605,16 +2599,13 @@ gen5_render_fill_op_box(struct sna *sna,
 	gen5_get_rectangles(sna, &op->base, 1, gen5_fill_bind_surfaces);
 
 	OUT_VERTEX(box->x2, box->y2);
-	OUT_VERTEX_F(1);
-	OUT_VERTEX_F(1);
+	OUT_VERTEX_F(.5);
 
 	OUT_VERTEX(box->x1, box->y2);
-	OUT_VERTEX_F(0);
-	OUT_VERTEX_F(1);
+	OUT_VERTEX_F(.5);
 
 	OUT_VERTEX(box->x1, box->y1);
-	OUT_VERTEX_F(0);
-	OUT_VERTEX_F(0);
+	OUT_VERTEX_F(.5);
 }
 
 fastcall static void
@@ -2635,16 +2626,13 @@ gen5_render_fill_op_boxes(struct sna *sna,
 
 		do {
 			OUT_VERTEX(box->x2, box->y2);
-			OUT_VERTEX_F(1);
-			OUT_VERTEX_F(1);
+			OUT_VERTEX_F(.5);
 
 			OUT_VERTEX(box->x1, box->y2);
-			OUT_VERTEX_F(0);
-			OUT_VERTEX_F(1);
+			OUT_VERTEX_F(.5);
 
 			OUT_VERTEX(box->x1, box->y1);
-			OUT_VERTEX_F(0);
-			OUT_VERTEX_F(0);
+			OUT_VERTEX_F(.5);
 			box++;
 		} while (--nbox_this_time);
 	} while (nbox);
@@ -2710,10 +2698,10 @@ gen5_render_fill(struct sna *sna, uint8_t alu,
 	op->base.mask.repeat = SAMPLER_EXTEND_NONE;
 
 	op->base.is_affine = true;
-	op->base.floats_per_vertex = 3;
-	op->base.floats_per_rect = 9;
+	op->base.floats_per_vertex = 2;
+	op->base.floats_per_rect = 6;
 	op->base.u.gen5.wm_kernel = WM_KERNEL;
-	op->base.u.gen5.ve_id = 2;
+	op->base.u.gen5.ve_id = 1;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
 		kgem_submit(&sna->kgem);
@@ -2798,13 +2786,13 @@ gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	tmp.mask.repeat = SAMPLER_EXTEND_NONE;
 
 	tmp.is_affine = true;
-	tmp.floats_per_vertex = 3;
-	tmp.floats_per_rect = 9;
+	tmp.floats_per_vertex = 2;
+	tmp.floats_per_rect = 6;
 	tmp.has_component_alpha = 0;
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen5.wm_kernel = WM_KERNEL;
-	tmp.u.gen5.ve_id = 2;
+	tmp.u.gen5.ve_id = 1;
 
 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
 		_kgem_submit(&sna->kgem);
@@ -2818,16 +2806,13 @@ gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 
 	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
 	OUT_VERTEX(x2, y2);
-	OUT_VERTEX_F(1);
-	OUT_VERTEX_F(1);
+	OUT_VERTEX_F(.5);
 
 	OUT_VERTEX(x1, y2);
-	OUT_VERTEX_F(0);
-	OUT_VERTEX_F(1);
+	OUT_VERTEX_F(.5);
 
 	OUT_VERTEX(x1, y1);
-	OUT_VERTEX_F(0);
-	OUT_VERTEX_F(0);
+	OUT_VERTEX_F(.5);
 
 	gen4_vertex_flush(sna);
 	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
commit 3244e4b23397f54ca76876dd76ebea9a0abd357e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Jan 6 13:24:23 2013 +0000

    Revert "sna/gen4+: Backport tight vertex packing for simple renderblits"
    
    This reverts commit 8ff76fad1fadc5e309f9a12c30f883460a432049 and
    commit 48e4dc4bd4b2980f0f804f572d0e3fc1bb4bc21e.
    
    I forgot gen4 and gen5 do not have the 'non-normalized' bit in their
    sampler states.

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index ac099a5..e713d35 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -172,8 +172,6 @@ static const struct blendinfo {
 #define SAMPLER_OFFSET(sf, se, mf, me, k) \
 	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
 
-#define VERTEX_2s2s 0
-
 static void
 gen4_emit_pipelined_pointers(struct sna *sna,
 			     const struct sna_composite_op *op,
@@ -2131,6 +2129,28 @@ gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
 	gen4_emit_state(sna, op, offset | dirty);
 }
 
+static void
+gen4_render_copy_one(struct sna *sna,
+		     const struct sna_composite_op *op,
+		     int sx, int sy,
+		     int w, int h,
+		     int dx, int dy)
+{
+	gen4_get_rectangles(sna, op, 1, gen4_copy_bind_surfaces);
+
+	OUT_VERTEX(dx+w, dy+h);
+	OUT_VERTEX_F((sx+w)*op->src.scale[0]);
+	OUT_VERTEX_F((sy+h)*op->src.scale[1]);
+
+	OUT_VERTEX(dx, dy+h);
+	OUT_VERTEX_F(sx*op->src.scale[0]);
+	OUT_VERTEX_F((sy+h)*op->src.scale[1]);
+
+	OUT_VERTEX(dx, dy);
+	OUT_VERTEX_F(sx*op->src.scale[0]);
+	OUT_VERTEX_F(sy*op->src.scale[1]);
+}
+
 static bool
 gen4_render_copy_boxes(struct sna *sna, uint8_t alu,
 		       PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
@@ -2233,20 +2253,20 @@ fallback_blt:
 					       extents.x2 - extents.x1,
 					       extents.y2 - extents.y1))
 			goto fallback_tiled_dst;
-
-		src_dx += tmp.src.offset[0];
-		src_dy += tmp.src.offset[1];
 	} else {
 		tmp.src.bo = kgem_bo_reference(src_bo);
 		tmp.src.width  = src->drawable.width;
 		tmp.src.height = src->drawable.height;
+		tmp.src.offset[0] = tmp.src.offset[1] = 0;
+		tmp.src.scale[0] = 1.f/src->drawable.width;
+		tmp.src.scale[1] = 1.f/src->drawable.height;
 	}
 
 	tmp.is_affine = true;
-	tmp.floats_per_vertex = 2;
-	tmp.floats_per_rect = 6;
+	tmp.floats_per_vertex = 3;
+	tmp.floats_per_rect = 9;
 	tmp.u.gen4.wm_kernel = WM_KERNEL;
-	tmp.u.gen4.ve_id = VERTEX_2s2s;
+	tmp.u.gen4.ve_id = 2;
 	tmp.u.gen4.sf = 0;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
@@ -2259,33 +2279,19 @@ fallback_blt:
 	dst_dy += tmp.dst.y;
 	tmp.dst.x = tmp.dst.y = 0;
 
+	src_dx += tmp.src.offset[0];
+	src_dy += tmp.src.offset[1];
+
 	gen4_copy_bind_surfaces(sna, &tmp);
 	gen4_align_vertex(sna, &tmp);
 
 	do {
-		int n_this_time;
-
-		n_this_time = gen4_get_rectangles(sna, &tmp, n,
-						  gen4_copy_bind_surfaces);
-		n -= n_this_time;
-
-		do {
-			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
-			     box->x1 + src_dx, box->y1 + src_dy,
-			     box->x1 + dst_dx, box->y1 + dst_dy,
-			     box->x2 - box->x1, box->y2 - box->y1));
-			OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
-			OUT_VERTEX(box->x2 + src_dx, box->y2 + src_dy);
-
-			OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
-			OUT_VERTEX(box->x1 + src_dx, box->y2 + src_dy);
-
-			OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
-			OUT_VERTEX(box->x1 + src_dx, box->y1 + src_dy);
-
-			box++;
-		} while (--n_this_time);
-	} while (n);
+		gen4_render_copy_one(sna, &tmp,
+				     box->x1 + src_dx, box->y1 + src_dy,
+				     box->x2 - box->x1, box->y2 - box->y1,
+				     box->x1 + dst_dx, box->y1 + dst_dy);
+		box++;
+	} while (--n);
 
 	gen4_vertex_flush(sna);
 	sna_render_composite_redirect_done(sna, &tmp);
@@ -2319,19 +2325,7 @@ gen4_render_copy_blt(struct sna *sna,
 		     int16_t w,  int16_t h,
 		     int16_t dx, int16_t dy)
 {
-	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", __FUNCTION__,
-	     sx, sy, dx, dy, w, h));
-
-	gen4_get_rectangles(sna, &op->base, 1, gen4_copy_bind_surfaces);
-
-	OUT_VERTEX(dx+w, dy+h);
-	OUT_VERTEX(sx+w, sy+h);
-
-	OUT_VERTEX(dx, dy+h);
-	OUT_VERTEX(sx, sy+h);
-
-	OUT_VERTEX(dx, dy);
-	OUT_VERTEX(sx, sy);
+	gen4_render_copy_one(sna, &op->base, sx, sy, w, h, dx, dy);
 }
 
 static void
@@ -2339,8 +2333,6 @@ gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
 {
 	if (sna->render.vertex_offset)
 		gen4_vertex_flush(sna);
-
-	DBG(("%s()\n", __FUNCTION__));
 }
 
 static bool
@@ -2396,14 +2388,16 @@ fallback:
 		gen4_get_card_format(op->base.src.pict_format);
 	op->base.src.width  = src->drawable.width;
 	op->base.src.height = src->drawable.height;
+	op->base.src.scale[0] = 1.f/src->drawable.width;
+	op->base.src.scale[1] = 1.f/src->drawable.height;
 	op->base.src.filter = SAMPLER_FILTER_NEAREST;
 	op->base.src.repeat = SAMPLER_EXTEND_NONE;
 
 	op->base.is_affine = true;
-	op->base.floats_per_vertex = 2;
-	op->base.floats_per_rect = 6;
+	op->base.floats_per_vertex = 3;
+	op->base.floats_per_rect = 9;
 	op->base.u.gen4.wm_kernel = WM_KERNEL;
-	op->base.u.gen4.ve_id = VERTEX_2s2s;
+	op->base.u.gen4.ve_id = 2;
 	op->base.u.gen4.sf = 0;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
@@ -2429,6 +2423,26 @@ fallback:
 	return true;
 }
 
+static void
+gen4_render_fill_rectangle(struct sna *sna,
+			   const struct sna_composite_op *op,
+			   int x, int y, int w, int h)
+{
+	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
+
+	OUT_VERTEX(x+w, y+h);
+	OUT_VERTEX_F(1);
+	OUT_VERTEX_F(1);
+
+	OUT_VERTEX(x, y+h);
+	OUT_VERTEX_F(0);
+	OUT_VERTEX_F(1);
+
+	OUT_VERTEX(x, y);
+	OUT_VERTEX_F(0);
+	OUT_VERTEX_F(0);
+}
+
 static bool
 gen4_render_fill_boxes(struct sna *sna,
 		       CARD8 op,
@@ -2500,10 +2514,10 @@ gen4_render_fill_boxes(struct sna *sna,
 	gen4_channel_init_solid(sna, &tmp.src, pixel);
 
 	tmp.is_affine = true;
-	tmp.floats_per_vertex = 2;
-	tmp.floats_per_rect = 6;
+	tmp.floats_per_vertex = 3;
+	tmp.floats_per_rect = 9;
 	tmp.u.gen4.wm_kernel = WM_KERNEL;
-	tmp.u.gen4.ve_id = VERTEX_2s2s;
+	tmp.u.gen4.ve_id = 2;
 	tmp.u.gen4.sf = 0;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
@@ -2515,27 +2529,12 @@ gen4_render_fill_boxes(struct sna *sna,
 	gen4_align_vertex(sna, &tmp);
 
 	do {
-		int n_this_time;
-
-		n_this_time = gen4_get_rectangles(sna, &tmp, n,
-						  gen4_bind_surfaces);
-		n -= n_this_time;
-
-		do {
-			DBG(("	(%d, %d), (%d, %d)\n",
-			     box->x1, box->y1, box->x2, box->y2));
-			OUT_VERTEX(box->x2, box->y2);
-			OUT_VERTEX(1, 1);
-
-			OUT_VERTEX(box->x1, box->y2);
-			OUT_VERTEX(0, 1);
-
-			OUT_VERTEX(box->x1, box->y1);
-			OUT_VERTEX(0, 0);
-
-			box++;
-		} while (--n_this_time);
-	} while (n);
+		gen4_render_fill_rectangle(sna, &tmp,
+					   box->x1, box->y1,
+					   box->x2 - box->x1,
+					   box->y2 - box->y1);
+		box++;
+	} while (--n);
 
 	gen4_vertex_flush(sna);
 	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
@@ -2543,22 +2542,10 @@ gen4_render_fill_boxes(struct sna *sna,
 }
 
 static void
-gen4_render_fill_op_blt(struct sna *sna,
-			const struct sna_fill_op *op,
+gen4_render_fill_op_blt(struct sna *sna, const struct sna_fill_op *op,
 			int16_t x, int16_t y, int16_t w, int16_t h)
 {
-	DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x,y,w,h));
-
-	gen4_get_rectangles(sna, &op->base, 1, gen4_bind_surfaces);
-
-	OUT_VERTEX(x+w, y+h);
-	OUT_VERTEX(1, 1);
-
-	OUT_VERTEX(x, y+h);
-	OUT_VERTEX(0, 1);
-
-	OUT_VERTEX(x, y);
-	OUT_VERTEX(0, 0);
+	gen4_render_fill_rectangle(sna, &op->base, x, y, w, h);
 }
 
 fastcall static void
@@ -2566,19 +2553,9 @@ gen4_render_fill_op_box(struct sna *sna,
 			const struct sna_fill_op *op,
 			const BoxRec *box)
 {
-	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
-	     box->x1, box->y1, box->x2, box->y2));
-
-	gen4_get_rectangles(sna, &op->base, 1, gen4_bind_surfaces);
-
-	OUT_VERTEX(box->x2, box->y2);
-	OUT_VERTEX(1, 1);
-
-	OUT_VERTEX(box->x1, box->y2);
-	OUT_VERTEX(0, 1);
-
-	OUT_VERTEX(box->x1, box->y1);
-	OUT_VERTEX(0, 0);
+	gen4_render_fill_rectangle(sna, &op->base,
+				   box->x1, box->y1,
+				   box->x2-box->x1, box->y2-box->y1);
 }
 
 fastcall static void
@@ -2587,28 +2564,12 @@ gen4_render_fill_op_boxes(struct sna *sna,
 			  const BoxRec *box,
 			  int nbox)
 {
-	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
-	     box->x1, box->y1, box->x2, box->y2, nbox));
-
 	do {
-		int nbox_this_time;
-
-		nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
-						     gen4_bind_surfaces);
-		nbox -= nbox_this_time;
-
-		do {
-			OUT_VERTEX(box->x2, box->y2);
-			OUT_VERTEX(1, 1);
-
-			OUT_VERTEX(box->x1, box->y2);
-			OUT_VERTEX(0, 1);
-
-			OUT_VERTEX(box->x1, box->y1);
-			OUT_VERTEX(0, 0);
-			box++;
-		} while (--nbox_this_time);
-	} while (nbox);
+		gen4_render_fill_rectangle(sna, &op->base,
+					   box->x1, box->y1,
+					   box->x2-box->x1, box->y2-box->y1);
+		box++;
+	} while (--nbox);
 }
 
 static void
@@ -2617,8 +2578,6 @@ gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
 	if (sna->render.vertex_offset)
 		gen4_vertex_flush(sna);
 	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
-
-	DBG(("%s()\n", __FUNCTION__));
 }
 
 static bool
@@ -2661,10 +2620,10 @@ gen4_render_fill(struct sna *sna, uint8_t alu,
 	op->base.mask.bo = NULL;
 
 	op->base.is_affine = true;
-	op->base.floats_per_vertex = 2;
-	op->base.floats_per_rect = 6;
+	op->base.floats_per_vertex = 3;
+	op->base.floats_per_rect = 9;
 	op->base.u.gen4.wm_kernel = WM_KERNEL;
-	op->base.u.gen4.ve_id = VERTEX_2s2s;
+	op->base.u.gen4.ve_id = 2;
 	op->base.u.gen4.sf = 0;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
@@ -2738,13 +2697,13 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	tmp.mask.bo = NULL;
 
 	tmp.is_affine = true;
-	tmp.floats_per_vertex = 2;
-	tmp.floats_per_rect = 6;
+	tmp.floats_per_vertex = 3;
+	tmp.floats_per_rect = 9;
 	tmp.has_component_alpha = false;
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen4.wm_kernel = WM_KERNEL;
-	tmp.u.gen4.ve_id = VERTEX_2s2s;
+	tmp.u.gen4.ve_id = 2;
 	tmp.u.gen4.sf = 0;
 
 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
@@ -2755,17 +2714,7 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	gen4_bind_surfaces(sna, &tmp);
 	gen4_align_vertex(sna, &tmp);
 
-	gen4_get_rectangles(sna, &tmp, 1, gen4_bind_surfaces);
-
-	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
-	OUT_VERTEX(x2, y2);
-	OUT_VERTEX(1, 1);
-
-	OUT_VERTEX(x1, y2);
-	OUT_VERTEX(0, 1);
-
-	OUT_VERTEX(x1, y1);
-	OUT_VERTEX(0, 0);
+	gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1);
 
 	gen4_vertex_flush(sna);
 	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 2e64b8e..e5c79aa 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -166,8 +166,6 @@ static const struct blendinfo {
 #define SAMPLER_OFFSET(sf, se, mf, me, k) \
 	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
 
-#define VERTEX_2s2s 0
-
 static bool
 gen5_emit_pipelined_pointers(struct sna *sna,
 			     const struct sna_composite_op *op,
@@ -807,7 +805,7 @@ gen5_emit_pipelined_pointers(struct sna *sna,
 	uint32_t key;
 
 	DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
-	     __FUNCTION__, op->u.gen4.ve_id & 2,
+	     __FUNCTION__, op->u.gen5.ve_id & 2,
 	     op->src.filter, op->src.repeat,
 	     op->mask.filter, op->mask.repeat,
 	     kernel, blend, op->has_component_alpha, (int)op->dst.format));
@@ -1020,7 +1018,7 @@ static void gen5_bind_surfaces(struct sna *sna,
 			     op->src.card_format,
 			     false);
 	if (op->mask.bo) {
-		assert(op->u.gen4.ve_id >> 2);
+		assert(op->u.gen5.ve_id >> 2);
 		binding_table[2] =
 			gen5_bind_bo(sna,
 				     op->mask.bo,
@@ -2206,20 +2204,20 @@ fallback_blt:
 					       extents.x2 - extents.x1,
 					       extents.y2 - extents.y1))
 			goto fallback_tiled_dst;
-
-		src_dx += tmp.src.offset[0];
-		src_dy += tmp.src.offset[1];
 	} else {
 		tmp.src.bo = kgem_bo_reference(src_bo);
 		tmp.src.width  = src->drawable.width;
 		tmp.src.height = src->drawable.height;
+		tmp.src.offset[0] = tmp.src.offset[1] = 0;
+		tmp.src.scale[0] = 1.f/src->drawable.width;
+		tmp.src.scale[1] = 1.f/src->drawable.height;
 	}
 
 	tmp.is_affine = true;
-	tmp.floats_per_vertex = 2;
-	tmp.floats_per_rect = 6;
+	tmp.floats_per_vertex = 3;
+	tmp.floats_per_rect = 9;
 	tmp.u.gen5.wm_kernel = WM_KERNEL;
-	tmp.u.gen5.ve_id = VERTEX_2s2s;
+	tmp.u.gen5.ve_id = 2;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 		kgem_submit(&sna->kgem);
@@ -2231,6 +2229,9 @@ fallback_blt:
 	dst_dy += tmp.dst.y;
 	tmp.dst.x = tmp.dst.y = 0;
 
+	src_dx += tmp.src.offset[0];
+	src_dy += tmp.src.offset[1];
+
 	gen5_copy_bind_surfaces(sna, &tmp);
 	gen5_align_vertex(sna, &tmp);
 
@@ -2247,13 +2248,16 @@ fallback_blt:
 			     box->x1 + dst_dx, box->y1 + dst_dy,
 			     box->x2 - box->x1, box->y2 - box->y1));
 			OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
-			OUT_VERTEX(box->x2 + src_dx, box->y2 + src_dy);
+			OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]);
+			OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
 
 			OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
-			OUT_VERTEX(box->x1 + src_dx, box->y2 + src_dy);
+			OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
+			OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
 
 			OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
-			OUT_VERTEX(box->x1 + src_dx, box->y1 + src_dy);
+			OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
+			OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]);
 
 			box++;
 		} while (--n_this_time);
@@ -2297,13 +2301,16 @@ gen5_render_copy_blt(struct sna *sna,
 	gen5_get_rectangles(sna, &op->base, 1, gen5_copy_bind_surfaces);
 
 	OUT_VERTEX(dx+w, dy+h);
-	OUT_VERTEX(sx+w, sy+h);
+	OUT_VERTEX_F((sx+w)*op->base.src.scale[0]);
+	OUT_VERTEX_F((sy+h)*op->base.src.scale[1]);
 
 	OUT_VERTEX(dx, dy+h);
-	OUT_VERTEX(sx, sy+h);
+	OUT_VERTEX_F(sx*op->base.src.scale[0]);
+	OUT_VERTEX_F((sy+h)*op->base.src.scale[1]);
 
 	OUT_VERTEX(dx, dy);
-	OUT_VERTEX(sx, sy);
+	OUT_VERTEX_F(sx*op->base.src.scale[0]);
+	OUT_VERTEX_F(sy*op->base.src.scale[1]);
 }
 
 static void
@@ -2365,14 +2372,16 @@ fallback:
 		gen5_get_card_format(op->base.src.pict_format);
 	op->base.src.width  = src->drawable.width;
 	op->base.src.height = src->drawable.height;
+	op->base.src.scale[0] = 1.f/src->drawable.width;
+	op->base.src.scale[1] = 1.f/src->drawable.height;
 	op->base.src.filter = SAMPLER_FILTER_NEAREST;
 	op->base.src.repeat = SAMPLER_EXTEND_NONE;
 
 	op->base.is_affine = true;
-	op->base.floats_per_vertex = 2;
-	op->base.floats_per_rect = 6;
+	op->base.floats_per_vertex = 3;
+	op->base.floats_per_rect = 9;
 	op->base.u.gen5.wm_kernel = WM_KERNEL;
-	op->base.u.gen5.ve_id = VERTEX_2s2s;
+	op->base.u.gen5.ve_id = 2;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 		kgem_submit(&sna->kgem);
@@ -2519,10 +2528,10 @@ gen5_render_fill_boxes(struct sna *sna,
 	tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
 
 	tmp.is_affine = true;
-	tmp.floats_per_vertex = 2;
-	tmp.floats_per_rect = 6;
+	tmp.floats_per_vertex = 3;
+	tmp.floats_per_rect = 9;
 	tmp.u.gen5.wm_kernel = WM_KERNEL;
-	tmp.u.gen5.ve_id = VERTEX_2s2s;
+	tmp.u.gen5.ve_id = 2;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
 		kgem_submit(&sna->kgem);
@@ -2543,13 +2552,16 @@ gen5_render_fill_boxes(struct sna *sna,
 			DBG(("	(%d, %d), (%d, %d)\n",
 			     box->x1, box->y1, box->x2, box->y2));
 			OUT_VERTEX(box->x2, box->y2);
-			OUT_VERTEX(1, 1);
+			OUT_VERTEX_F(1);
+			OUT_VERTEX_F(1);
 
 			OUT_VERTEX(box->x1, box->y2);
-			OUT_VERTEX(0, 1);
+			OUT_VERTEX_F(0);
+			OUT_VERTEX_F(1);
 
 			OUT_VERTEX(box->x1, box->y1);
-			OUT_VERTEX(0, 0);
+			OUT_VERTEX_F(0);
+			OUT_VERTEX_F(0);
 
 			box++;
 		} while (--n_this_time);
@@ -2570,13 +2582,16 @@ gen5_render_fill_op_blt(struct sna *sna,
 	gen5_get_rectangles(sna, &op->base, 1, gen5_fill_bind_surfaces);
 
 	OUT_VERTEX(x+w, y+h);
-	OUT_VERTEX(1, 1);
+	OUT_VERTEX_F(1);
+	OUT_VERTEX_F(1);
 
 	OUT_VERTEX(x, y+h);
-	OUT_VERTEX(0, 1);
+	OUT_VERTEX_F(0);
+	OUT_VERTEX_F(1);
 
 	OUT_VERTEX(x, y);
-	OUT_VERTEX(0, 0);
+	OUT_VERTEX_F(0);
+	OUT_VERTEX_F(0);
 }
 
 fastcall static void
@@ -2590,13 +2605,16 @@ gen5_render_fill_op_box(struct sna *sna,
 	gen5_get_rectangles(sna, &op->base, 1, gen5_fill_bind_surfaces);
 
 	OUT_VERTEX(box->x2, box->y2);
-	OUT_VERTEX(1, 1);
+	OUT_VERTEX_F(1);
+	OUT_VERTEX_F(1);
 
 	OUT_VERTEX(box->x1, box->y2);
-	OUT_VERTEX(0, 1);
+	OUT_VERTEX_F(0);
+	OUT_VERTEX_F(1);
 
 	OUT_VERTEX(box->x1, box->y1);
-	OUT_VERTEX(0, 0);
+	OUT_VERTEX_F(0);
+	OUT_VERTEX_F(0);
 }
 
 fastcall static void
@@ -2617,13 +2635,16 @@ gen5_render_fill_op_boxes(struct sna *sna,
 
 		do {
 			OUT_VERTEX(box->x2, box->y2);
-			OUT_VERTEX(1, 1);
+			OUT_VERTEX_F(1);
+			OUT_VERTEX_F(1);
 
 			OUT_VERTEX(box->x1, box->y2);
-			OUT_VERTEX(0, 1);
+			OUT_VERTEX_F(0);
+			OUT_VERTEX_F(1);
 
 			OUT_VERTEX(box->x1, box->y1);
-			OUT_VERTEX(0, 0);
+			OUT_VERTEX_F(0);
+			OUT_VERTEX_F(0);
 			box++;
 		} while (--nbox_this_time);
 	} while (nbox);
@@ -2689,10 +2710,10 @@ gen5_render_fill(struct sna *sna, uint8_t alu,
 	op->base.mask.repeat = SAMPLER_EXTEND_NONE;
 
 	op->base.is_affine = true;
-	op->base.floats_per_vertex = 2;
-	op->base.floats_per_rect = 6;
+	op->base.floats_per_vertex = 3;
+	op->base.floats_per_rect = 9;
 	op->base.u.gen5.wm_kernel = WM_KERNEL;
-	op->base.u.gen5.ve_id = VERTEX_2s2s;
+	op->base.u.gen5.ve_id = 2;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
 		kgem_submit(&sna->kgem);
@@ -2777,13 +2798,13 @@ gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	tmp.mask.repeat = SAMPLER_EXTEND_NONE;
 
 	tmp.is_affine = true;
-	tmp.floats_per_vertex = 2;
-	tmp.floats_per_rect = 6;
+	tmp.floats_per_vertex = 3;
+	tmp.floats_per_rect = 9;
 	tmp.has_component_alpha = 0;
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen5.wm_kernel = WM_KERNEL;
-	tmp.u.gen5.ve_id = VERTEX_2s2s;
+	tmp.u.gen5.ve_id = 2;
 
 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
 		_kgem_submit(&sna->kgem);
@@ -2797,13 +2818,16 @@ gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 
 	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
 	OUT_VERTEX(x2, y2);
-	OUT_VERTEX(1, 1);
+	OUT_VERTEX_F(1);
+	OUT_VERTEX_F(1);
 
 	OUT_VERTEX(x1, y2);
-	OUT_VERTEX(0, 1);
+	OUT_VERTEX_F(0);
+	OUT_VERTEX_F(1);
 
 	OUT_VERTEX(x1, y1);
-	OUT_VERTEX(0, 0);
+	OUT_VERTEX_F(0);
+	OUT_VERTEX_F(0);
 
 	gen4_vertex_flush(sna);
 	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
@@ -2890,7 +2914,7 @@ static void gen5_render_reset(struct sna *sna)
 	sna->render_state.gen5.needs_invariant = true;
 	sna->render_state.gen5.ve_id = -1;
 	sna->render_state.gen5.last_primitive = -1;
-	sna->render_state.gen5.last_pipelined_pointers = -1;
+	sna->render_state.gen5.last_pipelined_pointers = 0;
 
 	sna->render_state.gen5.drawrect_offset = -1;
 	sna->render_state.gen5.drawrect_limit = -1;
diff --git a/src/sna/gen5_render.h b/src/sna/gen5_render.h
index 31caafc..0f6bae6 100644
--- a/src/sna/gen5_render.h
+++ b/src/sna/gen5_render.h
@@ -1989,50 +1989,43 @@ struct gen5_sampler_legacy_border_color {
    uint8_t color[4];
 };
 
-struct gen5_sampler_state
-{
-   
-   struct
-   {
-      unsigned int shadow_function:3; 
-      unsigned int lod_bias:11; 
-      unsigned int min_filter:3; 
-      unsigned int mag_filter:3; 
-      unsigned int mip_filter:2; 
-      unsigned int base_level:5; 
+struct gen5_sampler_state {
+   struct {
+      unsigned int shadow_function:3;
+      unsigned int lod_bias:11;
+      unsigned int min_filter:3;
+      unsigned int mag_filter:3;
+      unsigned int mip_filter:2;
+      unsigned int base_level:5;
       unsigned int pad:1;
-      unsigned int lod_preclamp:1; 
-      unsigned int border_color_mode:1; 
+      unsigned int lod_preclamp:1;
+      unsigned int border_color_mode:1;
       unsigned int pad0:1;
-      unsigned int disable:1; 
+      unsigned int disable:1;
    } ss0;
 
-   struct
-   {
-      unsigned int r_wrap_mode:3; 
-      unsigned int t_wrap_mode:3; 
-      unsigned int s_wrap_mode:3; 
+   struct {
+      unsigned int r_wrap_mode:3;
+      unsigned int t_wrap_mode:3;
+      unsigned int s_wrap_mode:3;
       unsigned int pad:3;
-      unsigned int max_lod:10; 
-      unsigned int min_lod:10; 
+      unsigned int max_lod:10;
+      unsigned int min_lod:10;
    } ss1;
 
-   
-   struct
-   {
+   struct {
       unsigned int pad:5;
-      unsigned int border_color_pointer:27; 
+      unsigned int border_color_pointer:27;
    } ss2;
-   
-   struct
-   {
-      unsigned int pad:19;
-      unsigned int max_aniso:3; 
-      unsigned int chroma_key_mode:1; 
-      unsigned int chroma_key_index:2; 
-      unsigned int chroma_key_enable:1; 
-      unsigned int monochrome_filter_width:3; 
-      unsigned int monochrome_filter_height:3; 
+
+   struct {
+      uint32_t pad:13;
+      uint32_t address_round:6;
+      uint32_t max_aniso:3;
+      uint32_t chroma_key_mode:1;
+      uint32_t chroma_key_index:2;
+      uint32_t chroma_key_enable:1;
+      uint32_t mbz:6;
    } ss3;
 };
 
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 91b3568..4174b6f 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -126,8 +126,8 @@ struct sna_composite_op {
 		} gen4;
 
 		struct {
-			int wm_kernel;
-			int ve_id;
+			int16_t wm_kernel;
+			int16_t ve_id;
 		} gen5;
 
 		struct {
commit d3be77f87916e38af717bafaf2000becd5180d76
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Jan 5 18:07:50 2013 +0000

    sna/trapezoids: filter out cancelling edges upon insertion to edge-list
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 6f0c43c..5bedcbf 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -800,6 +800,9 @@ merge_sorted_edges(struct edge *head_a, struct edge *head_b)
 	struct edge *head, **next, *prev;
 	int32_t x;
 
+	if (head_b == NULL)
+		return head_a;
+
 	prev = head_a->prev;
 	next = &head;
 	if (head_a->x.quo <= head_b->x.quo) {
@@ -872,11 +875,39 @@ sort_edges(struct edge  *list,
 	return remaining;
 }
 
+static struct edge *filter(struct edge *edges)
+{
+	struct edge *e;
+
+	e = edges;
+	do {
+		struct edge *n = e->next;
+		if (e->dir == -n->dir &&
+		    e->height_left == n->height_left &&
+		    *(uint64_t *)&e->x == *(uint64_t *)&n->x &&
+		    *(uint64_t *)&e->dxdy == *(uint64_t *)&n->dxdy) {
+			if (e->prev)
+				e->prev->next = n->next;
+			else
+				edges = n->next;
+			if (n->next)
+				n->next->prev = e->prev;
+			else
+				break;
+
+			e = n->next;
+		} else
+			e = e->next;
+	} while (e->next);
+
+	return edges;
+}
+
 static struct edge *
 merge_unsorted_edges (struct edge *head, struct edge *unsorted)
 {
 	sort_edges (unsorted, UINT_MAX, &unsorted);
-	return merge_sorted_edges (head, unsorted);
+	return merge_sorted_edges (head, filter(unsorted));
 }
 
 /* Test if the edges on the active list can be safely advanced by a
@@ -884,11 +915,10 @@ merge_unsorted_edges (struct edge *head, struct edge *unsorted)
 inline static bool
 can_full_step(struct active_list *active)
 {
-	const struct edge *e;
-
 	/* Recomputes the minimum height of all edges on the active
 	 * list if we have been dropping edges. */
 	if (active->min_height <= 0) {
+		const struct edge *e;
 		int min_height = INT_MAX;
 		int is_vertical = 1;
 
@@ -1922,6 +1952,9 @@ mono_merge_sorted_edges(struct mono_edge *head_a, struct mono_edge *head_b)
 	struct mono_edge *head, **next, *prev;
 	int32_t x;
 
+	if (head_b == NULL)
+		return head_a;
+
 	prev = head_a->prev;
 	next = &head;
 	if (head_a->x.quo <= head_b->x.quo) {
@@ -1995,11 +2028,39 @@ mono_sort_edges(struct mono_edge *list,
 	return remaining;
 }
 
+static struct mono_edge *mono_filter(struct mono_edge *edges)
+{
+	struct mono_edge *e;
+
+	e = edges;
+	do {
+		struct mono_edge *n = e->next;
+		if (e->dir == -n->dir &&
+		    e->height_left == n->height_left &&
+		    *(uint64_t *)&e->x == *(uint64_t *)&n->x &&
+		    *(uint64_t *)&e->dxdy == *(uint64_t *)&n->dxdy) {
+			if (e->prev)
+				e->prev->next = n->next;
+			else
+				edges = n->next;
+			if (n->next)
+				n->next->prev = e->prev;
+			else
+				break;
+
+			e = n->next;
+		} else
+			e = e->next;
+	} while (e->next);
+
+	return edges;
+}
+
 static struct mono_edge *
 mono_merge_unsorted_edges(struct mono_edge *head, struct mono_edge *unsorted)
 {
 	mono_sort_edges(unsorted, UINT_MAX, &unsorted);
-	return mono_merge_sorted_edges(head, unsorted);
+	return mono_merge_sorted_edges(head, mono_filter(unsorted));
 }
 
 #if 0
commit 2b4a2f52c47a24c297312d51f9a8299c9a54a697
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Jan 5 17:21:34 2013 +0000

    sna/trapezoids: filter out zero-length runs
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index d4899e9..6f0c43c 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -532,6 +532,9 @@ cell_list_add_subspan(struct cell_list *cells,
 	int ix1, fx1;
 	int ix2, fx2;
 
+	if (x1 == x2)
+		return;
+
 	FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
 	FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
 
@@ -892,7 +895,8 @@ can_full_step(struct active_list *active)
 		for (e = active->head.next; &active->tail != e; e = e->next) {
 			if (e->height_left < min_height)
 				min_height = e->height_left;
-			is_vertical &= e->dy == 0;
+			if (is_vertical)
+				is_vertical = e->dy == 0;
 		}
 
 		active->is_vertical = is_vertical;
@@ -929,7 +933,8 @@ fill_buckets(struct active_list *active,
 		*b = edge;
 		if (edge->height_left < min_height)
 			min_height = edge->height_left;
-		is_vertical &= edge->dy == 0;
+		if (is_vertical)
+			is_vertical = edge->dy == 0;
 		edge = next;
 	}
 
commit 59a7b8b32c694735942fd7e42c1382d91004b0b1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Jan 4 18:22:14 2013 +0000

    sna: Clear up the caches after handling a request allocation failure
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 989ab94..e3da032 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2120,6 +2120,7 @@ static void kgem_commit(struct kgem *kgem)
 		assert(list_is_empty(&rq->buffers));
 
 		gem_close(kgem->fd, rq->bo->handle);
+		kgem_cleanup_cache(kgem);
 	} else {
 		list_add_tail(&rq->list, &kgem->requests[rq->ring]);
 		kgem->need_throttle = kgem->need_retire = 1;
commit 3c31a9fc210221ba8e7922bec80c15ec39cab7bc
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Jan 4 18:11:12 2013 +0000

    sna: Embed the pre-allocation of the static request into the device
    
    So that in the cache where we are driving multiple independent screens
    each having their own device, we do not share the global reserved
    request in the event of an allocation failure.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index ac7724a..989ab94 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -572,9 +572,7 @@ static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages)
 	return __kgem_bo_init(bo, handle, num_pages);
 }
 
-static struct kgem_request _kgem_static_request;
-
-static struct kgem_request *__kgem_request_alloc(void)
+static struct kgem_request *__kgem_request_alloc(struct kgem *kgem)
 {
 	struct kgem_request *rq;
 
@@ -584,7 +582,7 @@ static struct kgem_request *__kgem_request_alloc(void)
 	} else {
 		rq = malloc(sizeof(*rq));
 		if (rq == NULL)
-			rq = &_kgem_static_request;
+			rq = &kgem->static_request;
 	}
 
 	list_init(&rq->buffers);
@@ -1061,7 +1059,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
 	DBG(("%s: half cpu cache %d pages\n", __FUNCTION__,
 	     kgem->half_cpu_cache_pages));
 
-	kgem->next_request = __kgem_request_alloc();
+	kgem->next_request = __kgem_request_alloc(kgem);
 
 	DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__,
 	     !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_cacheing),
@@ -1553,7 +1551,7 @@ inline static void kgem_bo_remove_from_active(struct kgem *kgem,
 
 	list_del(&bo->list);
 	assert(bo->rq != NULL);
-	if (bo->rq == &_kgem_static_request)
+	if (bo->rq == (void *)kgem)
 		list_del(&bo->request);
 	assert(list_is_empty(&bo->vma));
 }
@@ -1693,7 +1691,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 				DBG(("%s: handle=%d is snooped, tracking until free\n",
 				     __FUNCTION__, bo->handle));
 				list_add(&bo->request, &kgem->flushing);
-				bo->rq = &_kgem_static_request;
+				bo->rq = (void *)kgem;
 			}
 		}
 		if (bo->rq == NULL)
@@ -1770,7 +1768,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 			else
 				cache = &kgem->large;
 			list_add(&bo->list, cache);
-			bo->rq = &_kgem_static_request;
+			bo->rq = (void *)kgem;
 			return;
 		}
 
@@ -1850,7 +1848,7 @@ static bool kgem_retire__flushing(struct kgem *kgem)
 	bool retired = false;
 
 	list_for_each_entry_safe(bo, next, &kgem->flushing, request) {
-		assert(bo->rq == &_kgem_static_request);
+		assert(bo->rq == (void *)kgem);
 		assert(bo->exec == NULL);
 
 		if (kgem_busy(kgem, bo->handle))
@@ -1913,7 +1911,7 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
 			DBG(("%s: moving %d to flushing\n",
 			     __FUNCTION__, bo->handle));
 			list_add(&bo->request, &kgem->flushing);
-			bo->rq = &_kgem_static_request;
+			bo->rq = (void *)kgem;
 		} else {
 			bo->domain = DOMAIN_NONE;
 			bo->rq = NULL;
@@ -1925,7 +1923,7 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
 		if (bo->snoop) {
 			if (bo->needs_flush) {
 				list_add(&bo->request, &kgem->flushing);
-				bo->rq = &_kgem_static_request;
+				bo->rq = (void *)kgem;
 			} else {
 				kgem_bo_move_to_snoop(kgem, bo);
 			}
@@ -2104,7 +2102,7 @@ static void kgem_commit(struct kgem *kgem)
 		kgem->scanout_busy |= bo->scanout;
 	}
 
-	if (rq == &_kgem_static_request) {
+	if (rq == &kgem->static_request) {
 		struct drm_i915_gem_set_domain set_domain;
 
 		DBG(("%s: syncing due to allocation failure\n", __FUNCTION__));
@@ -2414,7 +2412,7 @@ void kgem_reset(struct kgem *kgem)
 			}
 		}
 
-		if (kgem->next_request != &_kgem_static_request)
+		if (kgem->next_request != &kgem->static_request)
 			free(kgem->next_request);
 	}
 
@@ -2430,7 +2428,7 @@ void kgem_reset(struct kgem *kgem)
 	kgem->flush = 0;
 	kgem->batch_flags = kgem->batch_flags_base;
 
-	kgem->next_request = __kgem_request_alloc();
+	kgem->next_request = __kgem_request_alloc(kgem);
 
 	kgem_sna_reset(kgem);
 }
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 72f8cb3..4819dd6 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -138,6 +138,7 @@ struct kgem {
 
 	struct list requests[2];
 	struct kgem_request *next_request;
+	struct kgem_request static_request;
 
 	struct {
 		struct list inactive[NUM_CACHE_BUCKETS];


More information about the xorg-commit mailing list