xf86-video-intel: 4 commits - src/sna/kgem.c src/sna/sna_blt.c src/sna/sna_render.h src/sna/sna_tiling.c

Wed Nov 6 16:04:56 CET 2013

src/sna/kgem.c       |   23 ++-
 src/sna/sna_blt.c    |  170 +++++++++++++++++++++++++--
 src/sna/sna_render.h |   11 +
 src/sna/sna_tiling.c |  311 +++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 498 insertions(+), 17 deletions(-)

New commits:
commit 7a9c1e153a9208e8cd7680e478fde18e051beaa9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Nov 6 15:04:24 2013 +0000

    sna: Add a DBG breadcrumb to kgem_bo_convert_to_gpu()
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index bb884c3..8283df8 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -6564,6 +6564,7 @@ kgem_replace_bo(struct kgem *kgem,
 
 bool kgem_bo_convert_to_gpu(struct kgem *kgem, struct kgem_bo *bo)
 {
+	DBG(("%s: converting handle=%d from CPU to GPU\n", __FUNCTION__, bo->handle));
 	assert(bo->tiling == I915_TILING_NONE);
 
 	if (kgem->has_llc)
commit ae380a960df6b3a9714d78eb6cb42249764488ba
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Nov 6 14:51:42 2013 +0000

    sna: Use tiling BLT fallback for BLT composite operations
    
    This avoid a circuituous route through the render pathways and multiple
    levels of tiling fallbacks to accomplish the same copy.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 39ff0f6..693fedc 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -1734,8 +1734,8 @@ blt_composite_copy_with_alpha(struct sna *sna,
 	x2 = x1 + r->width;
 	y2 = y1 + r->height;
 
-	src_x = r->src.x - x1;
-	src_y = r->src.y - y1;
+	src_x = r->src.x - x1 + op->u.blt.sx;
+	src_y = r->src.y - y1 + op->u.blt.sy;
 
 	/* clip against dst */
 	if (x1 < 0)
@@ -1811,7 +1811,9 @@ prepare_blt_copy(struct sna *sna,
 		if (!kgem_check_many_bo_fenced(&sna->kgem,
 					       op->dst.bo, bo, NULL)) {
 			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
-			return false;
+			return sna_tiling_blt_composite(sna, op, bo,
+							src->drawable.bitsPerPixel,
+							alpha_fixup);
 		}
 		_kgem_set_mode(&sna->kgem, KGEM_BLT);
 	}
@@ -2682,25 +2684,27 @@ sna_blt_composite__convert(struct sna *sna,
 			return false;
 	}
 
+	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
+	     __FUNCTION__,
+	     tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup));
+
+	tmp->u.blt.src_pixmap = NULL;
+	tmp->u.blt.sx = sx;
+	tmp->u.blt.sy = sy;
+
 	kgem_set_mode(&sna->kgem, KGEM_BLT, tmp->dst.bo);
 	if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) {
 		kgem_submit(&sna->kgem);
 		if (!kgem_check_many_bo_fenced(&sna->kgem,
 					       tmp->dst.bo, tmp->src.bo, NULL)) {
 			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
-			return false;
+			return sna_tiling_blt_composite(sna, tmp, tmp->src.bo,
+							PICT_FORMAT_BPP(tmp->src.pict_format),
+							alpha_fixup);
 		}
 		_kgem_set_mode(&sna->kgem, KGEM_BLT);
 	}
 
-	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
-	     __FUNCTION__,
-	     tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup));
-
-	tmp->u.blt.src_pixmap = NULL;
-	tmp->u.blt.sx = sx;
-	tmp->u.blt.sy = sy;
-
 	if (alpha_fixup) {
 		tmp->blt   = blt_composite_copy_with_alpha;
 		tmp->box   = blt_composite_copy_box_with_alpha;
@@ -3531,6 +3535,148 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
 	return true;
 }
 
+bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu,
+				    struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+				    struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+				    int bpp, int alpha_fixup,
+				    const BoxRec *box, int nbox)
+{
+	struct kgem *kgem = &sna->kgem;
+	unsigned src_pitch, br13, cmd;
+
+#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
+	return false;
+#endif
+
+	DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
+	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
+	    src_bo->tiling, dst_bo->tiling,
+	    src_bo->pitch, dst_bo->pitch));
+
+	if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
+		DBG(("%s: cannot blt to src? %d or dst? %d\n",
+		     __FUNCTION__,
+		     kgem_bo_can_blt(kgem, src_bo),
+		     kgem_bo_can_blt(kgem, dst_bo)));
+		return false;
+	}
+
+	cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
+	src_pitch = src_bo->pitch;
+	if (kgem->gen >= 040 && src_bo->tiling) {
+		cmd |= BLT_SRC_TILED;
+		src_pitch >>= 2;
+	}
+	assert(src_pitch <= MAXSHORT);
+
+	br13 = dst_bo->pitch;
+	if (kgem->gen >= 040 && dst_bo->tiling) {
+		cmd |= BLT_DST_TILED;
+		br13 >>= 2;
+	}
+	assert(br13 <= MAXSHORT);
+
+	br13 |= copy_ROP[alu] << 16;
+	switch (bpp) {
+	default: assert(0);
+	case 32: br13 |= 1 << 25; /* RGB8888 */
+	case 16: br13 |= 1 << 24; /* RGB565 */
+	case 8: break;
+	}
+
+	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
+	if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
+		DBG(("%s: cannot fit src+dst into aperture\n", __FUNCTION__));
+		return false;
+	}
+
+	/* Compare first box against a previous fill */
+	if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
+	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
+		if (kgem->gen >= 0100) {
+			if (kgem->nbatch >= 7 &&
+			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
+			    kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
+			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
+				DBG(("%s: deleting last fill\n", __FUNCTION__));
+				kgem->nbatch -= 7;
+				kgem->nreloc--;
+			}
+		} else {
+			if (kgem->nbatch >= 6 &&
+			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
+			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
+			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
+				DBG(("%s: deleting last fill\n", __FUNCTION__));
+				kgem->nbatch -= 6;
+				kgem->nreloc--;
+			}
+		}
+	}
+
+	while (nbox--) {
+		uint32_t *b;
+
+		if (!kgem_check_batch(kgem, 14) ||
+		    !kgem_check_reloc(kgem, 2)) {
+			_kgem_submit(kgem);
+			_kgem_set_mode(kgem, KGEM_BLT);
+		}
+
+		assert(sna->kgem.mode == KGEM_BLT);
+		b = kgem->batch + kgem->nbatch;
+		b[0] = cmd;
+		b[1] = br13;
+		b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
+		b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
+		if (sna->kgem.gen >= 0100) {
+			*(uint64_t *)(b+4) =
+				kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
+						 I915_GEM_DOMAIN_RENDER << 16 |
+						 I915_GEM_DOMAIN_RENDER |
+						 KGEM_RELOC_FENCED,
+						 0);
+			b[6] = src_pitch;
+			b[7] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
+			*(uint64_t *)(b+8) =
+				kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
+						 I915_GEM_DOMAIN_RENDER << 16 |
+						 KGEM_RELOC_FENCED,
+						 0);
+			b[10] = alpha_fixup;
+			b[11] = alpha_fixup;
+			b[12] = 0;
+			b[13] = 0;
+			kgem->nbatch += 14;
+		} else {
+			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+					      I915_GEM_DOMAIN_RENDER << 16 |
+					      I915_GEM_DOMAIN_RENDER |
+					      KGEM_RELOC_FENCED,
+					      0);
+			b[5] = src_pitch;
+			b[6] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
+			b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+					      I915_GEM_DOMAIN_RENDER << 16 |
+					      KGEM_RELOC_FENCED,
+					      0);
+			b[8] = alpha_fixup;
+			b[9] = alpha_fixup;
+			b[10] = 0;
+			b[11] = 0;
+			kgem->nbatch += 12;
+		}
+		assert(kgem->nbatch < kgem->surface);
+		box++;
+	}
+
+	if (kgem->nexec > 1 && __kgem_ring_empty(kgem))
+		_kgem_submit(kgem);
+
+	sna->blt_state.fill_bo = 0;
+	return true;
+}
+
 static void box_extents(const BoxRec *box, int n, BoxRec *extents)
 {
 	*extents = *box;
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 38bde39..d64d652 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -592,6 +592,12 @@ bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu,
 			       struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
 			       int bpp, const BoxRec *box, int nbox);
 
+bool sna_tiling_blt_composite(struct sna *sna,
+			      struct sna_composite_op *op,
+			      struct kgem_bo *bo,
+			      int bpp,
+			      uint32_t alpha_fixup);
+
 bool sna_blt_composite(struct sna *sna,
 		       uint32_t op,
 		       PicturePtr src,
@@ -629,6 +635,11 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
 			struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
 			int bpp,
 			const BoxRec *box, int n);
+bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu,
+				    struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+				    struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+				    int bpp, int alpha_fixup,
+				    const BoxRec *box, int nbox);
 bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
 				 PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
 				 PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c
index d23fb00..ae9c84b 100644
--- a/src/sna/sna_tiling.c
+++ b/src/sna/sna_tiling.c
@@ -689,6 +689,317 @@ done:
 	return ret;
 }
 
+fastcall static void
+tiling_blt(struct sna *sna,
+	   const struct sna_composite_op *op,
+	   const struct sna_composite_rectangles *r)
+{
+	int x1, x2, y1, y2;
+	int src_x, src_y;
+	BoxRec box;
+
+	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
+	     __FUNCTION__,
+	     r->src.x, r->src.y,
+	     r->dst.x, r->dst.y,
+	     r->width, r->height));
+
+	/* XXX higher layer should have clipped? */
+
+	x1 = r->dst.x + op->dst.x;
+	y1 = r->dst.y + op->dst.y;
+	x2 = x1 + r->width;
+	y2 = y1 + r->height;
+
+	src_x = r->src.x - x1 + op->u.blt.sx;
+	src_y = r->src.y - y1 + op->u.blt.sy;
+
+	/* clip against dst */
+	if (x1 < 0)
+		x1 = 0;
+	if (y1 < 0)
+		y1 = 0;
+
+	if (x2 > op->dst.width)
+		x2 = op->dst.width;
+
+	if (y2 > op->dst.height)
+		y2 = op->dst.height;
+
+	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
+
+	if (x2 <= x1 || y2 <= y1)
+		return;
+
+	box.x1 = x1; box.y1 = y1;
+	box.x2 = x2; box.y2 = y2;
+	sna_tiling_blt_copy_boxes(sna, GXcopy,
+				  op->src.bo, src_x, src_y,
+				  op->dst.bo, 0, 0,
+				  op->u.blt.bpp,
+				  &box, 1);
+}
+
+fastcall static void
+tiling_blt_box(struct sna *sna,
+	       const struct sna_composite_op *op,
+	       const BoxRec *box)
+{
+	DBG(("%s: box (%d, %d), (%d, %d)\n",
+	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+	sna_tiling_blt_copy_boxes(sna, GXcopy,
+				  op->src.bo, op->u.blt.sx, op->u.blt.sy,
+				  op->dst.bo, op->dst.x, op->dst.y,
+				  op->u.blt.bpp,
+				  box, 1);
+}
+
+static void
+tiling_blt_boxes(struct sna *sna,
+		 const struct sna_composite_op *op,
+		 const BoxRec *box, int nbox)
+{
+	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+	sna_tiling_blt_copy_boxes(sna, GXcopy,
+				  op->src.bo, op->u.blt.sx, op->u.blt.sy,
+				  op->dst.bo, op->dst.x, op->dst.y,
+				  op->u.blt.bpp,
+				  box, nbox);
+}
+
+static bool
+sna_tiling_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu,
+				      struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+				      struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+				      int bpp, int alpha_fixup,
+				      const BoxRec *box, int nbox)
+{
+	RegionRec region, tile, this;
+	struct kgem_bo *bo;
+	int max_size, step;
+	bool ret = false;
+
+	if (wedged(sna) ||
+	    !kgem_bo_can_blt(&sna->kgem, src_bo) ||
+	    !kgem_bo_can_blt(&sna->kgem, dst_bo)) {
+		/* XXX */
+		DBG(("%s: tiling blt fail: src?=%d, dst?=%d\n",
+		     __FUNCTION__,
+		     kgem_bo_can_blt(&sna->kgem, src_bo),
+		     kgem_bo_can_blt(&sna->kgem, dst_bo)));
+		return false;
+	}
+
+	max_size = sna->kgem.aperture_high * PAGE_SIZE;
+	max_size -= MAX(kgem_bo_size(src_bo), kgem_bo_size(dst_bo));
+	if (max_size <= 0) {
+		DBG(("%s: tiles cannot fit into aperture\n", __FUNCTION__));
+		return false;
+	}
+	if (max_size > sna->kgem.max_copy_tile_size)
+		max_size = sna->kgem.max_copy_tile_size;
+
+	pixman_region_init_rects(&region, box, nbox);
+
+	/* Use a small step to accommodate enlargement through tile alignment */
+	step = sna->render.max_3d_size;
+	if (region.extents.x1 & (8*512 / bpp - 1) || region.extents.y1 & 63)
+		step /= 2;
+	while (step * step * 4 > max_size)
+		step /= 2;
+	if (sna->kgem.gen < 033)
+		step /= 2; /* accommodate severe fence restrictions */
+	if (step == 0) {
+		DBG(("%s: tiles cannot fit into aperture\n", __FUNCTION__));
+		return false;
+	}
+
+	DBG(("%s (alu=%d), tile.size=%d, box=%dx[(%d, %d), (%d, %d)])\n",
+	     __FUNCTION__, alu, step, nbox,
+	     region.extents.x1, region.extents.y1,
+	     region.extents.x2, region.extents.y2));
+
+	for (tile.extents.y1 = tile.extents.y2 = region.extents.y1;
+	     tile.extents.y2 < region.extents.y2;
+	     tile.extents.y1 = tile.extents.y2) {
+		int y2 = tile.extents.y1 + step;
+		if (y2 > region.extents.y2)
+			y2 = region.extents.y2;
+		tile.extents.y2 = y2;
+
+		for (tile.extents.x1 = tile.extents.x2 = region.extents.x1;
+		     tile.extents.x2 < region.extents.x2;
+		     tile.extents.x1 = tile.extents.x2) {
+			int w, h;
+			int x2 = tile.extents.x1 + step;
+			if (x2 > region.extents.x2)
+				x2 = region.extents.x2;
+			tile.extents.x2 = x2;
+
+			tile.data = NULL;
+
+			RegionNull(&this);
+			RegionIntersect(&this, &region, &tile);
+			if (RegionNil(&this))
+				continue;
+
+			w = this.extents.x2 - this.extents.x1;
+			h = this.extents.y2 - this.extents.y1;
+			bo = kgem_create_2d(&sna->kgem, w, h, bpp,
+					    kgem_choose_tiling(&sna->kgem,
+							       I915_TILING_X,
+							       w, h, bpp),
+					    CREATE_TEMPORARY);
+			if (bo) {
+				int16_t dx = this.extents.x1;
+				int16_t dy = this.extents.y1;
+
+				assert(bo->pitch <= 8192);
+				assert(bo->tiling != I915_TILING_Y);
+
+				if (!sna_blt_copy_boxes(sna, alu,
+							src_bo, src_dx, src_dy,
+							bo, -dx, -dy,
+							bpp, REGION_RECTS(&this), REGION_NUM_RECTS(&this)))
+					goto err;
+
+				if (!sna_blt_copy_boxes__with_alpha(sna, alu,
+								    bo, -dx, -dy,
+								    dst_bo, dst_dx, dst_dy,
+								    bpp, alpha_fixup,
+								    REGION_RECTS(&this), REGION_NUM_RECTS(&this)))
+					goto err;
+
+				kgem_bo_destroy(&sna->kgem, bo);
+			}
+			RegionUninit(&this);
+		}
+	}
+
+	ret = true;
+	goto done;
+err:
+	kgem_bo_destroy(&sna->kgem, bo);
+	RegionUninit(&this);
+done:
+	pixman_region_fini(&region);
+	return ret;
+}
+
+fastcall static void
+tiling_blt__with_alpha(struct sna *sna,
+		       const struct sna_composite_op *op,
+		       const struct sna_composite_rectangles *r)
+{
+	int x1, x2, y1, y2;
+	int src_x, src_y;
+	BoxRec box;
+
+	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
+	     __FUNCTION__,
+	     r->src.x, r->src.y,
+	     r->dst.x, r->dst.y,
+	     r->width, r->height));
+
+	/* XXX higher layer should have clipped? */
+
+	x1 = r->dst.x + op->dst.x;
+	y1 = r->dst.y + op->dst.y;
+	x2 = x1 + r->width;
+	y2 = y1 + r->height;
+
+	src_x = r->src.x - x1 + op->u.blt.sx;
+	src_y = r->src.y - y1 + op->u.blt.sy;
+
+	/* clip against dst */
+	if (x1 < 0)
+		x1 = 0;
+	if (y1 < 0)
+		y1 = 0;
+
+	if (x2 > op->dst.width)
+		x2 = op->dst.width;
+
+	if (y2 > op->dst.height)
+		y2 = op->dst.height;
+
+	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
+
+	if (x2 <= x1 || y2 <= y1)
+		return;
+
+	box.x1 = x1; box.y1 = y1;
+	box.x2 = x2; box.y2 = y2;
+	sna_tiling_blt_copy_boxes__with_alpha(sna, GXcopy,
+					      op->src.bo, src_x, src_y,
+					      op->dst.bo, 0, 0,
+					      op->u.blt.bpp, op->u.blt.pixel,
+					      &box, 1);
+}
+
+fastcall static void
+tiling_blt_box__with_alpha(struct sna *sna,
+			   const struct sna_composite_op *op,
+			   const BoxRec *box)
+{
+	DBG(("%s: box (%d, %d), (%d, %d)\n",
+	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+	sna_tiling_blt_copy_boxes__with_alpha(sna, GXcopy,
+					      op->src.bo, op->u.blt.sx, op->u.blt.sy,
+					      op->dst.bo, op->dst.x, op->dst.y,
+					      op->u.blt.bpp, op->u.blt.pixel,
+					      box, 1);
+}
+
+static void
+tiling_blt_boxes__with_alpha(struct sna *sna,
+			     const struct sna_composite_op *op,
+			     const BoxRec *box, int nbox)
+{
+	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+	sna_tiling_blt_copy_boxes__with_alpha(sna, GXcopy,
+					      op->src.bo, op->u.blt.sx, op->u.blt.sy,
+					      op->dst.bo, op->dst.x, op->dst.y,
+					      op->u.blt.bpp, op->u.blt.pixel,
+					      box, nbox);
+}
+
+static void nop_done(struct sna *sna, const struct sna_composite_op *op)
+{
+	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
+	(void)op;
+}
+
+bool
+sna_tiling_blt_composite(struct sna *sna,
+			 struct sna_composite_op *op,
+			 struct kgem_bo *bo,
+			 int bpp,
+			 uint32_t alpha_fixup)
+{
+	assert(op->op == PictOpSrc);
+	assert(op->dst.bo);
+	assert(kgem_bo_can_blt(&sna->kgem, op->dst.bo));
+	assert(kgem_bo_can_blt(&sna->kgem, bo));
+
+	op->src.bo = bo;
+	op->u.blt.bpp = bpp;
+	op->u.blt.pixel = alpha_fixup;
+
+	if (alpha_fixup) {
+		op->blt   = tiling_blt__with_alpha;
+		op->box   = tiling_blt_box__with_alpha;
+		op->boxes = tiling_blt_boxes__with_alpha;
+	} else {
+		op->blt   = tiling_blt;
+		op->box   = tiling_blt_box;
+		op->boxes = tiling_blt_boxes;
+	}
+	op->done  = nop_done;
+
+	return true;
+}
+
 bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu,
 			       struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
 			       struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
commit 7578809ddcb244ad78ebf86359b7ee2a61e27ff6
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Nov 6 13:42:27 2013 +0000

    sna: Trim create flags if tiled sizes are too large
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index ac0f164..bb884c3 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2428,17 +2428,18 @@ static void kgem_finish_buffers(struct kgem *kgem)
 	struct kgem_buffer *bo, *next;
 
 	list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) {
-		DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s\n",
+		DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s, refcnt=%d\n",
 		     __FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL,
-		     bo->write, bo->mmapped == MMAPPED_CPU ? "cpu" : bo->mmapped == MMAPPED_GTT ? "gtt" : "no"));
+		     bo->write, bo->mmapped == MMAPPED_CPU ? "cpu" : bo->mmapped == MMAPPED_GTT ? "gtt" : "no",
+		     bo->base.refcnt));
 
 		assert(next->base.list.prev == &bo->base.list);
 		assert(bo->base.io);
 		assert(bo->base.refcnt >= 1);
 
 		if (bo->base.refcnt > 1 && !bo->base.exec) {
-			DBG(("%s: skipping unattached handle=%d, used=%d\n",
-			     __FUNCTION__, bo->base.handle, bo->used));
+			DBG(("%s: skipping unattached handle=%d, used=%d, refcnt=%d\n",
+			     __FUNCTION__, bo->base.handle, bo->used, bo->base.refcnt));
 			continue;
 		}
 
@@ -3891,8 +3892,19 @@ unsigned kgem_can_create_2d(struct kgem *kgem,
 		DBG(("%s: tiled[%d] size=%d\n", __FUNCTION__, tiling, size));
 		if (size > 0 && size <= kgem->max_gpu_size)
 			flags |= KGEM_CAN_CREATE_GPU;
+		if (size > kgem->max_gpu_size)
+			flags &= ~KGEM_CAN_CREATE_GPU;
+		if (kgem->gen < 033) {
+			int fence_size = 1024 * 1024;
+			while (fence_size < size)
+				fence_size <<= 1;
+			if (fence_size > kgem->max_gpu_size)
+				flags &= ~KGEM_CAN_CREATE_GPU;
+		}
 		if (size > 0 && size <= PAGE_SIZE*kgem->aperture_mappable/4)
 			flags |= KGEM_CAN_CREATE_GTT;
+		if (size > PAGE_SIZE*kgem->aperture_mappable/4)
+			flags &= ~KGEM_CAN_CREATE_GTT;
 		if (size > kgem->large_object_size)
 			flags |= KGEM_CAN_CREATE_LARGE;
 		if (size > kgem->max_object_size) {
commit 073465817f54507ab6b7f801c5dfab2c06f678c0
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Nov 6 13:41:39 2013 +0000

    sna: Fences are power-of-two sizes
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 0f9b443..ac0f164 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3918,7 +3918,7 @@ inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo)
 	else
 		size = 1024 * 1024 / PAGE_SIZE;
 	while (size < num_pages(bo))
-		size *= 2;
+		size <<= 1;
 
 	return size;
 }