xf86-video-intel: 5 commits - src/sna/gen2_render.c src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna_dri.c src/sna/sna_glyphs.c src/sna/sna.h src/sna/sna_render.c src/sna/sna_render_inline.h src/sna/sna_trapezoids.c

Chris Wilson ickle at kemper.freedesktop.org
Sat Jul 21 04:36:51 PDT 2012


 src/sna/gen2_render.c       |    2 
 src/sna/gen3_render.c       |    2 
 src/sna/gen4_render.c       |    4 
 src/sna/gen5_render.c       |    4 
 src/sna/gen6_render.c       |    4 
 src/sna/gen7_render.c       |    4 
 src/sna/kgem.c              |  441 ++++++++++++++++++++++----------------------
 src/sna/kgem.h              |   17 -
 src/sna/sna.h               |    4 
 src/sna/sna_accel.c         |  255 +++++++++++++++----------
 src/sna/sna_blt.c           |  155 ++++++++++-----
 src/sna/sna_dri.c           |    4 
 src/sna/sna_glyphs.c        |   34 +--
 src/sna/sna_render.c        |    6 
 src/sna/sna_render_inline.h |    3 
 src/sna/sna_trapezoids.c    |  107 +++++-----
 16 files changed, 587 insertions(+), 459 deletions(-)

New commits:
commit 6acc9e6a6e1de2a11597c810e02f793774cef2dd
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Jul 21 12:07:46 2012 +0100

    sna: Fix role reversal of __kgem_bo_size() and kgem_bo_size()!
    
    Reported-by: Jiri Slaby <jirislaby at gmail.com>
    References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 9668976..ff592e0 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -421,13 +421,13 @@ static inline int __kgem_buffer_size(struct kgem_bo *bo)
 	return bo->size.bytes;
 }
 
-static inline int kgem_bo_size(struct kgem_bo *bo)
+static inline int __kgem_bo_size(struct kgem_bo *bo)
 {
 	assert(!(bo->proxy && bo->io));
 	return PAGE_SIZE * bo->size.pages.count;
 }
 
-static inline int __kgem_bo_size(struct kgem_bo *bo)
+static inline int kgem_bo_size(struct kgem_bo *bo)
 {
 	if (bo->io)
 		return __kgem_buffer_size(bo);
commit 286b0e1a48cab85191dfbb112c8dd14aeaa70956
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Jul 20 16:04:37 2012 +0100

    sna: Refresh experimental userptr vmap support
    
    Bring the code uptodate with both kernel interface changes and internal
    adjustments following the creation of CPU buffers with set-cacheing.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 577fa6c..5af0a9e 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -90,8 +90,8 @@ search_vmap_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 #define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
 #define MAKE_VMAP_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3))
 #define IS_VMAP_MAP(ptr) ((uintptr_t)(ptr) & 2)
+#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3)
 
-#if defined(USE_VMAP)
 #define LOCAL_I915_GEM_VMAP       0x32
 #define LOCAL_IOCTL_I915_GEM_VMAP DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_VMAP, struct local_i915_gem_vmap)
 struct local_i915_gem_vmap {
@@ -101,7 +101,6 @@ struct local_i915_gem_vmap {
 #define I915_VMAP_READ_ONLY 0x1
 	uint32_t handle;
 };
-#endif
 
 #define UNCACHED	0
 #define SNOOPED		1
@@ -196,6 +195,26 @@ static bool gem_set_cacheing(int fd, uint32_t handle, int cacheing)
 	return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHEING, &arg) == 0;
 }
 
+static uint32_t gem_vmap(int fd, void *ptr, int size, int read_only)
+{
+	struct local_i915_gem_vmap vmap;
+
+	VG_CLEAR(vmap);
+	vmap.user_ptr = (uintptr_t)ptr;
+	vmap.user_size = size;
+	vmap.flags = 0;
+	if (read_only)
+		vmap.flags |= I915_VMAP_READ_ONLY;
+
+	if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_VMAP, &vmap)) {
+		DBG(("%s: failed to map %p + %d bytes: %d\n",
+		     __FUNCTION__, ptr, size, errno));
+		return 0;
+	}
+
+	return vmap.handle;
+}
+
 static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags)
 {
 	if (flags & CREATE_NO_RETIRE) {
@@ -227,6 +246,7 @@ static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
 
 	DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
 	     bo->handle, bytes(bo)));
+	assert(bo->proxy == NULL);
 
 retry_gtt:
 	VG_CLEAR(mmap_arg);
@@ -700,6 +720,7 @@ static bool test_has_vmap(struct kgem *kgem)
 {
 #if defined(USE_VMAP)
 	uint32_t handle;
+	void *ptr;
 
 	if (DBG_NO_VMAP)
 		return false;
@@ -808,7 +829,6 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 	list_init(&kgem->active_buffers);
 	list_init(&kgem->requests);
 	list_init(&kgem->flushing);
-	list_init(&kgem->sync_list);
 	list_init(&kgem->large);
 	list_init(&kgem->vmap);
 	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
@@ -1187,8 +1207,10 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
 	if (IS_VMAP_MAP(bo->map)) {
 		assert(bo->rq == NULL);
 		assert(MAP(bo->map) != bo || bo->io);
-		if (bo != MAP(bo->map))
+		if (bo != MAP(bo->map)) {
+			DBG(("%s: freeing vmap base\n", __FUNCTION__));
 			free(MAP(bo->map));
+		}
 		bo->map = NULL;
 	}
 	if (bo->map)
@@ -1209,8 +1231,7 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
 inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
 					    struct kgem_bo *bo)
 {
-	DBG(("%s: moving %d from flush to inactive\n",
-	     __FUNCTION__, bo->handle));
+	DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle));
 
 	assert(bo->reusable);
 	assert(bo->rq == NULL);
@@ -1246,6 +1267,8 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
 inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
 						struct kgem_bo *bo)
 {
+	DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle));
+
 	list_del(&bo->list);
 	assert(bo->rq == NULL);
 	if (bo->map) {
@@ -1258,6 +1281,8 @@ inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
 inline static void kgem_bo_remove_from_active(struct kgem *kgem,
 					      struct kgem_bo *bo)
 {
+	DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle));
+
 	list_del(&bo->list);
 	if (bo->rq == &_kgem_static_request)
 		list_del(&bo->request);
@@ -1298,13 +1323,14 @@ static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo)
 static void kgem_bo_move_to_vmap(struct kgem *kgem, struct kgem_bo *bo)
 {
 	if (num_pages(bo) > kgem->max_cpu_size >> 13) {
+		DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n",
+		     __FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13));
 		kgem_bo_free(kgem, bo);
 		return;
 	}
 
 	assert(bo->tiling == I915_TILING_NONE);
 	assert(bo->rq == NULL);
-	assert(!bo->io);
 
 	DBG(("%s: moving %d to vmap\n", __FUNCTION__, bo->handle));
 	list_add(&bo->list, &kgem->vmap);
@@ -1328,6 +1354,7 @@ search_vmap_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
 	list_for_each_entry(bo, &kgem->vmap, list) {
 		assert(bo->refcnt == 0);
 		assert(bo->vmap);
+		assert(bo->proxy == NULL);
 		assert(bo->tiling == I915_TILING_NONE);
 		assert(bo->rq == NULL);
 
@@ -1369,6 +1396,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 	assert(list_is_empty(&bo->list));
 	assert(bo->refcnt == 0);
 	assert(!bo->purged);
+	assert(bo->proxy == NULL);
 
 	bo->binding.offset = 0;
 	kgem_bo_clear_scanout(kgem, bo);
@@ -1376,15 +1404,33 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 	if (DBG_NO_CACHE)
 		goto destroy;
 
+	if (bo->vmap && !bo->flush) {
+		DBG(("%s: handle=%d is vmapped\n", __FUNCTION__, bo->handle));
+		assert(!bo->flush);
+		assert(list_is_empty(&bo->list));
+		if (bo->rq == NULL) {
+			if (bo->needs_flush && kgem_busy(kgem, bo->handle)) {
+				DBG(("%s: handle=%d is vmapped, tracking until free\n",
+				     __FUNCTION__, bo->handle));
+				list_add(&bo->request, &kgem->flushing);
+				bo->rq = &_kgem_static_request;
+			}
+		}
+		if (bo->rq == NULL)
+			kgem_bo_move_to_vmap(kgem, bo);
+		return;
+	}
+
 	if (bo->io) {
 		struct kgem_bo *base;
 
+		assert(!bo->vmap);
 		base = malloc(sizeof(*base));
 		if (base) {
 			DBG(("%s: transferring io handle=%d to bo\n",
 			     __FUNCTION__, bo->handle));
 			/* transfer the handle to a minimum bo */
-			memcpy(base, bo, sizeof (*base));
+			memcpy(base, bo, sizeof(*base));
 			base->reusable = true;
 			base->io = false;
 			list_init(&base->list);
@@ -1395,21 +1441,6 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 		}
 	}
 
-	if (bo->vmap) {
-		assert(!bo->flush);
-		DBG(("%s: handle=%d is vmapped, tracking until free\n",
-		     __FUNCTION__, bo->handle));
-		if (bo->rq == NULL) {
-			if (bo->needs_flush && kgem_busy(kgem, bo->handle)) {
-				list_add(&bo->request, &kgem->flushing);
-				bo->rq = &_kgem_static_request;
-			}
-		}
-		if (bo->rq == NULL)
-			kgem_bo_move_to_vmap(kgem, bo);
-		return;
-	}
-
 	if (!bo->reusable) {
 		DBG(("%s: handle=%d, not reusable\n",
 		     __FUNCTION__, bo->handle));
@@ -1808,6 +1839,8 @@ static void kgem_finish_buffers(struct kgem *kgem)
 					  &kgem->active_buffers);
 				continue;
 			}
+			DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n",
+			     __FUNCTION__, bo->used, (int)__MAP_TYPE(bo->base.map)));
 			goto decouple;
 		}
 
@@ -2127,24 +2160,15 @@ void _kgem_submit(struct kgem *kgem)
 			}
 #if !NDEBUG
 			if (ret < 0) {
-				int i;
-
+				ret = errno;
 				ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n",
 				       kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
 				       kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, errno);
 
-				i = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
-				if (i != -1) {
-					ret = write(i, kgem->batch, batch_end*sizeof(uint32_t));
-					close(i);
-					(void)ret;
-				}
-
 				for (i = 0; i < kgem->nexec; i++) {
-					struct kgem_request *rq = kgem->next_request;
 					struct kgem_bo *bo, *found = NULL;
 
-					list_for_each_entry(bo, &rq->buffers, request) {
+					list_for_each_entry(bo, &kgem->next_request->buffers, request) {
 						if (bo->handle == kgem->exec[i].handle) {
 							found = bo;
 							break;
@@ -2169,7 +2193,14 @@ void _kgem_submit(struct kgem *kgem)
 					       kgem->reloc[i].write_domain,
 					       (int)kgem->reloc[i].presumed_offset);
 				}
-				FatalError("SNA: failed to submit batchbuffer\n");
+
+				i = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
+				if (i != -1) {
+					i = write(i, kgem->batch, batch_end*sizeof(uint32_t));
+					(void)i;
+				}
+
+				FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret);
 			}
 #endif
 
@@ -2442,6 +2473,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
 		list_for_each_entry(bo, cache, vma) {
 			assert(IS_CPU_MAP(bo->map) == for_cpu);
 			assert(bucket(bo) == cache_bucket(num_pages));
+			assert(bo->proxy == NULL);
 
 			if (num_pages > num_pages(bo)) {
 				DBG(("inactive too small: %d < %d\n",
@@ -2481,6 +2513,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
 		assert(bo->refcnt == 0);
 		assert(bo->reusable);
 		assert(!!bo->rq == !!use_active);
+		assert(bo->proxy == NULL);
 
 		if (num_pages > num_pages(bo))
 			continue;
@@ -2547,6 +2580,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
 		DBG(("  %s: found handle=%d (num_pages=%d) in linear %s cache\n",
 		     __FUNCTION__, bo->handle, num_pages(bo),
 		     use_active ? "active" : "inactive"));
+		assert(list_is_empty(&bo->list));
 		assert(use_active || bo->domain != DOMAIN_GPU);
 		assert(!bo->needs_flush || use_active);
 		//assert(use_active || !kgem_busy(kgem, bo->handle));
@@ -2563,9 +2597,10 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
 
 		first->pitch = 0;
 		first->delta = 0;
-		DBG(("  %s: found handle=%d (num_pages=%d) in linear %s cache\n",
+		DBG(("  %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n",
 		     __FUNCTION__, first->handle, num_pages(first),
 		     use_active ? "active" : "inactive"));
+		assert(list_is_empty(&first->list));
 		assert(use_active || first->domain != DOMAIN_GPU);
 		assert(!first->needs_flush || use_active);
 		//assert(use_active || !kgem_busy(kgem, first->handle));
@@ -3677,10 +3712,12 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
 {
 	struct drm_i915_gem_mmap mmap_arg;
 
-	DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, bo->handle, bytes(bo)));
+	DBG(("%s(handle=%d, size=%d, mapped? %d)\n",
+	     __FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map)));
 	assert(!bo->purged);
 	assert(list_is_empty(&bo->list));
 	assert(!bo->scanout);
+	assert(bo->proxy == NULL);
 
 	if (IS_CPU_MAP(bo->map))
 		return MAP(bo->map);
@@ -3743,27 +3780,6 @@ uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo)
 	return flink.name;
 }
 
-#if defined(USE_VMAP)
-static uint32_t gem_vmap(int fd, void *ptr, int size, int read_only)
-{
-	struct local_i915_gem_vmap vmap;
-
-	VG_CLEAR(vmap);
-	vmap.user_ptr = (uintptr_t)ptr;
-	vmap.user_size = size;
-	vmap.flags = 0;
-	if (read_only)
-		vmap.flags |= I915_VMAP_READ_ONLY;
-
-	if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_VMAP, &vmap)) {
-		DBG(("%s: failed to map %p + %d bytes: %d\n",
-		     __FUNCTION__, ptr, size, errno));
-		return 0;
-	}
-
-	return vmap.handle;
-}
-
 struct kgem_bo *kgem_create_map(struct kgem *kgem,
 				void *ptr, uint32_t size,
 				bool read_only)
@@ -3793,18 +3809,6 @@ struct kgem_bo *kgem_create_map(struct kgem *kgem,
 	     __FUNCTION__, ptr, size, NUM_PAGES(size), read_only, handle));
 	return bo;
 }
-#else
-static uint32_t gem_vmap(int fd, void *ptr, int size, int read_only)
-{
-	return 0;
-}
-struct kgem_bo *kgem_create_map(struct kgem *kgem,
-				void *ptr, uint32_t size,
-				bool read_only)
-{
-	return 0;
-}
-#endif
 
 void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
 {
@@ -3852,27 +3856,6 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
 	}
 }
 
-void kgem_bo_set_sync(struct kgem *kgem, struct kgem_bo *bo)
-{
-	assert(bo->vmap);
-	assert(!bo->reusable);
-	assert(list_is_empty(&bo->list));
-	list_add(&bo->list, &kgem->sync_list);
-	bo->flush = true;
-}
-
-void kgem_sync(struct kgem *kgem)
-{
-	struct kgem_bo *bo;
-
-	DBG(("%s\n", __FUNCTION__));
-
-	list_for_each_entry(bo, &kgem->sync_list, list) {
-		kgem_bo_submit(kgem, bo);
-		kgem_bo_sync__cpu(kgem, bo);
-	}
-}
-
 void kgem_clear_dirty(struct kgem *kgem)
 {
 	struct kgem_request *rq = kgem->next_request;
@@ -3914,16 +3897,33 @@ struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
 	return bo;
 }
 
-static struct kgem_buffer *buffer_alloc(int num_pages)
+static struct kgem_buffer *
+buffer_alloc(void)
+{
+	struct kgem_buffer *bo;
+
+	bo = malloc(sizeof(*bo));
+	if (bo == NULL)
+		return NULL;
+
+	bo->mem = NULL;
+	bo->need_io = false;
+	bo->mmapped = true;
+
+	return bo;
+}
+
+static struct kgem_buffer *
+buffer_alloc_with_data(int num_pages)
 {
 	struct kgem_buffer *bo;
 
 	bo = malloc(sizeof(*bo) + 2*UPLOAD_ALIGNMENT + num_pages * PAGE_SIZE);
-	if (bo) {
-		bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), UPLOAD_ALIGNMENT);
-		bo->mmapped = false;
-	}
+	if (bo == NULL)
+		return NULL;
 
+	bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), UPLOAD_ALIGNMENT);
+	bo->mmapped = false;
 	return bo;
 }
 
@@ -3936,6 +3936,28 @@ use_snoopable_buffer(struct kgem *kgem, uint32_t flags)
 	return true;
 }
 
+static void
+init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old)
+{
+	DBG(("%s: reusing handle=%d for buffer\n",
+	     __FUNCTION__, old->handle));
+
+	assert(old->proxy == NULL);
+
+	memcpy(&bo->base, old, sizeof(*old));
+	if (old->rq)
+		list_replace(&old->request, &bo->base.request);
+	else
+		list_init(&bo->base.request);
+	list_replace(&old->vma, &bo->base.vma);
+	list_init(&bo->base.list);
+	free(old);
+
+	assert(bo->base.tiling == I915_TILING_NONE);
+
+	bo->base.refcnt = 1;
+}
+
 static struct kgem_buffer *
 search_snoopable_buffer(struct kgem *kgem, unsigned alloc)
 {
@@ -3944,18 +3966,16 @@ search_snoopable_buffer(struct kgem *kgem, unsigned alloc)
 
 	old = search_vmap_cache(kgem, alloc, 0);
 	if (old) {
-		bo = malloc(sizeof(*bo));
-		if (bo == NULL)
-			return NULL;
+		if (!old->io) {
+			bo = buffer_alloc();
+			if (bo == NULL)
+				return NULL;
 
-		memcpy(&bo->base, old, sizeof(*old));
-		if (old->rq)
-			list_replace(&old->request, &bo->base.request);
-		else
-			list_init(&bo->base.request);
-		list_replace(&old->vma, &bo->base.vma);
-		list_init(&bo->base.list);
-		free(old);
+			init_buffer_from_bo(bo, old);
+		} else {
+			bo = (struct kgem_buffer *)old;
+			bo->base.refcnt = 1;
+		}
 
 		DBG(("%s: created CPU handle=%d for buffer, size %d\n",
 		     __FUNCTION__, bo->base.handle, num_pages(&bo->base)));
@@ -3963,51 +3983,32 @@ search_snoopable_buffer(struct kgem *kgem, unsigned alloc)
 		assert(bo->base.vmap);
 		assert(bo->base.tiling == I915_TILING_NONE);
 		assert(num_pages(&bo->base) >= alloc);
+		assert(bo->mmapped == true);
+		assert(bo->need_io == false);
 
 		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
-		if (bo->mem) {
-			bo->mmapped = true;
-			bo->need_io = false;
-			bo->base.io = true;
-			bo->base.refcnt = 1;
-
-			return bo;
-		} else
+		if (bo->mem == NULL) {
+			bo->base.refcnt = 0;
 			kgem_bo_free(kgem, &bo->base);
+			bo = NULL;
+		}
+
+		return bo;
 	}
 
 	return NULL;
 }
 
-static void
-init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old)
-{
-	DBG(("%s: reusing handle=%d for buffer\n",
-	     __FUNCTION__, old->handle));
-
-	memcpy(&bo->base, old, sizeof(*old));
-	if (old->rq)
-		list_replace(&old->request, &bo->base.request);
-	else
-		list_init(&bo->base.request);
-	list_replace(&old->vma, &bo->base.vma);
-	list_init(&bo->base.list);
-	free(old);
-	bo->base.refcnt = 1;
-
-	assert(bo->base.tiling == I915_TILING_NONE);
-}
-
 static struct kgem_buffer *
 create_snoopable_buffer(struct kgem *kgem, unsigned alloc)
 {
 	struct kgem_buffer *bo;
+	uint32_t handle;
 
 	if (kgem->has_cacheing) {
 		struct kgem_bo *old;
-		uint32_t handle;
 
-		bo = malloc(sizeof(*bo));
+		bo = buffer_alloc();
 		if (bo == NULL)
 			return NULL;
 
@@ -4015,62 +4016,73 @@ create_snoopable_buffer(struct kgem *kgem, unsigned alloc)
 					 CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT);
 		if (old) {
 			init_buffer_from_bo(bo, old);
-			return bo;
-		}
+		} else {
+			handle = gem_create(kgem->fd, alloc);
+			if (handle == 0) {
+				free(bo);
+				return NULL;
+			}
 
-		handle = gem_create(kgem->fd, alloc);
-		if (handle == 0) {
-			free(bo);
-			return NULL;
-		}
+			if (!gem_set_cacheing(kgem->fd, handle, SNOOPED)) {
+				gem_close(kgem->fd, handle);
+				free(bo);
+				return NULL;
+			}
 
-		if (!gem_set_cacheing(kgem->fd, handle, SNOOPED)) {
-			gem_close(kgem->fd, handle);
-			free(bo);
-			return NULL;
+			debug_alloc(kgem, alloc);
+			__kgem_bo_init(&bo->base, handle, alloc);
+			DBG(("%s: created CPU handle=%d for buffer, size %d\n",
+			     __FUNCTION__, bo->base.handle, alloc));
 		}
 
-		debug_alloc(kgem, alloc);
-		__kgem_bo_init(&bo->base, handle, alloc);
-		DBG(("%s: created CPU handle=%d for buffer, size %d\n",
-		     __FUNCTION__, bo->base.handle, alloc));
+		assert(bo->base.refcnt == 1);
+		assert(bo->mmapped == true);
+		assert(bo->need_io == false);
 
 		bo->base.reusable = false;
 		bo->base.vmap = true;
 
 		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
-		if (bo->mem) {
-			bo->mmapped = true;
-			bo->need_io = false;
-			bo->base.io = true;
-			return bo;
-		} else {
+		if (bo->mem == NULL) {
 			bo->base.refcnt = 0; /* for valgrind */
 			kgem_bo_free(kgem, &bo->base);
+			bo = NULL;
 		}
+		return bo;
 	}
 
 	if (kgem->has_vmap) {
-		bo = buffer_alloc(alloc);
-		if (bo) {
-			uint32_t handle = gem_vmap(kgem->fd, bo->mem,
-						   alloc * PAGE_SIZE, false);
-			if (handle == 0 ||
-			    !__kgem_bo_init(&bo->base, handle, alloc)) {
-				free(bo);
-			} else {
-				DBG(("%s: created vmap handle=%d for buffer\n",
-				     __FUNCTION__, bo->base.handle));
+		bo = buffer_alloc();
+		if (bo == NULL)
+			return NULL;
 
-				bo->base.io = true;
-				bo->base.vmap = true;
-				bo->base.map = MAKE_VMAP_MAP(bo);
-				bo->mmapped = true;
-				bo->need_io = false;
+		//if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
+		if (posix_memalign(&bo->mem, PAGE_SIZE, alloc *PAGE_SIZE)) {
+			free(bo);
+			return NULL;
+		}
 
-				return bo;
-			}
+		handle = gem_vmap(kgem->fd, bo->mem, alloc * PAGE_SIZE, false);
+		if (handle == 0) {
+			free(bo->mem);
+			free(bo);
+			return NULL;
 		}
+
+		debug_alloc(kgem, alloc);
+		__kgem_bo_init(&bo->base, handle, alloc);
+		DBG(("%s: created vmap handle=%d for buffer\n",
+		     __FUNCTION__, bo->base.handle));
+
+		assert(bo->mmapped == true);
+		assert(bo->need_io == false);
+
+		bo->base.refcnt = 1;
+		bo->base.vmap = true;
+		bo->base.reusable = false;
+		bo->base.map = MAKE_VMAP_MAP(bo->mem);
+
+		return bo;
 	}
 
 	return NULL;
@@ -4178,7 +4190,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 		alloc = PAGE_ALIGN(size);
 	alloc /= PAGE_SIZE;
 	if (kgem->has_llc) {
-		bo = malloc(sizeof(*bo));
+		bo = buffer_alloc();
 		if (bo == NULL)
 			return NULL;
 
@@ -4190,35 +4202,36 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 		if (old == NULL)
 			old = search_linear_cache(kgem, NUM_PAGES(size), CREATE_INACTIVE | CREATE_CPU_MAP);
 		if (old) {
+			DBG(("%s: found LLC handle=%d for buffer\n",
+			     __FUNCTION__, old->handle));
+
 			init_buffer_from_bo(bo, old);
 		} else {
 			uint32_t handle = gem_create(kgem->fd, alloc);
-			if (handle == 0 ||
-			    !__kgem_bo_init(&bo->base, handle, alloc)) {
+			if (handle == 0) {
 				free(bo);
 				return NULL;
 			}
-			DBG(("%s: created handle=%d for buffer\n",
+			__kgem_bo_init(&bo->base, handle, alloc);
+			DBG(("%s: created LLC handle=%d for buffer\n",
 			     __FUNCTION__, bo->base.handle));
 
 			debug_alloc(kgem, alloc);
 		}
 
+		assert(bo->mmapped);
+		assert(!bo->need_io);
+
 		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
 		if (bo->mem) {
 			if (flags & KGEM_BUFFER_WRITE)
 				kgem_bo_sync__cpu(kgem, &bo->base);
 
-			bo->need_io = false;
-			bo->base.io = true;
-			bo->mmapped = true;
-
 			alloc = num_pages(&bo->base);
 			goto init;
 		} else {
 			bo->base.refcnt = 0; /* for valgrind */
 			kgem_bo_free(kgem, &bo->base);
-			bo = NULL;
 		}
 	}
 
@@ -4271,25 +4284,23 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 			DBG(("%s: reusing handle=%d for buffer\n",
 			     __FUNCTION__, old->handle));
 
-			bo = malloc(sizeof(*bo));
+			bo = buffer_alloc();
 			if (bo == NULL)
 				return NULL;
 
 			init_buffer_from_bo(bo, old);
 			assert(num_pages(&bo->base) >= NUM_PAGES(size));
 
+			assert(bo->mmapped);
+			assert(bo->base.refcnt == 1);
+
 			bo->mem = kgem_bo_map(kgem, &bo->base);
 			if (bo->mem) {
-				bo->need_io = false;
-				bo->base.io = true;
-				bo->mmapped = true;
-
 				alloc = num_pages(&bo->base);
 				goto init;
 			} else {
 				bo->base.refcnt = 0;
 				kgem_bo_free(kgem, &bo->base);
-				bo = NULL;
 			}
 		}
 	}
@@ -4301,6 +4312,8 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 	if (use_snoopable_buffer(kgem, flags)) {
 		bo = search_snoopable_buffer(kgem, alloc);
 		if (bo) {
+			if (flags & KGEM_BUFFER_WRITE)
+				kgem_bo_sync__cpu(kgem, &bo->base);
 			flags &= ~KGEM_BUFFER_INPLACE;
 			alloc = num_pages(&bo->base);
 			goto init;
@@ -4326,13 +4339,12 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 		DBG(("%s: reusing ordinary handle %d for io\n",
 		     __FUNCTION__, old->handle));
 		alloc = num_pages(old);
-		bo = buffer_alloc(alloc);
+		bo = buffer_alloc_with_data(alloc);
 		if (bo == NULL)
 			return NULL;
 
 		init_buffer_from_bo(bo, old);
 		bo->need_io = flags & KGEM_BUFFER_WRITE;
-		bo->base.io = true;
 	} else {
 		if (use_snoopable_buffer(kgem, flags)) {
 			bo = create_snoopable_buffer(kgem, alloc);
@@ -4340,7 +4352,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 				goto init;
 		}
 
-		bo = malloc(sizeof(*bo));
+		bo = buffer_alloc();
 		if (bo == NULL)
 			return NULL;
 
@@ -4349,59 +4361,59 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 		if (old) {
 			DBG(("%s: reusing cpu map handle=%d for buffer\n",
 			     __FUNCTION__, old->handle));
+
 			alloc = num_pages(old);
 			init_buffer_from_bo(bo, old);
 		} else {
 			uint32_t handle = gem_create(kgem->fd, alloc);
-			if (handle == 0 ||
-			    !__kgem_bo_init(&bo->base, handle, alloc)) {
+			if (handle == 0) {
 				free(bo);
 				return NULL;
 			}
+
 			DBG(("%s: created handle=%d for buffer\n",
 			     __FUNCTION__, bo->base.handle));
 
+			__kgem_bo_init(&bo->base, handle, alloc);
 			debug_alloc(kgem, alloc * PAGE_SIZE);
 		}
 
+		assert(bo->mmapped);
+		assert(!bo->need_io);
+		assert(bo->base.refcnt == 1);
+
 		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
 		if (bo->mem != NULL) {
 			if (flags & KGEM_BUFFER_WRITE)
 				kgem_bo_sync__cpu(kgem, &bo->base);
-
-			bo->need_io = false;
-			bo->base.io = true;
-			bo->mmapped = true;
 			goto init;
 		}
 
 		DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__));
 		old = &bo->base;
-		bo = buffer_alloc(alloc);
+		bo = buffer_alloc_with_data(alloc);
 		if (bo == NULL) {
 			free(old);
 			return NULL;
 		}
 
-		memcpy(&bo->base, old, sizeof(*old));
-		free(old);
+		init_buffer_from_bo(bo, old);
 
 		assert(bo->mem);
 		assert(!bo->mmapped);
+		assert(bo->base.refcnt == 1);
 
-		list_init(&bo->base.request);
-		list_init(&bo->base.vma);
-		list_init(&bo->base.list);
-		bo->base.refcnt = 1;
 		bo->need_io = flags & KGEM_BUFFER_WRITE;
-		bo->base.io = true;
 	}
 init:
+	bo->base.io = true;
 	bo->base.reusable = false;
+	assert(bo->base.refcnt == 1);
 	assert(num_pages(&bo->base) == alloc);
-	assert(bo->base.io);
 	assert(!bo->need_io || !bo->base.needs_flush);
 	assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
+	assert(bo->mem);
+	assert(!bo->mmapped || bo->base.map != NULL);
 
 	bo->used = size;
 	bo->write = flags & KGEM_BUFFER_WRITE_INPLACE;
@@ -4410,8 +4422,8 @@ init:
 	assert(list_is_empty(&bo->base.list));
 	list_add(&bo->base.list, &kgem->batch_buffers);
 
-	DBG(("%s(pages=%d) new handle=%d\n",
-	     __FUNCTION__, alloc, bo->base.handle));
+	DBG(("%s(pages=%d) new handle=%d, used=%d, write=%d\n",
+	     __FUNCTION__, alloc, bo->base.handle, bo->used, bo->write));
 
 done:
 	bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT);
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 165e7b9..9668976 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -127,7 +127,6 @@ struct kgem {
 	struct list vmap;
 	struct list batch_buffers, active_buffers;
 	struct list requests;
-	struct list sync_list;
 	struct kgem_request *next_request;
 
 	struct {
@@ -407,7 +406,6 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo);
 void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo);
 void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
 void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo);
-void kgem_bo_set_sync(struct kgem *kgem, struct kgem_bo *bo);
 uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo);
 
 bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
@@ -547,8 +545,6 @@ static inline void kgem_bo_mark_dirty(struct kgem *kgem, struct kgem_bo *bo)
 	list_move(&bo->request, &kgem->next_request->buffers);
 }
 
-void kgem_sync(struct kgem *kgem);
-
 #define KGEM_BUFFER_WRITE	0x1
 #define KGEM_BUFFER_INPLACE	0x2
 #define KGEM_BUFFER_LAST	0x4
diff --git a/src/sna/sna.h b/src/sna/sna.h
index f274de9..91db995 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -121,6 +121,7 @@ struct sna_pixmap {
 	uint16_t source_count;
 	uint8_t pinned :1;
 	uint8_t mapped :1;
+	uint8_t shm :1;
 	uint8_t clear :1;
 	uint8_t undamaged :1;
 	uint8_t create :3;
@@ -199,7 +200,7 @@ struct sna {
 
 	int vblank_interval;
 
-	struct list dirty_pixmaps;
+	struct list flush_pixmaps;
 	struct list active_pixmaps;
 	struct list inactive_clock[2];
 
@@ -415,6 +416,7 @@ PixmapPtr sna_pixmap_create_upload(ScreenPtr screen,
 				   unsigned flags);
 PixmapPtr sna_pixmap_create_unattached(ScreenPtr screen,
 				       int width, int height, int depth);
+void sna_pixmap_destroy(PixmapPtr pixmap);
 
 #define MOVE_WRITE 0x1
 #define MOVE_READ 0x2
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 5466f38..dee8c02 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -60,7 +60,7 @@
 #define USE_INPLACE 1
 #define USE_WIDE_SPANS 0 /* -1 force CPU, 1 force GPU */
 #define USE_ZERO_SPANS 1 /* -1 force CPU, 1 force GPU */
-#define USE_SHM_VMAP 0
+#define USE_SHM_VMAP 1
 
 #define MIGRATE_ALL 0
 #define DBG_NO_CPU_UPLOAD 0
@@ -387,6 +387,7 @@ static void sna_pixmap_free_gpu(struct sna *sna, struct sna_pixmap *priv)
 	}
 
 	if (priv->mapped) {
+		assert(!priv->shm);
 		priv->pixmap->devPrivate.ptr = NULL;
 		priv->mapped = false;
 	}
@@ -404,17 +405,13 @@ sna_pixmap_alloc_cpu(struct sna *sna,
 		     bool from_gpu)
 {
 	/* Restore after a GTT mapping? */
+	assert(!priv->shm);
 	if (priv->ptr)
 		goto done;
 
 	DBG(("%s: pixmap=%ld\n", __FUNCTION__, pixmap->drawable.serialNumber));
 	assert(priv->stride);
 
-#ifdef DEBUG_MEMORY
-	sna->debug_memory.shadow_pixels_allocs++;
-	sna->debug_memory.shadow_pixels_bytes += priv->stride * pixmap->drawable.height;
-#endif
-
 	if (priv->create & KGEM_CAN_CREATE_CPU) {
 		DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__,
 		     pixmap->drawable.width, pixmap->drawable.height));
@@ -453,14 +450,9 @@ done:
 
 static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
 {
-	assert(priv->stride);
 	assert(priv->cpu_damage == NULL);
 	assert(list_is_empty(&priv->list));
 
-#ifdef DEBUG_MEMORY
-	sna->debug_memory.shadow_pixels_allocs--;
-	sna->debug_memory.shadow_pixels_bytes -= priv->stride * priv->pixmap->drawable.height;
-#endif
 	if (priv->cpu_bo) {
 		DBG(("%s: discarding CPU buffer, handle=%d, size=%d\n",
 		     __FUNCTION__, priv->cpu_bo->handle, kgem_bo_size(priv->cpu_bo)));
@@ -482,39 +474,6 @@ static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
 		priv->pixmap->devPrivate.ptr = NULL;
 }
 
-static bool sna_destroy_private(PixmapPtr pixmap)
-{
-	struct sna *sna = to_sna_from_pixmap(pixmap);
-	struct sna_pixmap *priv = sna_pixmap(pixmap);
-
-	if (priv == NULL)
-		return true;
-
-	list_del(&priv->list);
-	list_del(&priv->inactive);
-
-	assert_pixmap_damage(pixmap);
-
-	sna_damage_destroy(&priv->gpu_damage);
-	sna_damage_destroy(&priv->cpu_damage);
-
-	/* Always release the gpu bo back to the lower levels of caching */
-	if (priv->gpu_bo)
-		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
-
-	if (priv->ptr)
-		sna_pixmap_free_cpu(sna, priv);
-
-	if (!sna->freed_pixmap && priv->header) {
-		sna->freed_pixmap = pixmap;
-		assert(priv->ptr == NULL);
-		return false;
-	}
-
-	free(priv);
-	return true;
-}
-
 static inline uint32_t default_tiling(PixmapPtr pixmap,
 				      uint32_t tiling)
 {
@@ -619,6 +578,7 @@ struct kgem_bo *sna_pixmap_change_tiling(PixmapPtr pixmap, uint32_t tiling)
 	kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
 
 	if (priv->mapped) {
+		assert(!priv->shm);
 		pixmap->devPrivate.ptr = NULL;
 		priv->mapped = false;
 	}
@@ -781,12 +741,25 @@ sna_pixmap_create_shm(ScreenPtr screen,
 	struct sna_pixmap *priv;
 	PixmapPtr pixmap;
 
-	DBG(("%s(%d, %d, %d)\n", __FUNCTION__,
-	     width, height, depth));
+	DBG(("%s(%d, %d, %d)\n", __FUNCTION__, width, height, depth));
+
+	if (wedged(sna)) {
+		pixmap = sna_pixmap_create_unattached(screen, 0, 0, depth);
+		if (pixmap == NULL)
+			return NULL;
+
+		if (!screen->ModifyPixmapHeader(pixmap, width, height, depth,
+						bpp, pitch, addr)) {
+			screen->DestroyPixmap(pixmap);
+			return NULL;
+		}
+
+		return pixmap;
+	}
 
 	if (sna->freed_pixmap) {
 		pixmap = sna->freed_pixmap;
-		sna->freed_pixmap = NULL;
+		sna->freed_pixmap = pixmap->devPrivate.ptr;
 
 		pixmap->usage_hint = -1;
 		pixmap->refcnt = 1;
@@ -828,10 +801,11 @@ sna_pixmap_create_shm(ScreenPtr screen,
 		return GetScratchPixmapHeader(screen, width, height, depth,
 					      bpp, pitch, addr);
 	}
-	kgem_bo_set_sync(&sna->kgem, priv->cpu_bo);
-	sna_accel_watch_flush(sna, 1);
+	priv->cpu_bo->flush = true;
 	priv->cpu_bo->pitch = pitch;
+	sna_accel_watch_flush(sna, 1);
 
+	priv->shm = true;
 	priv->header = true;
 	sna_damage_all(&priv->cpu_damage, width, height);
 
@@ -876,7 +850,7 @@ sna_pixmap_create_scratch(ScreenPtr screen,
 	/* you promise never to access this via the cpu... */
 	if (sna->freed_pixmap) {
 		pixmap = sna->freed_pixmap;
-		sna->freed_pixmap = NULL;
+		sna->freed_pixmap = pixmap->devPrivate.ptr;
 
 		pixmap->usage_hint = CREATE_PIXMAP_USAGE_SCRATCH;
 		pixmap->refcnt = 1;
@@ -1031,16 +1005,53 @@ fallback:
 
 static Bool sna_destroy_pixmap(PixmapPtr pixmap)
 {
+	struct sna *sna;
+	struct sna_pixmap *priv;
+
 	if (--pixmap->refcnt)
 		return TRUE;
 
-	if (!sna_destroy_private(pixmap))
+	priv = sna_pixmap(pixmap);
+	if (priv == NULL) {
+		FreePixmap(pixmap);
 		return TRUE;
+	}
+
+	assert_pixmap_damage(pixmap);
+
+	list_del(&priv->list);
+	list_del(&priv->inactive);
+
+	sna_damage_destroy(&priv->gpu_damage);
+	sna_damage_destroy(&priv->cpu_damage);
+
+	sna = to_sna_from_pixmap(pixmap);
+
+	/* Always release the gpu bo back to the lower levels of caching */
+	if (priv->gpu_bo)
+		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
+
+	sna_pixmap_free_cpu(sna, priv);
+
+	if (priv->header) {
+		pixmap->devPrivate.ptr = sna->freed_pixmap;
+		sna->freed_pixmap = pixmap;
+	} else {
+		free(priv);
+		FreePixmap(pixmap);
+	}
 
-	FreePixmap(pixmap);
 	return TRUE;
 }
 
+void sna_pixmap_destroy(PixmapPtr pixmap)
+{
+	assert(pixmap->refcnt == 1);
+	assert(sna_pixmap(pixmap) == NULL || sna_pixmap(pixmap)->header == true);
+
+	sna_destroy_pixmap(pixmap);
+}
+
 static inline bool pixmap_inplace(struct sna *sna,
 				  PixmapPtr pixmap,
 				  struct sna_pixmap *priv)
@@ -1121,6 +1132,12 @@ static inline bool operate_inplace(struct sna_pixmap *priv, unsigned flags)
 	return priv->stride != 0;
 }
 
+static inline void add_flush_pixmap(struct sna *sna, struct sna_pixmap *priv)
+{
+	list_move(&priv->list, &sna->flush_pixmaps);
+	sna->kgem.flush |= 1;
+}
+
 bool
 _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 {
@@ -1153,6 +1170,7 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 
 		if (priv->create & KGEM_CAN_CREATE_GPU &&
 		    pixmap_inplace(sna, pixmap, priv)) {
+			assert(!priv->shm);
 			DBG(("%s: write inplace\n", __FUNCTION__));
 			if (priv->gpu_bo) {
 				if (kgem_bo_is_busy(priv->gpu_bo) &&
@@ -1190,6 +1208,7 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 			priv->cpu = false;
 			list_del(&priv->list);
 			if (priv->cpu_bo) {
+				assert(priv->shm);
 				assert(!priv->cpu_bo->flush);
 				sna_pixmap_free_cpu(sna, priv);
 			}
@@ -1201,6 +1220,7 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 skip_inplace_map:
 		sna_damage_destroy(&priv->gpu_damage);
 		if (priv->cpu_bo && !priv->cpu_bo->flush && kgem_bo_is_busy(priv->cpu_bo)) {
+			assert(!priv->shm);
 			if (priv->cpu_bo->exec == NULL)
 				kgem_retire(&sna->kgem);
 
@@ -1257,13 +1277,16 @@ skip_inplace_map:
 	}
 
 	if (priv->mapped) {
+		assert(!priv->shm);
 		pixmap->devPrivate.ptr = NULL;
 		priv->mapped = false;
 	}
 
 	if (priv->clear) {
-		if (priv->cpu_bo && !priv->cpu_bo->flush && kgem_bo_is_busy(priv->cpu_bo))
+		if (priv->cpu_bo && !priv->cpu_bo->flush && kgem_bo_is_busy(priv->cpu_bo)) {
+			assert(!priv->shm);
 			sna_pixmap_free_cpu(sna, priv);
+		}
 		sna_damage_destroy(&priv->gpu_damage);
 	}
 
@@ -1334,8 +1357,8 @@ skip_inplace_map:
 		priv->undamaged = false;
 
 		if (priv->flush) {
-			list_move(&priv->list, &sna->dirty_pixmaps);
-			sna->kgem.flush |= 1;
+			assert(!priv->shm);
+			add_flush_pixmap(sna, priv);
 		}
 	}
 
@@ -1622,6 +1645,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 					return false;
 				}
 
+				assert(!priv->shm);
 				sna_pixmap_free_cpu(sna, priv);
 			}
 		}
@@ -1706,6 +1730,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 	}
 
 	if (priv->mapped) {
+		assert(!priv->shm);
 		pixmap->devPrivate.ptr = NULL;
 		priv->mapped = false;
 	}
@@ -1974,8 +1999,8 @@ done:
 			priv->undamaged = false;
 		}
 		if (priv->flush) {
-			list_move(&priv->list, &sna->dirty_pixmaps);
-			sna->kgem.flush |= 1;
+			assert(!priv->shm);
+			add_flush_pixmap(sna, priv);
 		}
 	}
 
@@ -2167,6 +2192,7 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
 	assert(priv->gpu_bo->proxy == NULL);
 
 	if (priv->mapped) {
+		assert(!priv->shm);
 		pixmap->devPrivate.ptr = NULL;
 		priv->mapped = false;
 	}
@@ -2284,6 +2310,11 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
 		RegionUninit(&i);
 	}
 
+	if (priv->shm) {
+		assert(!priv->flush);
+		add_flush_pixmap(sna, priv);
+	}
+
 done:
 	if (flags & MOVE_WRITE) {
 		priv->clear = false;
@@ -2338,6 +2369,8 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box,
 
 	if (priv->flush)
 		flags |= PREFER_GPU;
+	if (priv->shm)
+		flags = 0;
 	if (priv->cpu && (flags & (IGNORE_CPU | FORCE_GPU)) == 0)
 		flags = 0;
 
@@ -2517,6 +2550,11 @@ use_cpu_bo:
 	else
 		*damage = &priv->cpu_damage;
 
+	if (priv->shm) {
+		assert(!priv->flush);
+		add_flush_pixmap(to_sna_from_pixmap(pixmap), priv);
+	}
+
 	DBG(("%s: using CPU bo with damage? %d\n",
 	     __FUNCTION__, *damage != NULL));
 	return priv->cpu_bo;
@@ -2540,7 +2578,7 @@ sna_pixmap_create_upload(ScreenPtr screen,
 
 	if (sna->freed_pixmap) {
 		pixmap = sna->freed_pixmap;
-		sna->freed_pixmap = NULL;
+		sna->freed_pixmap = pixmap->devPrivate.ptr;
 
 		pixmap->drawable.serialNumber = NEXT_SERIAL_NUMBER;
 		pixmap->refcnt = 1;
@@ -2751,12 +2789,16 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
 	priv->cpu_damage = NULL;
 	priv->undamaged = true;
 
+	if (priv->shm) {
+		assert(!priv->flush);
+		add_flush_pixmap(sna, priv);
+	}
+
 	/* For large bo, try to keep only a single copy around */
 	if (priv->create & KGEM_CAN_CREATE_LARGE)
 		sna_damage_all(&priv->gpu_damage,
 			       pixmap->drawable.width,
 			       pixmap->drawable.height);
-
 done:
 	list_del(&priv->list);
 
@@ -2767,6 +2809,7 @@ done:
 		priv->undamaged = false;
 		if (priv->ptr) {
 			assert(priv->cpu_bo == NULL || !priv->cpu_bo->flush);
+			assert(!priv->shm);
 			sna_pixmap_free_cpu(sna, priv);
 		}
 	}
@@ -3035,6 +3078,11 @@ static bool upload_inplace(struct sna *sna,
 			   struct sna_pixmap *priv,
 			   RegionRec *region)
 {
+	if (priv->shm) {
+		DBG(("%s: no, SHM Pixmap\n", __FUNCTION__));
+		return false;
+	}
+
 	if (priv->create & KGEM_CAN_CREATE_LARGE) {
 		if (priv->gpu_bo) {
 			DBG(("%s: yes, large buffer and already have GPU bo\n",
@@ -3226,6 +3274,7 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 					priv->undamaged = false;
 				}
 				assert(!priv->cpu_bo->flush);
+				assert(!priv->shm);
 				sna_pixmap_free_cpu(sna, priv);
 			}
 		}
@@ -3235,6 +3284,7 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	}
 
 	if (priv->mapped) {
+		assert(!priv->shm);
 		pixmap->devPrivate.ptr = NULL;
 		priv->mapped = false;
 	}
@@ -3296,8 +3346,8 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 			}
 		}
 		if (priv->flush) {
-			list_move(&priv->list, &sna->dirty_pixmaps);
-			sna->kgem.flush |= 1;
+			assert(!priv->shm);
+			add_flush_pixmap(sna, priv);
 		}
 	}
 	priv->cpu = true;
@@ -4150,6 +4200,11 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 				goto fallback;
 			}
 
+			if (src_priv->shm) {
+				assert(!src_priv->flush);
+				add_flush_pixmap(sna, src_priv);
+			}
+
 			if (damage)
 				sna_damage_add(damage, region);
 			return;
@@ -4219,6 +4274,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 			if (src_priv) {
 				/* Fixup the shadow pointer as necessary */
 				if (src_priv->mapped) {
+					assert(!src_priv->shm);
 					src_pixmap->devPrivate.ptr = NULL;
 					src_priv->mapped = false;
 				}
@@ -12433,6 +12489,7 @@ sna_accel_flush_callback(CallbackListPtr *list,
 			 pointer user_data, pointer call_data)
 {
 	struct sna *sna = user_data;
+	struct sna_pixmap *priv;
 
 	/* XXX we should be able to reduce the frequency of flushes further
 	 * by checking for outgoing damage events or sync replies. Tricky,
@@ -12442,32 +12499,27 @@ sna_accel_flush_callback(CallbackListPtr *list,
 		return;
 
 	/* flush any pending damage from shadow copies to tfp clients */
-	if (!list_is_empty(&sna->dirty_pixmaps)) {
-		struct list preserve;
+	while (!list_is_empty(&sna->flush_pixmaps)) {
+		bool ret;
 
-		list_init(&preserve);
+		priv = list_first_entry(&sna->flush_pixmaps,
+					struct sna_pixmap, list);
 
-		do {
-			struct sna_pixmap *priv;
-
-			priv = list_first_entry(&sna->dirty_pixmaps,
-						struct sna_pixmap, list);
-			if (!sna_pixmap_move_to_gpu(priv->pixmap, MOVE_READ))
-				list_move(&priv->list, &preserve);
-
-		} while (!list_is_empty(&sna->dirty_pixmaps));
-
-		if (!list_is_empty(&preserve)) {
-			sna->dirty_pixmaps.next = preserve.next;
-			preserve.next->prev = &sna->dirty_pixmaps;
-			preserve.prev->next = &sna->dirty_pixmaps;
-			sna->dirty_pixmaps.prev = preserve.prev;
+		list_del(&priv->list);
+		if (priv->shm) {
+			DBG(("%s: syncing SHM pixmap=%ld\n", __FUNCTION__,
+			     priv->pixmap->drawable.serialNumber));
+			ret = sna_pixmap_move_to_cpu(priv->pixmap,
+						     MOVE_READ | MOVE_WRITE);
+		} else {
+			DBG(("%s: flushing DRI pixmap=%ld\n", __FUNCTION__,
+			     priv->pixmap->drawable.serialNumber));
+			ret = sna_pixmap_move_to_gpu(priv->pixmap, MOVE_READ);
 		}
+		(void)ret;
 	}
 
 	kgem_submit(&sna->kgem);
-	kgem_sync(&sna->kgem);
-
 	sna->kgem.flush = false;
 }
 
@@ -12778,6 +12830,7 @@ static void sna_accel_inactive(struct sna *sna)
 			list_del(&priv->list);
 
 			assert(priv->cpu_bo == NULL || !priv->cpu_bo->flush);
+			assert(!priv->shm);
 			sna_pixmap_free_cpu(sna, priv);
 			priv->undamaged = false;
 			priv->cpu = false;
@@ -12828,14 +12881,12 @@ static bool sna_accel_do_debug_memory(struct sna *sna)
 
 static void sna_accel_debug_memory(struct sna *sna)
 {
-	ErrorF("Allocated shadow pixels: %d, %ld bytes, as CPU bo: %d, %ld bytes\n",
-	       sna->debug_memory.shadow_pixels_allocs,
-	       (long)sna->debug_memory.shadow_pixels_bytes,
-	       sna->debug_memory.cpu_bo_allocs,
-	       (long)sna->debug_memory.cpu_bo_bytes);
 	ErrorF("Allocated bo: %d, %ld bytes\n",
 	       sna->kgem.debug_memory.bo_allocs,
 	       (long)sna->kgem.debug_memory.bo_bytes);
+	ErrorF("Allocated CPU bo: %d, %ld bytes\n",
+	       sna->debug_memory.cpu_bo_allocs,
+	       (long)sna->debug_memory.cpu_bo_bytes);
 }
 
 #else
@@ -12951,7 +13002,7 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna)
 
 	sna_font_key = AllocateFontPrivateIndex();
 
-	list_init(&sna->dirty_pixmaps);
+	list_init(&sna->flush_pixmaps);
 	list_init(&sna->active_pixmaps);
 	list_init(&sna->inactive_clock[0]);
 	list_init(&sna->inactive_clock[1]);
@@ -13086,11 +13137,12 @@ void sna_accel_close(struct sna *sna)
 	sna_gradients_close(sna);
 	sna_glyphs_close(sna);
 
-	if (sna->freed_pixmap) {
-		assert(sna->freed_pixmap->refcnt == 0);
-		free(sna_pixmap(sna->freed_pixmap));
-		FreePixmap(sna->freed_pixmap);
-		sna->freed_pixmap = NULL;
+	while (sna->freed_pixmap) {
+		PixmapPtr pixmap = sna->freed_pixmap;
+		sna->freed_pixmap = pixmap->devPrivate.ptr;
+		assert(pixmap->refcnt == 0);
+		free(sna_pixmap(pixmap));
+		FreePixmap(pixmap);
 	}
 
 	DeleteCallback(&FlushCallback, sna_accel_flush_callback, sna);
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index d7d095a..4263bf7 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -2134,8 +2134,8 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
 
 				assert(box->x1 + src_dx >= 0);
 				assert(box->y1 + src_dy >= 0);
-				assert(box->x1 + src_dx <= MAX_SHORT);
-				assert(box->y1 + src_dy <= MAX_SHORT);
+				assert(box->x1 + src_dx <= INT16_MAX);
+				assert(box->y1 + src_dy <= INT16_MAX);
 
 				assert(box->x1 >= 0);
 				assert(box->y1 >= 0);
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index da2f358..cfb9d98 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -155,6 +155,10 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna,
 	struct sna_pixmap *priv;
 	int tiling;
 
+	priv = sna_pixmap(pixmap);
+	if (priv == NULL || priv->shm)
+		return NULL;
+
 	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
 	if (priv == NULL)
 		return NULL;
diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c
index 2822368..46fbf8d 100644
--- a/src/sna/sna_glyphs.c
+++ b/src/sna/sna_glyphs.c
@@ -863,7 +863,6 @@ glyphs_via_mask(struct sna *sna,
 		     __FUNCTION__, (unsigned long)format->format,
 		     format->depth, (uint32_t)width*height*format->depth));
 
-upload:
 		pixmap = sna_pixmap_create_upload(screen,
 						  width, height,
 						  format->depth,
@@ -876,10 +875,8 @@ upload:
 						 width, height,
 						 pixmap->devPrivate.ptr,
 						 pixmap->devKind);
-		if (mask_image == NULL) {
-			screen->DestroyPixmap(pixmap);
-			return false;
-		}
+		if (mask_image == NULL)
+			goto err_pixmap;
 
 		memset(pixmap->devPrivate.ptr, 0, pixmap->devKind*height);
 #if HAS_PIXMAN_GLYPHS
@@ -897,10 +894,8 @@ upload:
 				count += list[n].len;
 			if (count > N_STACK_GLYPHS) {
 				pglyphs = malloc (count * sizeof(pixman_glyph_t));
-				if (pglyphs == NULL) {
-					screen->DestroyPixmap(pixmap);
-					return false;
-				}
+				if (pglyphs == NULL)
+					goto err_pixmap;
 			}
 
 			count = 0;
@@ -1021,9 +1016,8 @@ next_image:
 		mask = CreatePicture(0, &pixmap->drawable,
 				     format, CPComponentAlpha,
 				     &component_alpha, serverClient, &error);
-		screen->DestroyPixmap(pixmap);
 		if (!mask)
-			return false;
+			goto err_pixmap;
 
 		ValidatePicture(mask);
 	} else {
@@ -1036,15 +1030,12 @@ next_image:
 		mask = CreatePicture(0, &pixmap->drawable,
 				     format, CPComponentAlpha,
 				     &component_alpha, serverClient, &error);
-		screen->DestroyPixmap(pixmap);
 		if (!mask)
-			return false;
+			goto err_pixmap;
 
 		ValidatePicture(mask);
-		if (!clear_pixmap(sna, pixmap)) {
-			FreePicture(mask, 0);
-			goto upload;
-		}
+		if (!clear_pixmap(sna, pixmap))
+			goto err_mask;
 
 		memset(&tmp, 0, sizeof(tmp));
 		glyph_atlas = NULL;
@@ -1106,8 +1097,7 @@ next_image:
 					if (!ok) {
 						DBG(("%s: fallback -- can not handle PictOpAdd of glyph onto mask!\n",
 						     __FUNCTION__));
-						FreePicture(mask, 0);
-						return false;
+						goto err_mask;
 					}
 
 					glyph_atlas = this_atlas;
@@ -1143,9 +1133,11 @@ next_glyph:
 		      0, 0,
 		      box.x1, box.y1,
 		      width, height);
-
+err_mask:
 	FreePicture(mask, 0);
-	return true;
+err_pixmap:
+	sna_pixmap_destroy(pixmap);
+	return TRUE;
 }
 
 static PictFormatPtr
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 1db8958..a8b5a06 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -370,6 +370,12 @@ use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box, bool blt)
 		}
 	}
 
+	if (priv->shm) {
+		assert(!priv->flush);
+		list_move(&priv->list, &sna->flush_pixmaps);
+		sna->kgem.flush |= 1;
+	}
+
 	DBG(("%s for box=(%d, %d), (%d, %d)\n",
 	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
 	++priv->source_count;
diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h
index fff5436..0024f99 100644
--- a/src/sna/sna_render_inline.h
+++ b/src/sna/sna_render_inline.h
@@ -75,7 +75,8 @@ is_gpu(DrawablePtr drawable)
 	if (priv == NULL || priv->clear)
 		return false;
 
-	if (DAMAGE_IS_ALL(priv->gpu_damage) || (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo) && !priv->gpu_bo->proxy))
+	if (DAMAGE_IS_ALL(priv->gpu_damage) ||
+	    (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo) && !priv->gpu_bo->proxy))
 		return true;
 
 	return priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo);
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index efb53dd..e63981f 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -2479,7 +2479,7 @@ trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 				pixman_image_unref(image);
 			}
 			if (format != PIXMAN_a8) {
-				screen->DestroyPixmap(scratch);
+				sna_pixmap_destroy(scratch);
 				return;
 			}
 		} else {
@@ -2505,17 +2505,16 @@ trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 		mask = CreatePicture(0, &scratch->drawable,
 				     PictureMatchFormat(screen, depth, format),
 				     0, 0, serverClient, &error);
-		screen->DestroyPixmap(scratch);
-		if (!mask)
-			return;
-
-		CompositePicture(op, src, mask, dst,
-				 xSrc + bounds.x1 - dst_x,
-				 ySrc + bounds.y1 - dst_y,
-				 0, 0,
-				 bounds.x1, bounds.y1,
-				 width, height);
-		FreePicture(mask, 0);
+		if (mask) {
+			CompositePicture(op, src, mask, dst,
+					 xSrc + bounds.x1 - dst_x,
+					 ySrc + bounds.y1 - dst_y,
+					 0, 0,
+					 bounds.x1, bounds.y1,
+					 width, height);
+			FreePicture(mask, 0);
+		}
+		sna_pixmap_destroy(scratch);
 	} else {
 		if (dst->polyEdge == PolyEdgeSharp)
 			maskFormat = PictureMatchFormat(screen, 1, PICT_a1);
@@ -3630,7 +3629,6 @@ composite_unaligned_boxes_fallback(CARD8 op,
 		mask = CreatePicture(0, &scratch->drawable,
 				     PictureMatchFormat(screen, 8, PICT_a8),
 				     0, 0, serverClient, &error);
-		screen->DestroyPixmap(scratch);
 		if (mask) {
 			CompositePicture(op, src, mask, dst,
 					 src_x + extents.x1 - dst_x,
@@ -3641,6 +3639,7 @@ composite_unaligned_boxes_fallback(CARD8 op,
 					 extents.y2 - extents.y1);
 			FreePicture(mask, 0);
 		}
+		sna_pixmap_destroy(scratch);
 	}
 
 	return true;
@@ -4260,7 +4259,7 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
 
 	if (tor_init(&tor, &extents, 2*ntrap)) {
-		screen->DestroyPixmap(scratch);
+		sna_pixmap_destroy(scratch);
 		return true;
 	}
 
@@ -4294,7 +4293,6 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 	mask = CreatePicture(0, &scratch->drawable,
 			     PictureMatchFormat(screen, 8, PICT_a8),
 			     0, 0, serverClient, &error);
-	screen->DestroyPixmap(scratch);
 	if (mask) {
 		CompositePicture(op, src, mask, dst,
 				 src_x + dst_x - pixman_fixed_to_int(traps[0].left.p1.x),
@@ -4304,6 +4302,7 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 				 extents.x2, extents.y2);
 		FreePicture(mask, 0);
 	}
+	sna_pixmap_destroy(scratch);
 
 	return true;
 }
@@ -5323,7 +5322,7 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
 
 	if (tor_init(&tor, &extents, 2*ntrap)) {
-		screen->DestroyPixmap(scratch);
+		sna_pixmap_destroy(scratch);
 		return true;
 	}
 
@@ -5355,7 +5354,6 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 	mask = CreatePicture(0, &scratch->drawable,
 			     PictureMatchFormat(screen, 8, PICT_a8),
 			     0, 0, serverClient, &error);
-	screen->DestroyPixmap(scratch);
 	if (mask) {
 		RegionRec region;
 
@@ -5393,6 +5391,7 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 done:
 		FreePicture(mask, 0);
 	}
+	sna_pixmap_destroy(scratch);
 
 	return true;
 }
@@ -5823,7 +5822,7 @@ trap_mask_converter(PicturePtr picture,
 	dx *= FAST_SAMPLES_X;
 	dy *= FAST_SAMPLES_Y;
 	if (tor_init(&tor, &extents, 2*ntrap)) {
-		screen->DestroyPixmap(scratch);
+		sna_pixmap_destroy(scratch);
 		return true;
 	}
 
@@ -5871,8 +5870,7 @@ trap_mask_converter(PicturePtr picture,
 			       pixmap, priv->gpu_bo, x, y,
 			       &extents, 1, 0);
 	mark_damaged(pixmap, priv, &extents ,x, y);
-
-	screen->DestroyPixmap(scratch);
+	sna_pixmap_destroy(scratch);
 	return true;
 }
 
@@ -5950,7 +5948,7 @@ trap_upload(PicturePtr picture,
 			       &extents, 1, 0);
 	mark_damaged(pixmap, priv, &extents, x, y);
 
-	screen->DestroyPixmap(scratch);
+	sna_pixmap_destroy(scratch);
 	return true;
 }
 
@@ -6362,7 +6360,7 @@ triangles_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
 
 	if (tor_init(&tor, &extents, 3*count)) {
-		screen->DestroyPixmap(scratch);
+		sna_pixmap_destroy(scratch);
 		return true;
 	}
 
@@ -6390,7 +6388,6 @@ triangles_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 	mask = CreatePicture(0, &scratch->drawable,
 			     PictureMatchFormat(screen, 8, PICT_a8),
 			     0, 0, serverClient, &error);
-	screen->DestroyPixmap(scratch);
 	if (mask) {
 		CompositePicture(op, src, mask, dst,
 				 src_x + dst_x - pixman_fixed_to_int(tri[0].p1.x),
@@ -6401,6 +6398,7 @@ triangles_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 		FreePicture(mask, 0);
 	}
 	tor_fini(&tor);
+	sna_pixmap_destroy(scratch);
 
 	return true;
 }
@@ -6478,17 +6476,16 @@ triangles_fallback(CARD8 op,
 		mask = CreatePicture(0, &scratch->drawable,
 				     PictureMatchFormat(screen, depth, format),
 				     0, 0, serverClient, &error);
-		screen->DestroyPixmap(scratch);
-		if (!mask)
-			return;
-
-		CompositePicture(op, src, mask, dst,
-				 xSrc + bounds.x1 - dst_x,
-				 ySrc + bounds.y1 - dst_y,
-				 0, 0,
-				 bounds.x1, bounds.y1,
-				 width, height);
-		FreePicture(mask, 0);
+		if (mask) {
+			CompositePicture(op, src, mask, dst,
+					 xSrc + bounds.x1 - dst_x,
+					 ySrc + bounds.y1 - dst_y,
+					 0, 0,
+					 bounds.x1, bounds.y1,
+					 width, height);
+			FreePicture(mask, 0);
+		}
+		sna_pixmap_destroy(scratch);
 	} else {
 		if (dst->polyEdge == PolyEdgeSharp)
 			maskFormat = PictureMatchFormat(screen, 1, PICT_a1);
@@ -6746,17 +6743,16 @@ tristrip_fallback(CARD8 op,
 		mask = CreatePicture(0, &scratch->drawable,
 				     PictureMatchFormat(screen, depth, format),
 				     0, 0, serverClient, &error);
-		screen->DestroyPixmap(scratch);
-		if (!mask)
-			return;
-
-		CompositePicture(op, src, mask, dst,
-				 xSrc + bounds.x1 - dst_x,
-				 ySrc + bounds.y1 - dst_y,
-				 0, 0,
-				 bounds.x1, bounds.y1,
-				 width, height);
-		FreePicture(mask, 0);
+		if (mask) {
+			CompositePicture(op, src, mask, dst,
+					 xSrc + bounds.x1 - dst_x,
+					 ySrc + bounds.y1 - dst_y,
+					 0, 0,
+					 bounds.x1, bounds.y1,
+					 width, height);
+			FreePicture(mask, 0);
+		}
+		sna_pixmap_destroy(scratch);
 	} else {
 		xTriangle tri;
 		xPointFixed *p[3] = { &tri.p1, &tri.p2, &tri.p3 };
@@ -6881,17 +6877,16 @@ trifan_fallback(CARD8 op,
 		mask = CreatePicture(0, &scratch->drawable,
 				     PictureMatchFormat(screen, depth, format),
 				     0, 0, serverClient, &error);
-		screen->DestroyPixmap(scratch);
-		if (!mask)
-			return;
-
-		CompositePicture(op, src, mask, dst,
-				 xSrc + bounds.x1 - dst_x,
-				 ySrc + bounds.y1 - dst_y,
-				 0, 0,
-				 bounds.x1, bounds.y1,
-				 width, height);
-		FreePicture(mask, 0);
+		if (mask) {
+			CompositePicture(op, src, mask, dst,
+					 xSrc + bounds.x1 - dst_x,
+					 ySrc + bounds.y1 - dst_y,
+					 0, 0,
+					 bounds.x1, bounds.y1,
+					 width, height);
+			FreePicture(mask, 0);
+		}
+		sna_pixmap_destroy(scratch);
 	} else {
 		xTriangle tri;
 		xPointFixed *p[3] = { &tri.p1, &tri.p2, &tri.p3 };
commit 93c794eb3f80bef64f1619986a7c950229dc7a47
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Jul 20 20:34:53 2012 +0100

    sna: Micro-optimise copying boxes with the blitter
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index ff8e3eb..d7d095a 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -2028,6 +2028,13 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
 	return true;
 }
 
+static inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
+{
+	x += v & 0xffff;
+	y += v >> 16;
+	return (uint16_t)y << 16 | x;
+}
+
 bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
 			struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
 			struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
@@ -2104,56 +2111,110 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
 		_kgem_set_mode(kgem, KGEM_BLT);
 	}
 
-	do {
-		int nbox_this_time;
-
-		nbox_this_time = nbox;
-		if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
-			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
-		if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
-			nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
-		assert(nbox_this_time);
-		nbox -= nbox_this_time;
-
+	if ((dst_dx | dst_dy) == 0) {
+		uint64_t hdr = (uint64_t)br13 << 32 | cmd;
 		do {
-			uint32_t *b = kgem->batch + kgem->nbatch;
-
-			DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
-			     __FUNCTION__,
-			     box->x1, box->y1,
-			     box->x2 - box->x1, box->y2 - box->y1));
-
-			assert(box->x1 + src_dx >= 0);
-			assert(box->y1 + src_dy >= 0);
-
-			assert(box->x1 + dst_dx >= 0);
-			assert(box->y1 + dst_dy >= 0);
-
-			b[0] = cmd;
-			b[1] = br13;
-			b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
-			b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
-			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
-					      I915_GEM_DOMAIN_RENDER << 16 |
-					      I915_GEM_DOMAIN_RENDER |
-					      KGEM_RELOC_FENCED,
-					      0);
-			b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
-			b[6] = src_pitch;
-			b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
-					      I915_GEM_DOMAIN_RENDER << 16 |
-					      KGEM_RELOC_FENCED,
-					      0);
-			kgem->nbatch += 8;
-			box++;
-		} while (--nbox_this_time);
+			int nbox_this_time;
+
+			nbox_this_time = nbox;
+			if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+				nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+			assert(nbox_this_time);
+			nbox -= nbox_this_time;
+
+			do {
+				uint32_t *b = kgem->batch + kgem->nbatch;
+
+				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
+				     __FUNCTION__,
+				     box->x1, box->y1,
+				     box->x2 - box->x1, box->y2 - box->y1));
+
+				assert(box->x1 + src_dx >= 0);
+				assert(box->y1 + src_dy >= 0);
+				assert(box->x1 + src_dx <= MAX_SHORT);
+				assert(box->y1 + src_dy <= MAX_SHORT);
+
+				assert(box->x1 >= 0);
+				assert(box->y1 >= 0);
+
+				*(uint64_t *)&b[0] = hdr;
+				*(uint64_t *)&b[2] = *(uint64_t *)box;
+				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+						      I915_GEM_DOMAIN_RENDER << 16 |
+						      I915_GEM_DOMAIN_RENDER |
+						      KGEM_RELOC_FENCED,
+						      0);
+				b[5] = add2(b[2], src_dx, src_dy);
+				b[6] = src_pitch;
+				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+						      I915_GEM_DOMAIN_RENDER << 16 |
+						      KGEM_RELOC_FENCED,
+						      0);
+				kgem->nbatch += 8;
+				box++;
+			} while (--nbox_this_time);
+
+			if (!nbox)
+				break;
 
-		if (!nbox)
-			break;
+			_kgem_submit(kgem);
+			_kgem_set_mode(kgem, KGEM_BLT);
+		} while (1);
+	} else {
+		do {
+			int nbox_this_time;
+
+			nbox_this_time = nbox;
+			if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+				nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+			assert(nbox_this_time);
+			nbox -= nbox_this_time;
+
+			do {
+				uint32_t *b = kgem->batch + kgem->nbatch;
+
+				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
+				     __FUNCTION__,
+				     box->x1, box->y1,
+				     box->x2 - box->x1, box->y2 - box->y1));
+
+				assert(box->x1 + src_dx >= 0);
+				assert(box->y1 + src_dy >= 0);
+
+				assert(box->x1 + dst_dx >= 0);
+				assert(box->y1 + dst_dy >= 0);
+
+				b[0] = cmd;
+				b[1] = br13;
+				b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
+				b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
+				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+						      I915_GEM_DOMAIN_RENDER << 16 |
+						      I915_GEM_DOMAIN_RENDER |
+						      KGEM_RELOC_FENCED,
+						      0);
+				b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
+				b[6] = src_pitch;
+				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+						      I915_GEM_DOMAIN_RENDER << 16 |
+						      KGEM_RELOC_FENCED,
+						      0);
+				kgem->nbatch += 8;
+				box++;
+			} while (--nbox_this_time);
+
+			if (!nbox)
+				break;
 
-		_kgem_submit(kgem);
-		_kgem_set_mode(kgem, KGEM_BLT);
-	} while (1);
+			_kgem_submit(kgem);
+			_kgem_set_mode(kgem, KGEM_BLT);
+		} while (1);
+	}
 
 	if (kgem->gen >= 60 && kgem_check_batch(kgem, 3)) {
 		uint32_t *b = kgem->batch + kgem->nbatch;
commit a0d95a9c2d3a27eafbe459e2aefe772c006e596f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Jul 20 20:34:23 2012 +0100

    sna: Only update a buffer when it becomes dirty
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 04c351c..b65454d 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -547,7 +547,7 @@ static void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op)
 	assert(sna->render_state.gen2.vertex_offset == 0);
 
 	if (sna->render_state.gen2.target == op->dst.bo->unique_id) {
-		kgem_bo_mark_dirty(op->dst.bo);
+		kgem_bo_mark_dirty(&sna->kgem, op->dst.bo);
 		return;
 	}
 
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 1f6c1aa..18c5d85 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -1373,7 +1373,7 @@ static void gen3_emit_target(struct sna *sna,
 
 		state->current_dst = bo->unique_id;
 	}
-	kgem_bo_mark_dirty(bo);
+	kgem_bo_mark_dirty(&sna->kgem, bo);
 }
 
 static void gen3_emit_composite_state(struct sna *sna,
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 6fcce71..de6c8c4 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -732,7 +732,7 @@ gen4_bind_bo(struct sna *sna,
 	/* After the first bind, we manage the cache domains within the batch */
 	if (is_dst) {
 		domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
-		kgem_bo_mark_dirty(bo);
+		kgem_bo_mark_dirty(&sna->kgem, bo);
 	} else
 		domains = I915_GEM_DOMAIN_SAMPLER << 16;
 
@@ -1457,7 +1457,7 @@ gen4_emit_state(struct sna *sna,
 		     kgem_bo_is_dirty(op->mask.bo)));
 		OUT_BATCH(MI_FLUSH);
 		kgem_clear_dirty(&sna->kgem);
-		kgem_bo_mark_dirty(op->dst.bo);
+		kgem_bo_mark_dirty(&sna->kgem, op->dst.bo);
 	}
 }
 
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index d776e77..db7eb7b 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -726,7 +726,7 @@ gen5_bind_bo(struct sna *sna,
 	/* After the first bind, we manage the cache domains within the batch */
 	if (is_dst) {
 		domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
-		kgem_bo_mark_dirty(bo);
+		kgem_bo_mark_dirty(&sna->kgem, bo);
 	} else
 		domains = I915_GEM_DOMAIN_SAMPLER << 16;
 
@@ -1472,7 +1472,7 @@ gen5_emit_state(struct sna *sna,
 	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
 		OUT_BATCH(MI_FLUSH);
 		kgem_clear_dirty(&sna->kgem);
-		kgem_bo_mark_dirty(op->dst.bo);
+		kgem_bo_mark_dirty(&sna->kgem, op->dst.bo);
 	}
 }
 
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index d4783e0..c292da1 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -914,7 +914,7 @@ gen6_emit_state(struct sna *sna,
 	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
 		gen6_emit_flush(sna);
 		kgem_clear_dirty(&sna->kgem);
-		kgem_bo_mark_dirty(op->dst.bo);
+		kgem_bo_mark_dirty(&sna->kgem, op->dst.bo);
 		need_stall = false;
 	}
 	if (need_stall) {
@@ -1246,7 +1246,7 @@ gen6_bind_bo(struct sna *sna,
 	/* After the first bind, we manage the cache domains within the batch */
 	if (is_dst) {
 		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
-		kgem_bo_mark_dirty(bo);
+		kgem_bo_mark_dirty(&sna->kgem, bo);
 	} else
 		domains = I915_GEM_DOMAIN_SAMPLER << 16;
 
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index c041d66..ae0aa9d 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -1048,7 +1048,7 @@ gen7_emit_state(struct sna *sna,
 	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
 		gen7_emit_pipe_invalidate(sna, need_stall);
 		kgem_clear_dirty(&sna->kgem);
-		kgem_bo_mark_dirty(op->dst.bo);
+		kgem_bo_mark_dirty(&sna->kgem, op->dst.bo);
 		need_stall = false;
 	}
 	if (need_stall)
@@ -1355,7 +1355,7 @@ gen7_bind_bo(struct sna *sna,
 	/* After the first bind, we manage the cache domains within the batch */
 	if (is_dst) {
 		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
-		kgem_bo_mark_dirty(bo);
+		kgem_bo_mark_dirty(&sna->kgem, bo);
 	} else
 		domains = I915_GEM_DOMAIN_SAMPLER << 16;
 
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 66a23bf..577fa6c 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -349,9 +349,10 @@ void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
 		assert(list_is_empty(&bo->vma));
 		bo->rq = NULL;
 		list_del(&bo->request);
+
+		bo->needs_flush = false;
 	}
 
-	bo->needs_flush = false;
 	bo->domain = DOMAIN_NONE;
 }
 
@@ -3494,12 +3495,8 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
 		kgem->reloc[index].target_handle = bo->handle;
 		kgem->reloc[index].presumed_offset = bo->presumed_offset;
 
-		if (read_write_domain & 0x7fff) {
-			DBG(("%s: marking handle=%d dirty\n",
-			     __FUNCTION__, bo->handle));
-			bo->needs_flush = bo->dirty = true;
-			list_move(&bo->request, &kgem->next_request->buffers);
-		}
+		if (read_write_domain & 0x7ff)
+			kgem_bo_mark_dirty(kgem, bo);
 
 		delta += bo->presumed_offset;
 	} else {
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 533a919..165e7b9 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -536,10 +536,15 @@ static inline bool kgem_bo_is_dirty(struct kgem_bo *bo)
 	return bo->dirty;
 }
 
-static inline void kgem_bo_mark_dirty(struct kgem_bo *bo)
+static inline void kgem_bo_mark_dirty(struct kgem *kgem, struct kgem_bo *bo)
 {
+	if (bo->dirty)
+		return;
+
 	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
-	bo->dirty = true;
+
+	bo->needs_flush = bo->dirty = true;
+	list_move(&bo->request, &kgem->next_request->buffers);
 }
 
 void kgem_sync(struct kgem *kgem);
commit c52d265b83b033fb2a275fcc9a8a8d146e3afdf6
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Jul 20 19:38:38 2012 +0100

    sna: Tweak CPU bo promotion rules for CopyArea
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 4ef52d7..5466f38 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -3733,35 +3733,40 @@ move_to_gpu(PixmapPtr pixmap, struct sna_pixmap *priv,
 {
 	int w = box->x2 - box->x1;
 	int h = box->y2 - box->y1;
+	int count;
 
 	if (DAMAGE_IS_ALL(priv->gpu_damage))
 		return true;
 
-	if (DAMAGE_IS_ALL(priv->cpu_damage))
-		return false;
-
 	if (priv->gpu_bo) {
 		if (alu != GXcopy)
 			return true;
 
 		if (!priv->cpu)
 			return true;
+
+		if (priv->gpu_bo->tiling)
+			return true;
 	} else {
 		if ((priv->create & KGEM_CAN_CREATE_GPU) == 0)
 			return false;
 	}
 
+	count = priv->source_count++;
 	if (priv->cpu_bo) {
-		if (sna_pixmap_choose_tiling(pixmap, DEFAULT_TILING) == I915_TILING_NONE)
+		if (priv->cpu_bo->flush && count > SOURCE_BIAS)
+			return true;
+
+		if (sna_pixmap_choose_tiling(pixmap,
+					     DEFAULT_TILING) == I915_TILING_NONE)
 			return false;
 
 		if (priv->cpu)
 			return false;
 
-		return (priv->source_count++-SOURCE_BIAS) * w*h >=
-			(int)pixmap->drawable.width * pixmap->drawable.height;
+		return count > SOURCE_BIAS;
 	} else {
-		return ++priv->source_count * w*h >= (SOURCE_BIAS+2) * (int)pixmap->drawable.width * pixmap->drawable.height;
+		return count * w*h >= (SOURCE_BIAS+2) * (int)pixmap->drawable.width * pixmap->drawable.height;
 	}
 }
 


More information about the xorg-commit mailing list