[PATCH 4/4] drm/ttm: move bo move logic inside driver to avoid callback

Thu Apr 5 11:44:26 PDT 2012

From: Jerome Glisse <jglisse at redhat.com>

Move buffer object move logic inside driver callback so we don't
have complex move_notify and cache_invalidate callback in error
path. This simplify driver at the expense of some code duplication
among drivers.

Signed-off-by: Jerome Glisse <jglisse at redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_bo.c   |  318 ++++++++++++++++++++------------
 drivers/gpu/drm/radeon/radeon_ttm.c    |  277 +++++++++++++++++-----------
 drivers/gpu/drm/ttm/ttm_bo.c           |   87 ++--------
 drivers/gpu/drm/ttm/ttm_tt.c           |    2 +
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |   67 +++++++-
 include/drm/ttm/ttm_bo_driver.h        |   37 ++--
 6 files changed, 472 insertions(+), 316 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 6f18c3b..2ddeb0f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -39,6 +39,18 @@
 #include <linux/log2.h>
 #include <linux/slab.h>
 
+/* gcc should kill that code */
+#if 1
+#define ASSERT(expr)							\
+	if (!(expr)) {							\
+		printk("radeon: assertion failed! %s[%d]: %s\n",	\
+			__func__, __LINE__, #expr);			\
+		panic("radeon: %s", __func__);				\
+	}
+#else
+#define ASSERT(expr) do {} while (0)
+#endif
+
 static void
 nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
 {
@@ -376,13 +388,6 @@ nouveau_ttm_tt_create(struct ttm_bo_device *bdev,
 }
 
 static int
-nouveau_bo_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
-{
-	/* We'll do this from user space. */
-	return 0;
-}
-
-static int
 nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 			 struct ttm_mem_type_manager *man)
 {
@@ -467,7 +472,6 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
 static int
 nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
 			      struct nouveau_bo *nvbo, bool evict,
-			      bool no_wait_reserve, bool no_wait_gpu,
 			      struct ttm_mem_reg *new_mem)
 {
 	struct nouveau_fence *fence = NULL;
@@ -687,8 +691,7 @@ nouveau_vma_getmap(struct nouveau_channel *chan, struct nouveau_bo *nvbo,
 }
 
 static int
-nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
-		     bool no_wait_reserve, bool no_wait_gpu,
+nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
 		     struct ttm_mem_reg *new_mem)
 {
 	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
@@ -727,9 +730,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 	else
 		ret = nvc0_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
 	if (ret == 0) {
-		ret = nouveau_bo_move_accel_cleanup(chan, nvbo, evict,
-						    no_wait_reserve,
-						    no_wait_gpu, new_mem);
+		ret = nouveau_bo_move_accel_cleanup(chan, nvbo, evict, new_mem);
 	}
 
 out:
@@ -738,73 +739,6 @@ out:
 	return ret;
 }
 
-static int
-nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
-		      bool no_wait_reserve, bool no_wait_gpu,
-		      struct ttm_mem_reg *new_mem)
-{
-	u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
-	struct ttm_placement placement;
-	struct ttm_mem_reg tmp_mem;
-	int ret;
-
-	placement.fpfn = placement.lpfn = 0;
-	placement.num_placement = placement.num_busy_placement = 1;
-	placement.placement = placement.busy_placement = &placement_memtype;
-
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_reserve, no_wait_gpu);
-	if (ret)
-		return ret;
-
-	ret = ttm_tt_bind(bo->ttm, &tmp_mem);
-	if (ret)
-		goto out;
-
-	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, no_wait_gpu, &tmp_mem);
-	if (ret)
-		goto out;
-
-	ret = ttm_bo_move_ttm(bo, new_mem);
-out:
-	ttm_bo_mem_put(bo, &tmp_mem);
-	return ret;
-}
-
-static int
-nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
-		      bool no_wait_reserve, bool no_wait_gpu,
-		      struct ttm_mem_reg *new_mem)
-{
-	u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
-	struct ttm_placement placement;
-	struct ttm_mem_reg tmp_mem;
-	int ret;
-
-	placement.fpfn = placement.lpfn = 0;
-	placement.num_placement = placement.num_busy_placement = 1;
-	placement.placement = placement.busy_placement = &placement_memtype;
-
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_reserve, no_wait_gpu);
-	if (ret)
-		return ret;
-
-	ret = ttm_bo_move_ttm(bo, &tmp_mem);
-	if (ret)
-		goto out;
-
-	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, no_wait_gpu, new_mem);
-	if (ret)
-		goto out;
-
-out:
-	ttm_bo_mem_put(bo, &tmp_mem);
-	return ret;
-}
-
 static void
 nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem)
 {
@@ -865,59 +799,215 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 }
 
 static int
-nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
-		bool no_wait_reserve, bool no_wait_gpu,
-		struct ttm_mem_reg *new_mem)
+nouveau_bo_move_vram_vram(struct ttm_buffer_object *bo,
+			  bool evict,
+			  struct ttm_mem_reg *new_mem)
 {
 	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
-	struct nouveau_bo *nvbo = nouveau_bo(bo);
-	struct ttm_mem_reg *old_mem = &bo->mem;
-	struct nouveau_tile_reg *new_tile = NULL;
-	int ret = 0;
+	int r;
 
-	if (dev_priv->card_type < NV_50) {
-		ret = nouveau_bo_vm_bind(bo, new_mem, &new_tile);
-		if (ret)
-			return ret;
+	if (!dev_priv->channel) {
+		/* Software copy if the card isn't up and running yet. */
+		goto memcpy;
 	}
 
-	/* Fake bo copy. */
-	if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
-		BUG_ON(bo->mem.mm_node != NULL);
-		bo->mem = *new_mem;
-		new_mem->mm_node = NULL;
-		goto out;
+	r = nouveau_bo_move_m2mf(bo, evict, new_mem);
+	if (!r) {
+		return 0;
+	}
+
+memcpy:
+	/* fallback to memcpy */
+	return ttm_bo_move_memcpy(bo, new_mem);
+}
+
+static int
+nouveau_bo_move_ram_vram(struct ttm_buffer_object *bo,
+			 bool evict, bool interruptible,
+			 bool no_wait_reserve, bool no_wait_gpu,
+			 struct ttm_mem_reg *new_mem)
+{
+	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+	struct ttm_mem_reg *old_mem = &bo->mem;
+	struct ttm_mem_reg *dst_mem = new_mem;
+	struct ttm_mem_reg tmp_mem = { 0 };
+	int r;
+
+	nouveau_bo_move_ntfy(bo, new_mem);
+
+	/* new memory placement needs ttm */
+	if (bo->ttm == NULL) {
+		if (new_mem->mem_type == TTM_PL_VRAM) {
+			/* it's new bo bound to vram */
+			BUG_ON(bo->mem.mm_node != NULL);
+			bo->mem = *new_mem;
+			new_mem->mm_node = NULL;
+			return 0;
+		}
+
+		r = ttm_bo_add_tt(bo, false);
+		if (r) {
+			return r;
+		}
 	}
 
-	/* Software copy if the card isn't up and running yet. */
 	if (!dev_priv->channel) {
-		ret = ttm_bo_move_memcpy(bo, new_mem);
-		goto out;
+		/* Software copy if the card isn't up and running yet. */
+		goto memcpy;
 	}
 
-	/* Hardware assisted copy. */
-	if (new_mem->mem_type == TTM_PL_SYSTEM)
-		ret = nouveau_bo_move_flipd(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem);
-	else if (old_mem->mem_type == TTM_PL_SYSTEM)
-		ret = nouveau_bo_move_flips(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem);
-	else
-		ret = nouveau_bo_move_m2mf(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem);
+	if (old_mem->mem_type == TTM_PL_SYSTEM || new_mem->mem_type == TTM_PL_SYSTEM) {
+		/* we need to use a temporary gtt memory to perform the blit */
+		struct ttm_placement placement;
+		u32 placements;
+
+		tmp_mem = *new_mem;
+		tmp_mem.mm_node = NULL;
+		placement.fpfn = 0;
+		placement.lpfn = 0;
+		placement.num_placement = 1;
+		placement.placement = &placements;
+		placement.num_busy_placement = 1;
+		placement.busy_placement = &placements;
+		placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
+		r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_reserve, no_wait_gpu);
+		if (r) {
+			return r;
+		}
+		if (old_mem->mem_type == TTM_PL_SYSTEM) {
+			r = ttm_bo_move_ttm(bo, &tmp_mem);
+			if (r) {
+				goto out;
+			}
+		} else {
+			r = ttm_tt_bind(bo->ttm, &tmp_mem);
+			if (r) {
+				goto out;
+			}
+			dst_mem = &tmp_mem;
+		}
+	}
 
-	if (!ret)
+	r = nouveau_bo_move_m2mf(bo, evict, new_mem);
+	if (r) {
+memcpy:
+		/* fallback to memcpy */
+		r = ttm_bo_move_memcpy(bo, dst_mem);
+	}
+	if (r) {
 		goto out;
+	}
 
-	/* Fallback to software copy. */
-	ret = ttm_bo_move_memcpy(bo, new_mem);
+	if (new_mem->mem_type == TTM_PL_SYSTEM) {
+		/* dst mem is temporary gtt mem, move to final system placement */
+		ttm_bo_move_ttm(bo, new_mem);
+	}
 
 out:
+	if (r && old_mem->mem_type == TTM_PL_VRAM && bo->ttm) {
+		ttm_tt_destroy(bo->ttm);
+		bo->ttm = NULL;
+	}
+	ttm_bo_mem_put(bo, &tmp_mem);
+	return r;
+}
+
+static int
+nouveau_bo_move_sys_gtt(struct ttm_buffer_object *bo,
+			struct ttm_mem_reg *new_mem)
+{
+	int r;
+
+	/* new memory placement needs ttm */
+	if (bo->ttm == NULL) {
+		r = ttm_bo_add_tt(bo, 1);
+		if (r) {
+			return r;
+		}
+	}
+
+	r = ttm_tt_set_placement_caching(bo->ttm, new_mem->placement);
+	if (r) {
+		return r;
+	}
+
+	if (new_mem->mem_type == TTM_PL_TT) {
+		r = ttm_tt_bind(bo->ttm, new_mem);
+		if (r) {
+			return r;
+		}
+	}
+
+	bo->mem = *new_mem;
+	new_mem->mm_node = NULL;
+	return 0;
+}
+
+static int
+nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
+		bool no_wait_reserve, bool no_wait_gpu,
+		struct ttm_mem_reg *new_mem)
+{
+	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+	struct ttm_mem_reg *old_mem = &bo->mem;
+	struct nouveau_bo *nvbo = nouveau_bo(bo);
+	struct nouveau_tile_reg *new_tile = NULL;
+	int r = -EINVAL;
+
+	/* this rely on the following */
+	ASSERT(TTM_PL_SYSTEM == 0);
+	ASSERT(TTM_PL_TT == 1);
+	ASSERT(TTM_PL_VRAM == 2);
+
+	/* bo is evicted before destruction */
+	if (new_mem == NULL && evict) {
+		nouveau_bo_move_ntfy(bo, NULL);
+		return 0;
+	}
+
 	if (dev_priv->card_type < NV_50) {
-		if (ret)
+		r = nouveau_bo_vm_bind(bo, new_mem, &new_tile);
+		if (r) {
+			return r;
+		}
+	}
+
+	nouveau_bo_move_ntfy(bo, new_mem);
+
+	switch ((old_mem->mem_type & 3) | ((new_mem->mem_type & 3) << 2)) {
+	case 0:  /* SYSTEM -> SYSTEM */
+	case 1:  /* TT -> SYSTEM */
+	case 4:  /* SYSTEM -> TT */
+	case 5:  /* TT -> TT */
+		r = nouveau_bo_move_sys_gtt(bo, new_mem);
+		break;
+	case 2:  /* VRAM -> SYSTEM */
+	case 6:  /* VRAM -> TT */
+	case 8:  /* SYSTEM -> VRAM */
+	case 9:  /* TT -> VRAM */
+		r = nouveau_bo_move_ram_vram(bo, evict, intr,
+					     no_wait_reserve, no_wait_gpu,
+					     new_mem);
+		break;
+	case 10: /* VRAM -> VRAM */
+		r = nouveau_bo_move_vram_vram(bo, evict, new_mem);
+		break;
+	default:
+		DRM_ERROR("invalid move src %d / dst %d\n",
+			  old_mem->mem_type, new_mem->mem_type);
+		return -EINVAL;
+	}
+
+	if (dev_priv->card_type < NV_50) {
+		if (r) {
 			nouveau_bo_vm_cleanup(bo, NULL, &new_tile);
-		else
+			nouveau_bo_move_ntfy(bo, old_mem);
+		} else {
 			nouveau_bo_vm_cleanup(bo, new_tile, &nvbo->tile);
+		}
 	}
 
-	return ret;
+	return r;
 }
 
 static int
@@ -1142,10 +1232,8 @@ struct ttm_bo_driver nouveau_bo_driver = {
 	.ttm_tt_create = &nouveau_ttm_tt_create,
 	.ttm_tt_populate = &nouveau_ttm_tt_populate,
 	.ttm_tt_unpopulate = &nouveau_ttm_tt_unpopulate,
-	.invalidate_caches = nouveau_bo_invalidate_caches,
 	.init_mem_type = nouveau_bo_init_mem_type,
 	.evict_flags = nouveau_bo_evict_flags,
-	.move_notify = nouveau_bo_move_ntfy,
 	.move = nouveau_bo_move,
 	.verify_access = nouveau_bo_verify_access,
 	.sync_obj_signaled = __nouveau_fence_signalled,
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 65b4d2f..7a5a739 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -43,6 +43,18 @@
 
 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
 
+/* gcc should kill that code */
+#if 1
+#define ASSERT(expr)							\
+	if (!(expr)) {							\
+		printk("radeon: assertion failed! %s[%d]: %s\n",	\
+			__func__, __LINE__, #expr);			\
+		panic("radeon: %s", __func__);				\
+	}
+#else
+#define ASSERT(expr) do {} while (0)
+#endif
+
 static int radeon_ttm_debugfs_init(struct radeon_device *rdev);
 
 static struct radeon_device *radeon_get_rdev(struct ttm_bo_device *bdev)
@@ -114,11 +126,6 @@ static void radeon_ttm_global_fini(struct radeon_device *rdev)
 	}
 }
 
-static int radeon_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
-{
-	return 0;
-}
-
 static int radeon_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 				struct ttm_mem_type_manager *man)
 {
@@ -215,10 +222,9 @@ static void radeon_move_null(struct ttm_buffer_object *bo,
 	new_mem->mm_node = NULL;
 }
 
-static int radeon_move_blit(struct ttm_buffer_object *bo,
-			bool evict, int no_wait_reserve, bool no_wait_gpu,
-			struct ttm_mem_reg *new_mem,
-			struct ttm_mem_reg *old_mem)
+static int radeon_move_blit(struct ttm_buffer_object *bo, bool evict,
+			    struct ttm_mem_reg *new_mem,
+			    struct ttm_mem_reg *old_mem)
 {
 	struct radeon_device *rdev;
 	uint64_t old_start, new_start;
@@ -301,135 +307,198 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 	return r;
 }
 
-static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
-				bool evict, bool interruptible,
-				bool no_wait_reserve, bool no_wait_gpu,
-				struct ttm_mem_reg *new_mem)
+static int radeon_bo_move_vram_vram(struct ttm_buffer_object *bo,
+				    bool evict, struct ttm_mem_reg *new_mem)
 {
-	struct radeon_device *rdev;
 	struct ttm_mem_reg *old_mem = &bo->mem;
-	struct ttm_mem_reg tmp_mem;
-	u32 placements;
-	struct ttm_placement placement;
+	struct radeon_device *rdev;
 	int r;
 
 	rdev = radeon_get_rdev(bo->bdev);
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-	placement.fpfn = 0;
-	placement.lpfn = 0;
-	placement.num_placement = 1;
-	placement.placement = &placements;
-	placement.num_busy_placement = 1;
-	placement.busy_placement = &placements;
-	placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
-	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
-			     interruptible, no_wait_reserve, no_wait_gpu);
-	if (unlikely(r)) {
-		return r;
-	}
 
-	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
-	if (unlikely(r)) {
-		goto out_cleanup;
+	if (!rdev->ring[radeon_copy_ring_index(rdev)].ready ||
+	    rdev->asic->copy.copy == NULL) {
+		/* use memcpy */
+		goto memcpy;
 	}
 
-	r = ttm_tt_bind(bo->ttm, &tmp_mem);
-	if (unlikely(r)) {
-		goto out_cleanup;
+	r = radeon_move_blit(bo, evict, new_mem, old_mem);
+	if (!r) {
+		/* blit succesfull */
+		radeon_bo_move_notify(bo, new_mem);
+		return 0;
 	}
-	r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem, old_mem);
-	if (unlikely(r)) {
-		goto out_cleanup;
+
+memcpy:
+	/* fallback to memcpy */
+	r = ttm_bo_move_memcpy(bo, new_mem);
+	if (!r) {
+		radeon_bo_move_notify(bo, new_mem);
 	}
-	r = ttm_bo_move_ttm(bo, new_mem);
-out_cleanup:
-	ttm_bo_mem_put(bo, &tmp_mem);
 	return r;
 }
 
-static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
-				bool evict, bool interruptible,
-				bool no_wait_reserve, bool no_wait_gpu,
-				struct ttm_mem_reg *new_mem)
+static int radeon_bo_move_ram_vram(struct ttm_buffer_object *bo,
+				   bool evict, bool interruptible,
+				   bool no_wait_reserve, bool no_wait_gpu,
+				   struct ttm_mem_reg *new_mem)
 {
-	struct radeon_device *rdev;
 	struct ttm_mem_reg *old_mem = &bo->mem;
-	struct ttm_mem_reg tmp_mem;
-	struct ttm_placement placement;
-	u32 placements;
+	struct ttm_mem_reg *dst_mem = new_mem;
+	struct ttm_mem_reg tmp_mem = { 0 };
+	struct radeon_device *rdev;
 	int r;
 
 	rdev = radeon_get_rdev(bo->bdev);
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-	placement.fpfn = 0;
-	placement.lpfn = 0;
-	placement.num_placement = 1;
-	placement.placement = &placements;
-	placement.num_busy_placement = 1;
-	placement.busy_placement = &placements;
-	placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
-	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_reserve, no_wait_gpu);
-	if (unlikely(r)) {
-		return r;
+
+	/* new memory placement needs ttm */
+	if (bo->ttm == NULL) {
+		if (new_mem->mem_type == TTM_PL_VRAM) {
+			/* it's new bo bound to vram */
+			radeon_move_null(bo, new_mem);
+			return 0;
+		}
+
+		r = ttm_bo_add_tt(bo, false);
+		if (r) {
+			return r;
+		}
 	}
-	r = ttm_bo_move_ttm(bo, &tmp_mem);
-	if (unlikely(r)) {
-		goto out_cleanup;
+
+	if (!rdev->ring[radeon_copy_ring_index(rdev)].ready ||
+	    rdev->asic->copy.copy == NULL) {
+		/* use memcpy */
+		goto memcpy;
 	}
-	r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, new_mem, old_mem);
-	if (unlikely(r)) {
-		goto out_cleanup;
+
+	if (old_mem->mem_type == TTM_PL_SYSTEM || new_mem->mem_type == TTM_PL_SYSTEM) {
+		/* we need to use a temporary gtt memory to perform the blit */
+		struct ttm_placement placement;
+		u32 placements;
+
+		tmp_mem = *new_mem;
+		tmp_mem.mm_node = NULL;
+		placement.fpfn = 0;
+		placement.lpfn = 0;
+		placement.num_placement = 1;
+		placement.placement = &placements;
+		placement.num_busy_placement = 1;
+		placement.busy_placement = &placements;
+		placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
+		r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_reserve, no_wait_gpu);
+		if (r) {
+			return r;
+		}
+		if (old_mem->mem_type == TTM_PL_SYSTEM) {
+			r = ttm_bo_move_ttm(bo, &tmp_mem);
+			if (r) {
+				goto out;
+			}
+		} else {
+			r = ttm_tt_bind(bo->ttm, &tmp_mem);
+			if (r) {
+				goto out;
+			}
+			dst_mem = &tmp_mem;
+		}
+	}
+
+	r = radeon_move_blit(bo, evict, dst_mem, old_mem);
+	if (r) {
+memcpy:
+		/* fallback to memcpy */
+		r = ttm_bo_move_memcpy(bo, dst_mem);
+	}
+	if (r) {
+		goto out;
+	}
+	radeon_bo_move_notify(bo, new_mem);
+
+	if (new_mem->mem_type == TTM_PL_SYSTEM) {
+		/* dst mem is temporary gtt mem, move to final system placement */
+		ttm_bo_move_ttm(bo, new_mem);
+	}
+
+out:
+	if (r && old_mem->mem_type == TTM_PL_VRAM && bo->ttm) {
+		ttm_tt_destroy(bo->ttm);
+		bo->ttm = NULL;
 	}
-out_cleanup:
 	ttm_bo_mem_put(bo, &tmp_mem);
 	return r;
 }
 
+static int radeon_bo_move_sys_gtt(struct ttm_buffer_object *bo,
+				  struct ttm_mem_reg *new_mem)
+{
+	int r;
+
+	/* new memory placement needs ttm */
+	if (bo->ttm == NULL) {
+		r = ttm_bo_add_tt(bo, 1);
+		if (r) {
+			return r;
+		}
+	}
+
+	r = ttm_tt_set_placement_caching(bo->ttm, new_mem->placement);
+	if (r) {
+		return r;
+	}
+
+	if (new_mem->mem_type == TTM_PL_TT) {
+		r = ttm_tt_bind(bo->ttm, new_mem);
+		if (r) {
+			return r;
+		}
+	}
+
+	radeon_move_null(bo, new_mem);
+	radeon_bo_move_notify(bo, new_mem);
+	return 0;
+}
+
 static int radeon_bo_move(struct ttm_buffer_object *bo,
 			bool evict, bool interruptible,
 			bool no_wait_reserve, bool no_wait_gpu,
 			struct ttm_mem_reg *new_mem)
 {
-	struct radeon_device *rdev;
 	struct ttm_mem_reg *old_mem = &bo->mem;
-	int r;
+	int r = -EINVAL;
 
-	rdev = radeon_get_rdev(bo->bdev);
-	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
-		radeon_move_null(bo, new_mem);
-		return 0;
-	}
-	if ((old_mem->mem_type == TTM_PL_TT &&
-	     new_mem->mem_type == TTM_PL_SYSTEM) ||
-	    (old_mem->mem_type == TTM_PL_SYSTEM &&
-	     new_mem->mem_type == TTM_PL_TT)) {
-		/* bind is enough */
-		radeon_move_null(bo, new_mem);
-		return 0;
-	}
-	if (!rdev->ring[radeon_copy_ring_index(rdev)].ready ||
-	    rdev->asic->copy.copy == NULL) {
-		/* use memcpy */
-		goto memcpy;
-	}
+	/* this rely on the following */
+	ASSERT(TTM_PL_SYSTEM == 0);
+	ASSERT(TTM_PL_TT == 1);
+	ASSERT(TTM_PL_VRAM == 2);
 
-	if (old_mem->mem_type == TTM_PL_VRAM &&
-	    new_mem->mem_type == TTM_PL_SYSTEM) {
-		r = radeon_move_vram_ram(bo, evict, interruptible,
-					no_wait_reserve, no_wait_gpu, new_mem);
-	} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
-		   new_mem->mem_type == TTM_PL_VRAM) {
-		r = radeon_move_ram_vram(bo, evict, interruptible,
-					    no_wait_reserve, no_wait_gpu, new_mem);
-	} else {
-		r = radeon_move_blit(bo, evict, no_wait_reserve, no_wait_gpu, new_mem, old_mem);
+	/* bo is evicted before destruction */
+	if (new_mem == NULL && evict) {
+		radeon_bo_move_notify(bo, NULL);
+		return 0;
 	}
 
-	if (r) {
-memcpy:
-		r = ttm_bo_move_memcpy(bo, new_mem);
+	switch ((old_mem->mem_type & 3) | ((new_mem->mem_type & 3) << 2)) {
+	case 0:  /* SYSTEM -> SYSTEM */
+	case 1:  /* TT -> SYSTEM */
+	case 4:  /* SYSTEM -> TT */
+	case 5:  /* TT -> TT */
+		r = radeon_bo_move_sys_gtt(bo, new_mem);
+		break;
+	case 2:  /* VRAM -> SYSTEM */
+	case 6:  /* VRAM -> TT */
+	case 8:  /* SYSTEM -> VRAM */
+	case 9:  /* TT -> VRAM */
+		r = radeon_bo_move_ram_vram(bo, evict, interruptible,
+					    no_wait_reserve, no_wait_gpu,
+					    new_mem);
+		break;
+	case 10: /* VRAM -> VRAM */
+		r = radeon_bo_move_vram_vram(bo, evict, new_mem);
+		break;
+	default:
+		DRM_ERROR("invalid move src %d / dst %d\n",
+			  old_mem->mem_type, new_mem->mem_type);
+		return -EINVAL;
 	}
 	return r;
 }
@@ -687,7 +756,6 @@ static struct ttm_bo_driver radeon_bo_driver = {
 	.ttm_tt_create = &radeon_ttm_tt_create,
 	.ttm_tt_populate = &radeon_ttm_tt_populate,
 	.ttm_tt_unpopulate = &radeon_ttm_tt_unpopulate,
-	.invalidate_caches = &radeon_invalidate_caches,
 	.init_mem_type = &radeon_init_mem_type,
 	.evict_flags = &radeon_evict_flags,
 	.move = &radeon_bo_move,
@@ -697,7 +765,6 @@ static struct ttm_bo_driver radeon_bo_driver = {
 	.sync_obj_flush = &radeon_sync_obj_flush,
 	.sync_obj_unref = &radeon_sync_obj_unref,
 	.sync_obj_ref = &radeon_sync_obj_ref,
-	.move_notify = &radeon_bo_move_notify,
 	.fault_reserve_notify = &radeon_bo_fault_reserve_notify,
 	.io_mem_reserve = &radeon_ttm_io_mem_reserve,
 	.io_mem_free = &radeon_ttm_io_mem_free,
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f2aa2e2..1e58a54 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -320,7 +320,7 @@ EXPORT_SYMBOL(ttm_bo_unreserve);
 /*
  * Call bo->mutex locked.
  */
-static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
+int ttm_bo_add_tt(struct ttm_buffer_object *bo, bool zero_alloc)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_bo_global *glob = bo->glob;
@@ -351,6 +351,7 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 
 	return ret;
 }
+EXPORT_SYMBOL(ttm_bo_add_tt);
 
 static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 				  struct ttm_mem_reg *mem,
@@ -361,99 +362,38 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 	bool old_is_pci = ttm_mem_reg_is_pci(bdev, &bo->mem);
 	bool new_is_pci = ttm_mem_reg_is_pci(bdev, mem);
 	struct ttm_mem_type_manager *old_man = &bdev->man[bo->mem.mem_type];
-	struct ttm_mem_type_manager *new_man = &bdev->man[mem->mem_type];
 	int ret = 0;
 
 	if (old_is_pci || new_is_pci ||
 	    ((mem->placement & bo->mem.placement & TTM_PL_MASK_CACHING) == 0)) {
 		ret = ttm_mem_io_lock(old_man, true);
-		if (unlikely(ret != 0))
-			goto out_err;
+		if (unlikely(ret != 0)) {
+			return ret;
+		}
 		ttm_bo_unmap_virtual_locked(bo);
 		ttm_mem_io_unlock(old_man);
 	}
 
-	/*
-	 * Create and bind a ttm if required.
-	 */
-
-	if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) {
-		if (bo->ttm == NULL) {
-			bool zero = !(old_man->flags & TTM_MEMTYPE_FLAG_FIXED);
-			ret = ttm_bo_add_ttm(bo, zero);
-			if (ret)
-				goto out_err;
-		}
-
-		ret = ttm_tt_set_placement_caching(bo->ttm, mem->placement);
-		if (ret)
-			goto out_err;
-
-		if (mem->mem_type != TTM_PL_SYSTEM) {
-			ret = ttm_tt_bind(bo->ttm, mem);
-			if (ret)
-				goto out_err;
-		}
-
-		if (bo->mem.mem_type == TTM_PL_SYSTEM) {
-			if (bdev->driver->move_notify)
-				bdev->driver->move_notify(bo, mem);
-			bo->mem = *mem;
-			mem->mm_node = NULL;
-			goto moved;
-		}
-	}
-
-	if (bdev->driver->move_notify)
-		bdev->driver->move_notify(bo, mem);
-
-	if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
-	    !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
-		ret = ttm_bo_move_ttm(bo, mem);
-	else if (bdev->driver->move)
-		ret = bdev->driver->move(bo, evict, interruptible,
-					 no_wait_reserve, no_wait_gpu, mem);
-	else
-		ret = ttm_bo_move_memcpy(bo, mem);
+	ret = bdev->driver->move(bo, evict, interruptible,
+				 no_wait_reserve, no_wait_gpu, mem);
 
 	if (ret) {
-		if (bdev->driver->move_notify) {
-			struct ttm_mem_reg tmp_mem = *mem;
-			*mem = bo->mem;
-			bo->mem = tmp_mem;
-			bdev->driver->move_notify(bo, mem);
-			bo->mem = *mem;
-		}
-
-		goto out_err;
+		return ret;
 	}
 
-moved:
 	if (bo->evicted) {
-		ret = bdev->driver->invalidate_caches(bdev, bo->mem.placement);
-		if (ret)
-			pr_err("Can not flush read caches\n");
 		bo->evicted = false;
 	}
 
 	if (bo->mem.mm_node) {
 		bo->offset = (bo->mem.start << PAGE_SHIFT) +
-		    bdev->man[bo->mem.mem_type].gpu_offset;
+			     bdev->man[bo->mem.mem_type].gpu_offset;
 		bo->cur_placement = bo->mem.placement;
-	} else
+	} else {
 		bo->offset = 0;
-
-	return 0;
-
-out_err:
-	new_man = &bdev->man[bo->mem.mem_type];
-	if ((new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm) {
-		ttm_tt_unbind(bo->ttm);
-		ttm_tt_destroy(bo->ttm);
-		bo->ttm = NULL;
 	}
 
-	return ret;
+	return 0;
 }
 
 /**
@@ -466,8 +406,7 @@ out_err:
 
 static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
 {
-	if (bo->bdev->driver->move_notify)
-		bo->bdev->driver->move_notify(bo, NULL);
+	bo->bdev->driver->move(bo, true, true, false, false, NULL);
 
 	if (bo->ttm) {
 		ttm_tt_unbind(bo->ttm);
@@ -1142,7 +1081,7 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 	 * We might need to add a TTM.
 	 */
 	if (bo->mem.mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
-		ret = ttm_bo_add_ttm(bo, true);
+		ret = ttm_bo_add_tt(bo, true);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 8aafeef..000e3d6 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -181,6 +181,7 @@ void ttm_tt_destroy(struct ttm_tt *ttm)
 	ttm->swap_storage = NULL;
 	ttm->func->destroy(ttm);
 }
+EXPORT_SYMBOL(ttm_tt_destroy);
 
 int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev,
 		unsigned long size, uint32_t page_flags,
@@ -259,6 +260,7 @@ void ttm_tt_unbind(struct ttm_tt *ttm)
 		ttm->state = tt_unbound;
 	}
 }
+EXPORT_SYMBOL(ttm_tt_unbind);
 
 int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
 {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 1e2c0fb..fa5e380 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -335,21 +335,82 @@ static int vmw_sync_obj_wait(void *sync_obj, void *sync_arg,
 				  VMW_FENCE_WAIT_TIMEOUT);
 }
 
+static int vmw_bo_move(struct ttm_buffer_object *bo,
+		       bool evict, bool interruptible,
+		       bool no_wait_reserve, bool no_wait_gpu,
+		       struct ttm_mem_reg *new_mem)
+{
+	struct ttm_mem_type_manager *old_man = &bo->bdev->man[bo->mem.mem_type];
+	struct ttm_mem_type_manager *new_man = &bo->bdev->man[new_mem->mem_type];
+	int ret;
+
+	/* bo is evicted before destruction */
+	if (new_mem == NULL && evict) {
+		return 0;
+	}
+
+	/*
+	 * Create and bind a ttm if required.
+	 */
+	if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) {
+		if (bo->ttm == NULL) {
+			bool zero = !(old_man->flags & TTM_MEMTYPE_FLAG_FIXED);
+			ret = ttm_bo_add_tt(bo, zero);
+			if (ret)
+				goto out_err;
+		}
+
+		ret = ttm_tt_set_placement_caching(bo->ttm, new_mem->placement);
+		if (ret) {
+			goto out_err;
+		}
+
+		if (new_mem->mem_type != TTM_PL_SYSTEM) {
+			ret = ttm_tt_bind(bo->ttm, new_mem);
+			if (ret)
+				goto out_err;
+		}
+
+		if (bo->mem.mem_type == TTM_PL_SYSTEM) {
+			bo->mem = *new_mem;
+			new_mem->mm_node = NULL;
+			return 0;
+		}
+	}
+
+	if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
+	    !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) {
+		ret = ttm_bo_move_ttm(bo, new_mem);
+	} else {
+		ret = ttm_bo_move_memcpy(bo, new_mem);
+	}
+	if (ret) {
+		goto out_err;
+	}
+	return 0;
+
+out_err:
+	new_man = &bo->bdev->man[bo->mem.mem_type];
+	if ((new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm) {
+		ttm_tt_destroy(bo->ttm);
+		bo->ttm = NULL;
+	}
+	return ret;
+}
+
 struct ttm_bo_driver vmw_bo_driver = {
 	.ttm_tt_create = &vmw_ttm_tt_create,
 	.ttm_tt_populate = &ttm_pool_populate,
 	.ttm_tt_unpopulate = &ttm_pool_unpopulate,
-	.invalidate_caches = vmw_invalidate_caches,
 	.init_mem_type = vmw_init_mem_type,
 	.evict_flags = vmw_evict_flags,
-	.move = NULL,
+	.move = vmw_bo_move,
 	.verify_access = vmw_verify_access,
 	.sync_obj_signaled = vmw_sync_obj_signaled,
 	.sync_obj_wait = vmw_sync_obj_wait,
 	.sync_obj_flush = vmw_sync_obj_flush,
 	.sync_obj_unref = vmw_sync_obj_unref,
 	.sync_obj_ref = vmw_sync_obj_ref,
-	.move_notify = NULL,
 	.swap_notify = NULL,
 	.fault_reserve_notify = &vmw_ttm_fault_reserve_notify,
 	.io_mem_reserve = &vmw_ttm_io_mem_reserve,
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 47f6f9d..a30be54 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -353,21 +353,9 @@ struct ttm_bo_driver {
 	 */
 	void (*ttm_tt_unpopulate)(struct ttm_tt *ttm);
 
-	/**
-	 * struct ttm_bo_driver member invalidate_caches
-	 *
-	 * @bdev: the buffer object device.
-	 * @flags: new placement of the rebound buffer object.
-	 *
-	 * A previosly evicted buffer has been rebound in a
-	 * potentially new location. Tell the driver that it might
-	 * consider invalidating read (texture) caches on the next command
-	 * submission as a consequence.
-	 */
-
-	int (*invalidate_caches) (struct ttm_bo_device *bdev, uint32_t flags);
 	int (*init_mem_type) (struct ttm_bo_device *bdev, uint32_t type,
 			      struct ttm_mem_type_manager *man);
+
 	/**
 	 * struct ttm_bo_driver member evict_flags:
 	 *
@@ -377,9 +365,9 @@ struct ttm_bo_driver {
 	 * These will be placed in proposed_flags so that when the move is
 	 * finished, they'll end up in bo->mem.flags
 	 */
-
 	 void(*evict_flags) (struct ttm_buffer_object *bo,
 				struct ttm_placement *placement);
+
 	/**
 	 * struct ttm_bo_driver member move:
 	 *
@@ -430,10 +418,6 @@ struct ttm_bo_driver {
 	void (*sync_obj_unref) (void **sync_obj);
 	void *(*sync_obj_ref) (void *sync_obj);
 
-	/* hook to notify driver about a driver move so it
-	 * can do tiling things */
-	void (*move_notify)(struct ttm_buffer_object *bo,
-			    struct ttm_mem_reg *new_mem);
 	/* notify the driver we are taking a fault on this BO
 	 * and have reserved it */
 	int (*fault_reserve_notify)(struct ttm_buffer_object *bo);
@@ -628,7 +612,7 @@ extern void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma);
 extern int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
 
 /**
- * ttm_ttm_destroy:
+ * ttm_tt_destroy:
  *
  * @ttm: The struct ttm_tt.
  *
@@ -688,6 +672,21 @@ extern int ttm_tt_swapout(struct ttm_tt *ttm,
  */
 
 /**
+ * ttm_bo_add_tt
+ *
+ * @bo: Pointer to a struct ttm_buffer_object. the data of which
+ * @zero: Allocate page cleared to zero or not.
+ *
+ * Allocate tt the buffer object pointed to by @bo.
+ *
+ * Returns:
+ * -EINVAL: Invalid buffer object type.
+ * -ENOMEM: Could not allocate tt for the buffer object.
+ */
+extern int ttm_bo_add_tt(struct ttm_buffer_object *bo, bool zero_alloc);
+
+
+/**
  * ttm_mem_reg_is_pci
  *
  * @bdev: Pointer to a struct ttm_bo_device.
-- 
1.7.7.6