[PATCH 43/51] drm/i915: Move vma pinning under vm->mutex

Fri Jun 29 22:04:18 UTC 2018

Use the local GTT lock (vm->mutex) for pinning the VMA (including
insertion and removal), as opposed to relying on the BKL struct_mutex.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h               |  5 ++
 drivers/gpu/drm/i915/i915_gem.c               | 42 ++++++++--
 drivers/gpu/drm/i915/i915_gem_evict.c         | 12 +--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 51 +++++++-----
 drivers/gpu/drm/i915/i915_gem_gtt.c           | 80 ++++++++++++++-----
 drivers/gpu/drm/i915/i915_gem_render_state.c  | 17 ++--
 drivers/gpu/drm/i915/i915_gem_shrinker.c      |  2 +
 drivers/gpu/drm/i915/i915_gem_stolen.c        |  8 +-
 drivers/gpu/drm/i915/i915_vma.c               | 59 +++++++++-----
 drivers/gpu/drm/i915/i915_vma.h               | 39 ++++++---
 drivers/gpu/drm/i915/intel_engine_cs.c        | 13 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  3 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  2 +-
 13 files changed, 234 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d33ef06d8862..5d5a25795e27 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -953,6 +953,11 @@ struct i915_gem_mm {
 	 */
 	struct pagevec wc_stash;
 
+	/**
+	 * Lock for the small stash of WC pages.
+	 */
+	struct mutex wc_lock;
+
 	/**
 	 * tmpfs instance used for shmem backed objects
 	 */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ad6fb5c01c51..7a348cfae340 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -67,6 +67,7 @@ static int
 insert_mappable_node(struct i915_ggtt *ggtt,
                      struct drm_mm_node *node, u32 size)
 {
+	lockdep_assert_held(&ggtt->vm.mutex);
 	memset(node, 0, sizeof(*node));
 	return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
 					   size, 0, I915_COLOR_UNEVICTABLE,
@@ -77,6 +78,9 @@ insert_mappable_node(struct i915_ggtt *ggtt,
 static void
 remove_mappable_node(struct drm_mm_node *node)
 {
+	lockdep_assert_held(&container_of(node,
+					  struct i915_vma,
+					  node)->vm->mutex);
 	drm_mm_remove_node(node);
 }
 
@@ -445,7 +449,9 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 		list_move_tail(&vma->obj_link, &still_in_list);
 		spin_unlock(&obj->vma.lock);
 
+		mutex_lock(&vma->vm->mutex);
 		ret = i915_vma_unbind(vma);
+		mutex_unlock(&vma->vm->mutex);
 
 		spin_lock(&obj->vma.lock);
 	}
@@ -1194,7 +1200,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		}
 	}
 	if (IS_ERR(vma)) {
+		mutex_lock(&ggtt->vm.mutex);
 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (ret)
 			goto out_unlock;
 		GEM_BUG_ON(!node.allocated);
@@ -1247,7 +1255,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	if (node.allocated) {
 		wmb();
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
+		mutex_lock(&ggtt->vm.mutex);
 		remove_mappable_node(&node);
+		mutex_unlock(&ggtt->vm.mutex);
 	} else {
 		i915_vma_unpin(vma);
 	}
@@ -1395,7 +1405,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		}
 	}
 	if (IS_ERR(vma)) {
+		mutex_lock(&ggtt->vm.mutex);
 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (ret)
 			goto out_rpm;
 		GEM_BUG_ON(!node.allocated);
@@ -1455,7 +1467,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 	if (node.allocated) {
 		wmb();
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
+		mutex_lock(&ggtt->vm.mutex);
 		remove_mappable_node(&node);
+		mutex_unlock(&ggtt->vm.mutex);
 	} else {
 		i915_vma_unpin(vma);
 	}
@@ -2107,7 +2121,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 err_fence:
 	i915_vma_unpin_fence(vma);
 err_unpin:
-	__i915_vma_unpin(vma);
+	i915_vma_unpin(vma);
 err_unlock:
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
@@ -3605,7 +3619,9 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		    i915_gem_valid_gtt_space(vma, cache_level))
 			continue;
 
+		mutex_lock(&vma->vm->mutex);
 		ret = i915_vma_unbind(vma);
+		mutex_unlock(&vma->vm->mutex);
 		if (ret)
 			return ret;
 
@@ -3987,8 +4003,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	if (flags & PIN_MAPPABLE &&
 	    (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
 		/* If the required space is larger than the available
@@ -4025,14 +4039,18 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	if (unlikely(IS_ERR(vma)))
 		return vma;
 
+	mutex_lock(&vm->mutex);
+
 	if (i915_vma_misplaced(vma, size, alignment, flags)) {
 		if (flags & PIN_NONBLOCK) {
+			ret = -ENOSPC;
+
 			if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
-				return ERR_PTR(-ENOSPC);
+				goto err_unlock;
 
 			if (flags & PIN_MAPPABLE &&
 			    vma->fence_size > dev_priv->ggtt.mappable_end / 2)
-				return ERR_PTR(-ENOSPC);
+				goto err_unlock;
 		}
 
 		WARN(i915_vma_is_pinned(vma),
@@ -4042,16 +4060,22 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 		     i915_ggtt_offset(vma), alignment,
 		     !!(flags & PIN_MAPPABLE),
 		     i915_vma_is_map_and_fenceable(vma));
+
 		ret = i915_vma_unbind(vma);
 		if (ret)
-			return ERR_PTR(ret);
+			goto err_unlock;
 	}
 
-	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+	ret = __i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
 	if (ret)
-		return ERR_PTR(ret);
+		goto err_unlock;
 
+	mutex_unlock(&vm->mutex);
 	return vma;
+
+err_unlock:
+	mutex_unlock(&vm->mutex);
+	return ERR_PTR(ret);
 }
 
 static __always_inline unsigned int __busy_read_flag(unsigned int id)
@@ -4971,7 +4995,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 		 * from the GTT to prevent such accidents and reclaim the
 		 * space.
 		 */
+		mutex_lock(&state->vm->mutex);
 		err = i915_vma_unbind(state);
+		mutex_unlock(&state->vm->mutex);
 		if (err)
 			goto err_active;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 8bc6fc66cea2..98635af0c9f7 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -132,7 +132,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	struct i915_vma *active;
 	int ret;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
 	trace_i915_gem_evict(vm, min_size, alignment, flags);
 
 	/*
@@ -247,7 +247,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	 */
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
 		if (drm_mm_scan_remove_block(&scan, &vma->node))
-			__i915_vma_pin(vma);
+			____i915_vma_pin(vma);
 		else
 			list_del(&vma->evict_link);
 	}
@@ -291,7 +291,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 	bool check_color;
 	int ret = 0;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
 	GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
 
@@ -371,7 +371,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 		 * unbinding one vma from freeing (by dropping its active
 		 * reference) another in our eviction list.
 		 */
-		__i915_vma_pin(vma);
+		____i915_vma_pin(vma);
 		list_add(&vma->evict_link, &eviction_list);
 	}
 
@@ -402,7 +402,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	struct i915_vma *vma, *next;
 	int ret;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
 	trace_i915_gem_evict_vm(vm);
 
 	/* Switch back to the default context in order to unpin
@@ -421,7 +421,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 		if (i915_vma_is_pinned(vma))
 			continue;
 
-		__i915_vma_pin(vma);
+		____i915_vma_pin(vma);
 		list_add(&vma->evict_link, &eviction_list);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 77fa86670e9b..89a38bd0ed98 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -405,7 +405,7 @@ eb_pin_vma(struct i915_execbuffer *eb,
 	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT))
 		pin_flags |= PIN_GLOBAL;
 
-	if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
+	if (unlikely(__i915_vma_pin(vma, 0, 0, pin_flags)))
 		return false;
 
 	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
@@ -426,11 +426,8 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
 {
 	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
 
-	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) {
-		mutex_lock(&vma->vm->mutex);
+	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
 		__i915_vma_unpin_fence(vma);
-		mutex_unlock(&vma->vm->mutex);
-	}
 
 	__i915_vma_unpin(vma);
 }
@@ -619,9 +616,9 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
 	}
 
-	err = i915_vma_pin(vma,
-			   entry->pad_to_size, entry->alignment,
-			   pin_flags);
+	err = __i915_vma_pin(vma,
+			     entry->pad_to_size, entry->alignment,
+			     pin_flags);
 	if (err)
 		return err;
 
@@ -633,7 +630,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
 		err = i915_vma_pin_fence(vma);
 		if (unlikely(err)) {
-			i915_vma_unpin(vma);
+			__i915_vma_unpin(vma);
 			return err;
 		}
 
@@ -763,6 +760,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 
 	batch = eb_batch_index(eb);
 
+	mutex_lock(&eb->vm->mutex);
 	for (i = 0; i < eb->buffer_count; i++) {
 		u32 handle = eb->exec[i].handle;
 		struct i915_lut_handle *lut;
@@ -772,6 +770,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		if (likely(vma))
 			goto add_vma;
 
+		mutex_unlock(&eb->vm->mutex);
+
 		obj = i915_gem_object_lookup(eb->file, handle);
 		if (unlikely(!obj)) {
 			err = -ENOENT;
@@ -796,6 +796,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 			goto err_obj;
 		}
 
+		mutex_lock(&eb->vm->mutex);
+
 		/* transfer ref to ctx */
 		if (!vma->open_count++)
 			i915_vma_reopen(vma);
@@ -806,8 +808,10 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 
 add_vma:
 		err = eb_add_vma(eb, i, batch, vma);
-		if (unlikely(err))
+		if (unlikely(err)) {
+			mutex_unlock(&eb->vm->mutex);
 			goto err_vma;
+		}
 
 		GEM_BUG_ON(vma != eb->vma[i]);
 		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
@@ -816,7 +820,9 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	}
 
 	eb->args->flags |= __EXEC_VALIDATED;
-	return eb_reserve(eb);
+	err = eb_reserve(eb);
+	mutex_unlock(&eb->vm->mutex);
+	return err;
 
 err_obj:
 	i915_gem_object_put(obj);
@@ -850,6 +856,7 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
 	const unsigned int count = eb->buffer_count;
 	unsigned int i;
 
+	mutex_lock(&eb->vm->mutex);
 	for (i = 0; i < count; i++) {
 		struct i915_vma *vma = eb->vma[i];
 		unsigned int flags = eb->flags[i];
@@ -861,12 +868,20 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
 		vma->exec_flags = NULL;
 		eb->vma[i] = NULL;
 
+		if (unlikely(vma->vm != eb->vm))
+			mutex_lock_nested(&vma->vm->mutex,
+					  SINGLE_DEPTH_NESTING);
+
 		if (flags & __EXEC_OBJECT_HAS_PIN)
 			__eb_unreserve_vma(vma, flags);
 
 		if (flags & __EXEC_OBJECT_HAS_REF)
 			i915_vma_put(vma);
+
+		if (unlikely(vma->vm != eb->vm))
+			mutex_unlock(&vma->vm->mutex);
 	}
+	mutex_unlock(&eb->vm->mutex);
 }
 
 static void eb_reset_vmas(const struct i915_execbuffer *eb)
@@ -1824,22 +1839,14 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 	}
 
 	for (i = 0; i < count; i++) {
-		unsigned int flags = eb->flags[i];
-		struct i915_vma *vma = eb->vma[i];
-
-		err = i915_vma_move_to_active(vma, eb->request, flags);
+		err = i915_vma_move_to_active(eb->vma[i],
+					      eb->request,
+					      eb->flags[i]);
 		if (unlikely(err)) {
 			i915_request_skip(eb->request, err);
 			return err;
 		}
-
-		__eb_unreserve_vma(vma, flags);
-		vma->exec_flags = NULL;
-
-		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
-			i915_vma_put(vma);
 	}
-	eb->exec = NULL;
 
 	/* Unconditionally flush any chipset caches (for streaming writes). */
 	i915_gem_chipset_flush(eb->i915);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 34837f20efab..bd8a3a0ee632 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -383,6 +383,7 @@ static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
 {
 	struct pagevec *pvec = &vm->free_pages;
 	struct pagevec stash;
+	struct page *page;
 
 	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
 		i915_gem_shrink_all(vm->i915);
@@ -393,13 +394,14 @@ static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
 	if (!vm->pt_kmap_wc)
 		return alloc_page(gfp);
 
-	/* A placeholder for a specific mutex to guard the WC stash */
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	mutex_lock(&vm->i915->mm.wc_lock);
 
 	/* Look in our global stash of WC pages... */
 	pvec = &vm->i915->mm.wc_stash;
-	if (likely(pvec->nr))
-		return pvec->pages[--pvec->nr];
+	if (likely(pvec->nr)) {
+		page = pvec->pages[--pvec->nr];
+		goto unlock;
+	}
 
 	/*
 	 * Otherwise batch allocate pages to amoritize cost of set_pages_wc.
@@ -434,7 +436,14 @@ static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
 		pagevec_release(&stash);
 	}
 
-	return likely(pvec->nr) ? pvec->pages[--pvec->nr] : NULL;
+	page = NULL;
+	if (likely(pvec->nr))
+		page = pvec->pages[--pvec->nr];
+
+unlock:
+	mutex_unlock(&vm->i915->mm.wc_lock);
+
+	return page;
 }
 
 static void vm_free_pages_release(struct i915_address_space *vm,
@@ -446,30 +455,37 @@ static void vm_free_pages_release(struct i915_address_space *vm,
 
 	if (vm->pt_kmap_wc) {
 		struct pagevec *stash = &vm->i915->mm.wc_stash;
+		struct mutex *lock = &vm->i915->mm.wc_lock;
 
 		/* When we use WC, first fill up the global stash and then
 		 * only if full immediately free the overflow.
 		 */
 
-		lockdep_assert_held(&vm->i915->drm.struct_mutex);
+		mutex_lock(lock);
 		if (pagevec_space(stash)) {
 			do {
 				stash->pages[stash->nr++] =
 					pvec->pages[--pvec->nr];
-				if (!pvec->nr)
+				if (!pvec->nr) {
+					mutex_unlock(lock);
 					return;
+				}
 			} while (pagevec_space(stash));
 
-			/* As we have made some room in the VM's free_pages,
+			/*
+			 * As we have made some room in the VM's free_pages,
 			 * we can wait for it to fill again. Unless we are
 			 * inside i915_address_space_fini() and must
 			 * immediately release the pages!
 			 */
-			if (!immediate)
+			if (!immediate) {
+				mutex_unlock(lock);
 				return;
+			}
 		}
 
 		set_pages_array_wb(pvec->pages, pvec->nr);
+		mutex_unlock(lock);
 	}
 
 	__pagevec_release(pvec);
@@ -1332,17 +1348,24 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 		if (pt == vm->scratch_pt) {
 			pd->used_pdes++;
 
+			mutex_unlock(&vm->mutex);
 			pt = alloc_pt(vm);
+			mutex_lock(&vm->mutex);
 			if (IS_ERR(pt)) {
 				pd->used_pdes--;
 				goto unwind;
 			}
 
-			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
-				gen8_initialize_pt(vm, pt);
+			if (pd->page_table[pde] == vm->scratch_pt) {
+				if (count < GEN8_PTES ||
+				    intel_vgpu_active(vm->i915))
+					gen8_initialize_pt(vm, pt);
 
-			gen8_ppgtt_set_pde(vm, pd, pt, pde);
-			GEM_BUG_ON(pd->used_pdes > I915_PDES);
+				gen8_ppgtt_set_pde(vm, pd, pt, pde);
+				GEM_BUG_ON(pd->used_pdes > I915_PDES);
+			} else {
+				free_pt(vm, pt);
+			}
 		}
 
 		pt->used_ptes += count;
@@ -1367,17 +1390,23 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 		if (pd == vm->scratch_pd) {
 			pdp->used_pdpes++;
 
+			mutex_unlock(&vm->mutex);
 			pd = alloc_pd(vm);
+			mutex_lock(&vm->mutex);
 			if (IS_ERR(pd)) {
 				pdp->used_pdpes--;
 				goto unwind;
 			}
 
-			gen8_initialize_pd(vm, pd);
-			gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
-			GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
+			if (pdp->page_directory[pdpe] == vm->scratch_pd) {
+				gen8_initialize_pd(vm, pd);
+				gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
+				GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
 
-			mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
+				mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
+			} else {
+				free_pd(vm, pd);
+			}
 		}
 
 		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
@@ -1418,12 +1447,18 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 		if (pml4->pdps[pml4e] == vm->scratch_pdp) {
+			mutex_unlock(&vm->mutex);
 			pdp = alloc_pdp(vm);
+			mutex_lock(&vm->mutex);
 			if (IS_ERR(pdp))
 				goto unwind;
 
-			gen8_initialize_pdp(vm, pdp);
-			gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
+			if (pml4->pdps[pml4e] == vm->scratch_pdp) {
+				gen8_initialize_pdp(vm, pdp);
+				gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
+			} else {
+				free_pdp(vm, pdp);
+			}
 		}
 
 		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
@@ -2961,8 +2996,10 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	i915_gem_fini_aliasing_ppgtt(dev_priv);
 
+	mutex_lock(&ggtt->vm.mutex);
 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
 		WARN_ON(i915_vma_unbind(vma));
+	mutex_unlock(&ggtt->vm.mutex);
 
 	if (drm_mm_node_allocated(&ggtt->error_capture))
 		drm_mm_remove_node(&ggtt->error_capture);
@@ -3576,6 +3613,9 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	int ret;
 
+	mutex_init(&dev_priv->mm.wc_lock);
+	pagevec_init(&dev_priv->mm.wc_stash);
+
 	INIT_LIST_HEAD(&dev_priv->vm_list);
 
 	/* Note that we use page colouring to enforce a guard page at the
@@ -4002,7 +4042,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	u64 offset;
 	int err;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
 	GEM_BUG_ON(!size);
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(alignment && !is_power_of_2(alignment));
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 90baf9086d0a..904581fdbde7 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -179,6 +179,7 @@ static int render_state_setup(struct intel_render_state *so,
 
 int i915_gem_render_state_emit(struct i915_request *rq)
 {
+	struct i915_ggtt *ggtt = &rq->i915->ggtt;
 	struct intel_engine_cs *engine = rq->engine;
 	struct intel_render_state so = {}; /* keep the compiler happy */
 	int err;
@@ -194,15 +195,19 @@ int i915_gem_render_state_emit(struct i915_request *rq)
 	if (IS_ERR(so.obj))
 		return PTR_ERR(so.obj);
 
-	so.vma = i915_vma_instance(so.obj, &engine->i915->ggtt.vm, NULL);
+	so.vma = i915_vma_instance(so.obj, &ggtt->vm, NULL);
 	if (IS_ERR(so.vma)) {
 		err = PTR_ERR(so.vma);
 		goto err_obj;
 	}
 
-	err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	mutex_lock(&ggtt->vm.mutex);
+
+	err = __i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
-		goto err_vma;
+		goto err_vma_locked;
+
+	mutex_unlock(&ggtt->vm.mutex);
 
 	err = render_state_setup(&so, rq->i915);
 	if (err)
@@ -224,9 +229,11 @@ int i915_gem_render_state_emit(struct i915_request *rq)
 
 	err = i915_vma_move_to_active(so.vma, rq, 0);
 err_unpin:
-	i915_vma_unpin(so.vma);
-err_vma:
+	mutex_lock(&ggtt->vm.mutex);
+	__i915_vma_unpin(so.vma);
+err_vma_locked:
 	i915_vma_close(so.vma);
+	mutex_unlock(&ggtt->vm.mutex);
 err_obj:
 	__i915_gem_object_release_unless_active(so.obj);
 	return err;
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 52c061b728ca..006a72b0d6a6 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -480,6 +480,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	intel_runtime_pm_put(i915);
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
+	mutex_lock(&i915->ggtt.vm.mutex);
 	list_for_each_entry_safe(vma, next,
 				 &i915->ggtt.vm.bound_list, vm_link) {
 		unsigned long count = vma->node.size >> PAGE_SHIFT;
@@ -490,6 +491,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		if (i915_vma_unbind(vma) == 0)
 			freed_pages += count;
 	}
+	mutex_unlock(&i915->ggtt.vm.mutex);
 
 out:
 	shrinker_unlock(i915, unlock);
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index f65ee28d5d81..917bd92d76d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -600,8 +600,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	if (!drm_mm_initialized(&dev_priv->mm.stolen))
 		return NULL;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n",
 			 &stolen_offset, &gtt_offset, &size);
 
@@ -648,7 +646,10 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 		goto err_pages;
 	}
 
-	/* To simplify the initialisation sequence between KMS and GTT,
+	mutex_lock(&vma->vm->mutex);
+
+	/*
+	 * To simplify the initialisation sequence between KMS and GTT,
 	 * we allow construction of the stolen object prior to
 	 * setting up the GTT space. The actual reservation will occur
 	 * later.
@@ -667,7 +668,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	vma->flags |= I915_VMA_GLOBAL_BIND;
 	__i915_vma_set_map_and_fenceable(vma);
 
-	mutex_lock(&ggtt->vm.mutex);
 	list_add_tail(&vma->vm_link, &ggtt->vm.bound_list);
 	mutex_unlock(&ggtt->vm.mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 08b6c2082b70..d688f2ba920c 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -274,7 +274,7 @@ vma_lookup(struct drm_i915_gem_object *obj,
  * Once created, the VMA is kept until either the object is freed, or the
  * address space is closed.
  *
- * Must be called with struct_mutex held.
+ * Must be called with vm->mutex held.
  *
  * Returns the vma, or an error pointer.
  */
@@ -316,6 +316,7 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 	u32 vma_flags;
 	int ret;
 
+	lockdep_assert_held(&vma->vm->mutex);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(vma->size > vma->node.size);
 
@@ -359,8 +360,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 
 	/* Access through the GTT requires the device to be awake. */
 	assert_rpm_wakelock_held(vma->vm->i915);
+	mutex_lock(&vma->vm->mutex);
 
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
 	if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
 		err = -ENODEV;
 		goto err;
@@ -382,18 +383,20 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 		vma->iomap = ptr;
 	}
 
-	__i915_vma_pin(vma);
+	____i915_vma_pin(vma);
 
-	err = i915_vma_pin_fence(vma);
+	err = __i915_vma_pin_fence(vma);
 	if (err)
 		goto err_unpin;
 
 	i915_vma_set_ggtt_write(vma);
+	mutex_unlock(&vma->vm->mutex);
 	return ptr;
 
 err_unpin:
 	__i915_vma_unpin(vma);
 err:
+	mutex_unlock(&vma->vm->mutex);
 	return IO_ERR_PTR(err);
 }
 
@@ -558,6 +561,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	u64 start, end;
 	int ret;
 
+	lockdep_assert_held(&vma->vm->mutex);
+
 	GEM_BUG_ON(i915_vma_is_closed(vma));
 	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
@@ -584,7 +589,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 		end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
 	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
 
-	/* If binding the object/GGTT view requires more space than the entire
+	/*
+	 * If binding the object/GGTT view requires more space than the entire
 	 * aperture has, reject it early before evicting everything in a vain
 	 * attempt to find space.
 	 */
@@ -596,11 +602,28 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	}
 
 	if (vma->obj) {
-		ret = i915_gem_object_pin_pages(vma->obj);
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		mutex_unlock(&vma->vm->mutex);
+		ret = i915_gem_object_pin_pages(obj);
+		mutex_lock(&vma->vm->mutex);
 		if (ret)
 			return ret;
 
-		cache_level = vma->obj->cache_level;
+		/*
+		 * Check that someone else didn't complete the job on our
+		 * behalf while we dropped the lock.
+		 */
+		if (drm_mm_node_allocated(&vma->node)) {
+			i915_gem_object_unpin_pages(obj);
+
+			if (i915_vma_misplaced(vma, size, alignment, flags))
+				return -EAGAIN;
+
+			return 0;
+		}
+
+		cache_level = obj->cache_level;
 	} else {
 		cache_level = 0;
 	}
@@ -670,9 +693,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
 
-	mutex_lock(&vma->vm->mutex);
+	lockdep_assert_held(&vma->vm->mutex);
 	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
-	mutex_unlock(&vma->vm->mutex);
 
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
@@ -700,15 +722,15 @@ i915_vma_remove(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
 
+	lockdep_assert_held(&vma->vm->mutex);
+
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 
 	vma->ops->clear_pages(vma);
 
-	mutex_lock(&vma->vm->mutex);
 	drm_mm_remove_node(&vma->node);
 	list_del(&vma->vm_link);
-	mutex_unlock(&vma->vm->mutex);
 
 	/*
 	 * Since the unbound list is global, only move to that list if
@@ -738,7 +760,7 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 	const unsigned int bound = vma->flags;
 	int ret;
 
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vma->vm->mutex);
 	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
 	GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
 
@@ -841,6 +863,8 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 
 void i915_vma_destroy(struct i915_vma *vma)
 {
+	struct i915_address_space *vm = vma->vm;
+
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
 
 	GEM_BUG_ON(i915_vma_is_active(vma));
@@ -849,7 +873,10 @@ void i915_vma_destroy(struct i915_vma *vma)
 	if (i915_vma_is_closed(vma))
 		list_del(&vma->closed_link);
 
+	mutex_lock(&vm->mutex);
 	WARN_ON(i915_vma_unbind(vma));
+	mutex_unlock(&vm->mutex);
+
 	__i915_vma_destroy(vma);
 }
 
@@ -1023,7 +1050,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 {
 	int ret;
 
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vma->vm->mutex);
 
 	/*
 	 * First wait upon any activity as retiring the request may
@@ -1046,7 +1073,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 * we currently hold, therefore it cannot free this object
 		 * before we are finished).
 		 */
-		__i915_vma_pin(vma);
+		____i915_vma_pin(vma);
 
 		ret = i915_gem_active_retire(&vma->last_active,
 					     &vma->vm->i915->drm.struct_mutex);
@@ -1079,8 +1106,6 @@ int i915_vma_unbind(struct i915_vma *vma)
 		return 0;
 
 	if (i915_vma_is_map_and_fenceable(vma)) {
-		mutex_lock(&vma->vm->mutex);
-
 		/*
 		 * Check that we have flushed all writes through the GGTT
 		 * before the unbind, other due to non-strict nature of those
@@ -1096,8 +1121,6 @@ int i915_vma_unbind(struct i915_vma *vma)
 
 		__i915_vma_iounmap(vma);
 		vma->flags &= ~I915_VMA_CAN_FENCE;
-
-		mutex_unlock(&vma->vm->mutex);
 	}
 	GEM_BUG_ON(vma->fence);
 	GEM_BUG_ON(i915_vma_has_userfault(vma));
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index e47f66f85134..9575457d36c2 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -277,11 +277,22 @@ void i915_vma_close(struct i915_vma *vma);
 void i915_vma_reopen(struct i915_vma *vma);
 void i915_vma_destroy(struct i915_vma *vma);
 
+static inline void ____i915_vma_pin(struct i915_vma *vma)
+{
+	lockdep_assert_held(&vma->vm->mutex);
+
+	vma->flags++;
+	GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW);
+}
+
 int __i915_vma_do_pin(struct i915_vma *vma,
 		      u64 size, u64 alignment, u64 flags);
+
 static inline int __must_check
-i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+__i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
+	lockdep_assert_held(&vma->vm->mutex);
+
 	BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
 	BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
 	BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
@@ -298,32 +309,42 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	return __i915_vma_do_pin(vma, size, alignment, flags);
 }
 
-static inline int i915_vma_pin_count(const struct i915_vma *vma)
+static inline int __must_check
+i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	return vma->flags & I915_VMA_PIN_MASK;
+	int err;
+
+	mutex_lock(&vma->vm->mutex);
+	err = __i915_vma_pin(vma, size, alignment, flags);
+	mutex_unlock(&vma->vm->mutex);
+
+	return err;
 }
 
-static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
+static inline int i915_vma_pin_count(const struct i915_vma *vma)
 {
-	return i915_vma_pin_count(vma);
+	return READ_ONCE(vma->flags) & I915_VMA_PIN_MASK;
 }
 
-static inline void __i915_vma_pin(struct i915_vma *vma)
+static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
 {
-	vma->flags++;
-	GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW);
+	return i915_vma_pin_count(vma);
 }
 
 static inline void __i915_vma_unpin(struct i915_vma *vma)
 {
+	lockdep_assert_held(&vma->vm->mutex);
+
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
 	vma->flags--;
 }
 
 static inline void i915_vma_unpin(struct i915_vma *vma)
 {
-	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+	mutex_lock(&vma->vm->mutex);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	__i915_vma_unpin(vma);
+	mutex_unlock(&vma->vm->mutex);
 }
 
 static inline bool i915_vma_is_bound(const struct i915_vma *vma,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 5f358e176a82..84d1d38a600f 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -490,6 +490,7 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
 int intel_engine_create_scratch(struct intel_engine_cs *engine,
 				unsigned int size)
 {
+	struct i915_ggtt *ggtt = &engine->i915->ggtt;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	int ret;
@@ -504,20 +505,21 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine,
 		return PTR_ERR(obj);
 	}
 
-	vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
+	vma = i915_vma_instance(obj, &ggtt->vm, NULL);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
-		goto err_unref;
+		goto err_put_obj;
 	}
 
 	ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH);
 	if (ret)
-		goto err_unref;
+		goto err_put_obj;
 
 	engine->scratch = vma;
 	return 0;
 
-err_unref:
+	mutex_unlock(&ggtt->vm.mutex);
+err_put_obj:
 	i915_gem_object_put(obj);
 	return ret;
 }
@@ -558,6 +560,7 @@ static void cleanup_status_page(struct intel_engine_cs *engine)
 
 static int init_status_page(struct intel_engine_cs *engine)
 {
+	struct i915_ggtt *ggtt = &engine->i915->ggtt;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	unsigned int flags;
@@ -574,7 +577,7 @@ static int init_status_page(struct intel_engine_cs *engine)
 	if (ret)
 		goto err;
 
-	vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
+	vma = i915_vma_instance(obj, &ggtt->vm, NULL);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto err;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6d7ce0b6b025..44a0d3010059 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1289,6 +1289,7 @@ static struct i915_vma *
 alloc_context_vma(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
+	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	int err;
@@ -1339,7 +1340,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
 		i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
 	}
 
-	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	vma = i915_vma_instance(obj, &ggtt->vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err_obj;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 0bc63fb03edd..7017adae7cc2 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1514,7 +1514,7 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 		track_vma_bind(vma);
-		__i915_vma_pin(vma);
+		____i915_vma_pin(vma);
 
 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	}
-- 
2.18.0