[Intel-gfx] [PATCH 05/27] drm/i915: Pull i915_vma_pin under the vm->mutex

Fri Sep 27 08:47:59 UTC 2019

On 25/09/2019 11:01, Chris Wilson wrote:
> Replace the struct_mutex requirement for pinning the i915_vma with the
> local vm->mutex instead. Note that the vm->mutex is tainted by the
> shrinker (we require unbinding from inside fs-reclaim) and so we cannot
> allocate while holding that mutex. Instead we have to preallocate
> workers to do allocate and apply the PTE updates after we have we
> reserved their slot in the drm_mm (using fences to order the PTE writes
> with the GPU work and with later unbind).
> 
> In adding the asynchronous vma binding, one subtle requirement is to
> avoid coupling the binding fence into the backing object->resv. That is
> the asynchronous binding only applies to the vma timeline itself and not
> to the pages as that is a more global timeline (the binding of one vma
> does not need to be ordered with another vma, nor does the implicit GEM
> fencing depend on a vma, only on writes to the backing store). Keeping
> the vma binding distinct from the backing store timelines is verified by
> a number of async gem_exec_fence and gem_exec_schedule tests. The way we
> do this is quite simple, we keep the fence for the vma binding separate
> and only wait on it as required, and never add it to the obj->resv
> itself.
> 
> Another consequence in reducing the locking around the vma is the
> destruction of the vma is no longer globally serialised by struct_mutex.
> A natural solution would be to add a kref to i915_vma, but that requires
> decoupling the reference cycles, possibly by introducing a new
> i915_mm_pages object that is own by both obj->mm and vma->pages.
> However, we have not taken that route due to the overshadowing lmem/ttm
> discussions, and instead play a series of complicated games with
> trylocks to (hopefully) ensure that only one destruction path is called!
> 
> v2: Add some commentary, and some helpers to reduce patch churn.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/display/intel_display.c  |  29 +-
>   drivers/gpu/drm/i915/display/intel_fbdev.c    |   8 +-
>   drivers/gpu/drm/i915/display/intel_overlay.c  |  11 +-
>   .../gpu/drm/i915/gem/i915_gem_client_blt.c    |   2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  20 +-
>   drivers/gpu/drm/i915/gem/i915_gem_domain.c    |  13 +-
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  43 +-
>   drivers/gpu/drm/i915/gem/i915_gem_mman.c      |  20 +-
>   drivers/gpu/drm/i915/gem/i915_gem_object.c    |  33 +-
>   drivers/gpu/drm/i915/gem/i915_gem_object.h    |   5 +
>   drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  73 +--
>   drivers/gpu/drm/i915/gem/i915_gem_stolen.c    |   8 +-
>   drivers/gpu/drm/i915/gem/i915_gem_tiling.c    |  27 +-
>   drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |  27 +-
>   .../gpu/drm/i915/gem/selftests/huge_pages.c   |   9 +-
>   .../drm/i915/gem/selftests/i915_gem_context.c |  12 +-
>   .../drm/i915/gem/selftests/i915_gem_mman.c    |   4 -
>   .../drm/i915/gem/selftests/igt_gem_utils.c    |   7 +-
>   drivers/gpu/drm/i915/gt/intel_gt.c            |   5 +-
>   drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |   4 +-
>   drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  19 +-
>   drivers/gpu/drm/i915/gvt/aperture_gm.c        |  12 +-
>   drivers/gpu/drm/i915/i915_active.c            |  95 +++-
>   drivers/gpu/drm/i915/i915_active.h            |   7 +
>   drivers/gpu/drm/i915/i915_active_types.h      |   5 +
>   drivers/gpu/drm/i915/i915_drv.c               |   2 -
>   drivers/gpu/drm/i915/i915_gem.c               |  83 ++-
>   drivers/gpu/drm/i915/i915_gem_evict.c         |  28 +-
>   drivers/gpu/drm/i915/i915_gem_fence_reg.c     |   9 +-
>   drivers/gpu/drm/i915/i915_gem_gtt.c           | 109 ++--
>   drivers/gpu/drm/i915/i915_gem_gtt.h           |  45 +-
>   drivers/gpu/drm/i915/i915_perf.c              |  32 +-
>   drivers/gpu/drm/i915/i915_vma.c               | 524 ++++++++++++------
>   drivers/gpu/drm/i915/i915_vma.h               |  84 +--
>   drivers/gpu/drm/i915/selftests/i915_gem.c     |   2 -
>   .../gpu/drm/i915/selftests/i915_gem_evict.c   |  36 +-
>   drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  58 +-
>   drivers/gpu/drm/i915/selftests/i915_request.c |   7 +
>   drivers/gpu/drm/i915/selftests/i915_vma.c     |   6 +-
>   39 files changed, 823 insertions(+), 700 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> index 5ecf54270181..12abcd1e471b 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -2079,7 +2079,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
>   	unsigned int pinctl;
>   	u32 alignment;
>   
> -	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
>   	if (WARN_ON(!i915_gem_object_is_framebuffer(obj)))
>   		return ERR_PTR(-EINVAL);
>   
> @@ -2163,8 +2162,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
>   
>   void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
>   {
> -	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> -
>   	i915_gem_object_lock(vma->obj);
>   	if (flags & PLANE_HAS_FENCE)
>   		i915_vma_unpin_fence(vma);
> @@ -3065,12 +3062,10 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
>   		return false;
>   	}
>   
> -	mutex_lock(&dev->struct_mutex);
>   	obj = i915_gem_object_create_stolen_for_preallocated(dev_priv,
>   							     base_aligned,
>   							     base_aligned,
>   							     size_aligned);
> -	mutex_unlock(&dev->struct_mutex);
>   	if (!obj)
>   		return false;
>   
> @@ -3232,13 +3227,11 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
>   	intel_state->color_plane[0].stride =
>   		intel_fb_pitch(fb, 0, intel_state->base.rotation);
>   
> -	mutex_lock(&dev->struct_mutex);
>   	intel_state->vma =
>   		intel_pin_and_fence_fb_obj(fb,
>   					   &intel_state->view,
>   					   intel_plane_uses_fence(intel_state),
>   					   &intel_state->flags);
> -	mutex_unlock(&dev->struct_mutex);
>   	if (IS_ERR(intel_state->vma)) {
>   		DRM_ERROR("failed to pin boot fb on pipe %d: %li\n",
>   			  intel_crtc->pipe, PTR_ERR(intel_state->vma));
> @@ -14368,8 +14361,6 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
>    * bits.  Some older platforms need special physical address handling for
>    * cursor planes.
>    *
> - * Must be called with struct_mutex held.
> - *
>    * Returns 0 on success, negative error code on failure.
>    */
>   int
> @@ -14426,15 +14417,8 @@ intel_prepare_plane_fb(struct drm_plane *plane,
>   	if (ret)
>   		return ret;
>   
> -	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
> -	if (ret) {
> -		i915_gem_object_unpin_pages(obj);
> -		return ret;
> -	}
> -
>   	ret = intel_plane_pin_fb(to_intel_plane_state(new_state));
>   
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
>   	i915_gem_object_unpin_pages(obj);
>   	if (ret)
>   		return ret;
> @@ -14483,8 +14467,6 @@ intel_prepare_plane_fb(struct drm_plane *plane,
>    * @old_state: the state from the previous modeset
>    *
>    * Cleans up a framebuffer that has just been removed from a plane.
> - *
> - * Must be called with struct_mutex held.
>    */
>   void
>   intel_cleanup_plane_fb(struct drm_plane *plane,
> @@ -14500,9 +14482,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
>   	}
>   
>   	/* Should only be called after a successful intel_prepare_plane_fb()! */
> -	mutex_lock(&dev_priv->drm.struct_mutex);
>   	intel_plane_unpin_fb(to_intel_plane_state(old_state));
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
>   }
>   
>   int
> @@ -14705,7 +14685,6 @@ intel_legacy_cursor_update(struct drm_plane *plane,
>   			   u32 src_w, u32 src_h,
>   			   struct drm_modeset_acquire_ctx *ctx)
>   {
> -	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
>   	struct drm_plane_state *old_plane_state, *new_plane_state;
>   	struct intel_plane *intel_plane = to_intel_plane(plane);
>   	struct intel_crtc_state *crtc_state =
> @@ -14771,13 +14750,9 @@ intel_legacy_cursor_update(struct drm_plane *plane,
>   	if (ret)
>   		goto out_free;
>   
> -	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
> -	if (ret)
> -		goto out_free;
> -
>   	ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state));
>   	if (ret)
> -		goto out_unlock;
> +		goto out_free;
>   
>   	intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_FLIP);
>   	intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->fb),
> @@ -14807,8 +14782,6 @@ intel_legacy_cursor_update(struct drm_plane *plane,
>   
>   	intel_plane_unpin_fb(to_intel_plane_state(old_plane_state));
>   
> -out_unlock:
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
>   out_free:
>   	if (new_crtc_state)
>   		intel_crtc_destroy_state(crtc, &new_crtc_state->base);
> diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
> index 68338669f054..97cde017670a 100644
> --- a/drivers/gpu/drm/i915/display/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
> @@ -204,7 +204,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
>   		sizes->fb_height = intel_fb->base.height;
>   	}
>   
> -	mutex_lock(&dev->struct_mutex);
>   	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
>   
>   	/* Pin the GGTT vma for our access via info->screen_base.
> @@ -266,7 +265,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
>   	ifbdev->vma_flags = flags;
>   
>   	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
> -	mutex_unlock(&dev->struct_mutex);
>   	vga_switcheroo_client_fb_set(pdev, info);
>   	return 0;
>   
> @@ -274,7 +272,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
>   	intel_unpin_fb_vma(vma, flags);
>   out_unlock:
>   	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
> -	mutex_unlock(&dev->struct_mutex);
>   	return ret;
>   }
>   
> @@ -291,11 +288,8 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev)
>   
>   	drm_fb_helper_fini(&ifbdev->helper);
>   
> -	if (ifbdev->vma) {
> -		mutex_lock(&ifbdev->helper.dev->struct_mutex);
> +	if (ifbdev->vma)
>   		intel_unpin_fb_vma(ifbdev->vma, ifbdev->vma_flags);
> -		mutex_unlock(&ifbdev->helper.dev->struct_mutex);
> -	}
>   
>   	if (ifbdev->fb)
>   		drm_framebuffer_remove(&ifbdev->fb->base);
> diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
> index 5efef9babadb..3f4ac1ee7668 100644
> --- a/drivers/gpu/drm/i915/display/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/display/intel_overlay.c
> @@ -1303,15 +1303,11 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
>   	struct i915_vma *vma;
>   	int err;
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> -
>   	obj = i915_gem_object_create_stolen(i915, PAGE_SIZE);
>   	if (obj == NULL)
>   		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
> -	if (IS_ERR(obj)) {
> -		err = PTR_ERR(obj);
> -		goto err_unlock;
> -	}
> +	if (IS_ERR(obj))
> +		return PTR_ERR(obj);
>   
>   	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
>   	if (IS_ERR(vma)) {
> @@ -1332,13 +1328,10 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
>   	}
>   
>   	overlay->reg_bo = obj;
> -	mutex_unlock(&i915->drm.struct_mutex);
>   	return 0;
>   
>   err_put_bo:
>   	i915_gem_object_put(obj);
> -err_unlock:
> -	mutex_unlock(&i915->drm.struct_mutex);
>   	return err;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> index 7f61a8024133..c1fca5728e6e 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> @@ -211,7 +211,7 @@ static void clear_pages_worker(struct work_struct *work)
>   	 * keep track of the GPU activity within this vma/request, and
>   	 * propagate the signal from the request to w->dma.
>   	 */
> -	err = i915_active_add_request(&vma->active, rq);
> +	err = __i915_vma_move_to_active(vma, rq);
>   	if (err)
>   		goto out_request;
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index f7ba0935ed67..95f8e66e45db 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -313,8 +313,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
>   
>   	release_hw_id(ctx);
> -	if (ctx->vm)
> -		i915_vm_put(ctx->vm);
>   
>   	free_engines(rcu_access_pointer(ctx->engines));
>   	mutex_destroy(&ctx->engines_mutex);
> @@ -379,9 +377,13 @@ void i915_gem_context_release(struct kref *ref)
>   
>   static void context_close(struct i915_gem_context *ctx)
>   {
> +	i915_gem_context_set_closed(ctx);
> +
> +	if (ctx->vm)
> +		i915_vm_close(ctx->vm);
> +
>   	mutex_lock(&ctx->mutex);
>   
> -	i915_gem_context_set_closed(ctx);
>   	ctx->file_priv = ERR_PTR(-EBADF);
>   
>   	/*
> @@ -474,7 +476,7 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
>   
>   	GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
>   
> -	ctx->vm = i915_vm_get(vm);
> +	ctx->vm = i915_vm_open(vm);
>   	context_apply_all(ctx, __apply_ppgtt, vm);
>   
>   	return old;
> @@ -488,7 +490,7 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
>   
>   	vm = __set_ppgtt(ctx, vm);
>   	if (vm)
> -		i915_vm_put(vm);
> +		i915_vm_close(vm);
>   }
>   
>   static void __set_timeline(struct intel_timeline **dst,
> @@ -953,7 +955,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
>   	if (ret < 0)
>   		goto err_unlock;
>   
> -	i915_vm_get(vm);
> +	i915_vm_open(vm);
>   
>   	args->size = 0;
>   	args->value = ret;
> @@ -973,7 +975,7 @@ static void set_ppgtt_barrier(void *data)
>   	if (INTEL_GEN(old->i915) < 8)
>   		gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old));
>   
> -	i915_vm_put(old);
> +	i915_vm_close(old);
>   }
>   
>   static int emit_ppgtt_update(struct i915_request *rq, void *data)
> @@ -1090,8 +1092,8 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
>   				   set_ppgtt_barrier,
>   				   old);
>   	if (err) {
> -		i915_vm_put(__set_ppgtt(ctx, old));
> -		i915_vm_put(old);
> +		i915_vm_close(__set_ppgtt(ctx, old));
> +		i915_vm_close(old);
>   	}
>   
>   unlock:
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index 55c3ab59e3aa..9937b4c341f1 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -288,7 +288,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>   			if (!drm_mm_node_allocated(&vma->node))
>   				continue;
>   
> -			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
> +			/* Wait for an earlier async bind, need to rewrite it */
> +			ret = i915_vma_sync(vma);
> +			if (ret)
> +				return ret;
> +
> +			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE, NULL);
>   			if (ret)
>   				return ret;
>   		}
> @@ -391,16 +396,11 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
>   	if (ret)
>   		goto out;
>   
> -	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
> -	if (ret)
> -		goto out;
> -
>   	ret = i915_gem_object_lock_interruptible(obj);
>   	if (ret == 0) {
>   		ret = i915_gem_object_set_cache_level(obj, level);
>   		i915_gem_object_unlock(obj);
>   	}
> -	mutex_unlock(&i915->drm.struct_mutex);
>   
>   out:
>   	i915_gem_object_put(obj);
> @@ -485,6 +485,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
>   		if (!drm_mm_node_allocated(&vma->node))
>   			continue;
>   
> +		GEM_BUG_ON(vma->vm != &i915->ggtt.vm);
>   		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
>   	}
>   	mutex_unlock(&i915->ggtt.vm.mutex);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index c049199a1df5..88a881be12ec 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -698,7 +698,9 @@ static int eb_reserve(struct i915_execbuffer *eb)
>   
>   		case 1:
>   			/* Too fragmented, unbind everything and retry */
> +			mutex_lock(&eb->context->vm->mutex);
>   			err = i915_gem_evict_vm(eb->context->vm);
> +			mutex_unlock(&eb->context->vm->mutex);
>   			if (err)
>   				return err;
>   			break;
> @@ -972,7 +974,9 @@ static void reloc_cache_reset(struct reloc_cache *cache)
>   			ggtt->vm.clear_range(&ggtt->vm,
>   					     cache->node.start,
>   					     cache->node.size);
> +			mutex_lock(&ggtt->vm.mutex);
>   			drm_mm_remove_node(&cache->node);
> +			mutex_unlock(&ggtt->vm.mutex);
>   		} else {
>   			i915_vma_unpin((struct i915_vma *)cache->node.mm);
>   		}
> @@ -1047,11 +1051,13 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
>   					       PIN_NOEVICT);
>   		if (IS_ERR(vma)) {
>   			memset(&cache->node, 0, sizeof(cache->node));
> +			mutex_lock(&ggtt->vm.mutex);
>   			err = drm_mm_insert_node_in_range
>   				(&ggtt->vm.mm, &cache->node,
>   				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
>   				 0, ggtt->mappable_end,
>   				 DRM_MM_INSERT_LOW);
> +			mutex_unlock(&ggtt->vm.mutex);
>   			if (err) /* no inactive aperture space, use cpu reloc */
>   				return NULL;
>   		} else {
> @@ -1416,7 +1422,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
>   		if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
>   		    IS_GEN(eb->i915, 6)) {
>   			err = i915_vma_bind(target, target->obj->cache_level,
> -					    PIN_GLOBAL);
> +					    PIN_GLOBAL, NULL);
>   			if (WARN_ONCE(err,
>   				      "Unexpected failure to bind target VMA!"))
>   				return err;
> @@ -2140,35 +2146,6 @@ static struct i915_request *eb_throttle(struct intel_context *ce)
>   	return i915_request_get(rq);
>   }
>   
> -static int
> -__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
> -{
> -	int err;
> -
> -	if (likely(atomic_inc_not_zero(&ce->pin_count)))
> -		return 0;
> -
> -	err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
> -	if (err)
> -		return err;
> -
> -	err = __intel_context_do_pin(ce);
> -	mutex_unlock(&eb->i915->drm.struct_mutex);
> -
> -	return err;
> -}
> -
> -static void
> -__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce)
> -{
> -	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
> -		return;
> -
> -	mutex_lock(&eb->i915->drm.struct_mutex);
> -	intel_context_unpin(ce);
> -	mutex_unlock(&eb->i915->drm.struct_mutex);
> -}
> -
>   static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
>   {
>   	struct intel_timeline *tl;
> @@ -2188,7 +2165,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
>   	 * GGTT space, so do this first before we reserve a seqno for
>   	 * ourselves.
>   	 */
> -	err = __eb_pin_context(eb, ce);
> +	err = intel_context_pin(ce);
>   	if (err)
>   		return err;
>   
> @@ -2232,7 +2209,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
>   	intel_context_exit(ce);
>   	intel_context_timeline_unlock(tl);
>   err_unpin:
> -	__eb_unpin_context(eb, ce);
> +	intel_context_unpin(ce);
>   	return err;
>   }
>   
> @@ -2245,7 +2222,7 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
>   	intel_context_exit(ce);
>   	mutex_unlock(&tl->mutex);
>   
> -	__eb_unpin_context(eb, ce);
> +	intel_context_unpin(ce);
>   }
>   
>   static unsigned int
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> index 860b751c51f1..c44614895487 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> @@ -249,16 +249,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>   	if (ret)
>   		goto err_rpm;
>   
> -	ret = i915_mutex_lock_interruptible(dev);
> -	if (ret)
> -		goto err_reset;
> -
> -	/* Access to snoopable pages through the GTT is incoherent. */
> -	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
> -		ret = -EFAULT;
> -		goto err_unlock;
> -	}
> -
>   	/* Now pin it into the GTT as needed */
>   	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
>   				       PIN_MAPPABLE |
> @@ -291,7 +281,13 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>   	}
>   	if (IS_ERR(vma)) {
>   		ret = PTR_ERR(vma);
> -		goto err_unlock;
> +		goto err_reset;
> +	}
> +
> +	/* Access to snoopable pages through the GTT is incoherent. */
> +	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
> +		ret = -EFAULT;
> +		goto err_unpin;
>   	}
>   
>   	ret = i915_vma_pin_fence(vma);
> @@ -329,8 +325,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>   	i915_vma_unpin_fence(vma);
>   err_unpin:
>   	__i915_vma_unpin(vma);
> -err_unlock:
> -	mutex_unlock(&dev->struct_mutex);
>   err_reset:
>   	intel_gt_reset_unlock(ggtt->vm.gt, srcu);
>   err_rpm:
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index 0ef60dae23a7..dbf9be9a79f4 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -155,21 +155,30 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
>   
>   	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
>   	llist_for_each_entry_safe(obj, on, freed, freed) {
> -		struct i915_vma *vma, *vn;
> -
>   		trace_i915_gem_object_destroy(obj);
>   
> -		mutex_lock(&i915->drm.struct_mutex);
> -
> -		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
> -			GEM_BUG_ON(i915_vma_is_active(vma));
> -			atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
> -			i915_vma_destroy(vma);
> +		if (!list_empty(&obj->vma.list)) {
> +			struct i915_vma *vma;
> +
> +			/*
> +			 * Note that the vma keeps an object reference while
> +			 * it is active, so it *should* not sleep while we
> +			 * destroy it. Our debug code errs insits it *might*.
> +			 * For the moment, play along.
> +			 */
> +			spin_lock(&obj->vma.lock);
> +			while ((vma = list_first_entry_or_null(&obj->vma.list,
> +							       struct i915_vma,
> +							       obj_link))) {
> +				GEM_BUG_ON(vma->obj != obj);
> +				spin_unlock(&obj->vma.lock);
> +
> +				i915_vma_destroy(vma);
> +
> +				spin_lock(&obj->vma.lock);
> +			}
> +			spin_unlock(&obj->vma.lock);
>   		}
> -		GEM_BUG_ON(!list_empty(&obj->vma.list));
> -		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
> -
> -		mutex_unlock(&i915->drm.struct_mutex);
>   
>   		GEM_BUG_ON(atomic_read(&obj->bind_count));
>   		GEM_BUG_ON(obj->userfault_count);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index 29b9eddc4c7f..a78af25dce36 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -106,6 +106,11 @@ static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
>   	dma_resv_lock(obj->base.resv, NULL);
>   }
>   
> +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
> +{
> +	return dma_resv_trylock(obj->base.resv);
> +}
> +
>   static inline int
>   i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
>   {
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> index d2c05d752909..fd3ce6da8497 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> @@ -16,40 +16,6 @@
>   
>   #include "i915_trace.h"
>   
> -static bool shrinker_lock(struct drm_i915_private *i915,
> -			  unsigned int flags,
> -			  bool *unlock)
> -{
> -	struct mutex *m = &i915->drm.struct_mutex;
> -
> -	switch (mutex_trylock_recursive(m)) {
> -	case MUTEX_TRYLOCK_RECURSIVE:
> -		*unlock = false;
> -		return true;
> -
> -	case MUTEX_TRYLOCK_FAILED:
> -		*unlock = false;
> -		if (flags & I915_SHRINK_ACTIVE &&
> -		    mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0)
> -			*unlock = true;
> -		return *unlock;
> -
> -	case MUTEX_TRYLOCK_SUCCESS:
> -		*unlock = true;
> -		return true;
> -	}
> -
> -	BUG();
> -}
> -
> -static void shrinker_unlock(struct drm_i915_private *i915, bool unlock)
> -{
> -	if (!unlock)
> -		return;
> -
> -	mutex_unlock(&i915->drm.struct_mutex);
> -}
> -
>   static bool swap_available(void)
>   {
>   	return get_nr_swap_pages() > 0;
> @@ -155,10 +121,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
>   	intel_wakeref_t wakeref = 0;
>   	unsigned long count = 0;
>   	unsigned long scanned = 0;
> -	bool unlock;
> -
> -	if (!shrinker_lock(i915, shrink, &unlock))
> -		return 0;
>   
>   	/*
>   	 * When shrinking the active list, we should also consider active
> @@ -268,8 +230,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
>   	if (shrink & I915_SHRINK_BOUND)
>   		intel_runtime_pm_put(&i915->runtime_pm, wakeref);
>   
> -	shrinker_unlock(i915, unlock);
> -
>   	if (nr_scanned)
>   		*nr_scanned += scanned;
>   	return count;
> @@ -339,19 +299,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>   	struct drm_i915_private *i915 =
>   		container_of(shrinker, struct drm_i915_private, mm.shrinker);
>   	unsigned long freed;
> -	bool unlock;
>   
>   	sc->nr_scanned = 0;
>   
> -	if (!shrinker_lock(i915, 0, &unlock))
> -		return SHRINK_STOP;
> -
>   	freed = i915_gem_shrink(i915,
>   				sc->nr_to_scan,
>   				&sc->nr_scanned,
>   				I915_SHRINK_BOUND |
> -				I915_SHRINK_UNBOUND |
> -				I915_SHRINK_WRITEBACK);
> +				I915_SHRINK_UNBOUND);
>   	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
>   		intel_wakeref_t wakeref;
>   
> @@ -366,8 +321,6 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>   		}
>   	}
>   
> -	shrinker_unlock(i915, unlock);
> -
>   	return sc->nr_scanned ? freed : SHRINK_STOP;
>   }
>   
> @@ -384,6 +337,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
>   	freed_pages = 0;
>   	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
>   		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
> +					       I915_SHRINK_ACTIVE |
>   					       I915_SHRINK_BOUND |
>   					       I915_SHRINK_UNBOUND |
>   					       I915_SHRINK_WRITEBACK);
> @@ -419,10 +373,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
>   	struct i915_vma *vma, *next;
>   	unsigned long freed_pages = 0;
>   	intel_wakeref_t wakeref;
> -	bool unlock;
> -
> -	if (!shrinker_lock(i915, 0, &unlock))
> -		return NOTIFY_DONE;
>   
>   	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
>   		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
> @@ -439,15 +389,11 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
>   		if (!vma->iomap || i915_vma_is_active(vma))
>   			continue;
>   
> -		mutex_unlock(&i915->ggtt.vm.mutex);
> -		if (i915_vma_unbind(vma) == 0)
> +		if (__i915_vma_unbind(vma) == 0)
>   			freed_pages += count;
> -		mutex_lock(&i915->ggtt.vm.mutex);
>   	}
>   	mutex_unlock(&i915->ggtt.vm.mutex);
>   
> -	shrinker_unlock(i915, unlock);
> -
>   	*(unsigned long *)ptr += freed_pages;
>   	return NOTIFY_DONE;
>   }
> @@ -490,22 +436,9 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
>   
>   	fs_reclaim_acquire(GFP_KERNEL);
>   
> -	/*
> -	 * As we invariably rely on the struct_mutex within the shrinker,
> -	 * but have a complicated recursion dance, taint all the mutexes used
> -	 * within the shrinker with the struct_mutex. For completeness, we
> -	 * taint with all subclass of struct_mutex, even though we should
> -	 * only need tainting by I915_MM_NORMAL to catch possible ABBA
> -	 * deadlocks from using struct_mutex inside @mutex.
> -	 */
> -	mutex_acquire(&i915->drm.struct_mutex.dep_map,
> -		      I915_MM_SHRINKER, 0, _RET_IP_);
> -
>   	mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
>   	mutex_release(&mutex->dep_map, 0, _RET_IP_);
>   
> -	mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
> -
>   	fs_reclaim_release(GFP_KERNEL);
>   
>   	if (unlock)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index e45eb8721850..fad98a921cde 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -624,8 +624,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
>   	if (!drm_mm_initialized(&dev_priv->mm.stolen))
>   		return NULL;
>   
> -	lockdep_assert_held(&dev_priv->drm.struct_mutex);
> -
>   	DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n",
>   			 &stolen_offset, &gtt_offset, &size);
>   
> @@ -677,21 +675,25 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
>   	 * setting up the GTT space. The actual reservation will occur
>   	 * later.
>   	 */
> +	mutex_lock(&ggtt->vm.mutex);
>   	ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
>   				   size, gtt_offset, obj->cache_level,
>   				   0);
>   	if (ret) {
>   		DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n");
> +		mutex_unlock(&ggtt->vm.mutex);
>   		goto err_pages;
>   	}
>   
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   
> +	GEM_BUG_ON(vma->pages);
>   	vma->pages = obj->mm.pages;
> +	atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE);
> +
>   	set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
>   	__i915_vma_set_map_and_fenceable(vma);
>   
> -	mutex_lock(&ggtt->vm.mutex);
>   	list_add_tail(&vma->vm_link, &ggtt->vm.bound_list);
>   	mutex_unlock(&ggtt->vm.mutex);
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
> index e5d1ae8d4dba..dc2a83ce44d5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
> @@ -181,22 +181,25 @@ static int
>   i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
>   			      int tiling_mode, unsigned int stride)
>   {
> +	struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
>   	struct i915_vma *vma;
> -	int ret;
> +	int ret = 0;
>   
>   	if (tiling_mode == I915_TILING_NONE)
>   		return 0;
>   
> +	mutex_lock(&ggtt->vm.mutex);
>   	for_each_ggtt_vma(vma, obj) {
>   		if (i915_vma_fence_prepare(vma, tiling_mode, stride))
>   			continue;
>   
> -		ret = i915_vma_unbind(vma);
> +		ret = __i915_vma_unbind(vma);
>   		if (ret)
> -			return ret;
> +			break;
>   	}
> +	mutex_unlock(&ggtt->vm.mutex);
>   
> -	return 0;
> +	return ret;
>   }
>   
>   int
> @@ -212,7 +215,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
>   
>   	GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride));
>   	GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE));
> -	lockdep_assert_held(&i915->drm.struct_mutex);
>   
>   	if ((tiling | stride) == obj->tiling_and_stride)
>   		return 0;
> @@ -233,16 +235,18 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
>   	 * whilst executing a fenced command for an untiled object.
>   	 */
>   
> -	err = i915_gem_object_fence_prepare(obj, tiling, stride);
> -	if (err)
> -		return err;
> -
>   	i915_gem_object_lock(obj);
>   	if (i915_gem_object_is_framebuffer(obj)) {
>   		i915_gem_object_unlock(obj);
>   		return -EBUSY;
>   	}
>   
> +	err = i915_gem_object_fence_prepare(obj, tiling, stride);
> +	if (err) {
> +		i915_gem_object_unlock(obj);
> +		return err;
> +	}
> +
>   	/* If the memory has unknown (i.e. varying) swizzling, we pin the
>   	 * pages to prevent them being swapped out and causing corruption
>   	 * due to the change in swizzling.
> @@ -368,12 +372,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
>   		}
>   	}
>   
> -	err = mutex_lock_interruptible(&dev->struct_mutex);
> -	if (err)
> -		goto err;
> -
>   	err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride);
> -	mutex_unlock(&dev->struct_mutex);
>   
>   	/* We have to maintain this existing ABI... */
>   	args->stride = i915_gem_object_get_stride(obj);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> index 11b231c187c5..27573ef903b4 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> @@ -92,7 +92,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>   	struct i915_mmu_notifier *mn =
>   		container_of(_mn, struct i915_mmu_notifier, mn);
>   	struct interval_tree_node *it;
> -	struct mutex *unlock = NULL;
>   	unsigned long end;
>   	int ret = 0;
>   
> @@ -129,33 +128,13 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>   		}
>   		spin_unlock(&mn->lock);
>   
> -		if (!unlock) {
> -			unlock = &mn->mm->i915->drm.struct_mutex;
> -
> -			switch (mutex_trylock_recursive(unlock)) {
> -			default:
> -			case MUTEX_TRYLOCK_FAILED:
> -				if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) {
> -					i915_gem_object_put(obj);
> -					return -EINTR;
> -				}
> -				/* fall through */
> -			case MUTEX_TRYLOCK_SUCCESS:
> -				break;
> -
> -			case MUTEX_TRYLOCK_RECURSIVE:
> -				unlock = ERR_PTR(-EEXIST);
> -				break;
> -			}
> -		}
> -
>   		ret = i915_gem_object_unbind(obj,
>   					     I915_GEM_OBJECT_UNBIND_ACTIVE);
>   		if (ret == 0)
>   			ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
>   		i915_gem_object_put(obj);
>   		if (ret)
> -			goto unlock;
> +			return ret;
>   
>   		spin_lock(&mn->lock);
>   
> @@ -168,10 +147,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>   	}
>   	spin_unlock(&mn->lock);
>   
> -unlock:
> -	if (!IS_ERR_OR_NULL(unlock))
> -		mutex_unlock(unlock);
> -
>   	return ret;
>   
>   }
> diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> index c5cea4379216..98b2a6ccfcc1 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> @@ -333,7 +333,12 @@ static int igt_check_page_sizes(struct i915_vma *vma)
>   	struct drm_i915_private *i915 = vma->vm->i915;
>   	unsigned int supported = INTEL_INFO(i915)->page_sizes;
>   	struct drm_i915_gem_object *obj = vma->obj;
> -	int err = 0;
> +	int err;
> +
> +	/* We have to wait for the async bind to complete before our asserts */
> +	err = i915_vma_sync(vma);
> +	if (err)
> +		return err;
>   
>   	if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) {
>   		pr_err("unsupported page_sizes.sg=%u, supported=%u\n",
> @@ -1390,7 +1395,7 @@ static int igt_ppgtt_pin_update(void *arg)
>   			goto out_unpin;
>   		}
>   
> -		err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE);
> +		err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL);
>   		if (err)
>   			goto out_unpin;
>   
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> index dc25bcc3e372..6ef3d745dc8d 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> @@ -747,10 +747,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
>   	if (err)
>   		goto skip_request;
>   
> -	i915_vma_unpin(batch);
> -	i915_vma_close(batch);
> -	i915_vma_put(batch);
> -
> +	i915_vma_unpin_and_release(&batch, 0);
>   	i915_vma_unpin(vma);
>   
>   	*rq_out = i915_request_get(rq);
> @@ -764,8 +761,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
>   err_request:
>   	i915_request_add(rq);
>   err_batch:
> -	i915_vma_unpin(batch);
> -	i915_vma_put(batch);
> +	i915_vma_unpin_and_release(&batch, 0);
>   err_vma:
>   	i915_vma_unpin(vma);
>   
> @@ -1309,9 +1305,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,
>   	if (err)
>   		goto skip_request;
>   
> -	i915_vma_unpin(vma);
> -	i915_vma_close(vma);
> -	i915_vma_put(vma);
> +	i915_vma_unpin_and_release(&vma, 0);
>   
>   	i915_request_add(rq);
>   
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> index aefe557527f8..36aca1c172e7 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> @@ -322,7 +322,6 @@ static int igt_partial_tiling(void *arg)
>   		goto out;
>   	}
>   
> -	mutex_lock(&i915->drm.struct_mutex);
>   	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
>   
>   	if (1) {
> @@ -415,7 +414,6 @@ next_tiling: ;
>   
>   out_unlock:
>   	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
> -	mutex_unlock(&i915->drm.struct_mutex);
>   	i915_gem_object_unpin_pages(obj);
>   out:
>   	i915_gem_object_put(obj);
> @@ -458,7 +456,6 @@ static int igt_smoke_tiling(void *arg)
>   		goto out;
>   	}
>   
> -	mutex_lock(&i915->drm.struct_mutex);
>   	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
>   
>   	count = 0;
> @@ -508,7 +505,6 @@ static int igt_smoke_tiling(void *arg)
>   	pr_info("%s: Completed %lu trials\n", __func__, count);
>   
>   	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
> -	mutex_unlock(&i915->drm.struct_mutex);
>   	i915_gem_object_unpin_pages(obj);
>   out:
>   	i915_gem_object_put(obj);
> diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
> index ee5dc13a30b3..6718da20f35d 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
> @@ -154,9 +154,7 @@ int igt_gpu_fill_dw(struct intel_context *ce,
>   
>   	i915_request_add(rq);
>   
> -	i915_vma_unpin(batch);
> -	i915_vma_close(batch);
> -	i915_vma_put(batch);
> +	i915_vma_unpin_and_release(&batch, 0);
>   
>   	return 0;
>   
> @@ -165,7 +163,6 @@ int igt_gpu_fill_dw(struct intel_context *ce,
>   err_request:
>   	i915_request_add(rq);
>   err_batch:
> -	i915_vma_unpin(batch);
> -	i915_vma_put(batch);
> +	i915_vma_unpin_and_release(&batch, 0);
>   	return err;
>   }
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index eef9bdae8ebb..95e67db52668 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -301,11 +301,12 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
>   
>   	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
>   		struct intel_uncore *uncore = gt->uncore;
> +		unsigned long flags;
>   
> -		spin_lock_irq(&uncore->lock);
> +		spin_lock_irqsave(&uncore->lock, flags);
>   		intel_uncore_posting_read_fw(uncore,
>   					     RING_HEAD(RENDER_RING_BASE));
> -		spin_unlock_irq(&uncore->lock);
> +		spin_unlock_irqrestore(&uncore->lock, flags);
>   	}
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index 0747b8c9f768..ec32996254c0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -1338,15 +1338,13 @@ void intel_ring_free(struct kref *ref)
>   {
>   	struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
>   
> -	i915_vma_close(ring->vma);
>   	i915_vma_put(ring->vma);
> -
>   	kfree(ring);
>   }
>   
>   static void __ring_context_fini(struct intel_context *ce)
>   {
> -	i915_gem_object_put(ce->state->obj);
> +	i915_vma_put(ce->state);
>   }
>   
>   static void ring_context_destroy(struct kref *ref)
> diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> index a0098fc35921..e53eea1050f8 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> @@ -1127,15 +1127,14 @@ static int evict_vma(void *data)
>   {
>   	struct evict_vma *arg = data;
>   	struct i915_address_space *vm = arg->vma->vm;
> -	struct drm_i915_private *i915 = vm->i915;
>   	struct drm_mm_node evict = arg->vma->node;
>   	int err;
>   
>   	complete(&arg->completion);
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> +	mutex_lock(&vm->mutex);
>   	err = i915_gem_evict_for_node(vm, &evict, 0);
> -	mutex_unlock(&i915->drm.struct_mutex);
> +	mutex_unlock(&vm->mutex);
>   
>   	return err;
>   }
> @@ -1143,39 +1142,33 @@ static int evict_vma(void *data)
>   static int evict_fence(void *data)
>   {
>   	struct evict_vma *arg = data;
> -	struct drm_i915_private *i915 = arg->vma->vm->i915;
>   	int err;
>   
>   	complete(&arg->completion);
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> -
>   	/* Mark the fence register as dirty to force the mmio update. */
>   	err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512);
>   	if (err) {
>   		pr_err("Invalid Y-tiling settings; err:%d\n", err);
> -		goto out_unlock;
> +		return err;
>   	}
>   
>   	err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE);
>   	if (err) {
>   		pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err);
> -		goto out_unlock;
> +		return err;
>   	}
>   
>   	err = i915_vma_pin_fence(arg->vma);
>   	i915_vma_unpin(arg->vma);
>   	if (err) {
>   		pr_err("Unable to pin Y-tiled fence; err:%d\n", err);
> -		goto out_unlock;
> +		return err;
>   	}
>   
>   	i915_vma_unpin_fence(arg->vma);
>   
> -out_unlock:
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
> -	return err;
> +	return 0;
>   }
>   
>   static int __igt_reset_evict_vma(struct intel_gt *gt,
> diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
> index 5ff2437b2998..d996bbc7ea59 100644
> --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
> +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
> @@ -61,14 +61,14 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
>   		flags = PIN_MAPPABLE;
>   	}
>   
> -	mutex_lock(&dev_priv->drm.struct_mutex);
> +	mutex_lock(&dev_priv->ggtt.vm.mutex);
>   	mmio_hw_access_pre(dev_priv);
>   	ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node,
>   				  size, I915_GTT_PAGE_SIZE,
>   				  I915_COLOR_UNEVICTABLE,
>   				  start, end, flags);
>   	mmio_hw_access_post(dev_priv);
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
> +	mutex_unlock(&dev_priv->ggtt.vm.mutex);
>   	if (ret)
>   		gvt_err("fail to alloc %s gm space from host\n",
>   			high_gm ? "high" : "low");
> @@ -98,9 +98,9 @@ static int alloc_vgpu_gm(struct intel_vgpu *vgpu)
>   
>   	return 0;
>   out_free_aperture:
> -	mutex_lock(&dev_priv->drm.struct_mutex);
> +	mutex_lock(&dev_priv->ggtt.vm.mutex);
>   	drm_mm_remove_node(&vgpu->gm.low_gm_node);
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
> +	mutex_unlock(&dev_priv->ggtt.vm.mutex);
>   	return ret;
>   }
>   
> @@ -108,10 +108,10 @@ static void free_vgpu_gm(struct intel_vgpu *vgpu)
>   {
>   	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
>   
> -	mutex_lock(&dev_priv->drm.struct_mutex);
> +	mutex_lock(&dev_priv->ggtt.vm.mutex);
>   	drm_mm_remove_node(&vgpu->gm.low_gm_node);
>   	drm_mm_remove_node(&vgpu->gm.high_gm_node);
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
> +	mutex_unlock(&dev_priv->ggtt.vm.mutex);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index d5aac6ff803a..0791736a08fd 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -146,6 +146,7 @@ __active_retire(struct i915_active *ref)
>   	if (!retire)
>   		return;
>   
> +	GEM_BUG_ON(rcu_access_pointer(ref->excl));
>   	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
>   		GEM_BUG_ON(i915_active_request_isset(&it->base));
>   		kmem_cache_free(global.slab_cache, it);
> @@ -245,6 +246,8 @@ void __i915_active_init(struct drm_i915_private *i915,
>   	ref->flags = 0;
>   	ref->active = active;
>   	ref->retire = retire;
> +
> +	ref->excl = NULL;
>   	ref->tree = RB_ROOT;
>   	ref->cache = NULL;
>   	init_llist_head(&ref->preallocated_barriers);
> @@ -341,6 +344,46 @@ int i915_active_ref(struct i915_active *ref,
>   	return err;
>   }
>   
> +static void excl_cb(struct dma_fence *f, struct dma_fence_cb *cb)
> +{
> +	struct i915_active *ref = container_of(cb, typeof(*ref), excl_cb);
> +
> +	RCU_INIT_POINTER(ref->excl, NULL);
> +	dma_fence_put(f);
> +
> +	active_retire(ref);
> +}
> +
> +void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
> +{
> +	/* We expect the caller to manage the exclusive timeline ordering */
> +	GEM_BUG_ON(i915_active_is_idle(ref));
> +
> +	dma_fence_get(f);
> +
> +	rcu_read_lock();
> +	if (rcu_access_pointer(ref->excl)) {
> +		struct dma_fence *old;
> +
> +		old = dma_fence_get_rcu_safe(&ref->excl);
> +		if (old) {
> +			if (dma_fence_remove_callback(old, &ref->excl_cb))
> +				atomic_dec(&ref->count);
> +			dma_fence_put(old);
> +		}
> +	}
> +	rcu_read_unlock();
> +
> +	atomic_inc(&ref->count);
> +	rcu_assign_pointer(ref->excl, f);
> +
> +	if (dma_fence_add_callback(f, &ref->excl_cb, excl_cb)) {
> +		RCU_INIT_POINTER(ref->excl, NULL);
> +		atomic_dec(&ref->count);
> +		dma_fence_put(f);
> +	}
> +}
> +
>   int i915_active_acquire(struct i915_active *ref)
>   {
>   	int err;
> @@ -399,6 +442,25 @@ void i915_active_ungrab(struct i915_active *ref)
>   	__active_ungrab(ref);
>   }
>   
> +static int excl_wait(struct i915_active *ref)
> +{
> +	struct dma_fence *old;
> +	int err = 0;
> +
> +	if (!rcu_access_pointer(ref->excl))
> +		return 0;
> +
> +	rcu_read_lock();
> +	old = dma_fence_get_rcu_safe(&ref->excl);
> +	rcu_read_unlock();
> +	if (old) {
> +		err = dma_fence_wait(old, true);
> +		dma_fence_put(old);
> +	}
> +
> +	return err;
> +}
> +
>   int i915_active_wait(struct i915_active *ref)
>   {
>   	struct active_node *it, *n;
> @@ -419,6 +481,10 @@ int i915_active_wait(struct i915_active *ref)
>   		return 0;
>   	}
>   
> +	err = excl_wait(ref);
> +	if (err)
> +		goto out;
> +
>   	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
>   		if (is_barrier(&it->base)) { /* unconnected idle-barrier */
>   			err = -EBUSY;
> @@ -430,6 +496,7 @@ int i915_active_wait(struct i915_active *ref)
>   			break;
>   	}
>   
> +out:
>   	__active_retire(ref);
>   	if (err)
>   		return err;
> @@ -454,26 +521,22 @@ int i915_request_await_active_request(struct i915_request *rq,
>   
>   int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
>   {
> -	struct active_node *it, *n;
> -	int err;
> -
> -	if (RB_EMPTY_ROOT(&ref->tree))
> -		return 0;
> +	int err = 0;
>   
> -	/* await allocates and so we need to avoid hitting the shrinker */
> -	err = i915_active_acquire(ref);
> -	if (err)
> -		return err;
> +	if (rcu_access_pointer(ref->excl)) {
> +		struct dma_fence *fence;
>   
> -	mutex_lock(&ref->mutex);
> -	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> -		err = i915_request_await_active_request(rq, &it->base);
> -		if (err)
> -			break;
> +		rcu_read_lock();
> +		fence = dma_fence_get_rcu_safe(&ref->excl);
> +		rcu_read_unlock();
> +		if (fence) {
> +			err = i915_request_await_dma_fence(rq, fence);
> +			dma_fence_put(fence);
> +		}
>   	}
> -	mutex_unlock(&ref->mutex);
>   
> -	i915_active_release(ref);
> +	/* In the future we may choose to await on all fences */
> +
>   	return err;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index 949c6835335b..90034f61b7c2 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -379,6 +379,13 @@ i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
>   	return i915_active_ref(ref, i915_request_timeline(rq), rq);
>   }
>   
> +void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
> +
> +static inline bool i915_active_has_exclusive(struct i915_active *ref)
> +{
> +	return rcu_access_pointer(ref->excl);
> +}
> +
>   int i915_active_wait(struct i915_active *ref);
>   
>   int i915_request_await_active(struct i915_request *rq,
> diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
> index 1854e7d168c1..86e7a232ea3c 100644
> --- a/drivers/gpu/drm/i915/i915_active_types.h
> +++ b/drivers/gpu/drm/i915/i915_active_types.h
> @@ -8,6 +8,7 @@
>   #define _I915_ACTIVE_TYPES_H_
>   
>   #include <linux/atomic.h>
> +#include <linux/dma-fence.h>
>   #include <linux/llist.h>
>   #include <linux/mutex.h>
>   #include <linux/rbtree.h>
> @@ -51,6 +52,10 @@ struct i915_active {
>   	struct mutex mutex;
>   	atomic_t count;
>   
> +	/* Preallocated "exclusive" node */
> +	struct dma_fence __rcu *excl;
> +	struct dma_fence_cb excl_cb;
> +
>   	unsigned long flags;
>   #define I915_ACTIVE_GRAB_BIT 0
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index a9ee73b61f4d..ab00ba6f695b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1881,10 +1881,8 @@ static int i915_drm_resume(struct drm_device *dev)
>   	if (ret)
>   		DRM_ERROR("failed to re-enable GGTT\n");
>   
> -	mutex_lock(&dev_priv->drm.struct_mutex);
>   	i915_gem_restore_gtt_mappings(dev_priv);
>   	i915_gem_restore_fences(dev_priv);
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
>   
>   	intel_csr_ucode_resume(dev_priv);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b8f7fe311b0a..934ea776a46a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -62,20 +62,31 @@
>   #include "intel_pm.h"
>   
>   static int
> -insert_mappable_node(struct i915_ggtt *ggtt,
> -                     struct drm_mm_node *node, u32 size)
> +insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size)
>   {
> +	int err;
> +
> +	err = mutex_lock_interruptible(&ggtt->vm.mutex);
> +	if (err)
> +		return err;
> +
>   	memset(node, 0, sizeof(*node));
> -	return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
> -					   size, 0, I915_COLOR_UNEVICTABLE,
> -					   0, ggtt->mappable_end,
> -					   DRM_MM_INSERT_LOW);
> +	err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
> +					  size, 0, I915_COLOR_UNEVICTABLE,
> +					  0, ggtt->mappable_end,
> +					  DRM_MM_INSERT_LOW);
> +
> +	mutex_unlock(&ggtt->vm.mutex);
> +
> +	return err;
>   }
>   
>   static void
> -remove_mappable_node(struct drm_mm_node *node)
> +remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node)
>   {
> +	mutex_lock(&ggtt->vm.mutex);
>   	drm_mm_remove_node(node);
> +	mutex_unlock(&ggtt->vm.mutex);
>   }
>   
>   int
> @@ -87,7 +98,8 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>   	struct i915_vma *vma;
>   	u64 pinned;
>   
> -	mutex_lock(&ggtt->vm.mutex);
> +	if (mutex_lock_interruptible(&ggtt->vm.mutex))
> +		return -EINTR;
>   
>   	pinned = ggtt->vm.reserved;
>   	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
> @@ -109,20 +121,24 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
>   	LIST_HEAD(still_in_list);
>   	int ret = 0;
>   
> -	lockdep_assert_held(&obj->base.dev->struct_mutex);
> -
>   	spin_lock(&obj->vma.lock);
>   	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
>   						       struct i915_vma,
>   						       obj_link))) {
> +		struct i915_address_space *vm = vma->vm;
> +
> +		ret = -EBUSY;
> +		if (!i915_vm_tryopen(vm))
> +			break;
> +
>   		list_move_tail(&vma->obj_link, &still_in_list);
>   		spin_unlock(&obj->vma.lock);
>   
> -		ret = -EBUSY;
>   		if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
>   		    !i915_vma_is_active(vma))
>   			ret = i915_vma_unbind(vma);
>   
> +		i915_vm_close(vm);
>   		spin_lock(&obj->vma.lock);
>   	}
>   	list_splice(&still_in_list, &obj->vma.list);
> @@ -338,10 +354,6 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>   	u64 remain, offset;
>   	int ret;
>   
> -	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
> -	if (ret)
> -		return ret;
> -
>   	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
>   	vma = ERR_PTR(-ENODEV);
>   	if (!i915_gem_object_is_tiled(obj))
> @@ -355,12 +367,10 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>   	} else {
>   		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
>   		if (ret)
> -			goto out_unlock;
> +			goto out_rpm;
>   		GEM_BUG_ON(!drm_mm_node_allocated(&node));
>   	}
>   
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
>   	ret = i915_gem_object_lock_interruptible(obj);
>   	if (ret)
>   		goto out_unpin;
> @@ -414,17 +424,14 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>   
>   	i915_gem_object_unlock_fence(obj, fence);
>   out_unpin:
> -	mutex_lock(&i915->drm.struct_mutex);
>   	if (drm_mm_node_allocated(&node)) {
>   		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
> -		remove_mappable_node(&node);
> +		remove_mappable_node(ggtt, &node);
>   	} else {
>   		i915_vma_unpin(vma);
>   	}
> -out_unlock:
> +out_rpm:
>   	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
>   	return ret;
>   }
>   
> @@ -531,10 +538,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>   	void __user *user_data;
>   	int ret;
>   
> -	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
> -	if (ret)
> -		return ret;
> -
>   	if (i915_gem_object_has_struct_page(obj)) {
>   		/*
>   		 * Avoid waking the device up if we can fallback, as
> @@ -544,10 +547,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>   		 * using the cache bypass of indirect GGTT access.
>   		 */
>   		wakeref = intel_runtime_pm_get_if_in_use(rpm);
> -		if (!wakeref) {
> -			ret = -EFAULT;
> -			goto out_unlock;
> -		}
> +		if (!wakeref)
> +			return -EFAULT;
>   	} else {
>   		/* No backing pages, no fallback, we must force GGTT access */
>   		wakeref = intel_runtime_pm_get(rpm);
> @@ -569,8 +570,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>   		GEM_BUG_ON(!drm_mm_node_allocated(&node));
>   	}
>   
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
>   	ret = i915_gem_object_lock_interruptible(obj);
>   	if (ret)
>   		goto out_unpin;
> @@ -634,18 +633,15 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>   
>   	i915_gem_object_unlock_fence(obj, fence);
>   out_unpin:
> -	mutex_lock(&i915->drm.struct_mutex);
>   	intel_gt_flush_ggtt_writes(ggtt->vm.gt);
>   	if (drm_mm_node_allocated(&node)) {
>   		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
> -		remove_mappable_node(&node);
> +		remove_mappable_node(ggtt, &node);
>   	} else {
>   		i915_vma_unpin(vma);
>   	}
>   out_rpm:
>   	intel_runtime_pm_put(rpm, wakeref);
> -out_unlock:
> -	mutex_unlock(&i915->drm.struct_mutex);
>   	return ret;
>   }
>   
> @@ -968,8 +964,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>   	struct i915_vma *vma;
>   	int ret;
>   
> -	lockdep_assert_held(&obj->base.dev->struct_mutex);
> -
>   	if (flags & PIN_MAPPABLE &&
>   	    (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
>   		/* If the required space is larger than the available
> @@ -1016,13 +1010,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>   				return ERR_PTR(-ENOSPC);
>   		}
>   
> -		WARN(i915_vma_is_pinned(vma),
> -		     "bo is already pinned in ggtt with incorrect alignment:"
> -		     " offset=%08x, req.alignment=%llx,"
> -		     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
> -		     i915_ggtt_offset(vma), alignment,
> -		     !!(flags & PIN_MAPPABLE),
> -		     i915_vma_is_map_and_fenceable(vma));
>   		ret = i915_vma_unbind(vma);
>   		if (ret)
>   			return ERR_PTR(ret);
> @@ -1441,8 +1428,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   	}
>   
>   	if (ret == -EIO) {
> -		mutex_lock(&dev_priv->drm.struct_mutex);
> -
>   		/*
>   		 * Allow engines or uC initialisation to fail by marking the GPU
>   		 * as wedged. But we only want to do this when the GPU is angry,
> @@ -1459,8 +1444,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   		i915_gem_restore_gtt_mappings(dev_priv);
>   		i915_gem_restore_fences(dev_priv);
>   		intel_init_clock_gating(dev_priv);
> -
> -		mutex_unlock(&dev_priv->drm.struct_mutex);
>   	}
>   
>   	i915_gem_drain_freed_objects(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 8c1e04f402bc..0552bf93eea3 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -47,8 +47,7 @@ static int ggtt_flush(struct drm_i915_private *i915)
>   	 * bound by their active reference.
>   	 */
>   	return i915_gem_wait_for_idle(i915,
> -				      I915_WAIT_INTERRUPTIBLE |
> -				      I915_WAIT_LOCKED,
> +				      I915_WAIT_INTERRUPTIBLE,
>   				      MAX_SCHEDULE_TIMEOUT);
>   }
>   
> @@ -104,7 +103,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
>   	struct i915_vma *active;
>   	int ret;
>   
> -	lockdep_assert_held(&vm->i915->drm.struct_mutex);
> +	lockdep_assert_held(&vm->mutex);
>   	trace_i915_gem_evict(vm, min_size, alignment, flags);
>   
>   	/*
> @@ -127,15 +126,6 @@ i915_gem_evict_something(struct i915_address_space *vm,
>   				    min_size, alignment, color,
>   				    start, end, mode);
>   
> -	/*
> -	 * Retire before we search the active list. Although we have
> -	 * reasonable accuracy in our retirement lists, we may have
> -	 * a stray pin (preventing eviction) that can only be resolved by
> -	 * retiring.
> -	 */
> -	if (!(flags & PIN_NONBLOCK))
> -		i915_retire_requests(dev_priv);
> -
>   search_again:
>   	active = NULL;
>   	INIT_LIST_HEAD(&eviction_list);
> @@ -235,12 +225,12 @@ i915_gem_evict_something(struct i915_address_space *vm,
>   	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
>   		__i915_vma_unpin(vma);
>   		if (ret == 0)
> -			ret = i915_vma_unbind(vma);
> +			ret = __i915_vma_unbind(vma);
>   	}
>   
>   	while (ret == 0 && (node = drm_mm_scan_color_evict(&scan))) {
>   		vma = container_of(node, struct i915_vma, node);
> -		ret = i915_vma_unbind(vma);
> +		ret = __i915_vma_unbind(vma);
>   	}
>   
>   	return ret;
> @@ -268,7 +258,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
>   	struct i915_vma *vma, *next;
>   	int ret = 0;
>   
> -	lockdep_assert_held(&vm->i915->drm.struct_mutex);
> +	lockdep_assert_held(&vm->mutex);
>   	GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
>   	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
>   
> @@ -349,7 +339,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
>   	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
>   		__i915_vma_unpin(vma);
>   		if (ret == 0)
> -			ret = i915_vma_unbind(vma);
> +			ret = __i915_vma_unbind(vma);
>   	}
>   
>   	return ret;
> @@ -373,7 +363,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
>   	struct i915_vma *vma, *next;
>   	int ret;
>   
> -	lockdep_assert_held(&vm->i915->drm.struct_mutex);
> +	lockdep_assert_held(&vm->mutex);
>   	trace_i915_gem_evict_vm(vm);
>   
>   	/* Switch back to the default context in order to unpin
> @@ -388,7 +378,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
>   	}
>   
>   	INIT_LIST_HEAD(&eviction_list);
> -	mutex_lock(&vm->mutex);
>   	list_for_each_entry(vma, &vm->bound_list, vm_link) {
>   		if (i915_vma_is_pinned(vma))
>   			continue;
> @@ -396,13 +385,12 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
>   		__i915_vma_pin(vma);
>   		list_add(&vma->evict_link, &eviction_list);
>   	}
> -	mutex_unlock(&vm->mutex);
>   
>   	ret = 0;
>   	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
>   		__i915_vma_unpin(vma);
>   		if (ret == 0)
> -			ret = i915_vma_unbind(vma);
> +			ret = __i915_vma_unbind(vma);
>   	}
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> index 615a9f4ef30c..487b7261f7ed 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> @@ -230,14 +230,15 @@ static int fence_update(struct i915_fence_reg *fence,
>   			 i915_gem_object_get_tiling(vma->obj)))
>   			return -EINVAL;
>   
> -		ret = i915_active_wait(&vma->active);
> +		ret = i915_vma_sync(vma);
>   		if (ret)
>   			return ret;
>   	}
>   
>   	old = xchg(&fence->vma, NULL);
>   	if (old) {
> -		ret = i915_active_wait(&old->active);
> +		/* XXX Ideally we would move the waiting to outside the mutex */
> +		ret = i915_vma_sync(old);
>   		if (ret) {
>   			fence->vma = old;
>   			return ret;
> @@ -331,13 +332,15 @@ static struct i915_fence_reg *fence_find(struct drm_i915_private *i915)
>   	return ERR_PTR(-EDEADLK);
>   }
>   
> -static int __i915_vma_pin_fence(struct i915_vma *vma)
> +int __i915_vma_pin_fence(struct i915_vma *vma)
>   {
>   	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
>   	struct i915_fence_reg *fence;
>   	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
>   	int err;
>   
> +	lockdep_assert_held(&vma->vm->mutex);
> +
>   	/* Just update our place in the LRU if our fence is getting reused. */
>   	if (vma->fence) {
>   		fence = vma->fence;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 483eeff8730e..a351b98afe03 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -150,16 +150,18 @@ static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
>   
>   static int ppgtt_bind_vma(struct i915_vma *vma,
>   			  enum i915_cache_level cache_level,
> -			  u32 unused)
> +			  u32 flags)
>   {
>   	u32 pte_flags;
>   	int err;
>   
> -	if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) {
> +	if (flags & I915_VMA_ALLOC) {
>   		err = vma->vm->allocate_va_range(vma->vm,
>   						 vma->node.start, vma->size);
>   		if (err)
>   			return err;
> +
> +		set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
>   	}
>   
>   	/* Applicable to VLV, and gen8+ */
> @@ -167,6 +169,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
>   	if (i915_gem_object_is_readonly(vma->obj))
>   		pte_flags |= PTE_READ_ONLY;
>   
> +	GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)));
>   	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
>   	wmb();
>   
> @@ -175,7 +178,8 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
>   
>   static void ppgtt_unbind_vma(struct i915_vma *vma)
>   {
> -	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
> +	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
> +		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
>   }
>   
>   static int ppgtt_set_pages(struct i915_vma *vma)
> @@ -503,15 +507,26 @@ static void i915_address_space_fini(struct i915_address_space *vm)
>   	mutex_destroy(&vm->mutex);
>   }
>   
> -static void ppgtt_destroy_vma(struct i915_address_space *vm)
> +void __i915_vm_close(struct i915_address_space *vm)
>   {
>   	struct i915_vma *vma, *vn;
>   
> -	mutex_lock(&vm->i915->drm.struct_mutex);
> -	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link)
> +	mutex_lock(&vm->mutex);
> +	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
> +		struct drm_i915_gem_object *obj = vma->obj;
> +
> +		/* Keep the obj (and hence the vma) alive as _we_ destroy it */
> +		if (!kref_get_unless_zero(&obj->base.refcount))
> +			continue;
> +
> +		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
> +		WARN_ON(__i915_vma_unbind(vma));
>   		i915_vma_destroy(vma);
> +
> +		i915_gem_object_put(obj);
> +	}
>   	GEM_BUG_ON(!list_empty(&vm->bound_list));
> -	mutex_unlock(&vm->i915->drm.struct_mutex);
> +	mutex_unlock(&vm->mutex);
>   }
>   
>   static void __i915_vm_release(struct work_struct *work)
> @@ -519,8 +534,6 @@ static void __i915_vm_release(struct work_struct *work)
>   	struct i915_address_space *vm =
>   		container_of(work, struct i915_address_space, rcu.work);
>   
> -	ppgtt_destroy_vma(vm);
> -
>   	vm->cleanup(vm);
>   	i915_address_space_fini(vm);
>   
> @@ -535,7 +548,6 @@ void i915_vm_release(struct kref *kref)
>   	GEM_BUG_ON(i915_is_ggtt(vm));
>   	trace_i915_ppgtt_release(vm);
>   
> -	vm->closed = true;
>   	queue_rcu_work(vm->i915->wq, &vm->rcu);
>   }
>   
> @@ -543,6 +555,7 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)
>   {
>   	kref_init(&vm->ref);
>   	INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
> +	atomic_set(&vm->open, 1);
>   
>   	/*
>   	 * The vm->mutex must be reclaim safe (for use in the shrinker).
> @@ -1771,12 +1784,8 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
>   static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
>   {
>   	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
> -	struct drm_i915_private *i915 = vm->i915;
>   
> -	/* FIXME remove the struct_mutex to bring the locking under control */
> -	mutex_lock(&i915->drm.struct_mutex);
>   	i915_vma_destroy(ppgtt->vma);
> -	mutex_unlock(&i915->drm.struct_mutex);
>   
>   	gen6_ppgtt_free_pd(ppgtt);
>   	free_scratch(vm);
> @@ -1865,7 +1874,8 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
>   
>   	i915_active_init(i915, &vma->active, NULL, NULL);
>   
> -	vma->vm = &ggtt->vm;
> +	mutex_init(&vma->pages_mutex);
> +	vma->vm = i915_vm_get(&ggtt->vm);
>   	vma->ops = &pd_vma_ops;
>   	vma->private = ppgtt;
>   
> @@ -1885,7 +1895,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base)
>   	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
>   	int err = 0;
>   
> -	GEM_BUG_ON(ppgtt->base.vm.closed);
> +	GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
>   
>   	/*
>   	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
> @@ -2463,14 +2473,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
>   	if (flags & I915_VMA_LOCAL_BIND) {
>   		struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
>   
> -		if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) {
> +		if (flags & I915_VMA_ALLOC) {
>   			ret = alias->vm.allocate_va_range(&alias->vm,
>   							  vma->node.start,
>   							  vma->size);
>   			if (ret)
>   				return ret;
> +
> +			set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
>   		}
>   
> +		GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT,
> +				     __i915_vma_flags(vma)));
>   		alias->vm.insert_entries(&alias->vm, vma,
>   					 cache_level, pte_flags);
>   	}
> @@ -2499,7 +2513,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
>   			vm->clear_range(vm, vma->node.start, vma->size);
>   	}
>   
> -	if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) {
> +	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
>   		struct i915_address_space *vm =
>   			&i915_vm_to_ggtt(vma->vm)->alias->vm;
>   
> @@ -2602,22 +2616,16 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
>   
>   static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
>   {
> -	struct drm_i915_private *i915 = ggtt->vm.i915;
>   	struct i915_ppgtt *ppgtt;
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> -
>   	ppgtt = fetch_and_zero(&ggtt->alias);
>   	if (!ppgtt)
> -		goto out;
> +		return;
>   
>   	i915_vm_put(&ppgtt->vm);
>   
>   	ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
>   	ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
> -
> -out:
> -	mutex_unlock(&i915->drm.struct_mutex);
>   }
>   
>   static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
> @@ -2734,32 +2742,28 @@ int i915_init_ggtt(struct drm_i915_private *i915)
>   
>   static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
>   {
> -	struct drm_i915_private *i915 = ggtt->vm.i915;
>   	struct i915_vma *vma, *vn;
>   
> -	ggtt->vm.closed = true;
> +	atomic_set(&ggtt->vm.open, 0);
>   
>   	rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
> -	flush_workqueue(i915->wq);
> +	flush_workqueue(ggtt->vm.i915->wq);
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> +	mutex_lock(&ggtt->vm.mutex);
>   
>   	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
> -		WARN_ON(i915_vma_unbind(vma));
> +		WARN_ON(__i915_vma_unbind(vma));
>   
>   	if (drm_mm_node_allocated(&ggtt->error_capture))
>   		drm_mm_remove_node(&ggtt->error_capture);
>   
>   	ggtt_release_guc_top(ggtt);
> -
> -	if (drm_mm_initialized(&ggtt->vm.mm)) {
> -		intel_vgt_deballoon(ggtt);
> -		i915_address_space_fini(&ggtt->vm);
> -	}
> +	intel_vgt_deballoon(ggtt);
>   
>   	ggtt->vm.cleanup(&ggtt->vm);
>   
> -	mutex_unlock(&i915->drm.struct_mutex);
> +	mutex_unlock(&ggtt->vm.mutex);
> +	i915_address_space_fini(&ggtt->vm);
>   
>   	arch_phys_wc_del(ggtt->mtrr);
>   	io_mapping_fini(&ggtt->iomap);
> @@ -3188,9 +3192,6 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
>   static int ggtt_init_hw(struct i915_ggtt *ggtt)
>   {
>   	struct drm_i915_private *i915 = ggtt->vm.i915;
> -	int ret = 0;
> -
> -	mutex_lock(&i915->drm.struct_mutex);
>   
>   	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
>   
> @@ -3206,18 +3207,14 @@ static int ggtt_init_hw(struct i915_ggtt *ggtt)
>   				ggtt->gmadr.start,
>   				ggtt->mappable_end)) {
>   		ggtt->vm.cleanup(&ggtt->vm);
> -		ret = -EIO;
> -		goto out;
> +		return -EIO;
>   	}
>   
>   	ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
>   
>   	i915_ggtt_init_fences(ggtt);
>   
> -out:
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
> -	return ret;
> +	return 0;
>   }
>   
>   /**
> @@ -3289,6 +3286,7 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
>   {
>   	struct i915_vma *vma, *vn;
>   	bool flush = false;
> +	int open;
>   
>   	intel_gt_check_and_clear_faults(ggtt->vm.gt);
>   
> @@ -3296,7 +3294,9 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
>   
>   	/* First fill our portion of the GTT with scratch pages */
>   	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
> -	ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
> +
> +	/* Skip rewriting PTE on VMA unbind. */
> +	open = atomic_xchg(&ggtt->vm.open, 0);
>   
>   	/* clflush objects bound into the GGTT and rebind them. */
>   	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
> @@ -3305,24 +3305,20 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
>   		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
>   			continue;
>   
> -		mutex_unlock(&ggtt->vm.mutex);
> -
> -		if (!i915_vma_unbind(vma))
> -			goto lock;
> +		if (!__i915_vma_unbind(vma))
> +			continue;
>   
> +		clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
>   		WARN_ON(i915_vma_bind(vma,
>   				      obj ? obj->cache_level : 0,
> -				      PIN_UPDATE));
> +				      PIN_GLOBAL, NULL));
>   		if (obj) { /* only used during resume => exclusive access */
>   			flush |= fetch_and_zero(&obj->write_domain);
>   			obj->read_domains |= I915_GEM_DOMAIN_GTT;
>   		}
> -
> -lock:
> -		mutex_lock(&ggtt->vm.mutex);
>   	}
>   
> -	ggtt->vm.closed = false;
> +	atomic_set(&ggtt->vm.open, open);
>   	ggtt->invalidate(ggtt);
>   
>   	mutex_unlock(&ggtt->vm.mutex);
> @@ -3714,7 +3710,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
>   	u64 offset;
>   	int err;
>   
> -	lockdep_assert_held(&vm->i915->drm.struct_mutex);
> +	lockdep_assert_held(&vm->mutex);
> +
>   	GEM_BUG_ON(!size);
>   	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
>   	GEM_BUG_ON(alignment && !is_power_of_2(alignment));
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index d8dfa3811e54..03418dede503 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -307,7 +307,14 @@ struct i915_address_space {
>   
>   	unsigned int bind_alloc;
>   
> -	bool closed;
> +	/*
> +	 * Each active user context has its own address space (in full-ppgtt).
> +	 * Since the vm may be shared between multiple contexts, we count how
> +	 * many contexts keep us "open". Once open hits zero, we are closed
> +	 * and do not allow any new attachments, and proceed to shutdown our
> +	 * vma and page directories.
> +	 */
> +	atomic_t open;
>   
>   	struct mutex mutex; /* protects vma and our lists */
>   #define VM_CLASS_GGTT 0
> @@ -581,6 +588,35 @@ static inline void i915_vm_put(struct i915_address_space *vm)
>   	kref_put(&vm->ref, i915_vm_release);
>   }
>   
> +static inline struct i915_address_space *
> +i915_vm_open(struct i915_address_space *vm)
> +{
> +	GEM_BUG_ON(!atomic_read(&vm->open));
> +	atomic_inc(&vm->open);
> +	return i915_vm_get(vm);
> +}
> +
> +static inline bool
> +i915_vm_tryopen(struct i915_address_space *vm)
> +{
> +	if (atomic_add_unless(&vm->open, 1, 0))
> +		return i915_vm_get(vm);
> +
> +	return false;
> +}
> +
> +void __i915_vm_close(struct i915_address_space *vm);
> +
> +static inline void
> +i915_vm_close(struct i915_address_space *vm)
> +{
> +	GEM_BUG_ON(!atomic_read(&vm->open));
> +	if (atomic_dec_and_test(&vm->open))
> +		__i915_vm_close(vm);
> +
> +	i915_vm_put(vm);
> +}
> +
>   int gen6_ppgtt_pin(struct i915_ppgtt *base);
>   void gen6_ppgtt_unpin(struct i915_ppgtt *base);
>   void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
> @@ -613,10 +649,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
>   #define PIN_OFFSET_BIAS		BIT_ULL(6)
>   #define PIN_OFFSET_FIXED	BIT_ULL(7)
>   
> -#define PIN_MBZ			BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */
> -#define PIN_GLOBAL		BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */
> -#define PIN_USER		BIT_ULL(10) /* I915_VMA_LOCAL_BIND */
> -#define PIN_UPDATE		BIT_ULL(11)
> +#define PIN_UPDATE		BIT_ULL(9)
> +#define PIN_GLOBAL		BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
> +#define PIN_USER		BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
>   
>   #define PIN_OFFSET_MASK		(-I915_GTT_PAGE_SIZE)
>   
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 524f6710b7aa..80055501eccb 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1204,15 +1204,10 @@ static int i915_oa_read(struct i915_perf_stream *stream,
>   static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
>   {
>   	struct i915_gem_engines_iter it;
> -	struct drm_i915_private *i915 = stream->dev_priv;
>   	struct i915_gem_context *ctx = stream->ctx;
>   	struct intel_context *ce;
>   	int err;
>   
> -	err = i915_mutex_lock_interruptible(&i915->drm);
> -	if (err)
> -		return ERR_PTR(err);
> -
>   	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
>   		if (ce->engine->class != RENDER_CLASS)
>   			continue;
> @@ -1229,10 +1224,6 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
>   	}
>   	i915_gem_context_unlock_engines(ctx);
>   
> -	mutex_unlock(&i915->drm.struct_mutex);
> -	if (err)
> -		return ERR_PTR(err);
> -
>   	return stream->pinned_ctx;
>   }
>   
> @@ -1331,32 +1322,22 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>    */
>   static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
>   {
> -	struct drm_i915_private *dev_priv = stream->dev_priv;
>   	struct intel_context *ce;
>   
>   	stream->specific_ctx_id = INVALID_CTX_ID;
>   	stream->specific_ctx_id_mask = 0;
>   
>   	ce = fetch_and_zero(&stream->pinned_ctx);
> -	if (ce) {
> -		mutex_lock(&dev_priv->drm.struct_mutex);
> +	if (ce)
>   		intel_context_unpin(ce);
> -		mutex_unlock(&dev_priv->drm.struct_mutex);
> -	}
>   }
>   
>   static void
>   free_oa_buffer(struct i915_perf_stream *stream)
>   {
> -	struct drm_i915_private *i915 = stream->dev_priv;
> -
> -	mutex_lock(&i915->drm.struct_mutex);
> -
>   	i915_vma_unpin_and_release(&stream->oa_buffer.vma,
>   				   I915_VMA_RELEASE_MAP);
>   
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
>   	stream->oa_buffer.vaddr = NULL;
>   }
>   
> @@ -1511,18 +1492,13 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
>   	if (WARN_ON(stream->oa_buffer.vma))
>   		return -ENODEV;
>   
> -	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
> -	if (ret)
> -		return ret;
> -
>   	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
>   	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
>   
>   	bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
>   	if (IS_ERR(bo)) {
>   		DRM_ERROR("Failed to allocate OA buffer\n");
> -		ret = PTR_ERR(bo);
> -		goto unlock;
> +		return PTR_ERR(bo);
>   	}
>   
>   	i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
> @@ -1546,7 +1522,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
>   			 i915_ggtt_offset(stream->oa_buffer.vma),
>   			 stream->oa_buffer.vaddr);
>   
> -	goto unlock;
> +	return 0;
>   
>   err_unpin:
>   	__i915_vma_unpin(vma);
> @@ -1557,8 +1533,6 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
>   	stream->oa_buffer.vaddr = NULL;
>   	stream->oa_buffer.vma = NULL;
>   
> -unlock:
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
>   	return ret;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index d097f77890ba..defeb9bb1daa 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -32,6 +32,7 @@
>   
>   #include "i915_drv.h"
>   #include "i915_globals.h"
> +#include "i915_sw_fence_work.h"
>   #include "i915_trace.h"
>   #include "i915_vma.h"
>   
> @@ -110,7 +111,8 @@ vma_create(struct drm_i915_gem_object *obj,
>   	if (vma == NULL)
>   		return ERR_PTR(-ENOMEM);
>   
> -	vma->vm = vm;
> +	mutex_init(&vma->pages_mutex);
> +	vma->vm = i915_vm_get(vm);
>   	vma->ops = &vm->vma_ops;
>   	vma->obj = obj;
>   	vma->resv = obj->base.resv;
> @@ -261,8 +263,6 @@ vma_lookup(struct drm_i915_gem_object *obj,
>    * Once created, the VMA is kept until either the object is freed, or the
>    * address space is closed.
>    *
> - * Must be called with struct_mutex held.
> - *
>    * Returns the vma, or an error pointer.
>    */
>   struct i915_vma *
> @@ -273,7 +273,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>   	struct i915_vma *vma;
>   
>   	GEM_BUG_ON(view && !i915_is_ggtt(vm));
> -	GEM_BUG_ON(vm->closed);
> +	GEM_BUG_ON(!atomic_read(&vm->open));
>   
>   	spin_lock(&obj->vma.lock);
>   	vma = vma_lookup(obj, vm, view);
> @@ -287,18 +287,63 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>   	return vma;
>   }
>   
> +struct i915_vma_work {
> +	struct dma_fence_work base;
> +	struct i915_vma *vma;
> +	enum i915_cache_level cache_level;
> +	unsigned int flags;
> +};
> +
> +static int __vma_bind(struct dma_fence_work *work)
> +{
> +	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
> +	struct i915_vma *vma = vw->vma;
> +	int err;
> +
> +	err = vma->ops->bind_vma(vma, vw->cache_level, vw->flags);
> +	if (err)
> +		atomic_or(I915_VMA_ERROR, &vma->flags);
> +
> +	if (vma->obj)
> +		__i915_gem_object_unpin_pages(vma->obj);
> +
> +	return err;
> +}
> +
> +static const struct dma_fence_work_ops bind_ops = {
> +	.name = "bind",
> +	.work = __vma_bind,
> +};
> +
> +struct i915_vma_work *i915_vma_work(void)
> +{
> +	struct i915_vma_work *vw;
> +
> +	vw = kzalloc(sizeof(*vw), GFP_KERNEL);
> +	if (!vw)
> +		return NULL;
> +
> +	dma_fence_work_init(&vw->base, &bind_ops);
> +	vw->base.dma.error = -EAGAIN; /* disable the worker by default */
> +
> +	return vw;
> +}
> +
>   /**
>    * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
>    * @vma: VMA to map
>    * @cache_level: mapping cache level
>    * @flags: flags like global or local mapping
> + * @work: preallocated worker for allocating and binding the PTE
>    *
>    * DMA addresses are taken from the scatter-gather table of this object (or of
>    * this VMA in case of non-default GGTT views) and PTE entries set up.
>    * Note that DMA addresses are also the only part of the SG table we care about.
>    */
> -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
> -		  u32 flags)
> +int i915_vma_bind(struct i915_vma *vma,
> +		  enum i915_cache_level cache_level,
> +		  u32 flags,
> +		  struct i915_vma_work *work)
>   {
>   	u32 bind_flags;
>   	u32 vma_flags;
> @@ -315,11 +360,8 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
>   	if (GEM_DEBUG_WARN_ON(!flags))
>   		return -EINVAL;
>   
> -	bind_flags = 0;
> -	if (flags & PIN_GLOBAL)
> -		bind_flags |= I915_VMA_GLOBAL_BIND;
> -	if (flags & PIN_USER)
> -		bind_flags |= I915_VMA_LOCAL_BIND;
> +	bind_flags = flags;
> +	bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
>   
>   	vma_flags = atomic_read(&vma->flags);
>   	vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
> @@ -333,9 +375,32 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
>   	GEM_BUG_ON(!vma->pages);
>   
>   	trace_i915_vma_bind(vma, bind_flags);
> -	ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
> -	if (ret)
> -		return ret;
> +	if (work && (bind_flags & ~vma_flags) & vma->vm->bind_alloc) {
> +		work->vma = vma;
> +		work->cache_level = cache_level;
> +		work->flags = bind_flags | I915_VMA_ALLOC;
> +
> +		/*
> +		 * Note we only want to chain up to the migration fence on
> +		 * the pages (not the object itself). As we don't track that,
> +		 * yet, we have to use the exclusive fence instead.
> +		 *
> +		 * Also note that we do not want to track the async vma as
> +		 * part of the obj->resv->excl_fence as it only affects
> +		 * execution and not content or object's backing store lifetime.
> +		 */
> +		GEM_BUG_ON(i915_active_has_exclusive(&vma->active));
> +		i915_active_set_exclusive(&vma->active, &work->base.dma);
> +		work->base.dma.error = 0; /* enable the queue_work() */
> +
> +		if (vma->obj)
> +			__i915_gem_object_pin_pages(vma->obj);
> +	} else {
> +		GEM_BUG_ON((bind_flags & ~vma_flags) & vma->vm->bind_alloc);
> +		ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
> +		if (ret)
> +			return ret;
> +	}
>   
>   	atomic_or(bind_flags, &vma->flags);
>   	return 0;
> @@ -348,9 +413,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
>   
>   	/* Access through the GTT requires the device to be awake. */
>   	assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm);
> -
> -	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> -	if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
> +	if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
>   		err = -ENODEV;
>   		goto err;
>   	}
> @@ -358,7 +421,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
>   	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
>   	GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND));
>   
> -	ptr = vma->iomap;
> +	ptr = READ_ONCE(vma->iomap);
>   	if (ptr == NULL) {
>   		ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap,
>   					vma->node.start,
> @@ -368,7 +431,10 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
>   			goto err;
>   		}
>   
> -		vma->iomap = ptr;
> +		if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) {
> +			io_mapping_unmap(ptr);
> +			ptr = vma->iomap;
> +		}
>   	}
>   
>   	__i915_vma_pin(vma);
> @@ -388,18 +454,12 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
>   
>   void i915_vma_flush_writes(struct i915_vma *vma)
>   {
> -	if (!i915_vma_has_ggtt_write(vma))
> -		return;
> -
> -	intel_gt_flush_ggtt_writes(vma->vm->gt);
> -
> -	i915_vma_unset_ggtt_write(vma);
> +	if (i915_vma_unset_ggtt_write(vma))
> +		intel_gt_flush_ggtt_writes(vma->vm->gt);
>   }
>   
>   void i915_vma_unpin_iomap(struct i915_vma *vma)
>   {
> -	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> -
>   	GEM_BUG_ON(vma->iomap == NULL);
>   
>   	i915_vma_flush_writes(vma);
> @@ -435,6 +495,9 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
>   	if (!drm_mm_node_allocated(&vma->node))
>   		return false;
>   
> +	if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)))
> +		return true;
> +
>   	if (vma->node.size < size)
>   		return true;
>   
> @@ -535,7 +598,6 @@ static void assert_bind_count(const struct drm_i915_gem_object *obj)
>   static int
>   i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>   {
> -	struct drm_i915_private *dev_priv = vma->vm->i915;
>   	unsigned long color;
>   	u64 start, end;
>   	int ret;
> @@ -561,7 +623,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>   
>   	end = vma->vm->total;
>   	if (flags & PIN_MAPPABLE)
> -		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
> +		end = min_t(u64, end, i915_vm_to_ggtt(vma->vm)->mappable_end);
>   	if (flags & PIN_ZONE_4G)
>   		end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
>   	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
> @@ -578,34 +640,20 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>   	}
>   
>   	color = 0;
> -	if (vma->obj) {
> -		ret = i915_gem_object_pin_pages(vma->obj);
> -		if (ret)
> -			return ret;
> -
> -		if (i915_vm_has_cache_coloring(vma->vm))
> -			color = vma->obj->cache_level;
> -	}
> -
> -	GEM_BUG_ON(vma->pages);
> -
> -	ret = vma->ops->set_pages(vma);
> -	if (ret)
> -		goto err_unpin;
> +	if (vma->obj && i915_vm_has_cache_coloring(vma->vm))
> +		color = vma->obj->cache_level;
>   
>   	if (flags & PIN_OFFSET_FIXED) {
>   		u64 offset = flags & PIN_OFFSET_MASK;
>   		if (!IS_ALIGNED(offset, alignment) ||
> -		    range_overflows(offset, size, end)) {
> -			ret = -EINVAL;
> -			goto err_clear;
> -		}
> +		    range_overflows(offset, size, end))
> +			return -EINVAL;
>   
>   		ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
>   					   size, offset, color,
>   					   flags);
>   		if (ret)
> -			goto err_clear;
> +			return ret;
>   	} else {
>   		/*
>   		 * We only support huge gtt pages through the 48b PPGTT,
> @@ -644,7 +692,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>   					  size, alignment, color,
>   					  start, end, flags);
>   		if (ret)
> -			goto err_clear;
> +			return ret;
>   
>   		GEM_BUG_ON(vma->node.start < start);
>   		GEM_BUG_ON(vma->node.start + vma->node.size > end);
> @@ -652,23 +700,15 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
>   
> -	mutex_lock(&vma->vm->mutex);
>   	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
> -	mutex_unlock(&vma->vm->mutex);
>   
>   	if (vma->obj) {
> +		atomic_inc(&vma->obj->mm.pages_pin_count);
>   		atomic_inc(&vma->obj->bind_count);
>   		assert_bind_count(vma->obj);
>   	}
>   
>   	return 0;
> -
> -err_clear:
> -	vma->ops->clear_pages(vma);
> -err_unpin:
> -	if (vma->obj)
> -		i915_gem_object_unpin_pages(vma->obj);
> -	return ret;
>   }
>   
>   static void
> @@ -677,12 +717,7 @@ i915_vma_remove(struct i915_vma *vma)
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
>   
> -	vma->ops->clear_pages(vma);
> -
> -	mutex_lock(&vma->vm->mutex);
> -	drm_mm_remove_node(&vma->node);
>   	list_del(&vma->vm_link);
> -	mutex_unlock(&vma->vm->mutex);
>   
>   	/*
>   	 * Since the unbound list is global, only move to that list if
> @@ -701,51 +736,211 @@ i915_vma_remove(struct i915_vma *vma)
>   		i915_gem_object_unpin_pages(obj);
>   		assert_bind_count(obj);
>   	}
> +
> +	drm_mm_remove_node(&vma->node);
>   }
>   
> -int __i915_vma_do_pin(struct i915_vma *vma,
> -		      u64 size, u64 alignment, u64 flags)
> +static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)

What does the QAD stand for?

>   {
> -	const unsigned int bound = atomic_read(&vma->flags);
> -	int ret;
> +	unsigned int bound;
> +	bool pinned = true;
>   
> -	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> -	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
> -	GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
> +	bound = atomic_read(&vma->flags);
> +	do {
> +		if (unlikely(flags & ~bound))
> +			return false;
>   
> -	if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
> -		ret = -EBUSY;
> -		goto err_unpin;
> +		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
> +			return false;
> +
> +		if (!(bound & I915_VMA_PIN_MASK))
> +			goto unpinned;
> +
> +		GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0);
> +	} while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
> +
> +	return true;
> +
> +unpinned:
> +	/*
> +	 * If pin_count==0, but we are bound, check under the lock to avoid
> +	 * racing with a concurrent i915_vma_unbind().
> +	 */
> +	mutex_lock(&vma->vm->mutex);
> +	do {
> +		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {
> +			pinned = false;
> +			break;
> +		}
> +
> +		if (unlikely(flags & ~bound)) {
> +			pinned = false;
> +			break;
> +		}
> +	} while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
> +	mutex_unlock(&vma->vm->mutex);
> +
> +	return pinned;
> +}
> +
> +static int vma_get_pages(struct i915_vma *vma)
> +{
> +	int err = 0;
> +
> +	if (atomic_add_unless(&vma->pages_count, 1, 0))
> +		return 0;
> +
> +	/* Allocations ahoy! */
> +	if (mutex_lock_interruptible(&vma->pages_mutex))
> +		return -EINTR;
> +
> +	if (!atomic_read(&vma->pages_count)) {
> +		if (vma->obj) {
> +			err = i915_gem_object_pin_pages(vma->obj);
> +			if (err)
> +				goto unlock;
> +		}
> +
> +		err = vma->ops->set_pages(vma);
> +		if (err)
> +			goto unlock;
>   	}
> +	atomic_inc(&vma->pages_count);
>   
> -	if ((bound & I915_VMA_BIND_MASK) == 0) {
> -		ret = i915_vma_insert(vma, size, alignment, flags);
> -		if (ret)
> -			goto err_unpin;
> +unlock:
> +	mutex_unlock(&vma->pages_mutex);
> +
> +	return err;
> +}
> +
> +static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
> +{
> +	/* We allocate under vma_get_pages, so beware the shrinker */
> +	mutex_lock_nested(&vma->pages_mutex, SINGLE_DEPTH_NESTING);
> +	GEM_BUG_ON(atomic_read(&vma->pages_count) < count);
> +	if (atomic_sub_return(count, &vma->pages_count) == 0) {
> +		vma->ops->clear_pages(vma);
> +		GEM_BUG_ON(vma->pages);
> +		if (vma->obj)
> +			i915_gem_object_unpin_pages(vma->obj);
>   	}
> -	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> +	mutex_unlock(&vma->pages_mutex);
> +}
>   
> -	ret = i915_vma_bind(vma, vma->obj ? vma->obj->cache_level : 0, flags);
> -	if (ret)
> -		goto err_remove;
> +static void vma_put_pages(struct i915_vma *vma)
> +{
> +	if (atomic_add_unless(&vma->pages_count, -1, 1))
> +		return;
> +
> +	__vma_put_pages(vma, 1);
> +}
> +
> +static void vma_unbind_pages(struct i915_vma *vma)
> +{
> +	unsigned int count;
> +
> +	lockdep_assert_held(&vma->vm->mutex);
> +
> +	/* The upper portion of pages_count is the number of bindings */
> +	count = atomic_read(&vma->pages_count);
> +	count >>= I915_VMA_PAGES_BIAS;
> +	GEM_BUG_ON(!count);
> +
> +	__vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS);
> +}
> +
> +int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> +{
> +	struct i915_vma_work *work = NULL;
> +	unsigned int bound;
> +	int err;
> +
> +	BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
> +	BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
> +
> +	GEM_BUG_ON(flags & PIN_UPDATE);
> +	GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL)));
> +
> +	/* First try and grab the pin without rebinding the vma */
> +	if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK))
> +		return 0;
> +
> +	err = vma_get_pages(vma);
> +	if (err)
> +		return err;
> +
> +	if (flags & vma->vm->bind_alloc) {
> +		work = i915_vma_work();
> +		if (!work) {
> +			err = -ENOMEM;
> +			goto err_pages;
> +		}
> +	}
> +
> +	/* No more allocations allowed once we hold vm->mutex */
> +	err = mutex_lock_interruptible(&vma->vm->mutex);
> +	if (err)
> +		goto err_fence;
> +
> +	bound = atomic_read(&vma->flags);
> +	if (unlikely(bound & I915_VMA_ERROR)) {
> +		err = -ENOMEM;
> +		goto err_unlock;
> +	}
> +
> +	if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) {
> +		err = -EAGAIN; /* pins are meant to be fairly temporary */
> +		goto err_unlock;
> +	}
> +
> +	if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) {
> +		__i915_vma_pin(vma);
> +		goto err_unlock;
> +	}
> +
> +	err = i915_active_acquire(&vma->active);
> +	if (err)
> +		goto err_unlock;
> +
> +	if (!(bound & I915_VMA_BIND_MASK)) {
> +		err = i915_vma_insert(vma, size, alignment, flags);
> +		if (err)
> +			goto err_active;
> +
> +		if (i915_is_ggtt(vma->vm))
> +			__i915_vma_set_map_and_fenceable(vma);
> +	}
>   
> -	GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_BIND_MASK));
> +	GEM_BUG_ON(!vma->pages);
> +	err = i915_vma_bind(vma,
> +			    vma->obj ? vma->obj->cache_level : 0,
> +			    flags, work);
> +	if (err)
> +		goto err_remove;
>   
> -	if ((bound ^ atomic_read(&vma->flags)) & I915_VMA_GLOBAL_BIND)
> -		__i915_vma_set_map_and_fenceable(vma);
> +	/* There should only be at most 2 active bindings (user, global) */
> +	GEM_BUG_ON(bound + I915_VMA_PAGES_ACTIVE < bound);
> +	atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count);
> +	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
>   
> +	__i915_vma_pin(vma);
> +	GEM_BUG_ON(!i915_vma_is_pinned(vma));
> +	GEM_BUG_ON(!i915_vma_is_bound(vma, flags));
>   	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
> -	return 0;
>   
>   err_remove:
> -	if ((bound & I915_VMA_BIND_MASK) == 0) {
> +	if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK))
>   		i915_vma_remove(vma);
> -		GEM_BUG_ON(vma->pages);
> -		GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_BIND_MASK);
> -	}
> -err_unpin:
> -	__i915_vma_unpin(vma);
> -	return ret;
> +err_active:
> +	i915_active_release(&vma->active);
> +err_unlock:
> +	mutex_unlock(&vma->vm->mutex);
> +err_fence:
> +	if (work)
> +		dma_fence_work_commit(&work->base);
> +err_pages:
> +	vma_put_pages(vma);
> +	return err;
>   }
>   
>   void i915_vma_close(struct i915_vma *vma)
> @@ -776,9 +971,6 @@ static void __i915_vma_remove_closed(struct i915_vma *vma)
>   {
>   	struct drm_i915_private *i915 = vma->vm->i915;
>   
> -	if (!i915_vma_is_closed(vma))
> -		return;
> -
>   	spin_lock_irq(&i915->gt.closed_lock);
>   	list_del_init(&vma->closed_link);
>   	spin_unlock_irq(&i915->gt.closed_lock);
> @@ -786,40 +978,35 @@ static void __i915_vma_remove_closed(struct i915_vma *vma)
>   
>   void i915_vma_reopen(struct i915_vma *vma)
>   {
> -	__i915_vma_remove_closed(vma);
> +	if (i915_vma_is_closed(vma))
> +		__i915_vma_remove_closed(vma);
>   }
>   
> -static void __i915_vma_destroy(struct i915_vma *vma)
> +void i915_vma_destroy(struct i915_vma *vma)
>   {
> -	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
> -	GEM_BUG_ON(vma->fence);
> +	if (drm_mm_node_allocated(&vma->node)) {
> +		mutex_lock(&vma->vm->mutex);
> +		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
> +		WARN_ON(__i915_vma_unbind(vma));
> +		mutex_unlock(&vma->vm->mutex);
> +		GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
> +	}
> +	GEM_BUG_ON(i915_vma_is_active(vma));
>   
>   	if (vma->obj) {
>   		struct drm_i915_gem_object *obj = vma->obj;
>   
>   		spin_lock(&obj->vma.lock);
>   		list_del(&vma->obj_link);
> -		rb_erase(&vma->obj_node, &vma->obj->vma.tree);
> +		rb_erase(&vma->obj_node, &obj->vma.tree);
>   		spin_unlock(&obj->vma.lock);
>   	}
>   
> -	i915_active_fini(&vma->active);
> -
> -	i915_vma_free(vma);
> -}
> -
> -void i915_vma_destroy(struct i915_vma *vma)
> -{
> -	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> -
> -	GEM_BUG_ON(i915_vma_is_pinned(vma));
> -
>   	__i915_vma_remove_closed(vma);
> +	i915_vm_put(vma->vm);
>   
> -	WARN_ON(i915_vma_unbind(vma));
> -	GEM_BUG_ON(i915_vma_is_active(vma));
> -
> -	__i915_vma_destroy(vma);
> +	i915_active_fini(&vma->active);
> +	i915_vma_free(vma);
>   }
>   
>   void i915_vma_parked(struct drm_i915_private *i915)
> @@ -828,12 +1015,32 @@ void i915_vma_parked(struct drm_i915_private *i915)
>   
>   	spin_lock_irq(&i915->gt.closed_lock);
>   	list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) {
> -		list_del_init(&vma->closed_link);
> +		struct drm_i915_gem_object *obj = vma->obj;
> +		struct i915_address_space *vm = vma->vm;
> +
> +		/* XXX All to avoid keeping a reference on i915_vma itself */
> +
> +		if (!kref_get_unless_zero(&obj->base.refcount))
> +			continue;
> +
> +		if (!i915_vm_tryopen(vm)) {
> +			i915_gem_object_put(obj);
> +			obj = NULL;
> +		}
> +
>   		spin_unlock_irq(&i915->gt.closed_lock);
>   
> -		i915_vma_destroy(vma);
> +		if (obj) {
> +			i915_vma_destroy(vma);
> +			i915_gem_object_put(obj);
> +		}
>   
> +		i915_vm_close(vm);
> +
> +		/* Restart after dropping lock */
>   		spin_lock_irq(&i915->gt.closed_lock);
> +		next = list_first_entry(&i915->gt.closed_vma,
> +					typeof(*next), closed_link);
>   	}
>   	spin_unlock_irq(&i915->gt.closed_lock);
>   }
> @@ -873,6 +1080,20 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
>   		list_del(&vma->obj->userfault_link);
>   }
>   
> +int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
> +{
> +	int err;
> +
> +	GEM_BUG_ON(!i915_vma_is_pinned(vma));
> +
> +	/* Wait for the vma to be bound before we start! */
> +	err = i915_request_await_active(rq, &vma->active);
> +	if (err)
> +		return err;
> +
> +	return i915_active_add_request(&vma->active, rq);
> +}
> +
>   int i915_vma_move_to_active(struct i915_vma *vma,
>   			    struct i915_request *rq,
>   			    unsigned int flags)
> @@ -880,19 +1101,9 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   	struct drm_i915_gem_object *obj = vma->obj;
>   	int err;
>   
> -	assert_vma_held(vma);
>   	assert_object_held(obj);
> -	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   
> -	/*
> -	 * Add a reference if we're newly entering the active list.
> -	 * The order in which we add operations to the retirement queue is
> -	 * vital here: mark_active adds to the start of the callback list,
> -	 * such that subsequent callbacks are called first. Therefore we
> -	 * add the active reference first and queue for it to be dropped
> -	 * *last*.
> -	 */
> -	err = i915_active_add_request(&vma->active, rq);
> +	err = __i915_vma_move_to_active(vma, rq);
>   	if (unlikely(err))
>   		return err;
>   
> @@ -918,38 +1129,23 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   	return 0;
>   }
>   
> -int i915_vma_unbind(struct i915_vma *vma)
> +int __i915_vma_unbind(struct i915_vma *vma)
>   {
>   	int ret;
>   
> -	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> +	lockdep_assert_held(&vma->vm->mutex);
>   
>   	/*
>   	 * First wait upon any activity as retiring the request may
>   	 * have side-effects such as unpinning or even unbinding this vma.
> +	 *
> +	 * XXX Actually waiting under the vm->mutex is a hinderance and
> +	 * should be pipelined wherever possible. In cases where that is
> +	 * unavoidable, we should lift the wait to before the mutex.
>   	 */
> -	might_sleep();
> -	if (i915_vma_is_active(vma)) {
> -		/*
> -		 * When a closed VMA is retired, it is unbound - eek.
> -		 * In order to prevent it from being recursively closed,
> -		 * take a pin on the vma so that the second unbind is
> -		 * aborted.
> -		 *
> -		 * Even more scary is that the retire callback may free
> -		 * the object (last active vma). To prevent the explosion
> -		 * we defer the actual object free to a worker that can
> -		 * only proceed once it acquires the struct_mutex (which
> -		 * we currently hold, therefore it cannot free this object
> -		 * before we are finished).
> -		 */
> -		__i915_vma_pin(vma);
> -		ret = i915_active_wait(&vma->active);
> -		__i915_vma_unpin(vma);
> -		if (ret)
> -			return ret;
> -	}
> -	GEM_BUG_ON(i915_vma_is_active(vma));
> +	ret = i915_vma_sync(vma);
> +	if (ret)
> +		return ret;
>   
>   	if (i915_vma_is_pinned(vma)) {
>   		vma_print_allocator(vma, "is pinned");
> @@ -970,16 +1166,12 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		GEM_BUG_ON(i915_vma_has_ggtt_write(vma));
>   
>   		/* release the fence reg _after_ flushing */
> -		mutex_lock(&vma->vm->mutex);
>   		ret = i915_vma_revoke_fence(vma);
> -		mutex_unlock(&vma->vm->mutex);
>   		if (ret)
>   			return ret;
>   
>   		/* Force a pagefault for domain tracking on next user access */
> -		mutex_lock(&vma->vm->mutex);
>   		i915_vma_revoke_mmap(vma);
> -		mutex_unlock(&vma->vm->mutex);
>   
>   		__i915_vma_iounmap(vma);
>   		clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
> @@ -987,17 +1179,33 @@ int i915_vma_unbind(struct i915_vma *vma)
>   	GEM_BUG_ON(vma->fence);
>   	GEM_BUG_ON(i915_vma_has_userfault(vma));
>   
> -	if (likely(!vma->vm->closed)) {
> +	if (likely(atomic_read(&vma->vm->open))) {
>   		trace_i915_vma_unbind(vma);
>   		vma->ops->unbind_vma(vma);
>   	}
> -	atomic_and(~I915_VMA_BIND_MASK, &vma->flags);
> +	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR), &vma->flags);
>   
> +	vma_unbind_pages(vma);
>   	i915_vma_remove(vma);
>   
>   	return 0;
>   }
>   
> +int i915_vma_unbind(struct i915_vma *vma)
> +{
> +	struct i915_address_space *vm = vma->vm;
> +	int err;
> +
> +	err = mutex_lock_interruptible(&vm->mutex);
> +	if (err)
> +		return err;
> +
> +	err = __i915_vma_unbind(vma);
> +	mutex_unlock(&vm->mutex);
> +
> +	return err;
> +}
> +
>   struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma)
>   {
>   	i915_gem_object_make_unshrinkable(vma->obj);
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index e49b199f7de7..858908e3d1cc 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -96,25 +96,28 @@ struct i915_vma {
>   	 * exclusive cachelines of a single page, so a maximum of 64 possible
>   	 * users.
>   	 */
> -#define I915_VMA_PIN_MASK 0xff
> -#define I915_VMA_PIN_OVERFLOW_BIT 8
> -#define I915_VMA_PIN_OVERFLOW	((int)BIT(I915_VMA_PIN_OVERFLOW_BIT))
> +#define I915_VMA_PIN_MASK 0x3ff
> +#define I915_VMA_OVERFLOW 0x200
>   
>   	/** Flags and address space this VMA is bound to */
> -#define I915_VMA_GLOBAL_BIND_BIT 9
> -#define I915_VMA_LOCAL_BIND_BIT 10
> +#define I915_VMA_GLOBAL_BIND_BIT 10
> +#define I915_VMA_LOCAL_BIND_BIT  11
>   
>   #define I915_VMA_GLOBAL_BIND	((int)BIT(I915_VMA_GLOBAL_BIND_BIT))
>   #define I915_VMA_LOCAL_BIND	((int)BIT(I915_VMA_LOCAL_BIND_BIT))
>   
> -#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | \
> -			    I915_VMA_LOCAL_BIND | \
> -			    I915_VMA_PIN_OVERFLOW)
> +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)
>   
> -#define I915_VMA_GGTT_BIT	11
> -#define I915_VMA_CAN_FENCE_BIT	12
> -#define I915_VMA_USERFAULT_BIT	13
> -#define I915_VMA_GGTT_WRITE_BIT	14
> +#define I915_VMA_ALLOC_BIT	12
> +#define I915_VMA_ALLOC		((int)BIT(I915_VMA_ALLOC_BIT))
> +
> +#define I915_VMA_ERROR_BIT	13
> +#define I915_VMA_ERROR		((int)BIT(I915_VMA_ERROR_BIT))
> +
> +#define I915_VMA_GGTT_BIT	14
> +#define I915_VMA_CAN_FENCE_BIT	15
> +#define I915_VMA_USERFAULT_BIT	16
> +#define I915_VMA_GGTT_WRITE_BIT	17
>   
>   #define I915_VMA_GGTT		((int)BIT(I915_VMA_GGTT_BIT))
>   #define I915_VMA_CAN_FENCE	((int)BIT(I915_VMA_CAN_FENCE_BIT))
> @@ -123,6 +126,11 @@ struct i915_vma {
>   
>   	struct i915_active active;
>   
> +#define I915_VMA_PAGES_BIAS 24
> +#define I915_VMA_PAGES_ACTIVE (BIT(24) | 1)
> +	atomic_t pages_count; /* number of active binds to the pages */
> +	struct mutex pages_mutex; /* protect acquire/release of backing pages */
> +
>   	/**
>   	 * Support different GGTT views into the same object.
>   	 * This means there can be multiple VMA mappings per object and per VM.
> @@ -169,6 +177,8 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma)
>   	return !i915_active_is_idle(&vma->active);
>   }
>   
> +int __must_check __i915_vma_move_to_active(struct i915_vma *vma,
> +					   struct i915_request *rq);
>   int __must_check i915_vma_move_to_active(struct i915_vma *vma,
>   					 struct i915_request *rq,
>   					 unsigned int flags);
> @@ -307,13 +317,18 @@ i915_vma_compare(struct i915_vma *vma,
>   	return memcmp(&vma->ggtt_view.partial, &view->partial, view->type);
>   }
>   
> -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
> -		  u32 flags);
> +struct i915_vma_work *i915_vma_work(void);
> +int i915_vma_bind(struct i915_vma *vma,
> +		  enum i915_cache_level cache_level,
> +		  u32 flags,
> +		  struct i915_vma_work *work);
> +
>   bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color);
>   bool i915_vma_misplaced(const struct i915_vma *vma,
>   			u64 size, u64 alignment, u64 flags);
>   void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
>   void i915_vma_revoke_mmap(struct i915_vma *vma);
> +int __i915_vma_unbind(struct i915_vma *vma);
>   int __must_check i915_vma_unbind(struct i915_vma *vma);
>   void i915_vma_unlink_ctx(struct i915_vma *vma);
>   void i915_vma_close(struct i915_vma *vma);
> @@ -332,26 +347,8 @@ static inline void i915_vma_unlock(struct i915_vma *vma)
>   	dma_resv_unlock(vma->resv);
>   }
>   
> -int __i915_vma_do_pin(struct i915_vma *vma,
> -		      u64 size, u64 alignment, u64 flags);
> -static inline int __must_check
> -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> -{
> -	BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
> -	BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
> -	BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
> -
> -	/* Pin early to prevent the shrinker/eviction logic from destroying
> -	 * our vma as we insert and bind.
> -	 */
> -	if (likely(((atomic_inc_return(&vma->flags) ^ flags) & I915_VMA_BIND_MASK) == 0)) {
> -		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> -		GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
> -		return 0;
> -	}
> -
> -	return __i915_vma_do_pin(vma, size, alignment, flags);
> -}
> +int __must_check
> +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
>   
>   static inline int i915_vma_pin_count(const struct i915_vma *vma)
>   {
> @@ -366,17 +363,17 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
>   static inline void __i915_vma_pin(struct i915_vma *vma)
>   {
>   	atomic_inc(&vma->flags);
> -	GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_PIN_OVERFLOW);
> +	GEM_BUG_ON(!i915_vma_is_pinned(vma));
>   }
>   
>   static inline void __i915_vma_unpin(struct i915_vma *vma)
>   {
> +	GEM_BUG_ON(!i915_vma_is_pinned(vma));
>   	atomic_dec(&vma->flags);
>   }
>   
>   static inline void i915_vma_unpin(struct i915_vma *vma)
>   {
> -	GEM_BUG_ON(!i915_vma_is_pinned(vma));
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   	__i915_vma_unpin(vma);
>   }
> @@ -402,8 +399,6 @@ static inline bool i915_node_color_differs(const struct drm_mm_node *node,
>    * the caller must call i915_vma_unpin_iomap to relinquish the pinning
>    * after the iomapping is no longer required.
>    *
> - * Callers must hold the struct_mutex.
> - *
>    * Returns a valid iomapped pointer or ERR_PTR.
>    */
>   void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
> @@ -415,8 +410,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
>    *
>    * Unpins the previously iomapped VMA from i915_vma_pin_iomap().
>    *
> - * Callers must hold the struct_mutex. This function is only valid to be
> - * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap().
> + * This function is only valid to be called on a VMA previously
> + * iomapped by the caller with i915_vma_pin_iomap().
>    */
>   void i915_vma_unpin_iomap(struct i915_vma *vma);
>   
> @@ -444,6 +439,8 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma)
>   int __must_check i915_vma_pin_fence(struct i915_vma *vma);
>   int __must_check i915_vma_revoke_fence(struct i915_vma *vma);
>   
> +int __i915_vma_pin_fence(struct i915_vma *vma);
> +
>   static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
>   {
>   	GEM_BUG_ON(atomic_read(&vma->fence->pin_count) <= 0);
> @@ -461,7 +458,6 @@ static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
>   static inline void
>   i915_vma_unpin_fence(struct i915_vma *vma)
>   {
> -	/* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */
>   	if (vma->fence)
>   		__i915_vma_unpin_fence(vma);
>   }
> @@ -490,4 +486,10 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
>   void i915_vma_make_shrinkable(struct i915_vma *vma);
>   void i915_vma_make_purgeable(struct i915_vma *vma);
>   
> +static inline int i915_vma_sync(struct i915_vma *vma)
> +{
> +	/* Wait for the asynchronous bindings and pending GPU reads */
> +	return i915_active_wait(&vma->active);
> +}
> +
>   #endif
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
> index 37593831b539..0346c3e5b6b6 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
> @@ -119,10 +119,8 @@ static void pm_resume(struct drm_i915_private *i915)
>   		intel_gt_sanitize(&i915->gt, false);
>   		i915_gem_sanitize(i915);
>   
> -		mutex_lock(&i915->drm.struct_mutex);
>   		i915_gem_restore_gtt_mappings(i915);
>   		i915_gem_restore_fences(i915);
> -		mutex_unlock(&i915->drm.struct_mutex);
>   
>   		i915_gem_resume(i915);
>   	}
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> index 2905fb21d866..75a4695b82bb 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> @@ -106,14 +106,11 @@ static int populate_ggtt(struct drm_i915_private *i915,
>   
>   static void unpin_ggtt(struct drm_i915_private *i915)
>   {
> -	struct i915_ggtt *ggtt = &i915->ggtt;
>   	struct i915_vma *vma;
>   
> -	mutex_lock(&ggtt->vm.mutex);
>   	list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
>   		if (vma->obj->mm.quirked)
>   			i915_vma_unpin(vma);
> -	mutex_unlock(&ggtt->vm.mutex);
>   }
>   
>   static void cleanup_objects(struct drm_i915_private *i915,
> @@ -127,11 +124,7 @@ static void cleanup_objects(struct drm_i915_private *i915,
>   		i915_gem_object_put(obj);
>   	}
>   
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
>   	i915_gem_drain_freed_objects(i915);
> -
> -	mutex_lock(&i915->drm.struct_mutex);
>   }
>   
>   static int igt_evict_something(void *arg)
> @@ -148,10 +141,12 @@ static int igt_evict_something(void *arg)
>   		goto cleanup;
>   
>   	/* Everything is pinned, nothing should happen */
> +	mutex_lock(&ggtt->vm.mutex);
>   	err = i915_gem_evict_something(&ggtt->vm,
>   				       I915_GTT_PAGE_SIZE, 0, 0,
>   				       0, U64_MAX,
>   				       0);
> +	mutex_unlock(&ggtt->vm.mutex);
>   	if (err != -ENOSPC) {
>   		pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n",
>   		       err);
> @@ -161,10 +156,12 @@ static int igt_evict_something(void *arg)
>   	unpin_ggtt(i915);
>   
>   	/* Everything is unpinned, we should be able to evict something */
> +	mutex_lock(&ggtt->vm.mutex);
>   	err = i915_gem_evict_something(&ggtt->vm,
>   				       I915_GTT_PAGE_SIZE, 0, 0,
>   				       0, U64_MAX,
>   				       0);
> +	mutex_unlock(&ggtt->vm.mutex);
>   	if (err) {
>   		pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n",
>   		       err);
> @@ -230,7 +227,9 @@ static int igt_evict_for_vma(void *arg)
>   		goto cleanup;
>   
>   	/* Everything is pinned, nothing should happen */
> +	mutex_lock(&ggtt->vm.mutex);
>   	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
> +	mutex_unlock(&ggtt->vm.mutex);
>   	if (err != -ENOSPC) {
>   		pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n",
>   		       err);
> @@ -240,7 +239,9 @@ static int igt_evict_for_vma(void *arg)
>   	unpin_ggtt(i915);
>   
>   	/* Everything is unpinned, we should be able to evict the node */
> +	mutex_lock(&ggtt->vm.mutex);
>   	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
> +	mutex_unlock(&ggtt->vm.mutex);
>   	if (err) {
>   		pr_err("i915_gem_evict_for_node returned err=%d\n",
>   		       err);
> @@ -319,7 +320,9 @@ static int igt_evict_for_cache_color(void *arg)
>   	i915_vma_unpin(vma);
>   
>   	/* Remove just the second vma */
> +	mutex_lock(&ggtt->vm.mutex);
>   	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
> +	mutex_unlock(&ggtt->vm.mutex);
>   	if (err) {
>   		pr_err("[0]i915_gem_evict_for_node returned err=%d\n", err);
>   		goto cleanup;
> @@ -330,7 +333,9 @@ static int igt_evict_for_cache_color(void *arg)
>   	 */
>   	target.color = I915_CACHE_L3_LLC;
>   
> +	mutex_lock(&ggtt->vm.mutex);
>   	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
> +	mutex_unlock(&ggtt->vm.mutex);
>   	if (!err) {
>   		pr_err("[1]i915_gem_evict_for_node returned err=%d\n", err);
>   		err = -EINVAL;
> @@ -360,7 +365,9 @@ static int igt_evict_vm(void *arg)
>   		goto cleanup;
>   
>   	/* Everything is pinned, nothing should happen */
> +	mutex_lock(&ggtt->vm.mutex);
>   	err = i915_gem_evict_vm(&ggtt->vm);
> +	mutex_unlock(&ggtt->vm.mutex);
>   	if (err) {
>   		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
>   		       err);
> @@ -369,7 +376,9 @@ static int igt_evict_vm(void *arg)
>   
>   	unpin_ggtt(i915);
>   
> +	mutex_lock(&ggtt->vm.mutex);
>   	err = i915_gem_evict_vm(&ggtt->vm);
> +	mutex_unlock(&ggtt->vm.mutex);
>   	if (err) {
>   		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
>   		       err);
> @@ -410,11 +419,11 @@ static int igt_evict_contexts(void *arg)
>   	if (!HAS_FULL_PPGTT(i915))
>   		return 0;
>   
> -	mutex_lock(&i915->drm.struct_mutex);
>   	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
>   
>   	/* Reserve a block so that we know we have enough to fit a few rq */
>   	memset(&hole, 0, sizeof(hole));
> +	mutex_lock(&i915->ggtt.vm.mutex);
>   	err = i915_gem_gtt_insert(&i915->ggtt.vm, &hole,
>   				  PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE,
>   				  0, i915->ggtt.vm.total,
> @@ -427,7 +436,9 @@ static int igt_evict_contexts(void *arg)
>   	do {
>   		struct reserved *r;
>   
> +		mutex_unlock(&i915->ggtt.vm.mutex);
>   		r = kcalloc(1, sizeof(*r), GFP_KERNEL);
> +		mutex_lock(&i915->ggtt.vm.mutex);
>   		if (!r) {
>   			err = -ENOMEM;
>   			goto out_locked;
> @@ -447,7 +458,7 @@ static int igt_evict_contexts(void *arg)
>   		count++;
>   	} while (1);
>   	drm_mm_remove_node(&hole);
> -	mutex_unlock(&i915->drm.struct_mutex);
> +	mutex_unlock(&i915->ggtt.vm.mutex);
>   	pr_info("Filled GGTT with %lu 1MiB nodes\n", count);
>   
>   	/* Overfill the GGTT with context objects and so try to evict one. */
> @@ -510,7 +521,7 @@ static int igt_evict_contexts(void *arg)
>   			break;
>   	}
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> +	mutex_lock(&i915->ggtt.vm.mutex);
>   out_locked:
>   	if (igt_flush_test(i915, I915_WAIT_LOCKED))
>   		err = -EIO;
> @@ -524,8 +535,8 @@ static int igt_evict_contexts(void *arg)
>   	}
>   	if (drm_mm_node_allocated(&hole))
>   		drm_mm_remove_node(&hole);
> +	mutex_unlock(&i915->ggtt.vm.mutex);
>   	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
> -	mutex_unlock(&i915->drm.struct_mutex);
>   
>   	return err;
>   }
> @@ -547,12 +558,9 @@ int i915_gem_evict_mock_selftests(void)
>   	if (!i915)
>   		return -ENOMEM;
>   
> -	mutex_lock(&i915->drm.struct_mutex);
>   	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
>   		err = i915_subtests(tests, i915);
>   
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
>   	drm_dev_put(&i915->drm);
>   	return err;
>   }
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 84d94eabc7dd..373fbd58a17a 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -38,16 +38,7 @@
>   
>   static void cleanup_freed_objects(struct drm_i915_private *i915)
>   {
> -	/*
> -	 * As we may hold onto the struct_mutex for inordinate lengths of
> -	 * time, the NMI khungtaskd detector may fire for the free objects
> -	 * worker.
> -	 */
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
>   	i915_gem_drain_freed_objects(i915);
> -
> -	mutex_lock(&i915->drm.struct_mutex);
>   }
>   
>   static void fake_free_pages(struct drm_i915_gem_object *obj,
> @@ -880,6 +871,15 @@ static int __shrink_hole(struct drm_i915_private *i915,
>   		i915_vma_unpin(vma);
>   		addr += size;
>   
> +		/*
> +		 * Since we are injecting allocation faults at random intervals,
> +		 * wait for this allocation to complete before we change the
> +		 * faultinjection.
> +		 */
> +		err = i915_vma_sync(vma);
> +		if (err)
> +			break;
> +
>   		if (igt_timeout(end_time,
>   				"%s timed out at ofset %llx [%llx - %llx]\n",
>   				__func__, addr, hole_start, hole_end)) {
> @@ -1013,21 +1013,19 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
>   	if (IS_ERR(file))
>   		return PTR_ERR(file);
>   
> -	mutex_lock(&dev_priv->drm.struct_mutex);
>   	ppgtt = i915_ppgtt_create(dev_priv);
>   	if (IS_ERR(ppgtt)) {
>   		err = PTR_ERR(ppgtt);
> -		goto out_unlock;
> +		goto out_free;
>   	}
>   	GEM_BUG_ON(offset_in_page(ppgtt->vm.total));
> -	GEM_BUG_ON(ppgtt->vm.closed);
> +	GEM_BUG_ON(!atomic_read(&ppgtt->vm.open));
>   
>   	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
>   
>   	i915_vm_put(&ppgtt->vm);
> -out_unlock:
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
>   
> +out_free:
>   	mock_file_free(dev_priv, file);
>   	return err;
>   }
> @@ -1090,7 +1088,6 @@ static int exercise_ggtt(struct drm_i915_private *i915,
>   	IGT_TIMEOUT(end_time);
>   	int err = 0;
>   
> -	mutex_lock(&i915->drm.struct_mutex);
>   restart:
>   	list_sort(NULL, &ggtt->vm.mm.hole_stack, sort_holes);
>   	drm_mm_for_each_hole(node, &ggtt->vm.mm, hole_start, hole_end) {
> @@ -1111,7 +1108,6 @@ static int exercise_ggtt(struct drm_i915_private *i915,
>   		last = hole_end;
>   		goto restart;
>   	}
> -	mutex_unlock(&i915->drm.struct_mutex);
>   
>   	return err;
>   }
> @@ -1153,13 +1149,9 @@ static int igt_ggtt_page(void *arg)
>   	unsigned int *order, n;
>   	int err;
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> -
>   	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
> -	if (IS_ERR(obj)) {
> -		err = PTR_ERR(obj);
> -		goto out_unlock;
> -	}
> +	if (IS_ERR(obj))
> +		return PTR_ERR(obj);
>   
>   	err = i915_gem_object_pin_pages(obj);
>   	if (err)
> @@ -1227,8 +1219,6 @@ static int igt_ggtt_page(void *arg)
>   	i915_gem_object_unpin_pages(obj);
>   out_free:
>   	i915_gem_object_put(obj);
> -out_unlock:
> -	mutex_unlock(&i915->drm.struct_mutex);
>   	return err;
>   }
>   
> @@ -1239,6 +1229,9 @@ static void track_vma_bind(struct i915_vma *vma)
>   	atomic_inc(&obj->bind_count); /* track for eviction later */
>   	__i915_gem_object_pin_pages(obj);
>   
> +	GEM_BUG_ON(vma->pages);
> +	atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE);
> +	__i915_gem_object_pin_pages(obj);
>   	vma->pages = obj->mm.pages;
>   
>   	mutex_lock(&vma->vm->mutex);
> @@ -1335,11 +1328,13 @@ static int igt_gtt_reserve(void *arg)
>   			goto out;
>   		}
>   
> +		mutex_lock(&ggtt->vm.mutex);
>   		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
>   					   obj->base.size,
>   					   total,
>   					   obj->cache_level,
>   					   0);
> +		mutex_unlock(&ggtt->vm.mutex);
>   		if (err) {
>   			pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n",
>   			       total, ggtt->vm.total, err);
> @@ -1385,11 +1380,13 @@ static int igt_gtt_reserve(void *arg)
>   			goto out;
>   		}
>   
> +		mutex_lock(&ggtt->vm.mutex);
>   		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
>   					   obj->base.size,
>   					   total,
>   					   obj->cache_level,
>   					   0);
> +		mutex_unlock(&ggtt->vm.mutex);
>   		if (err) {
>   			pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n",
>   			       total, ggtt->vm.total, err);
> @@ -1429,11 +1426,13 @@ static int igt_gtt_reserve(void *arg)
>   				       2*I915_GTT_PAGE_SIZE,
>   				       I915_GTT_MIN_ALIGNMENT);
>   
> +		mutex_lock(&ggtt->vm.mutex);
>   		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
>   					   obj->base.size,
>   					   offset,
>   					   obj->cache_level,
>   					   0);
> +		mutex_unlock(&ggtt->vm.mutex);
>   		if (err) {
>   			pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n",
>   			       total, ggtt->vm.total, err);
> @@ -1502,11 +1501,13 @@ static int igt_gtt_insert(void *arg)
>   
>   	/* Check a couple of obviously invalid requests */
>   	for (ii = invalid_insert; ii->size; ii++) {
> +		mutex_lock(&ggtt->vm.mutex);
>   		err = i915_gem_gtt_insert(&ggtt->vm, &tmp,
>   					  ii->size, ii->alignment,
>   					  I915_COLOR_UNEVICTABLE,
>   					  ii->start, ii->end,
>   					  0);
> +		mutex_unlock(&ggtt->vm.mutex);
>   		if (err != -ENOSPC) {
>   			pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n",
>   			       ii->size, ii->alignment, ii->start, ii->end,
> @@ -1542,10 +1543,12 @@ static int igt_gtt_insert(void *arg)
>   			goto out;
>   		}
>   
> +		mutex_lock(&ggtt->vm.mutex);
>   		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
>   					  obj->base.size, 0, obj->cache_level,
>   					  0, ggtt->vm.total,
>   					  0);
> +		mutex_unlock(&ggtt->vm.mutex);
>   		if (err == -ENOSPC) {
>   			/* maxed out the GGTT space */
>   			i915_gem_object_put(obj);
> @@ -1600,10 +1603,12 @@ static int igt_gtt_insert(void *arg)
>   			goto out;
>   		}
>   
> +		mutex_lock(&ggtt->vm.mutex);
>   		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
>   					  obj->base.size, 0, obj->cache_level,
>   					  0, ggtt->vm.total,
>   					  0);
> +		mutex_unlock(&ggtt->vm.mutex);
>   		if (err) {
>   			pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n",
>   			       total, ggtt->vm.total, err);
> @@ -1647,10 +1652,12 @@ static int igt_gtt_insert(void *arg)
>   			goto out;
>   		}
>   
> +		mutex_lock(&ggtt->vm.mutex);
>   		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
>   					  obj->base.size, 0, obj->cache_level,
>   					  0, ggtt->vm.total,
>   					  0);
> +		mutex_unlock(&ggtt->vm.mutex);
>   		if (err) {
>   			pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n",
>   			       total, ggtt->vm.total, err);
> @@ -1694,8 +1701,9 @@ int i915_gem_gtt_mock_selftests(void)
>   	}
>   	mock_init_ggtt(i915, ggtt);
>   
> -	mutex_lock(&i915->drm.struct_mutex);
>   	err = i915_subtests(tests, ggtt);
> +
> +	mutex_lock(&i915->drm.struct_mutex);
>   	mock_device_flush(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index b3688543ed7d..6b5b03081880 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -647,8 +647,15 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915)
>   	if (err)
>   		goto err;
>   
> +	/* Force the wait wait now to avoid including it in the benchmark */
> +	err = i915_vma_sync(vma);
> +	if (err)
> +		goto err_pin;
> +
>   	return vma;
>   
> +err_pin:
> +	i915_vma_unpin(vma);
>   err:
>   	i915_gem_object_put(obj);
>   	return ERR_PTR(err);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
> index 97752deecccb..0e4f66312b39 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_vma.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
> @@ -831,8 +831,9 @@ int i915_vma_mock_selftests(void)
>   	}
>   	mock_init_ggtt(i915, ggtt);
>   
> -	mutex_lock(&i915->drm.struct_mutex);
>   	err = i915_subtests(tests, ggtt);
> +
> +	mutex_lock(&i915->drm.struct_mutex);
>   	mock_device_flush(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   
> @@ -879,8 +880,6 @@ static int igt_vma_remapped_gtt(void *arg)
>   	if (IS_ERR(obj))
>   		return PTR_ERR(obj);
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> -
>   	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
>   
>   	for (t = types; *t; t++) {
> @@ -976,7 +975,6 @@ static int igt_vma_remapped_gtt(void *arg)
>   
>   out:
>   	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
> -	mutex_unlock(&i915->drm.struct_mutex);
>   	i915_gem_object_put(obj);
>   
>   	return err;
> 

High level concept looks okay to me, and the low level details as well. 
But it is so huge that I cannot guarantee I haven't missed something in 
the middle. So for any hypothetical missed thing in that last third, I 
put my faith that before release we catch it and fix it.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko