[Intel-gfx] [PATCH 3/7] drm/i915: Convert fences to use a GGTT lock rather than struct_mutex

Wed Jul 11 09:08:46 UTC 2018

On Wed, Jul 11, 2018 at 08:36:04AM +0100, Chris Wilson wrote:
> Introduce a new mutex to guard all of the vma operations within a vm (as
> opposed to the BKL struct_mutex) and start by using it to guard the
> fence operations for a GGTT VMA.

Commit message is a bit confusing, since you've already introduce this new
mutex in an earlier patch. Please change to "Switch from dev->struct_mutex
to ggtt.vm->mutex" or similar ...

For the reviewers benefit it would also be good to explain how this new
vm.mutex nests with others stuff here (dev->struct_mutex and rpm come to
mind, looking from the patch). Probably best here than in docs since it's
likely going to get outdated.

> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |  9 ++-
>  drivers/gpu/drm/i915/i915_gem.c            | 11 +++-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 +-
>  drivers/gpu/drm/i915/i915_gem_fence_reg.c  | 68 +++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_vma.c            | 12 ++--
>  drivers/gpu/drm/i915/i915_vma.h            | 23 +++++++-
>  6 files changed, 96 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 75ffed6a3f31..e2ba298a5d88 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -80,7 +80,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj)
>  
>  static char get_global_flag(struct drm_i915_gem_object *obj)
>  {
> -	return obj->userfault_count ? 'g' : ' ';
> +	return READ_ONCE(obj->userfault_count) ? 'g' : ' ';

The usefault_count here (and below) look like misplaced hunks?

>  }
>  
>  static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
> @@ -914,11 +914,10 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
>  
>  static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
>  {
> -	struct drm_i915_private *i915 = node_to_i915(m->private);
> -	const struct i915_ggtt *ggtt = &i915->ggtt;
> +	struct i915_ggtt *ggtt = &node_to_i915(m->private)->ggtt;
>  	int i, ret;
>  
> -	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
> +	ret = mutex_lock_interruptible(&ggtt->vm.mutex);
>  	if (ret)
>  		return ret;
>  
> @@ -935,7 +934,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
>  		seq_putc(m, '\n');
>  	}
>  
> -	mutex_unlock(&i915->drm.struct_mutex);
> +	mutex_unlock(&ggtt->vm.mutex);
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 356c86071ccc..cbcba613b175 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2193,8 +2193,8 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
>  	 * requirement that operations to the GGTT be made holding the RPM
>  	 * wakeref.
>  	 */
> -	lockdep_assert_held(&i915->drm.struct_mutex);
>  	intel_runtime_pm_get(i915);
> +	mutex_lock(&i915->ggtt.vm.mutex);
>  
>  	if (!obj->userfault_count)
>  		goto out;
> @@ -2211,6 +2211,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
>  	wmb();
>  
>  out:
> +	mutex_unlock(&i915->ggtt.vm.mutex);
>  	intel_runtime_pm_put(i915);
>  }
>  
> @@ -2223,10 +2224,12 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
>  	/*
>  	 * Only called during RPM suspend. All users of the userfault_list
>  	 * must be holding an RPM wakeref to ensure that this can not
> -	 * run concurrently with themselves (and use the struct_mutex for
> +	 * run concurrently with themselves (and use the ggtt->mutex for
>  	 * protection between themselves).
>  	 */

I think the above change isn't correct, at least not yet at this stage:
All users of the userfault_list still use dev->struct_mutex, not vm.mutex.
I guess we could move that over to the ggtt.vm.mutex eventually, but this
patch doesn't do that.
>  
> +	mutex_lock(&i915->ggtt.vm.mutex);

I also don't think you need to take the lock just yet. For rpm we keep the
fences around (we restore them in rpm_resume after all), we just nuke the
mmap ptes.

> +
>  	list_for_each_entry_safe(obj, on,
>  				 &i915->mm.userfault_list, userfault_link)
>  		__i915_gem_object_release_mmap(obj);
> @@ -2255,6 +2258,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
>  		GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
>  		reg->dirty = true;
>  	}
> +
> +	mutex_unlock(&i915->ggtt.vm.mutex);

>  }
>  
>  static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
> @@ -4861,7 +4866,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
>  		mutex_unlock(&i915->drm.struct_mutex);
>  
>  		GEM_BUG_ON(obj->bind_count);
> -		GEM_BUG_ON(obj->userfault_count);
> +		GEM_BUG_ON(READ_ONCE(obj->userfault_count));

Another misplaced hunk?

>  		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
>  		GEM_BUG_ON(!list_empty(&obj->lut_list));
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3f0c612d42e7..e1d65b165bf1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -426,8 +426,11 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
>  {
>  	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
>  
> -	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
> +	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) {
> +		mutex_lock(&vma->vm->mutex);
>  		__i915_vma_unpin_fence(vma);
> +		mutex_unlock(&vma->vm->mutex);
> +	}
>  
>  	__i915_vma_unpin(vma);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> index 60fa5a8276cb..9313a8e675c8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> @@ -188,6 +188,8 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
>  static void fence_write(struct drm_i915_fence_reg *fence,
>  			struct i915_vma *vma)
>  {
> +	lockdep_assert_held(&fence->ggtt->vm.mutex);
> +
>  	/* Previous access through the fence register is marshalled by
>  	 * the mb() inside the fault handlers (i915_gem_release_mmaps)
>  	 * and explicitly managed for internal users.
> @@ -213,6 +215,8 @@ static int fence_update(struct drm_i915_fence_reg *fence,
>  	struct i915_ggtt *ggtt = fence->ggtt;
>  	int ret;
>  
> +	lockdep_assert_held(&ggtt->vm.mutex);
> +
>  	if (vma) {
>  		if (!i915_vma_is_map_and_fenceable(vma))
>  			return -EINVAL;
> @@ -289,14 +293,39 @@ static int fence_update(struct drm_i915_fence_reg *fence,
>  int i915_vma_put_fence(struct i915_vma *vma)
>  {
>  	struct drm_i915_fence_reg *fence = vma->fence;
> +	int err;
>  
>  	if (!fence)
>  		return 0;
>  
> -	if (fence->pin_count)
> -		return -EBUSY;
> +	mutex_lock(&vma->vm->mutex);
> +	if (!fence->pin_count)
> +		err = fence_update(fence, NULL);
> +	else
> +		err = -EBUSY;
> +	mutex_unlock(&vma->vm->mutex);
>  
> -	return fence_update(fence, NULL);
> +	return err;
> +}
> +
> +void i915_vma_revoke_fence(struct i915_vma *vma)
> +{
> +	struct drm_i915_fence_reg *fence;
> +
> +	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> +	lockdep_assert_held(&vma->vm->mutex);
> +
> +	fence = vma->fence;
> +	if (!fence)
> +		return;
> +
> +	GEM_BUG_ON(fence->pin_count);
> +
> +	list_move(&fence->link, &i915_vm_to_ggtt(vma->vm)->fence_list);
> +	vma->fence = NULL;
> +
> +	fence_write(fence, NULL);
> +	fence->vma = NULL;

fence_update(fence, NULL); here please instead of open-coding it. Or I'm
kinda not following why you're doing this?

>  }
>  
>  static struct drm_i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
> @@ -337,8 +366,7 @@ static struct drm_i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
>   *
>   * 0 on success, negative error code on failure.
>   */
> -int
> -i915_vma_pin_fence(struct i915_vma *vma)
> +int __i915_vma_pin_fence(struct i915_vma *vma)

I don't (yet) see a caller of this new __ version ...

>  {
>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
>  	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
> @@ -349,6 +377,7 @@ i915_vma_pin_fence(struct i915_vma *vma)
>  	 * must keep the device awake whilst using the fence.
>  	 */
>  	assert_rpm_wakelock_held(ggtt->vm.i915);
> +	lockdep_assert_held(&ggtt->vm.mutex);
>  
>  	/* Just update our place in the LRU if our fence is getting reused. */
>  	if (vma->fence) {
> @@ -399,27 +428,34 @@ i915_reserve_fence(struct drm_i915_private *i915)
>  	int count;
>  	int ret;
>  
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> +	mutex_lock(&i915->ggtt.vm.mutex);
>  
>  	/* Keep at least one fence available for the display engine. */
>  	count = 0;
>  	list_for_each_entry(fence, &ggtt->fence_list, link)
>  		count += !fence->pin_count;
> -	if (count <= 1)
> -		return ERR_PTR(-ENOSPC);
> +	if (count <= 1) {
> +		fence = ERR_PTR(-ENOSPC);
> +		goto out_unlock;
> +	}
>  
>  	fence = fence_find(ggtt);
>  	if (IS_ERR(fence))
> -		return fence;
> +		goto out_unlock;
>  
>  	if (fence->vma) {
>  		/* Force-remove fence from VMA */
>  		ret = fence_update(fence, NULL);
> -		if (ret)
> -			return ERR_PTR(ret);
> +		if (ret) {
> +			fence = ERR_PTR(ret);
> +			goto out_unlock;
> +		}
>  	}
>  
>  	list_del(&fence->link);
> +
> +out_unlock:
> +	mutex_unlock(&i915->ggtt.vm.mutex);
>  	return fence;
>  }
>  
> @@ -431,9 +467,9 @@ i915_reserve_fence(struct drm_i915_private *i915)
>   */
>  void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
>  {
> -	lockdep_assert_held(&fence->ggtt->vm.i915->drm.struct_mutex);
> -
> +	mutex_lock(&fence->ggtt->vm.mutex);
>  	list_add(&fence->link, &fence->ggtt->fence_list);
> +	mutex_unlock(&fence->ggtt->vm.mutex);
>  }
>  
>  /**
> @@ -451,8 +487,7 @@ void i915_gem_revoke_fences(struct drm_i915_private *i915)
>  	struct i915_ggtt *ggtt = &i915->ggtt;
>  	int i;
>  
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> -
> +	mutex_lock(&ggtt->vm.mutex);
>  	for (i = 0; i < ggtt->num_fence_regs; i++) {
>  		struct drm_i915_fence_reg *fence = &ggtt->fence_regs[i];
>  
> @@ -461,6 +496,7 @@ void i915_gem_revoke_fences(struct drm_i915_private *i915)
>  		if (fence->vma)
>  			i915_vma_revoke_mmap(fence->vma);
>  	}
> +	mutex_unlock(&ggtt->vm.mutex);
>  }
>  
>  /**
> @@ -476,6 +512,7 @@ void i915_gem_restore_fences(struct drm_i915_private *i915)
>  	struct i915_ggtt *ggtt = &i915->ggtt;
>  	int i;
>  
> +	mutex_lock(&ggtt->vm.mutex);
>  	for (i = 0; i < ggtt->num_fence_regs; i++) {
>  		struct drm_i915_fence_reg *reg = &ggtt->fence_regs[i];
>  		struct i915_vma *vma = reg->vma;
> @@ -498,6 +535,7 @@ void i915_gem_restore_fences(struct drm_i915_private *i915)
>  		fence_write(reg, vma);
>  		reg->vma = vma;
>  	}
> +	mutex_unlock(&ggtt->vm.mutex);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index ed4e0fb558f7..045b75d79f60 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -860,7 +860,7 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
>  	struct drm_vma_offset_node *node = &vma->obj->base.vma_node;
>  	u64 vma_offset;
>  
> -	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> +	lockdep_assert_held(&vma->vm->mutex);
>  
>  	if (!i915_vma_has_userfault(vma))
>  		return;
> @@ -1082,6 +1082,8 @@ int i915_vma_unbind(struct i915_vma *vma)
>  		return 0;
>  
>  	if (i915_vma_is_map_and_fenceable(vma)) {
> +		mutex_lock(&vma->vm->mutex);
> +
>  		/*
>  		 * Check that we have flushed all writes through the GGTT
>  		 * before the unbind, other due to non-strict nature of those
> @@ -1091,16 +1093,14 @@ int i915_vma_unbind(struct i915_vma *vma)
>  		i915_vma_flush_writes(vma);
>  		GEM_BUG_ON(i915_vma_has_ggtt_write(vma));
>  
> -		/* release the fence reg _after_ flushing */
> -		ret = i915_vma_put_fence(vma);
> -		if (ret)
> -			return ret;
> -
>  		/* Force a pagefault for domain tracking on next user access */
>  		i915_vma_revoke_mmap(vma);
> +		i915_vma_revoke_fence(vma);
>  
>  		__i915_vma_iounmap(vma);
>  		vma->flags &= ~I915_VMA_CAN_FENCE;
> +
> +		mutex_unlock(&vma->vm->mutex);
>  	}
>  	GEM_BUG_ON(vma->fence);
>  	GEM_BUG_ON(i915_vma_has_userfault(vma));
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index f06d66377107..422d90c686b5 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -190,6 +190,7 @@ static inline bool i915_vma_set_userfault(struct i915_vma *vma)
>  
>  static inline void i915_vma_unset_userfault(struct i915_vma *vma)
>  {
> +	lockdep_assert_held(&vma->vm->mutex);
>  	return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
>  }
>  
> @@ -378,11 +379,26 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma)
>   *
>   * True if the vma has a fence, false otherwise.
>   */
> -int i915_vma_pin_fence(struct i915_vma *vma);
> +int __i915_vma_pin_fence(struct i915_vma *vma);
> +static inline int i915_vma_pin_fence(struct i915_vma *vma)
> +{
> +	int err;
> +
> +	mutex_lock(&vma->vm->mutex);
> +	err = __i915_vma_pin_fence(vma);
> +	mutex_unlock(&vma->vm->mutex);
> +
> +	return err;
> +}
> +
>  int __must_check i915_vma_put_fence(struct i915_vma *vma);
> +void i915_vma_revoke_fence(struct i915_vma *vma);
>  
>  static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
>  {
> +	lockdep_assert_held(&vma->vm->mutex);
> +	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> +
>  	GEM_BUG_ON(vma->fence->pin_count <= 0);
>  	vma->fence->pin_count--;
>  }
> @@ -399,8 +415,11 @@ static inline void
>  i915_vma_unpin_fence(struct i915_vma *vma)
>  {
>  	/* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */
> -	if (vma->fence)
> +	if (vma->fence) {

I think you need the lock here outside of vma->fence. At least I
understood the fancy new locking rules to mean that vma->fence is
protected by vma->vm.mutex. Since vma's can't move between vm this is
safe.

Would be good to update the kerneldoc/comments for that accordingly too.

> +		mutex_lock(&vma->vm->mutex);
>  		__i915_vma_unpin_fence(vma);
> +		mutex_unlock(&vma->vm->mutex);
> +	}
>  }
>  
>  void i915_vma_parked(struct drm_i915_private *i915);

Ok the locking changes in i915_gem_fence_reg.c look good, but git grep
says there's more:
- describe_obj in i915_debugfs.c only wants dev->struct_mutex, but really
  wants vm.mutex on top too now. At least for ggtt.
- i915_gem_fence_regs_info in i915_debugfs.c is already fixed in this
  patch.
- I've ignored i915_gpu_error.c as usual :-)
- intel_display.c is annoying, since the way it tries to figure out
  whether it got a fence or not is racy. That one probably needs the
  __i915_vma_pin_fence version that I didn't find ...
- intel_fbc.c looks safe due to the PLANE_HAS_FENCE flags (minus the issue
  in intel_display.c)
- I found one more access to vma->fence->id in the selftests, that's
  really the same as we do in i915_gpu_error.c. If you feel like it, a
  i915_vma_fence_id() static inline that does the correct READ_ONCE dance
  on vma->fence could be useful for the paranoid.

Absolutely loved the naming collision with dma_fence, and my grep fu isn't
good enough to avoid these. So probably missed some (but tried rather hard
not to).

Besides these small nits and questions looks all good.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch