[Intel-gfx] [PATCH 3/7] drm/i915: Convert fences to use a GGTT lock rather than struct_mutex
Daniel Vetter
daniel at ffwll.ch
Wed Jul 11 09:08:46 UTC 2018
On Wed, Jul 11, 2018 at 08:36:04AM +0100, Chris Wilson wrote:
> Introduce a new mutex to guard all of the vma operations within a vm (as
> opposed to the BKL struct_mutex) and start by using it to guard the
> fence operations for a GGTT VMA.
Commit message is a bit confusing, since you've already introduce this new
mutex in an earlier patch. Please change to "Switch from dev->struct_mutex
to ggtt.vm->mutex" or similar ...
For the reviewers benefit it would also be good to explain how this new
vm.mutex nests with others stuff here (dev->struct_mutex and rpm come to
mind, looking from the patch). Probably best here than in docs since it's
likely going to get outdated.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 9 ++-
> drivers/gpu/drm/i915/i915_gem.c | 11 +++-
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +-
> drivers/gpu/drm/i915/i915_gem_fence_reg.c | 68 +++++++++++++++++-----
> drivers/gpu/drm/i915/i915_vma.c | 12 ++--
> drivers/gpu/drm/i915/i915_vma.h | 23 +++++++-
> 6 files changed, 96 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 75ffed6a3f31..e2ba298a5d88 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -80,7 +80,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj)
>
> static char get_global_flag(struct drm_i915_gem_object *obj)
> {
> - return obj->userfault_count ? 'g' : ' ';
> + return READ_ONCE(obj->userfault_count) ? 'g' : ' ';
The usefault_count here (and below) look like misplaced hunks?
> }
>
> static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
> @@ -914,11 +914,10 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
>
> static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
> {
> - struct drm_i915_private *i915 = node_to_i915(m->private);
> - const struct i915_ggtt *ggtt = &i915->ggtt;
> + struct i915_ggtt *ggtt = &node_to_i915(m->private)->ggtt;
> int i, ret;
>
> - ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
> + ret = mutex_lock_interruptible(&ggtt->vm.mutex);
> if (ret)
> return ret;
>
> @@ -935,7 +934,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
> seq_putc(m, '\n');
> }
>
> - mutex_unlock(&i915->drm.struct_mutex);
> + mutex_unlock(&ggtt->vm.mutex);
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 356c86071ccc..cbcba613b175 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2193,8 +2193,8 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
> * requirement that operations to the GGTT be made holding the RPM
> * wakeref.
> */
> - lockdep_assert_held(&i915->drm.struct_mutex);
> intel_runtime_pm_get(i915);
> + mutex_lock(&i915->ggtt.vm.mutex);
>
> if (!obj->userfault_count)
> goto out;
> @@ -2211,6 +2211,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
> wmb();
>
> out:
> + mutex_unlock(&i915->ggtt.vm.mutex);
> intel_runtime_pm_put(i915);
> }
>
> @@ -2223,10 +2224,12 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
> /*
> * Only called during RPM suspend. All users of the userfault_list
> * must be holding an RPM wakeref to ensure that this can not
> - * run concurrently with themselves (and use the struct_mutex for
> + * run concurrently with themselves (and use the ggtt->mutex for
> * protection between themselves).
> */
I think the above change isn't correct, at least not yet at this stage:
All users of the userfault_list still use dev->struct_mutex, not vm.mutex.
I guess we could move that over to the ggtt.vm.mutex eventually, but this
patch doesn't do that.
>
> + mutex_lock(&i915->ggtt.vm.mutex);
I also don't think you need to take the lock just yet. For rpm we keep the
fences around (we restore them in rpm_resume after all), we just nuke the
mmap ptes.
> +
> list_for_each_entry_safe(obj, on,
> &i915->mm.userfault_list, userfault_link)
> __i915_gem_object_release_mmap(obj);
> @@ -2255,6 +2258,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
> GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
> reg->dirty = true;
> }
> +
> + mutex_unlock(&i915->ggtt.vm.mutex);
> }
>
> static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
> @@ -4861,7 +4866,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
> mutex_unlock(&i915->drm.struct_mutex);
>
> GEM_BUG_ON(obj->bind_count);
> - GEM_BUG_ON(obj->userfault_count);
> + GEM_BUG_ON(READ_ONCE(obj->userfault_count));
Another misplaced hunk?
> GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
> GEM_BUG_ON(!list_empty(&obj->lut_list));
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3f0c612d42e7..e1d65b165bf1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -426,8 +426,11 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
> {
> GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
>
> - if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
> + if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) {
> + mutex_lock(&vma->vm->mutex);
> __i915_vma_unpin_fence(vma);
> + mutex_unlock(&vma->vm->mutex);
> + }
>
> __i915_vma_unpin(vma);
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> index 60fa5a8276cb..9313a8e675c8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> @@ -188,6 +188,8 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
> static void fence_write(struct drm_i915_fence_reg *fence,
> struct i915_vma *vma)
> {
> + lockdep_assert_held(&fence->ggtt->vm.mutex);
> +
> /* Previous access through the fence register is marshalled by
> * the mb() inside the fault handlers (i915_gem_release_mmaps)
> * and explicitly managed for internal users.
> @@ -213,6 +215,8 @@ static int fence_update(struct drm_i915_fence_reg *fence,
> struct i915_ggtt *ggtt = fence->ggtt;
> int ret;
>
> + lockdep_assert_held(&ggtt->vm.mutex);
> +
> if (vma) {
> if (!i915_vma_is_map_and_fenceable(vma))
> return -EINVAL;
> @@ -289,14 +293,39 @@ static int fence_update(struct drm_i915_fence_reg *fence,
> int i915_vma_put_fence(struct i915_vma *vma)
> {
> struct drm_i915_fence_reg *fence = vma->fence;
> + int err;
>
> if (!fence)
> return 0;
>
> - if (fence->pin_count)
> - return -EBUSY;
> + mutex_lock(&vma->vm->mutex);
> + if (!fence->pin_count)
> + err = fence_update(fence, NULL);
> + else
> + err = -EBUSY;
> + mutex_unlock(&vma->vm->mutex);
>
> - return fence_update(fence, NULL);
> + return err;
> +}
> +
> +void i915_vma_revoke_fence(struct i915_vma *vma)
> +{
> + struct drm_i915_fence_reg *fence;
> +
> + GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> + lockdep_assert_held(&vma->vm->mutex);
> +
> + fence = vma->fence;
> + if (!fence)
> + return;
> +
> + GEM_BUG_ON(fence->pin_count);
> +
> + list_move(&fence->link, &i915_vm_to_ggtt(vma->vm)->fence_list);
> + vma->fence = NULL;
> +
> + fence_write(fence, NULL);
> + fence->vma = NULL;
fence_update(fence, NULL); here please instead of open-coding it. Or I'm
kinda not following why you're doing this?
> }
>
> static struct drm_i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
> @@ -337,8 +366,7 @@ static struct drm_i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
> *
> * 0 on success, negative error code on failure.
> */
> -int
> -i915_vma_pin_fence(struct i915_vma *vma)
> +int __i915_vma_pin_fence(struct i915_vma *vma)
I don't (yet) see a caller of this new __ version ...
> {
> struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
> struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
> @@ -349,6 +377,7 @@ i915_vma_pin_fence(struct i915_vma *vma)
> * must keep the device awake whilst using the fence.
> */
> assert_rpm_wakelock_held(ggtt->vm.i915);
> + lockdep_assert_held(&ggtt->vm.mutex);
>
> /* Just update our place in the LRU if our fence is getting reused. */
> if (vma->fence) {
> @@ -399,27 +428,34 @@ i915_reserve_fence(struct drm_i915_private *i915)
> int count;
> int ret;
>
> - lockdep_assert_held(&i915->drm.struct_mutex);
> + mutex_lock(&i915->ggtt.vm.mutex);
>
> /* Keep at least one fence available for the display engine. */
> count = 0;
> list_for_each_entry(fence, &ggtt->fence_list, link)
> count += !fence->pin_count;
> - if (count <= 1)
> - return ERR_PTR(-ENOSPC);
> + if (count <= 1) {
> + fence = ERR_PTR(-ENOSPC);
> + goto out_unlock;
> + }
>
> fence = fence_find(ggtt);
> if (IS_ERR(fence))
> - return fence;
> + goto out_unlock;
>
> if (fence->vma) {
> /* Force-remove fence from VMA */
> ret = fence_update(fence, NULL);
> - if (ret)
> - return ERR_PTR(ret);
> + if (ret) {
> + fence = ERR_PTR(ret);
> + goto out_unlock;
> + }
> }
>
> list_del(&fence->link);
> +
> +out_unlock:
> + mutex_unlock(&i915->ggtt.vm.mutex);
> return fence;
> }
>
> @@ -431,9 +467,9 @@ i915_reserve_fence(struct drm_i915_private *i915)
> */
> void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
> {
> - lockdep_assert_held(&fence->ggtt->vm.i915->drm.struct_mutex);
> -
> + mutex_lock(&fence->ggtt->vm.mutex);
> list_add(&fence->link, &fence->ggtt->fence_list);
> + mutex_unlock(&fence->ggtt->vm.mutex);
> }
>
> /**
> @@ -451,8 +487,7 @@ void i915_gem_revoke_fences(struct drm_i915_private *i915)
> struct i915_ggtt *ggtt = &i915->ggtt;
> int i;
>
> - lockdep_assert_held(&i915->drm.struct_mutex);
> -
> + mutex_lock(&ggtt->vm.mutex);
> for (i = 0; i < ggtt->num_fence_regs; i++) {
> struct drm_i915_fence_reg *fence = &ggtt->fence_regs[i];
>
> @@ -461,6 +496,7 @@ void i915_gem_revoke_fences(struct drm_i915_private *i915)
> if (fence->vma)
> i915_vma_revoke_mmap(fence->vma);
> }
> + mutex_unlock(&ggtt->vm.mutex);
> }
>
> /**
> @@ -476,6 +512,7 @@ void i915_gem_restore_fences(struct drm_i915_private *i915)
> struct i915_ggtt *ggtt = &i915->ggtt;
> int i;
>
> + mutex_lock(&ggtt->vm.mutex);
> for (i = 0; i < ggtt->num_fence_regs; i++) {
> struct drm_i915_fence_reg *reg = &ggtt->fence_regs[i];
> struct i915_vma *vma = reg->vma;
> @@ -498,6 +535,7 @@ void i915_gem_restore_fences(struct drm_i915_private *i915)
> fence_write(reg, vma);
> reg->vma = vma;
> }
> + mutex_unlock(&ggtt->vm.mutex);
> }
>
> /**
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index ed4e0fb558f7..045b75d79f60 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -860,7 +860,7 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
> struct drm_vma_offset_node *node = &vma->obj->base.vma_node;
> u64 vma_offset;
>
> - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> + lockdep_assert_held(&vma->vm->mutex);
>
> if (!i915_vma_has_userfault(vma))
> return;
> @@ -1082,6 +1082,8 @@ int i915_vma_unbind(struct i915_vma *vma)
> return 0;
>
> if (i915_vma_is_map_and_fenceable(vma)) {
> + mutex_lock(&vma->vm->mutex);
> +
> /*
> * Check that we have flushed all writes through the GGTT
> * before the unbind, other due to non-strict nature of those
> @@ -1091,16 +1093,14 @@ int i915_vma_unbind(struct i915_vma *vma)
> i915_vma_flush_writes(vma);
> GEM_BUG_ON(i915_vma_has_ggtt_write(vma));
>
> - /* release the fence reg _after_ flushing */
> - ret = i915_vma_put_fence(vma);
> - if (ret)
> - return ret;
> -
> /* Force a pagefault for domain tracking on next user access */
> i915_vma_revoke_mmap(vma);
> + i915_vma_revoke_fence(vma);
>
> __i915_vma_iounmap(vma);
> vma->flags &= ~I915_VMA_CAN_FENCE;
> +
> + mutex_unlock(&vma->vm->mutex);
> }
> GEM_BUG_ON(vma->fence);
> GEM_BUG_ON(i915_vma_has_userfault(vma));
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index f06d66377107..422d90c686b5 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -190,6 +190,7 @@ static inline bool i915_vma_set_userfault(struct i915_vma *vma)
>
> static inline void i915_vma_unset_userfault(struct i915_vma *vma)
> {
> + lockdep_assert_held(&vma->vm->mutex);
> return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
> }
>
> @@ -378,11 +379,26 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma)
> *
> * True if the vma has a fence, false otherwise.
> */
> -int i915_vma_pin_fence(struct i915_vma *vma);
> +int __i915_vma_pin_fence(struct i915_vma *vma);
> +static inline int i915_vma_pin_fence(struct i915_vma *vma)
> +{
> + int err;
> +
> + mutex_lock(&vma->vm->mutex);
> + err = __i915_vma_pin_fence(vma);
> + mutex_unlock(&vma->vm->mutex);
> +
> + return err;
> +}
> +
> int __must_check i915_vma_put_fence(struct i915_vma *vma);
> +void i915_vma_revoke_fence(struct i915_vma *vma);
>
> static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
> {
> + lockdep_assert_held(&vma->vm->mutex);
> + GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> +
> GEM_BUG_ON(vma->fence->pin_count <= 0);
> vma->fence->pin_count--;
> }
> @@ -399,8 +415,11 @@ static inline void
> i915_vma_unpin_fence(struct i915_vma *vma)
> {
> /* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */
> - if (vma->fence)
> + if (vma->fence) {
I think you need the lock here outside of vma->fence. At least I
understood the fancy new locking rules to mean that vma->fence is
protected by vma->vm.mutex. Since vma's can't move between vm this is
safe.
Would be good to update the kerneldoc/comments for that accordingly too.
> + mutex_lock(&vma->vm->mutex);
> __i915_vma_unpin_fence(vma);
> + mutex_unlock(&vma->vm->mutex);
> + }
> }
>
> void i915_vma_parked(struct drm_i915_private *i915);
Ok the locking changes in i915_gem_fence_reg.c look good, but git grep
says there's more:
- describe_obj in i915_debugfs.c only wants dev->struct_mutex, but really
wants vm.mutex on top too now. At least for ggtt.
- i915_gem_fence_regs_info in i915_debugfs.c is already fixed in this
patch.
- I've ignored i915_gpu_error.c as usual :-)
- intel_display.c is annoying, since the way it tries to figure out
whether it got a fence or not is racy. That one probably needs the
__i915_vma_pin_fence version that I didn't find ...
- intel_fbc.c looks safe due to the PLANE_HAS_FENCE flags (minus the issue
in intel_display.c)
- I found one more access to vma->fence->id in the selftests, that's
really the same as we do in i915_gpu_error.c. If you feel like it, a
i915_vma_fence_id() static inline that does the correct READ_ONCE dance
on vma->fence could be useful for the paranoid.
Absolutely loved the naming collision with dma_fence, and my grep fu isn't
good enough to avoid these. So probably missed some (but tried rather hard
not to).
Besides these small nits and questions looks all good.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list