[PATCH] drm/ttm: remove ttm_bo_wait_unreserved

Thu Aug 22 09:53:40 UTC 2019

On 8/22/19 10:47 AM, Daniel Vetter wrote:
> On Thu, Aug 22, 2019 at 9:56 AM Koenig, Christian
> <Christian.Koenig at amd.com> wrote:
>> Am 22.08.19 um 08:49 schrieb Daniel Vetter:
>>> With nouveau fixed all ttm-using drives have the correct nesting of
>>> mmap_sem vs dma_resv, and we can just lock the buffer.
>>>
>>> Assuming I didn't screw up anything with my audit of course.
>>>
>>> v2:
>>> - Dont forget wu_mutex (Christian König)
>>> - Keep the mmap_sem-less wait optimization (Thomas)
>>> - Use _lock_interruptible to be good citizens (Thomas)
>>>
>>> Reviewed-by: Christian König <christian.koenig at amd.com>
>>> Signed-off-by: Daniel Vetter <daniel.vetter at intel.com>
>>> Cc: Christian Koenig <christian.koenig at amd.com>
>>> Cc: Huang Rui <ray.huang at amd.com>
>>> Cc: Gerd Hoffmann <kraxel at redhat.com>
>>> Cc: "VMware Graphics" <linux-graphics-maintainer at vmware.com>
>>> Cc: Thomas Hellstrom <thellstrom at vmware.com>
>>> ---
>>>    drivers/gpu/drm/ttm/ttm_bo.c      | 36 -------------------------------
>>>    drivers/gpu/drm/ttm/ttm_bo_util.c |  1 -
>>>    drivers/gpu/drm/ttm/ttm_bo_vm.c   | 18 +++++-----------
>>>    include/drm/ttm/ttm_bo_api.h      |  4 ----
>>>    4 files changed, 5 insertions(+), 54 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 20ff56f27aa4..d1ce5d315d5b 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -162,7 +162,6 @@ static void ttm_bo_release_list(struct kref *list_kref)
>>>        dma_fence_put(bo->moving);
>>>        if (!ttm_bo_uses_embedded_gem_object(bo))
>>>                dma_resv_fini(&bo->base._resv);
>>> -     mutex_destroy(&bo->wu_mutex);
>>>        bo->destroy(bo);
>>>        ttm_mem_global_free(bdev->glob->mem_glob, acc_size);
>>>    }
>>> @@ -1319,7 +1318,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
>>>        INIT_LIST_HEAD(&bo->ddestroy);
>>>        INIT_LIST_HEAD(&bo->swap);
>>>        INIT_LIST_HEAD(&bo->io_reserve_lru);
>>> -     mutex_init(&bo->wu_mutex);
>>>        bo->bdev = bdev;
>>>        bo->type = type;
>>>        bo->num_pages = num_pages;
>>> @@ -1954,37 +1952,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
>>>                ;
>>>    }
>>>    EXPORT_SYMBOL(ttm_bo_swapout_all);
>>> -
>>> -/**
>>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
>>> - * unreserved
>>> - *
>>> - * @bo: Pointer to buffer
>>> - */
>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
>>> -{
>>> -     int ret;
>>> -
>>> -     /*
>>> -      * In the absense of a wait_unlocked API,
>>> -      * Use the bo::wu_mutex to avoid triggering livelocks due to
>>> -      * concurrent use of this function. Note that this use of
>>> -      * bo::wu_mutex can go away if we change locking order to
>>> -      * mmap_sem -> bo::reserve.
>>> -      */
>>> -     ret = mutex_lock_interruptible(&bo->wu_mutex);
>>> -     if (unlikely(ret != 0))
>>> -             return -ERESTARTSYS;
>>> -     if (!dma_resv_is_locked(bo->base.resv))
>>> -             goto out_unlock;
>>> -     ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
>>> -     if (ret == -EINTR)
>>> -             ret = -ERESTARTSYS;
>>> -     if (unlikely(ret != 0))
>>> -             goto out_unlock;
>>> -     dma_resv_unlock(bo->base.resv);
>>> -
>>> -out_unlock:
>>> -     mutex_unlock(&bo->wu_mutex);
>>> -     return ret;
>>> -}
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
>>> index fe81c565e7ef..82ea26a49959 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
>>> @@ -508,7 +508,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>>>        INIT_LIST_HEAD(&fbo->base.lru);
>>>        INIT_LIST_HEAD(&fbo->base.swap);
>>>        INIT_LIST_HEAD(&fbo->base.io_reserve_lru);
>>> -     mutex_init(&fbo->base.wu_mutex);
>>>        fbo->base.moving = NULL;
>>>        drm_vma_node_reset(&fbo->base.base.vma_node);
>>>        atomic_set(&fbo->base.cpu_writers, 0);
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> index 76eedb963693..a61a35e57d1c 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> @@ -125,30 +125,22 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>>>                &bdev->man[bo->mem.mem_type];
>>>        struct vm_area_struct cvma;
>>>
>>> -     /*
>>> -      * Work around locking order reversal in fault / nopfn
>>> -      * between mmap_sem and bo_reserve: Perform a trylock operation
>>> -      * for reserve, and if it fails, retry the fault after waiting
>>> -      * for the buffer to become unreserved.
>>> -      */
>>>        if (unlikely(!dma_resv_trylock(bo->base.resv))) {
>>>                if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
>>>                        if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
>> Not an expert on fault handling, but shouldn't this now be one if?
>>
>> E.g. if FAULT_FLAG_RETRY_NOWAIT is set we should return VM_FAULT_NOPAGE
>> instead of VM_FAULT_RETRY.
> Honestly I have no idea at all about this stuff. I just learned about
> the mmap_sem less retry now that Thomas pointed it out, and I have no
> idea how anything else here works. My approach has always been to just
> throw ridiculous amounts of really nasty tests at fault handling
> (including handling our own gtt mmaps to copy*user in relocs or gup
> for userptr and all that), and leave it at that :-)
>
>> But really take that with a grain of salt,
> No idea either. It should be functionally equivalent to what was there
> before, except we now have the full blocking wait for the mutex
> instead of busy-spinning on NO_PAGE (with the wait_unreserved mixed in
> every odd fault I guess?). All over my head I'd say ...

To be honest I don't remember the difference about VM_FAULT_RETRY with 
!FAULT_FLAG_RETRY_NOWAIT and just returning VM_FAULT_NOPAGE. It appears 
most users and TTM use the former, while shmem uses the latter.

The detailed FAULT_RETRY semantics are pretty undocumented and requires 
diving into the mm system to get the full picture.

BTW it looks to me like vgem and vmks has got VM_FAULT_RETRY wrong, 
since they might return it
without ALLOW_RETRY, and if FAULT_FLAG_RETRY_NOWAIT is true they, should 
drop the mmap_sem,
otherwise things will go really bad.

/Thomas

>
> Cheers, Daniel
>
>> Christian.
>>
>>>                                ttm_bo_get(bo);
>>>                                up_read(&vmf->vma->vm_mm->mmap_sem);
>>> -                             (void) ttm_bo_wait_unreserved(bo);
>>> +                             if (!dma_resv_lock_interruptible(bo->base.resv,
>>> +                                                              NULL))
>>> +                                     dma_resv_unlock(bo->base.resv);
>>>                                ttm_bo_put(bo);
>>>                        }
>>>
>>>                        return VM_FAULT_RETRY;
>>>                }
>>>
>>> -             /*
>>> -              * If we'd want to change locking order to
>>> -              * mmap_sem -> bo::reserve, we'd use a blocking reserve here
>>> -              * instead of retrying the fault...
>>> -              */
>>> -             return VM_FAULT_NOPAGE;
>>> +             if (dma_resv_lock_interruptible(bo->base.resv, NULL))
>>> +                     return VM_FAULT_NOPAGE;
>>>        }
>>>
>>>        /*
>>> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
>>> index 43c4929a2171..21c7d0d28757 100644
>>> --- a/include/drm/ttm/ttm_bo_api.h
>>> +++ b/include/drm/ttm/ttm_bo_api.h
>>> @@ -155,7 +155,6 @@ struct ttm_tt;
>>>     * @offset: The current GPU offset, which can have different meanings
>>>     * depending on the memory type. For SYSTEM type memory, it should be 0.
>>>     * @cur_placement: Hint of current placement.
>>> - * @wu_mutex: Wait unreserved mutex.
>>>     *
>>>     * Base class for TTM buffer object, that deals with data placement and CPU
>>>     * mappings. GPU mappings are really up to the driver, but for simpler GPUs
>>> @@ -229,8 +228,6 @@ struct ttm_buffer_object {
>>>        uint64_t offset; /* GPU address space is independent of CPU word size */
>>>
>>>        struct sg_table *sg;
>>> -
>>> -     struct mutex wu_mutex;
>>>    };
>>>
>>>    /**
>>> @@ -765,7 +762,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
>>>    int ttm_bo_swapout(struct ttm_bo_global *glob,
>>>                        struct ttm_operation_ctx *ctx);
>>>    void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
>>>
>>>    /**
>>>     * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
>