[PATCH 8/8] drm/radeon: rework the VM code a bit more

Wed Sep 12 10:45:08 PDT 2012

On Wed, Sep 12, 2012 at 1:13 PM, Christian König
<deathsimple at vodafone.de> wrote:
> On 12.09.2012 15:54, Jerome Glisse wrote:
>>
>> On Wed, Sep 12, 2012 at 8:08 AM, Christian König
>> <deathsimple at vodafone.de> wrote:
>>>
>>> On 11.09.2012 18:11, Jerome Glisse wrote:
>>>>
>>>> On Tue, Sep 11, 2012 at 10:10 AM, Christian König
>>>> <deathsimple at vodafone.de> wrote:
>>>>>
>>>>> Roughly based on how nouveau is handling it. Instead of
>>>>> adding the bo_va when the address is set add the bo_va
>>>>> when the handle is opened, but set the address to zero
>>>>> until userspace tells us where to place it.
>>>>>
>>>>> This fixes another bunch of problems with glamor.
>>>>
>>>> What exactly are the fix ? I don't see why this change is needed. It
>>>> make things more complicated in my view.
>>>
>>> Applications can open GEM Handles multiple times, so what happens is that
>>> (for example) the DDX creates an BO to back a pixmap, hands that over to
>>> GLAMOR and than closes the handle again (a bit later and also not all the
>>> times).
>>>
>>> In the end the kernel complains that bo x isn't in vm y, what makes sense
>>> cause the DDX has removed the mapping by closing the handle. What we
>>> don't
>>> have in this picture is that the handle was opened two times, once for
>>> creation and once for handing it over to GLAMOR.
>>>
>>> I see three possible solutions to that problem:
>>> 1. Remember how many times a handle was opened in a vm context, that is
>>> what
>>> this patch does.
>>>
>>> 2. Instead of removing the mapping on closing the handle we change
>>> usespace
>>> to remove the mapping separately.
>>>
>>> 3. Change GEM to only call the open/close callbacks when the handle is
>>> opened and closed for the first/last time in a process context, that
>>> would
>>> also eliminate the refcounting nouveau is currently doing.
>>>
>>> Feel free to choose one, I just have implemented number 1 because that
>>> was
>>> the simplest way to go (for me).
>>>
>>> Cheers,
>>> Christian.
>>
>> I am all ok for the refcounting part but i don't think the always add
>> a va to bo is a good idea. It just add more code for no good reason.
>
> Always adding a va to a bo is only an issue on cayman, on SI and further we
> will need a va for each bo anyway.
>
> The problem with not adding a va is that I just need something to actually
> count the number of references in.
>
> So there isn't so much of an alternative to adding the va on opening the
> handle.
>
> Cheers,
> Christian.

Ok fair enough

Cheers,
Jerome

>
>>
>> Cheers,
>> Jerome
>>
>>>>> Signed-off-by: Christian König <deathsimple at vodafone.de>
>>>>> ---
>>>>>    drivers/gpu/drm/radeon/radeon.h      |   30 ++++---
>>>>>    drivers/gpu/drm/radeon/radeon_gart.c |  147
>>>>> ++++++++++++++++++++++------------
>>>>>    drivers/gpu/drm/radeon/radeon_gem.c  |   47 +++++++++--
>>>>>    3 files changed, 153 insertions(+), 71 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>>>>> b/drivers/gpu/drm/radeon/radeon.h
>>>>> index 8cca1d2..4d67f0f 100644
>>>>> --- a/drivers/gpu/drm/radeon/radeon.h
>>>>> +++ b/drivers/gpu/drm/radeon/radeon.h
>>>>> @@ -292,17 +292,20 @@ struct radeon_mman {
>>>>>
>>>>>    /* bo virtual address in a specific vm */
>>>>>    struct radeon_bo_va {
>>>>> -       /* bo list is protected by bo being reserved */
>>>>> +       /* protected by bo being reserved */
>>>>>           struct list_head                bo_list;
>>>>> -       /* vm list is protected by vm mutex */
>>>>> -       struct list_head                vm_list;
>>>>> -       /* constant after initialization */
>>>>> -       struct radeon_vm                *vm;
>>>>> -       struct radeon_bo                *bo;
>>>>>           uint64_t                        soffset;
>>>>>           uint64_t                        eoffset;
>>>>>           uint32_t                        flags;
>>>>>           bool                            valid;
>>>>> +       unsigned                        ref_count;
>>>>
>>>> unsigned refcount is a recipe for failure, if somehow you over unref
>>>> then you va will stay alive forever and this overunref will probably
>>>> go unnoticed.
>>>>
>>>>> +
>>>>> +       /* protected by vm mutex */
>>>>> +       struct list_head                vm_list;
>>>>> +
>>>>> +       /* constant after initialization */
>>>>> +       struct radeon_vm                *vm;
>>>>> +       struct radeon_bo                *bo;
>>>>>    };
>>>>>
>>>>>    struct radeon_bo {
>>>>> @@ -1848,14 +1851,15 @@ void radeon_vm_bo_invalidate(struct
>>>>> radeon_device
>>>>> *rdev,
>>>>>                                struct radeon_bo *bo);
>>>>>    struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
>>>>>                                          struct radeon_bo *bo);
>>>>> -int radeon_vm_bo_add(struct radeon_device *rdev,
>>>>> -                    struct radeon_vm *vm,
>>>>> -                    struct radeon_bo *bo,
>>>>> -                    uint64_t offset,
>>>>> -                    uint32_t flags);
>>>>> +struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
>>>>> +                                     struct radeon_vm *vm,
>>>>> +                                     struct radeon_bo *bo);
>>>>> +int radeon_vm_bo_set_addr(struct radeon_device *rdev,
>>>>> +                         struct radeon_bo_va *bo_va,
>>>>> +                         uint64_t offset,
>>>>> +                         uint32_t flags);
>>>>>    int radeon_vm_bo_rmv(struct radeon_device *rdev,
>>>>> -                    struct radeon_vm *vm,
>>>>> -                    struct radeon_bo *bo);
>>>>> +                    struct radeon_bo_va *bo_va);
>>>>>
>>>>>    /* audio */
>>>>>    void r600_audio_update_hdmi(struct work_struct *work);
>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_gart.c
>>>>> b/drivers/gpu/drm/radeon/radeon_gart.c
>>>>> index 2c59491..2f28ff3 100644
>>>>> --- a/drivers/gpu/drm/radeon/radeon_gart.c
>>>>> +++ b/drivers/gpu/drm/radeon/radeon_gart.c
>>>>> @@ -693,51 +693,83 @@ struct radeon_bo_va *radeon_vm_bo_find(struct
>>>>> radeon_vm *vm,
>>>>>     * @rdev: radeon_device pointer
>>>>>     * @vm: requested vm
>>>>>     * @bo: radeon buffer object
>>>>> - * @offset: requested offset of the buffer in the VM address space
>>>>> - * @flags: attributes of pages (read/write/valid/etc.)
>>>>>     *
>>>>>     * Add @bo into the requested vm (cayman+).
>>>>> - * Add @bo to the list of bos associated with the vm and validate
>>>>> - * the offset requested within the vm address space.
>>>>> - * Returns 0 for success, error for failure.
>>>>> + * Add @bo to the list of bos associated with the vm
>>>>> + * Returns newly added bo_va or NULL for failure
>>>>>     *
>>>>>     * Object has to be reserved!
>>>>>     */
>>>>> -int radeon_vm_bo_add(struct radeon_device *rdev,
>>>>> -                    struct radeon_vm *vm,
>>>>> -                    struct radeon_bo *bo,
>>>>> -                    uint64_t offset,
>>>>> -                    uint32_t flags)
>>>>> +struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
>>>>> +                                     struct radeon_vm *vm,
>>>>> +                                     struct radeon_bo *bo)
>>>>>    {
>>>>> -       struct radeon_bo_va *bo_va, *tmp;
>>>>> -       struct list_head *head;
>>>>> -       uint64_t size = radeon_bo_size(bo), last_offset = 0;
>>>>> -       unsigned last_pfn;
>>>>> +       struct radeon_bo_va *bo_va;
>>>>>
>>>>>           bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
>>>>>           if (bo_va == NULL) {
>>>>> -               return -ENOMEM;
>>>>> +               return NULL;
>>>>>           }
>>>>>           bo_va->vm = vm;
>>>>>           bo_va->bo = bo;
>>>>> -       bo_va->soffset = offset;
>>>>> -       bo_va->eoffset = offset + size;
>>>>> -       bo_va->flags = flags;
>>>>> +       bo_va->soffset = 0;
>>>>> +       bo_va->eoffset = 0;
>>>>> +       bo_va->flags = 0;
>>>>>           bo_va->valid = false;
>>>>> +       bo_va->ref_count = 1;
>>>>>           INIT_LIST_HEAD(&bo_va->bo_list);
>>>>>           INIT_LIST_HEAD(&bo_va->vm_list);
>>>>> -       /* make sure object fit at this offset */
>>>>> -       if (bo_va->soffset >= bo_va->eoffset) {
>>>>> -               kfree(bo_va);
>>>>> -               return -EINVAL;
>>>>> -       }
>>>>>
>>>>> -       last_pfn = bo_va->eoffset / RADEON_GPU_PAGE_SIZE;
>>>>> -       if (last_pfn > rdev->vm_manager.max_pfn) {
>>>>> -               kfree(bo_va);
>>>>> -               dev_err(rdev->dev, "va above limit (0x%08X >
>>>>> 0x%08X)\n",
>>>>> -                       last_pfn, rdev->vm_manager.max_pfn);
>>>>> -               return -EINVAL;
>>>>> +       mutex_lock(&vm->mutex);
>>>>> +       list_add(&bo_va->vm_list, &vm->va);
>>>>> +       list_add_tail(&bo_va->bo_list, &bo->va);
>>>>> +       mutex_unlock(&vm->mutex);
>>>>> +
>>>>> +       return bo_va;
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * radeon_vm_bo_set_addr - set bos virtual address inside a vm
>>>>> + *
>>>>> + * @rdev: radeon_device pointer
>>>>> + * @bo_va: bo_va to store the address
>>>>> + * @soffset: requested offset of the buffer in the VM address space
>>>>> + * @flags: attributes of pages (read/write/valid/etc.)
>>>>> + *
>>>>> + * Set offset of @bo_va (cayman+).
>>>>> + * Validate and set the offset requested within the vm address space.
>>>>> + * Returns 0 for success, error for failure.
>>>>> + *
>>>>> + * Object has to be reserved!
>>>>> + */
>>>>> +int radeon_vm_bo_set_addr(struct radeon_device *rdev,
>>>>> +                         struct radeon_bo_va *bo_va,
>>>>> +                         uint64_t soffset,
>>>>> +                         uint32_t flags)
>>>>> +{
>>>>> +       uint64_t size = radeon_bo_size(bo_va->bo);
>>>>> +       uint64_t eoffset, last_offset = 0;
>>>>> +       struct radeon_vm *vm = bo_va->vm;
>>>>> +       struct radeon_bo_va *tmp;
>>>>> +       struct list_head *head;
>>>>> +       unsigned last_pfn;
>>>>> +
>>>>> +       if (soffset) {
>>>>> +               /* make sure object fit at this offset */
>>>>> +               eoffset = soffset + size;
>>>>> +               if (soffset >= eoffset) {
>>>>> +                       return -EINVAL;
>>>>> +               }
>>>>> +
>>>>> +               last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
>>>>> +               if (last_pfn > rdev->vm_manager.max_pfn) {
>>>>> +                       dev_err(rdev->dev, "va above limit (0x%08X >
>>>>> 0x%08X)\n",
>>>>> +                               last_pfn, rdev->vm_manager.max_pfn);
>>>>> +                       return -EINVAL;
>>>>> +               }
>>>>> +
>>>>> +       } else {
>>>>> +               eoffset = last_pfn = 0;
>>>>>           }
>>>>>
>>>>>           mutex_lock(&vm->mutex);
>>>>> @@ -758,24 +790,33 @@ int radeon_vm_bo_add(struct radeon_device *rdev,
>>>>>           head = &vm->va;
>>>>>           last_offset = 0;
>>>>>           list_for_each_entry(tmp, &vm->va, vm_list) {
>>>>> -               if (bo_va->soffset >= last_offset && bo_va->eoffset <=
>>>>> tmp->soffset) {
>>>>> +               if (bo_va == tmp) {
>>>>> +                       /* skip over currently modified bo */
>>>>> +                       continue;
>>>>> +               }
>>>>> +
>>>>> +               if (soffset >= last_offset && eoffset <= tmp->soffset)
>>>>> {
>>>>>                           /* bo can be added before this one */
>>>>>                           break;
>>>>>                   }
>>>>> -               if (bo_va->eoffset > tmp->soffset && bo_va->soffset <
>>>>> tmp->eoffset) {
>>>>> +               if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
>>>>>                           /* bo and tmp overlap, invalid offset */
>>>>>                           dev_err(rdev->dev, "bo %p va 0x%08X conflict
>>>>> with (bo %p 0x%08X 0x%08X)\n",
>>>>> -                               bo, (unsigned)bo_va->soffset, tmp->bo,
>>>>> +                               bo_va->bo, (unsigned)bo_va->soffset,
>>>>> tmp->bo,
>>>>>                                   (unsigned)tmp->soffset,
>>>>> (unsigned)tmp->eoffset);
>>>>> -                       kfree(bo_va);
>>>>>                           mutex_unlock(&vm->mutex);
>>>>>                           return -EINVAL;
>>>>>                   }
>>>>>                   last_offset = tmp->eoffset;
>>>>>                   head = &tmp->vm_list;
>>>>>           }
>>>>> -       list_add(&bo_va->vm_list, head);
>>>>> -       list_add_tail(&bo_va->bo_list, &bo->va);
>>>>> +
>>>>> +       bo_va->soffset = soffset;
>>>>> +       bo_va->eoffset = eoffset;
>>>>> +       bo_va->flags = flags;
>>>>> +       bo_va->valid = false;
>>>>> +       list_move(&bo_va->vm_list, head);
>>>>> +
>>>>>           mutex_unlock(&vm->mutex);
>>>>>           return 0;
>>>>>    }
>>>>> @@ -855,6 +896,12 @@ int radeon_vm_bo_update_pte(struct radeon_device
>>>>> *rdev,
>>>>>                   return -EINVAL;
>>>>>           }
>>>>>
>>>>> +       if (!bo_va->soffset) {
>>>>> +               dev_err(rdev->dev, "bo %p don't has a mapping in vm
>>>>> %p\n",
>>>>> +                       bo, vm);
>>>>> +               return -EINVAL;
>>>>> +       }
>>>>> +
>>>>>           if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL))
>>>>>                   return 0;
>>>>>
>>>>> @@ -921,33 +968,26 @@ int radeon_vm_bo_update_pte(struct radeon_device
>>>>> *rdev,
>>>>>     * radeon_vm_bo_rmv - remove a bo to a specific vm
>>>>>     *
>>>>>     * @rdev: radeon_device pointer
>>>>> - * @vm: requested vm
>>>>> - * @bo: radeon buffer object
>>>>> + * @bo_va: requested bo_va
>>>>>     *
>>>>> - * Remove @bo from the requested vm (cayman+).
>>>>> - * Remove @bo from the list of bos associated with the vm and
>>>>> - * remove the ptes for @bo in the page table.
>>>>> + * Remove @bo_va->bo from the requested vm (cayman+).
>>>>> + * Remove @bo_va->bo from the list of bos associated with the
>>>>> bo_va->vm
>>>>> and
>>>>> + * remove the ptes for @bo_va in the page table.
>>>>>     * Returns 0 for success.
>>>>>     *
>>>>>     * Object have to be reserved!
>>>>>     */
>>>>>    int radeon_vm_bo_rmv(struct radeon_device *rdev,
>>>>> -                    struct radeon_vm *vm,
>>>>> -                    struct radeon_bo *bo)
>>>>> +                    struct radeon_bo_va *bo_va)
>>>>>    {
>>>>> -       struct radeon_bo_va *bo_va;
>>>>>           int r;
>>>>>
>>>>> -       bo_va = radeon_vm_bo_find(vm, bo);
>>>>> -       if (bo_va == NULL)
>>>>> -               return 0;
>>>>> -
>>>>>           mutex_lock(&rdev->vm_manager.lock);
>>>>> -       mutex_lock(&vm->mutex);
>>>>> -       r = radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
>>>>> +       mutex_lock(&bo_va->vm->mutex);
>>>>> +       r = radeon_vm_bo_update_pte(rdev, bo_va->vm, bo_va->bo, NULL);
>>>>>           mutex_unlock(&rdev->vm_manager.lock);
>>>>>           list_del(&bo_va->vm_list);
>>>>> -       mutex_unlock(&vm->mutex);
>>>>> +       mutex_unlock(&bo_va->vm->mutex);
>>>>>           list_del(&bo_va->bo_list);
>>>>>
>>>>>           kfree(bo_va);
>>>>> @@ -987,6 +1027,7 @@ void radeon_vm_bo_invalidate(struct radeon_device
>>>>> *rdev,
>>>>>     */
>>>>>    int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
>>>>>    {
>>>>> +       struct radeon_bo_va *bo_va;
>>>>>           int r;
>>>>>
>>>>>           vm->id = 0;
>>>>> @@ -1006,8 +1047,10 @@ int radeon_vm_init(struct radeon_device *rdev,
>>>>> struct radeon_vm *vm)
>>>>>           /* map the ib pool buffer at 0 in virtual address space, set
>>>>>            * read only
>>>>>            */
>>>>> -       r = radeon_vm_bo_add(rdev, vm, rdev->ring_tmp_bo.bo,
>>>>> RADEON_VA_IB_OFFSET,
>>>>> -                            RADEON_VM_PAGE_READABLE |
>>>>> RADEON_VM_PAGE_SNOOPED);
>>>>> +       bo_va = radeon_vm_bo_add(rdev, vm, rdev->ring_tmp_bo.bo);
>>>>> +       r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET,
>>>>> +                                 RADEON_VM_PAGE_READABLE |
>>>>> +                                 RADEON_VM_PAGE_SNOOPED);
>>>>>           return r;
>>>>>    }
>>>>>
>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>>>>> b/drivers/gpu/drm/radeon/radeon_gem.c
>>>>> index cfad667..6579bef 100644
>>>>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>>>>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>>>>> @@ -124,6 +124,30 @@ void radeon_gem_fini(struct radeon_device *rdev)
>>>>>     */
>>>>>    int radeon_gem_object_open(struct drm_gem_object *obj, struct
>>>>> drm_file
>>>>> *file_priv)
>>>>>    {
>>>>> +       struct radeon_bo *rbo = gem_to_radeon_bo(obj);
>>>>> +       struct radeon_device *rdev = rbo->rdev;
>>>>> +       struct radeon_fpriv *fpriv = file_priv->driver_priv;
>>>>> +       struct radeon_vm *vm = &fpriv->vm;
>>>>> +       struct radeon_bo_va *bo_va;
>>>>> +       int r;
>>>>> +
>>>>> +       if (rdev->family < CHIP_CAYMAN) {
>>>>> +               return 0;
>>>>> +       }
>>>>> +
>>>>> +       r = radeon_bo_reserve(rbo, false);
>>>>> +       if (r) {
>>>>> +               return r;
>>>>> +       }
>>>>> +
>>>>> +       bo_va = radeon_vm_bo_find(vm, rbo);
>>>>> +       if (!bo_va) {
>>>>> +               bo_va = radeon_vm_bo_add(rdev, vm, rbo);
>>>>> +       } else {
>>>>> +               ++bo_va->ref_count;
>>>>> +       }
>>>>> +       radeon_bo_unreserve(rbo);
>>>>> +
>>>>>           return 0;
>>>>>    }
>>>>>
>>>>> @@ -134,6 +158,7 @@ void radeon_gem_object_close(struct drm_gem_object
>>>>> *obj,
>>>>>           struct radeon_device *rdev = rbo->rdev;
>>>>>           struct radeon_fpriv *fpriv = file_priv->driver_priv;
>>>>>           struct radeon_vm *vm = &fpriv->vm;
>>>>> +       struct radeon_bo_va *bo_va;
>>>>>           int r;
>>>>>
>>>>>           if (rdev->family < CHIP_CAYMAN) {
>>>>> @@ -146,7 +171,12 @@ void radeon_gem_object_close(struct drm_gem_object
>>>>> *obj,
>>>>>                           "we fail to reserve bo (%d)\n", r);
>>>>>                   return;
>>>>>           }
>>>>> -       radeon_vm_bo_rmv(rdev, vm, rbo);
>>>>> +       bo_va = radeon_vm_bo_find(vm, rbo);
>>>>> +       if (bo_va) {
>>>>> +               if (--bo_va->ref_count == 0) {
>>>>> +                       radeon_vm_bo_rmv(rdev, bo_va);
>>>>> +               }
>>>>> +       }
>>>>>           radeon_bo_unreserve(rbo);
>>>>>    }
>>>>>
>>>>> @@ -462,19 +492,24 @@ int radeon_gem_va_ioctl(struct drm_device *dev,
>>>>> void *data,
>>>>>                   drm_gem_object_unreference_unlocked(gobj);
>>>>>                   return r;
>>>>>           }
>>>>> +       bo_va = radeon_vm_bo_find(&fpriv->vm, rbo);
>>>>> +       if (!bo_va) {
>>>>> +               args->operation = RADEON_VA_RESULT_ERROR;
>>>>> +               drm_gem_object_unreference_unlocked(gobj);
>>>>> +               return -ENOENT;
>>>>> +       }
>>>>> +
>>>>>           switch (args->operation) {
>>>>>           case RADEON_VA_MAP:
>>>>> -               bo_va = radeon_vm_bo_find(&fpriv->vm, rbo);
>>>>> -               if (bo_va) {
>>>>> +               if (bo_va->soffset) {
>>>>>                           args->operation = RADEON_VA_RESULT_VA_EXIST;
>>>>>                           args->offset = bo_va->soffset;
>>>>>                           goto out;
>>>>>                   }
>>>>> -               r = radeon_vm_bo_add(rdev, &fpriv->vm, rbo,
>>>>> -                                    args->offset, args->flags);
>>>>> +               r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset,
>>>>> args->flags);
>>>>>                   break;
>>>>>           case RADEON_VA_UNMAP:
>>>>> -               r = radeon_vm_bo_rmv(rdev, &fpriv->vm, rbo);
>>>>> +               r = radeon_vm_bo_set_addr(rdev, bo_va, 0, 0);
>>>>>                   break;
>>>>>           default:
>>>>>                   break;
>>>>> --
>>>>> 1.7.9.5
>>>>>
>>>>> _______________________________________________
>>>>> dri-devel mailing list
>>>>> dri-devel at lists.freedesktop.org
>>>>> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>
>>>
>