[PATCH 4/4] drm/amdgpu: add new unlocked flag for PTE updates

Mon Apr 13 22:11:43 UTC 2020

[+Philip]

Am 2020-04-08 um 3:17 a.m. schrieb Christian König:
> Am 07.04.20 um 21:24 schrieb Felix Kuehling:
>> Am 2020-04-07 um 10:27 a.m. schrieb Christian König:
>>> For HMM support we need the ability to invalidate PTEs from
>>> a MM callback where we can't lock the root PD.
>>>
>>> Add a new flag to better support this instead of assuming
>>> that all invalidation updates are unlocked.
>> Thanks for this patch series. It looks good to me. Alex, please take a
>> look at the new "unlocked" parameter in amdgpu_vm_bo_update_mapping.
>> Does this work for your MMU notifier implementation? I think this should
>> be all that's needed for unmapping in an MMU notifier with SVM, where we
>> won't unmap complete BOs.
>>
>> It may not work with your initial prototype that was BO-based, though.
>> For that one you may need to propagate the "unlocked" parameter all the
>> way up to amdgpu_vm_bo_update.
>
> You can't base the MMU notifier unmap on BOs anyway because you would
> need to lock the BO for that.
>
> Best practice here is probably to call amdgpu_vm_bo_update_mapping()
> directly. Maybe we should rename that function to
> amdgpu_vm_bo_update_range().

That name sounds good to me. While we're at it, I think we should also
rename amdgpu_vm_split_mapping and update it's documenting. It no longer
handles splitting into IBs at all. Maybe this one should be renamed to
amdgpu_vm_update_mapping.

Philip will need to make both those functions non-static for our HMM
range-based memory manager.

Regards,
  Felix

>
> Regards,
> Christian.
>
>>
>> The series is
>>
>> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
>>
>>
>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 42
>>> ++++++++++++---------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  9 ++++-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 12 +++---
>>>   3 files changed, 37 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 1ee87b460b96..4ca4f61b34ca 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -1446,7 +1446,7 @@ static int amdgpu_vm_update_ptes(struct
>>> amdgpu_vm_update_params *params,
>>>           uint64_t incr, entry_end, pe_start;
>>>           struct amdgpu_bo *pt;
>>>   -        if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>>> +        if (!params->unlocked) {
>>>               /* make sure that the page tables covering the
>>>                * address range are actually allocated
>>>                */
>>> @@ -1458,8 +1458,12 @@ static int amdgpu_vm_update_ptes(struct
>>> amdgpu_vm_update_params *params,
>>>             shift = amdgpu_vm_level_shift(adev, cursor.level);
>>>           parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
>>> -        if (adev->asic_type < CHIP_VEGA10 &&
>>> -            (flags & AMDGPU_PTE_VALID)) {
>>> +        if (params->unlocked) {
>>> +            /* Unlocked updates are only allowed on the leaves */
>>> +            if (amdgpu_vm_pt_descendant(adev, &cursor))
>>> +                continue;
>>> +        } else if (adev->asic_type < CHIP_VEGA10 &&
>>> +               (flags & AMDGPU_PTE_VALID)) {
>>>               /* No huge page support before GMC v9 */
>>>               if (cursor.level != AMDGPU_VM_PTB) {
>>>                   if (!amdgpu_vm_pt_descendant(adev, &cursor))
>>> @@ -1558,6 +1562,7 @@ static int amdgpu_vm_update_ptes(struct
>>> amdgpu_vm_update_params *params,
>>>    * @adev: amdgpu_device pointer
>>>    * @vm: requested vm
>>>    * @immediate: immediate submission in a page fault
>>> + * @unlocked: unlocked invalidation during MM callback
>>>    * @resv: fences we need to sync to
>>>    * @start: start of mapped range
>>>    * @last: last mapped entry
>>> @@ -1573,7 +1578,7 @@ static int amdgpu_vm_update_ptes(struct
>>> amdgpu_vm_update_params *params,
>>>    */
>>>   static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>                          struct amdgpu_vm *vm, bool immediate,
>>> -                       struct dma_resv *resv,
>>> +                       bool unlocked, struct dma_resv *resv,
>>>                          uint64_t start, uint64_t last,
>>>                          uint64_t flags, uint64_t addr,
>>>                          dma_addr_t *pages_addr,
>>> @@ -1603,11 +1608,12 @@ static int
>>> amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>           goto error_unlock;
>>>       }
>>>   -    if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>>> -        struct amdgpu_bo *root = vm->root.base.bo;
>>> +    if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
>>> +        struct dma_fence *tmp = dma_fence_get_stub();
>>>   -        if (!dma_fence_is_signaled(vm->last_immediate))
>>> -            amdgpu_bo_fence(root, vm->last_immediate, true);
>>> +        amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true);
>>> +        swap(vm->last_unlocked, tmp);
>>> +        dma_fence_put(tmp);
>>>       }
>>>         r = vm->update_funcs->prepare(&params, resv, sync_mode);
>>> @@ -1721,7 +1727,7 @@ static int amdgpu_vm_bo_split_mapping(struct
>>> amdgpu_device *adev,
>>>           }
>>>             last = min((uint64_t)mapping->last, start + max_entries
>>> - 1);
>>> -        r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
>>> +        r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>>>                           start, last, flags, addr,
>>>                           dma_addr, fence);
>>>           if (r)
>>> @@ -2018,7 +2024,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device
>>> *adev,
>>>               mapping->start < AMDGPU_GMC_HOLE_START)
>>>               init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>>   -        r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
>>> +        r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>>>                           mapping->start, mapping->last,
>>>                           init_pte_value, 0, NULL, &f);
>>>           amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>> @@ -2589,7 +2595,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>>>           return false;
>>>         /* Don't evict VM page tables while they are updated */
>>> -    if (!dma_fence_is_signaled(bo_base->vm->last_immediate)) {
>>> +    if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
>>>           amdgpu_vm_eviction_unlock(bo_base->vm);
>>>           return false;
>>>       }
>>> @@ -2766,7 +2772,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm,
>>> long timeout)
>>>       if (timeout <= 0)
>>>           return timeout;
>>>   -    return dma_fence_wait_timeout(vm->last_immediate, true,
>>> timeout);
>>> +    return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
>>>   }
>>>     /**
>>> @@ -2838,7 +2844,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
>>> struct amdgpu_vm *vm,
>>>       else
>>>           vm->update_funcs = &amdgpu_vm_sdma_funcs;
>>>       vm->last_update = NULL;
>>> -    vm->last_immediate = dma_fence_get_stub();
>>> +    vm->last_unlocked = dma_fence_get_stub();
>>>         mutex_init(&vm->eviction_lock);
>>>       vm->evicting = false;
>>> @@ -2892,7 +2898,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
>>> struct amdgpu_vm *vm,
>>>       vm->root.base.bo = NULL;
>>>     error_free_delayed:
>>> -    dma_fence_put(vm->last_immediate);
>>> +    dma_fence_put(vm->last_unlocked);
>>>       drm_sched_entity_destroy(&vm->delayed);
>>>     error_free_immediate:
>>> @@ -3093,8 +3099,8 @@ void amdgpu_vm_fini(struct amdgpu_device
>>> *adev, struct amdgpu_vm *vm)
>>>           vm->pasid = 0;
>>>       }
>>>   -    dma_fence_wait(vm->last_immediate, false);
>>> -    dma_fence_put(vm->last_immediate);
>>> +    dma_fence_wait(vm->last_unlocked, false);
>>> +    dma_fence_put(vm->last_unlocked);
>>>         list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
>>>           if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
>>> @@ -3347,8 +3353,8 @@ bool amdgpu_vm_handle_fault(struct
>>> amdgpu_device *adev, unsigned int pasid,
>>>           value = 0;
>>>       }
>>>   -    r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr,
>>> addr + 1,
>>> -                    flags, value, NULL, NULL);
>>> +    r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
>>> +                    addr + 1, flags, value, NULL, NULL);
>>>       if (r)
>>>           goto error_unlock;
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 0b64200ec45f..ea771d84bf2b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -210,6 +210,11 @@ struct amdgpu_vm_update_params {
>>>        */
>>>       bool immediate;
>>>   +    /**
>>> +     * @unlocked: true if the root BO is not locked
>>> +     */
>>> +    bool unlocked;
>>> +
>>>       /**
>>>        * @pages_addr:
>>>        *
>>> @@ -277,8 +282,8 @@ struct amdgpu_vm {
>>>       struct drm_sched_entity    immediate;
>>>       struct drm_sched_entity    delayed;
>>>   -    /* Last submission to the scheduler entities */
>>> -    struct dma_fence    *last_immediate;
>>> +    /* Last unlocked submission to the scheduler entities */
>>> +    struct dma_fence    *last_unlocked;
>>>         unsigned int        pasid;
>>>       /* dedicated to vm */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> index c78bcebd9378..8d9c6feba660 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> @@ -92,8 +92,8 @@ static int amdgpu_vm_sdma_commit(struct
>>> amdgpu_vm_update_params *p,
>>>   {
>>>       struct amdgpu_ib *ib = p->job->ibs;
>>>       struct drm_sched_entity *entity;
>>> -    struct dma_fence *f, *tmp;
>>>       struct amdgpu_ring *ring;
>>> +    struct dma_fence *f;
>>>       int r;
>>>         entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
>>> @@ -106,13 +106,13 @@ static int amdgpu_vm_sdma_commit(struct
>>> amdgpu_vm_update_params *p,
>>>       if (r)
>>>           goto error;
>>>   -    if (p->immediate) {
>>> -        tmp = dma_fence_get(f);
>>> -        swap(p->vm->last_immediate, f);
>>> +    if (p->unlocked) {
>>> +        struct dma_fence *tmp = dma_fence_get(f);
>>> +
>>> +        swap(p->vm->last_unlocked, f);
>>>           dma_fence_put(tmp);
>>>       } else {
>>> -        dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv,
>>> -                      f);
>>> +        amdgpu_bo_fence(p->vm->root.base.bo, f, true);
>>>       }
>>>         if (fence && !p->immediate)
>