[PATCH 4/4] drm/amdgpu: add new unlocked flag for PTE updates
Felix Kuehling
felix.kuehling at amd.com
Mon Apr 13 22:11:43 UTC 2020
[+Philip]
Am 2020-04-08 um 3:17 a.m. schrieb Christian König:
> Am 07.04.20 um 21:24 schrieb Felix Kuehling:
>> Am 2020-04-07 um 10:27 a.m. schrieb Christian König:
>>> For HMM support we need the ability to invalidate PTEs from
>>> a MM callback where we can't lock the root PD.
>>>
>>> Add a new flag to better support this instead of assuming
>>> that all invalidation updates are unlocked.
>> Thanks for this patch series. It looks good to me. Alex, please take a
>> look at the new "unlocked" parameter in amdgpu_vm_bo_update_mapping.
>> Does this work for your MMU notifier implementation? I think this should
>> be all that's needed for unmapping in an MMU notifier with SVM, where we
>> won't unmap complete BOs.
>>
>> It may not work with your initial prototype that was BO-based, though.
>> For that one you may need to propagate the "unlocked" parameter all the
>> way up to amdgpu_vm_bo_update.
>
> You can't base the MMU notifier unmap on BOs anyway because you would
> need to lock the BO for that.
>
> Best practice here is probably to call amdgpu_vm_bo_update_mapping()
> directly. Maybe we should rename that function to
> amdgpu_vm_bo_update_range().
That name sounds good to me. While we're at it, I think we should also
rename amdgpu_vm_split_mapping and update it's documenting. It no longer
handles splitting into IBs at all. Maybe this one should be renamed to
amdgpu_vm_update_mapping.
Philip will need to make both those functions non-static for our HMM
range-based memory manager.
Regards,
Felix
>
> Regards,
> Christian.
>
>>
>> The series is
>>
>> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
>>
>>
>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 42
>>> ++++++++++++---------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 9 ++++-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 12 +++---
>>> 3 files changed, 37 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 1ee87b460b96..4ca4f61b34ca 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -1446,7 +1446,7 @@ static int amdgpu_vm_update_ptes(struct
>>> amdgpu_vm_update_params *params,
>>> uint64_t incr, entry_end, pe_start;
>>> struct amdgpu_bo *pt;
>>> - if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>>> + if (!params->unlocked) {
>>> /* make sure that the page tables covering the
>>> * address range are actually allocated
>>> */
>>> @@ -1458,8 +1458,12 @@ static int amdgpu_vm_update_ptes(struct
>>> amdgpu_vm_update_params *params,
>>> shift = amdgpu_vm_level_shift(adev, cursor.level);
>>> parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
>>> - if (adev->asic_type < CHIP_VEGA10 &&
>>> - (flags & AMDGPU_PTE_VALID)) {
>>> + if (params->unlocked) {
>>> + /* Unlocked updates are only allowed on the leaves */
>>> + if (amdgpu_vm_pt_descendant(adev, &cursor))
>>> + continue;
>>> + } else if (adev->asic_type < CHIP_VEGA10 &&
>>> + (flags & AMDGPU_PTE_VALID)) {
>>> /* No huge page support before GMC v9 */
>>> if (cursor.level != AMDGPU_VM_PTB) {
>>> if (!amdgpu_vm_pt_descendant(adev, &cursor))
>>> @@ -1558,6 +1562,7 @@ static int amdgpu_vm_update_ptes(struct
>>> amdgpu_vm_update_params *params,
>>> * @adev: amdgpu_device pointer
>>> * @vm: requested vm
>>> * @immediate: immediate submission in a page fault
>>> + * @unlocked: unlocked invalidation during MM callback
>>> * @resv: fences we need to sync to
>>> * @start: start of mapped range
>>> * @last: last mapped entry
>>> @@ -1573,7 +1578,7 @@ static int amdgpu_vm_update_ptes(struct
>>> amdgpu_vm_update_params *params,
>>> */
>>> static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>> struct amdgpu_vm *vm, bool immediate,
>>> - struct dma_resv *resv,
>>> + bool unlocked, struct dma_resv *resv,
>>> uint64_t start, uint64_t last,
>>> uint64_t flags, uint64_t addr,
>>> dma_addr_t *pages_addr,
>>> @@ -1603,11 +1608,12 @@ static int
>>> amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>> goto error_unlock;
>>> }
>>> - if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>>> - struct amdgpu_bo *root = vm->root.base.bo;
>>> + if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
>>> + struct dma_fence *tmp = dma_fence_get_stub();
>>> - if (!dma_fence_is_signaled(vm->last_immediate))
>>> - amdgpu_bo_fence(root, vm->last_immediate, true);
>>> + amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true);
>>> + swap(vm->last_unlocked, tmp);
>>> + dma_fence_put(tmp);
>>> }
>>> r = vm->update_funcs->prepare(¶ms, resv, sync_mode);
>>> @@ -1721,7 +1727,7 @@ static int amdgpu_vm_bo_split_mapping(struct
>>> amdgpu_device *adev,
>>> }
>>> last = min((uint64_t)mapping->last, start + max_entries
>>> - 1);
>>> - r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
>>> + r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>>> start, last, flags, addr,
>>> dma_addr, fence);
>>> if (r)
>>> @@ -2018,7 +2024,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device
>>> *adev,
>>> mapping->start < AMDGPU_GMC_HOLE_START)
>>> init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>> - r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
>>> + r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>>> mapping->start, mapping->last,
>>> init_pte_value, 0, NULL, &f);
>>> amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>> @@ -2589,7 +2595,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>>> return false;
>>> /* Don't evict VM page tables while they are updated */
>>> - if (!dma_fence_is_signaled(bo_base->vm->last_immediate)) {
>>> + if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
>>> amdgpu_vm_eviction_unlock(bo_base->vm);
>>> return false;
>>> }
>>> @@ -2766,7 +2772,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm,
>>> long timeout)
>>> if (timeout <= 0)
>>> return timeout;
>>> - return dma_fence_wait_timeout(vm->last_immediate, true,
>>> timeout);
>>> + return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
>>> }
>>> /**
>>> @@ -2838,7 +2844,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
>>> struct amdgpu_vm *vm,
>>> else
>>> vm->update_funcs = &amdgpu_vm_sdma_funcs;
>>> vm->last_update = NULL;
>>> - vm->last_immediate = dma_fence_get_stub();
>>> + vm->last_unlocked = dma_fence_get_stub();
>>> mutex_init(&vm->eviction_lock);
>>> vm->evicting = false;
>>> @@ -2892,7 +2898,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
>>> struct amdgpu_vm *vm,
>>> vm->root.base.bo = NULL;
>>> error_free_delayed:
>>> - dma_fence_put(vm->last_immediate);
>>> + dma_fence_put(vm->last_unlocked);
>>> drm_sched_entity_destroy(&vm->delayed);
>>> error_free_immediate:
>>> @@ -3093,8 +3099,8 @@ void amdgpu_vm_fini(struct amdgpu_device
>>> *adev, struct amdgpu_vm *vm)
>>> vm->pasid = 0;
>>> }
>>> - dma_fence_wait(vm->last_immediate, false);
>>> - dma_fence_put(vm->last_immediate);
>>> + dma_fence_wait(vm->last_unlocked, false);
>>> + dma_fence_put(vm->last_unlocked);
>>> list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
>>> if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
>>> @@ -3347,8 +3353,8 @@ bool amdgpu_vm_handle_fault(struct
>>> amdgpu_device *adev, unsigned int pasid,
>>> value = 0;
>>> }
>>> - r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr,
>>> addr + 1,
>>> - flags, value, NULL, NULL);
>>> + r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
>>> + addr + 1, flags, value, NULL, NULL);
>>> if (r)
>>> goto error_unlock;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 0b64200ec45f..ea771d84bf2b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -210,6 +210,11 @@ struct amdgpu_vm_update_params {
>>> */
>>> bool immediate;
>>> + /**
>>> + * @unlocked: true if the root BO is not locked
>>> + */
>>> + bool unlocked;
>>> +
>>> /**
>>> * @pages_addr:
>>> *
>>> @@ -277,8 +282,8 @@ struct amdgpu_vm {
>>> struct drm_sched_entity immediate;
>>> struct drm_sched_entity delayed;
>>> - /* Last submission to the scheduler entities */
>>> - struct dma_fence *last_immediate;
>>> + /* Last unlocked submission to the scheduler entities */
>>> + struct dma_fence *last_unlocked;
>>> unsigned int pasid;
>>> /* dedicated to vm */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> index c78bcebd9378..8d9c6feba660 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> @@ -92,8 +92,8 @@ static int amdgpu_vm_sdma_commit(struct
>>> amdgpu_vm_update_params *p,
>>> {
>>> struct amdgpu_ib *ib = p->job->ibs;
>>> struct drm_sched_entity *entity;
>>> - struct dma_fence *f, *tmp;
>>> struct amdgpu_ring *ring;
>>> + struct dma_fence *f;
>>> int r;
>>> entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
>>> @@ -106,13 +106,13 @@ static int amdgpu_vm_sdma_commit(struct
>>> amdgpu_vm_update_params *p,
>>> if (r)
>>> goto error;
>>> - if (p->immediate) {
>>> - tmp = dma_fence_get(f);
>>> - swap(p->vm->last_immediate, f);
>>> + if (p->unlocked) {
>>> + struct dma_fence *tmp = dma_fence_get(f);
>>> +
>>> + swap(p->vm->last_unlocked, f);
>>> dma_fence_put(tmp);
>>> } else {
>>> - dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv,
>>> - f);
>>> + amdgpu_bo_fence(p->vm->root.base.bo, f, true);
>>> }
>>> if (fence && !p->immediate)
>
More information about the amd-gfx
mailing list