[PATCH 4/4] drm/amdgpu: add new unlocked flag for PTE updates
Christian König
ckoenig.leichtzumerken at gmail.com
Wed Apr 8 07:17:21 UTC 2020
Am 07.04.20 um 21:24 schrieb Felix Kuehling:
> Am 2020-04-07 um 10:27 a.m. schrieb Christian König:
>> For HMM support we need the ability to invalidate PTEs from
>> a MM callback where we can't lock the root PD.
>>
>> Add a new flag to better support this instead of assuming
>> that all invalidation updates are unlocked.
> Thanks for this patch series. It looks good to me. Alex, please take a
> look at the new "unlocked" parameter in amdgpu_vm_bo_update_mapping.
> Does this work for your MMU notifier implementation? I think this should
> be all that's needed for unmapping in an MMU notifier with SVM, where we
> won't unmap complete BOs.
>
> It may not work with your initial prototype that was BO-based, though.
> For that one you may need to propagate the "unlocked" parameter all the
> way up to amdgpu_vm_bo_update.
You can't base the MMU notifier unmap on BOs anyway because you would
need to lock the BO for that.
Best practice here is probably to call amdgpu_vm_bo_update_mapping()
directly. Maybe we should rename that function to
amdgpu_vm_bo_update_range().
Regards,
Christian.
>
> The series is
>
> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
>
>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 42 ++++++++++++---------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 9 ++++-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 12 +++---
>> 3 files changed, 37 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 1ee87b460b96..4ca4f61b34ca 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1446,7 +1446,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>> uint64_t incr, entry_end, pe_start;
>> struct amdgpu_bo *pt;
>>
>> - if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>> + if (!params->unlocked) {
>> /* make sure that the page tables covering the
>> * address range are actually allocated
>> */
>> @@ -1458,8 +1458,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>
>> shift = amdgpu_vm_level_shift(adev, cursor.level);
>> parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
>> - if (adev->asic_type < CHIP_VEGA10 &&
>> - (flags & AMDGPU_PTE_VALID)) {
>> + if (params->unlocked) {
>> + /* Unlocked updates are only allowed on the leaves */
>> + if (amdgpu_vm_pt_descendant(adev, &cursor))
>> + continue;
>> + } else if (adev->asic_type < CHIP_VEGA10 &&
>> + (flags & AMDGPU_PTE_VALID)) {
>> /* No huge page support before GMC v9 */
>> if (cursor.level != AMDGPU_VM_PTB) {
>> if (!amdgpu_vm_pt_descendant(adev, &cursor))
>> @@ -1558,6 +1562,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>> * @adev: amdgpu_device pointer
>> * @vm: requested vm
>> * @immediate: immediate submission in a page fault
>> + * @unlocked: unlocked invalidation during MM callback
>> * @resv: fences we need to sync to
>> * @start: start of mapped range
>> * @last: last mapped entry
>> @@ -1573,7 +1578,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>> */
>> static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>> struct amdgpu_vm *vm, bool immediate,
>> - struct dma_resv *resv,
>> + bool unlocked, struct dma_resv *resv,
>> uint64_t start, uint64_t last,
>> uint64_t flags, uint64_t addr,
>> dma_addr_t *pages_addr,
>> @@ -1603,11 +1608,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>> goto error_unlock;
>> }
>>
>> - if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>> - struct amdgpu_bo *root = vm->root.base.bo;
>> + if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
>> + struct dma_fence *tmp = dma_fence_get_stub();
>>
>> - if (!dma_fence_is_signaled(vm->last_immediate))
>> - amdgpu_bo_fence(root, vm->last_immediate, true);
>> + amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true);
>> + swap(vm->last_unlocked, tmp);
>> + dma_fence_put(tmp);
>> }
>>
>> r = vm->update_funcs->prepare(¶ms, resv, sync_mode);
>> @@ -1721,7 +1727,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>> }
>>
>> last = min((uint64_t)mapping->last, start + max_entries - 1);
>> - r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
>> + r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>> start, last, flags, addr,
>> dma_addr, fence);
>> if (r)
>> @@ -2018,7 +2024,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>> mapping->start < AMDGPU_GMC_HOLE_START)
>> init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>
>> - r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
>> + r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>> mapping->start, mapping->last,
>> init_pte_value, 0, NULL, &f);
>> amdgpu_vm_free_mapping(adev, vm, mapping, f);
>> @@ -2589,7 +2595,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>> return false;
>>
>> /* Don't evict VM page tables while they are updated */
>> - if (!dma_fence_is_signaled(bo_base->vm->last_immediate)) {
>> + if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
>> amdgpu_vm_eviction_unlock(bo_base->vm);
>> return false;
>> }
>> @@ -2766,7 +2772,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
>> if (timeout <= 0)
>> return timeout;
>>
>> - return dma_fence_wait_timeout(vm->last_immediate, true, timeout);
>> + return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
>> }
>>
>> /**
>> @@ -2838,7 +2844,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> else
>> vm->update_funcs = &amdgpu_vm_sdma_funcs;
>> vm->last_update = NULL;
>> - vm->last_immediate = dma_fence_get_stub();
>> + vm->last_unlocked = dma_fence_get_stub();
>>
>> mutex_init(&vm->eviction_lock);
>> vm->evicting = false;
>> @@ -2892,7 +2898,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> vm->root.base.bo = NULL;
>>
>> error_free_delayed:
>> - dma_fence_put(vm->last_immediate);
>> + dma_fence_put(vm->last_unlocked);
>> drm_sched_entity_destroy(&vm->delayed);
>>
>> error_free_immediate:
>> @@ -3093,8 +3099,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>> vm->pasid = 0;
>> }
>>
>> - dma_fence_wait(vm->last_immediate, false);
>> - dma_fence_put(vm->last_immediate);
>> + dma_fence_wait(vm->last_unlocked, false);
>> + dma_fence_put(vm->last_unlocked);
>>
>> list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
>> if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
>> @@ -3347,8 +3353,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
>> value = 0;
>> }
>>
>> - r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
>> - flags, value, NULL, NULL);
>> + r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
>> + addr + 1, flags, value, NULL, NULL);
>> if (r)
>> goto error_unlock;
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 0b64200ec45f..ea771d84bf2b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -210,6 +210,11 @@ struct amdgpu_vm_update_params {
>> */
>> bool immediate;
>>
>> + /**
>> + * @unlocked: true if the root BO is not locked
>> + */
>> + bool unlocked;
>> +
>> /**
>> * @pages_addr:
>> *
>> @@ -277,8 +282,8 @@ struct amdgpu_vm {
>> struct drm_sched_entity immediate;
>> struct drm_sched_entity delayed;
>>
>> - /* Last submission to the scheduler entities */
>> - struct dma_fence *last_immediate;
>> + /* Last unlocked submission to the scheduler entities */
>> + struct dma_fence *last_unlocked;
>>
>> unsigned int pasid;
>> /* dedicated to vm */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> index c78bcebd9378..8d9c6feba660 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> @@ -92,8 +92,8 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>> {
>> struct amdgpu_ib *ib = p->job->ibs;
>> struct drm_sched_entity *entity;
>> - struct dma_fence *f, *tmp;
>> struct amdgpu_ring *ring;
>> + struct dma_fence *f;
>> int r;
>>
>> entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
>> @@ -106,13 +106,13 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>> if (r)
>> goto error;
>>
>> - if (p->immediate) {
>> - tmp = dma_fence_get(f);
>> - swap(p->vm->last_immediate, f);
>> + if (p->unlocked) {
>> + struct dma_fence *tmp = dma_fence_get(f);
>> +
>> + swap(p->vm->last_unlocked, f);
>> dma_fence_put(tmp);
>> } else {
>> - dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv,
>> - f);
>> + amdgpu_bo_fence(p->vm->root.base.bo, f, true);
>> }
>>
>> if (fence && !p->immediate)
More information about the amd-gfx
mailing list