[PATCH 4/4] drm/amdgpu: add new unlocked flag for PTE updates

Wed Apr 8 07:17:21 UTC 2020

Am 07.04.20 um 21:24 schrieb Felix Kuehling:
> Am 2020-04-07 um 10:27 a.m. schrieb Christian König:
>> For HMM support we need the ability to invalidate PTEs from
>> a MM callback where we can't lock the root PD.
>>
>> Add a new flag to better support this instead of assuming
>> that all invalidation updates are unlocked.
> Thanks for this patch series. It looks good to me. Alex, please take a
> look at the new "unlocked" parameter in amdgpu_vm_bo_update_mapping.
> Does this work for your MMU notifier implementation? I think this should
> be all that's needed for unmapping in an MMU notifier with SVM, where we
> won't unmap complete BOs.
>
> It may not work with your initial prototype that was BO-based, though.
> For that one you may need to propagate the "unlocked" parameter all the
> way up to amdgpu_vm_bo_update.

You can't base the MMU notifier unmap on BOs anyway because you would 
need to lock the BO for that.

Best practice here is probably to call amdgpu_vm_bo_update_mapping() 
directly. Maybe we should rename that function to 
amdgpu_vm_bo_update_range().

Regards,
Christian.

>
> The series is
>
> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
>
>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 42 ++++++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  9 ++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 12 +++---
>>   3 files changed, 37 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 1ee87b460b96..4ca4f61b34ca 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1446,7 +1446,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>   		uint64_t incr, entry_end, pe_start;
>>   		struct amdgpu_bo *pt;
>>   
>> -		if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>> +		if (!params->unlocked) {
>>   			/* make sure that the page tables covering the
>>   			 * address range are actually allocated
>>   			 */
>> @@ -1458,8 +1458,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>   
>>   		shift = amdgpu_vm_level_shift(adev, cursor.level);
>>   		parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
>> -		if (adev->asic_type < CHIP_VEGA10 &&
>> -		    (flags & AMDGPU_PTE_VALID)) {
>> +		if (params->unlocked) {
>> +			/* Unlocked updates are only allowed on the leaves */
>> +			if (amdgpu_vm_pt_descendant(adev, &cursor))
>> +				continue;
>> +		} else if (adev->asic_type < CHIP_VEGA10 &&
>> +			   (flags & AMDGPU_PTE_VALID)) {
>>   			/* No huge page support before GMC v9 */
>>   			if (cursor.level != AMDGPU_VM_PTB) {
>>   				if (!amdgpu_vm_pt_descendant(adev, &cursor))
>> @@ -1558,6 +1562,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>    * @adev: amdgpu_device pointer
>>    * @vm: requested vm
>>    * @immediate: immediate submission in a page fault
>> + * @unlocked: unlocked invalidation during MM callback
>>    * @resv: fences we need to sync to
>>    * @start: start of mapped range
>>    * @last: last mapped entry
>> @@ -1573,7 +1578,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>    */
>>   static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   				       struct amdgpu_vm *vm, bool immediate,
>> -				       struct dma_resv *resv,
>> +				       bool unlocked, struct dma_resv *resv,
>>   				       uint64_t start, uint64_t last,
>>   				       uint64_t flags, uint64_t addr,
>>   				       dma_addr_t *pages_addr,
>> @@ -1603,11 +1608,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   		goto error_unlock;
>>   	}
>>   
>> -	if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>> -		struct amdgpu_bo *root = vm->root.base.bo;
>> +	if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
>> +		struct dma_fence *tmp = dma_fence_get_stub();
>>   
>> -		if (!dma_fence_is_signaled(vm->last_immediate))
>> -			amdgpu_bo_fence(root, vm->last_immediate, true);
>> +		amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true);
>> +		swap(vm->last_unlocked, tmp);
>> +		dma_fence_put(tmp);
>>   	}
>>   
>>   	r = vm->update_funcs->prepare(&params, resv, sync_mode);
>> @@ -1721,7 +1727,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>>   		}
>>   
>>   		last = min((uint64_t)mapping->last, start + max_entries - 1);
>> -		r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
>> +		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>>   						start, last, flags, addr,
>>   						dma_addr, fence);
>>   		if (r)
>> @@ -2018,7 +2024,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>   		    mapping->start < AMDGPU_GMC_HOLE_START)
>>   			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>   
>> -		r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
>> +		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>>   						mapping->start, mapping->last,
>>   						init_pte_value, 0, NULL, &f);
>>   		amdgpu_vm_free_mapping(adev, vm, mapping, f);
>> @@ -2589,7 +2595,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>>   		return false;
>>   
>>   	/* Don't evict VM page tables while they are updated */
>> -	if (!dma_fence_is_signaled(bo_base->vm->last_immediate)) {
>> +	if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
>>   		amdgpu_vm_eviction_unlock(bo_base->vm);
>>   		return false;
>>   	}
>> @@ -2766,7 +2772,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
>>   	if (timeout <= 0)
>>   		return timeout;
>>   
>> -	return dma_fence_wait_timeout(vm->last_immediate, true, timeout);
>> +	return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
>>   }
>>   
>>   /**
>> @@ -2838,7 +2844,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   	else
>>   		vm->update_funcs = &amdgpu_vm_sdma_funcs;
>>   	vm->last_update = NULL;
>> -	vm->last_immediate = dma_fence_get_stub();
>> +	vm->last_unlocked = dma_fence_get_stub();
>>   
>>   	mutex_init(&vm->eviction_lock);
>>   	vm->evicting = false;
>> @@ -2892,7 +2898,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   	vm->root.base.bo = NULL;
>>   
>>   error_free_delayed:
>> -	dma_fence_put(vm->last_immediate);
>> +	dma_fence_put(vm->last_unlocked);
>>   	drm_sched_entity_destroy(&vm->delayed);
>>   
>>   error_free_immediate:
>> @@ -3093,8 +3099,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>>   		vm->pasid = 0;
>>   	}
>>   
>> -	dma_fence_wait(vm->last_immediate, false);
>> -	dma_fence_put(vm->last_immediate);
>> +	dma_fence_wait(vm->last_unlocked, false);
>> +	dma_fence_put(vm->last_unlocked);
>>   
>>   	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
>>   		if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
>> @@ -3347,8 +3353,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
>>   		value = 0;
>>   	}
>>   
>> -	r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
>> -					flags, value, NULL, NULL);
>> +	r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
>> +					addr + 1, flags, value, NULL, NULL);
>>   	if (r)
>>   		goto error_unlock;
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 0b64200ec45f..ea771d84bf2b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -210,6 +210,11 @@ struct amdgpu_vm_update_params {
>>   	 */
>>   	bool immediate;
>>   
>> +	/**
>> +	 * @unlocked: true if the root BO is not locked
>> +	 */
>> +	bool unlocked;
>> +
>>   	/**
>>   	 * @pages_addr:
>>   	 *
>> @@ -277,8 +282,8 @@ struct amdgpu_vm {
>>   	struct drm_sched_entity	immediate;
>>   	struct drm_sched_entity	delayed;
>>   
>> -	/* Last submission to the scheduler entities */
>> -	struct dma_fence	*last_immediate;
>> +	/* Last unlocked submission to the scheduler entities */
>> +	struct dma_fence	*last_unlocked;
>>   
>>   	unsigned int		pasid;
>>   	/* dedicated to vm */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> index c78bcebd9378..8d9c6feba660 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> @@ -92,8 +92,8 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>>   {
>>   	struct amdgpu_ib *ib = p->job->ibs;
>>   	struct drm_sched_entity *entity;
>> -	struct dma_fence *f, *tmp;
>>   	struct amdgpu_ring *ring;
>> +	struct dma_fence *f;
>>   	int r;
>>   
>>   	entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
>> @@ -106,13 +106,13 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>>   	if (r)
>>   		goto error;
>>   
>> -	if (p->immediate) {
>> -		tmp = dma_fence_get(f);
>> -		swap(p->vm->last_immediate, f);
>> +	if (p->unlocked) {
>> +		struct dma_fence *tmp = dma_fence_get(f);
>> +
>> +		swap(p->vm->last_unlocked, f);
>>   		dma_fence_put(tmp);
>>   	} else {
>> -		dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv,
>> -					  f);
>> +		amdgpu_bo_fence(p->vm->root.base.bo, f, true);
>>   	}
>>   
>>   	if (fence && !p->immediate)