[PATCH 4/4] drm/amdgpu: add new unlocked flag for PTE updates

Tue Apr 7 19:24:11 UTC 2020

Am 2020-04-07 um 10:27 a.m. schrieb Christian König:
> For HMM support we need the ability to invalidate PTEs from
> a MM callback where we can't lock the root PD.
>
> Add a new flag to better support this instead of assuming
> that all invalidation updates are unlocked.

Thanks for this patch series. It looks good to me. Alex, please take a
look at the new "unlocked" parameter in amdgpu_vm_bo_update_mapping.
Does this work for your MMU notifier implementation? I think this should
be all that's needed for unmapping in an MMU notifier with SVM, where we
won't unmap complete BOs.

It may not work with your initial prototype that was BO-based, though.
For that one you may need to propagate the "unlocked" parameter all the
way up to amdgpu_vm_bo_update.

The series is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>

>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 42 ++++++++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  9 ++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 12 +++---
>  3 files changed, 37 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 1ee87b460b96..4ca4f61b34ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1446,7 +1446,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>  		uint64_t incr, entry_end, pe_start;
>  		struct amdgpu_bo *pt;
>  
> -		if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
> +		if (!params->unlocked) {
>  			/* make sure that the page tables covering the
>  			 * address range are actually allocated
>  			 */
> @@ -1458,8 +1458,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>  
>  		shift = amdgpu_vm_level_shift(adev, cursor.level);
>  		parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
> -		if (adev->asic_type < CHIP_VEGA10 &&
> -		    (flags & AMDGPU_PTE_VALID)) {
> +		if (params->unlocked) {
> +			/* Unlocked updates are only allowed on the leaves */
> +			if (amdgpu_vm_pt_descendant(adev, &cursor))
> +				continue;
> +		} else if (adev->asic_type < CHIP_VEGA10 &&
> +			   (flags & AMDGPU_PTE_VALID)) {
>  			/* No huge page support before GMC v9 */
>  			if (cursor.level != AMDGPU_VM_PTB) {
>  				if (!amdgpu_vm_pt_descendant(adev, &cursor))
> @@ -1558,6 +1562,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>   * @adev: amdgpu_device pointer
>   * @vm: requested vm
>   * @immediate: immediate submission in a page fault
> + * @unlocked: unlocked invalidation during MM callback
>   * @resv: fences we need to sync to
>   * @start: start of mapped range
>   * @last: last mapped entry
> @@ -1573,7 +1578,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>   */
>  static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  				       struct amdgpu_vm *vm, bool immediate,
> -				       struct dma_resv *resv,
> +				       bool unlocked, struct dma_resv *resv,
>  				       uint64_t start, uint64_t last,
>  				       uint64_t flags, uint64_t addr,
>  				       dma_addr_t *pages_addr,
> @@ -1603,11 +1608,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  		goto error_unlock;
>  	}
>  
> -	if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
> -		struct amdgpu_bo *root = vm->root.base.bo;
> +	if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
> +		struct dma_fence *tmp = dma_fence_get_stub();
>  
> -		if (!dma_fence_is_signaled(vm->last_immediate))
> -			amdgpu_bo_fence(root, vm->last_immediate, true);
> +		amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true);
> +		swap(vm->last_unlocked, tmp);
> +		dma_fence_put(tmp);
>  	}
>  
>  	r = vm->update_funcs->prepare(&params, resv, sync_mode);
> @@ -1721,7 +1727,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
>  		}
>  
>  		last = min((uint64_t)mapping->last, start + max_entries - 1);
> -		r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
> +		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>  						start, last, flags, addr,
>  						dma_addr, fence);
>  		if (r)
> @@ -2018,7 +2024,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>  		    mapping->start < AMDGPU_GMC_HOLE_START)
>  			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>  
> -		r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
> +		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
>  						mapping->start, mapping->last,
>  						init_pte_value, 0, NULL, &f);
>  		amdgpu_vm_free_mapping(adev, vm, mapping, f);
> @@ -2589,7 +2595,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>  		return false;
>  
>  	/* Don't evict VM page tables while they are updated */
> -	if (!dma_fence_is_signaled(bo_base->vm->last_immediate)) {
> +	if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
>  		amdgpu_vm_eviction_unlock(bo_base->vm);
>  		return false;
>  	}
> @@ -2766,7 +2772,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
>  	if (timeout <= 0)
>  		return timeout;
>  
> -	return dma_fence_wait_timeout(vm->last_immediate, true, timeout);
> +	return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
>  }
>  
>  /**
> @@ -2838,7 +2844,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  	else
>  		vm->update_funcs = &amdgpu_vm_sdma_funcs;
>  	vm->last_update = NULL;
> -	vm->last_immediate = dma_fence_get_stub();
> +	vm->last_unlocked = dma_fence_get_stub();
>  
>  	mutex_init(&vm->eviction_lock);
>  	vm->evicting = false;
> @@ -2892,7 +2898,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  	vm->root.base.bo = NULL;
>  
>  error_free_delayed:
> -	dma_fence_put(vm->last_immediate);
> +	dma_fence_put(vm->last_unlocked);
>  	drm_sched_entity_destroy(&vm->delayed);
>  
>  error_free_immediate:
> @@ -3093,8 +3099,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>  		vm->pasid = 0;
>  	}
>  
> -	dma_fence_wait(vm->last_immediate, false);
> -	dma_fence_put(vm->last_immediate);
> +	dma_fence_wait(vm->last_unlocked, false);
> +	dma_fence_put(vm->last_unlocked);
>  
>  	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
>  		if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
> @@ -3347,8 +3353,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
>  		value = 0;
>  	}
>  
> -	r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
> -					flags, value, NULL, NULL);
> +	r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
> +					addr + 1, flags, value, NULL, NULL);
>  	if (r)
>  		goto error_unlock;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 0b64200ec45f..ea771d84bf2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -210,6 +210,11 @@ struct amdgpu_vm_update_params {
>  	 */
>  	bool immediate;
>  
> +	/**
> +	 * @unlocked: true if the root BO is not locked
> +	 */
> +	bool unlocked;
> +
>  	/**
>  	 * @pages_addr:
>  	 *
> @@ -277,8 +282,8 @@ struct amdgpu_vm {
>  	struct drm_sched_entity	immediate;
>  	struct drm_sched_entity	delayed;
>  
> -	/* Last submission to the scheduler entities */
> -	struct dma_fence	*last_immediate;
> +	/* Last unlocked submission to the scheduler entities */
> +	struct dma_fence	*last_unlocked;
>  
>  	unsigned int		pasid;
>  	/* dedicated to vm */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> index c78bcebd9378..8d9c6feba660 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> @@ -92,8 +92,8 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>  {
>  	struct amdgpu_ib *ib = p->job->ibs;
>  	struct drm_sched_entity *entity;
> -	struct dma_fence *f, *tmp;
>  	struct amdgpu_ring *ring;
> +	struct dma_fence *f;
>  	int r;
>  
>  	entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
> @@ -106,13 +106,13 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>  	if (r)
>  		goto error;
>  
> -	if (p->immediate) {
> -		tmp = dma_fence_get(f);
> -		swap(p->vm->last_immediate, f);
> +	if (p->unlocked) {
> +		struct dma_fence *tmp = dma_fence_get(f);
> +
> +		swap(p->vm->last_unlocked, f);
>  		dma_fence_put(tmp);
>  	} else {
> -		dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv,
> -					  f);
> +		amdgpu_bo_fence(p->vm->root.base.bo, f, true);
>  	}
>  
>  	if (fence && !p->immediate)