[PATCH 3/5] drm/amdkfd: restore pages race with vma remove

Felix Kuehling felix.kuehling at amd.com
Wed Nov 10 03:52:30 UTC 2021


On 2021-11-09 6:04 p.m., Philip Yang wrote:
> Before restore pages takes mmap read or write lock, vma maybe removed.
> Check if vma exists before creating unregistered range or verifying
> range access permission, and return 0 if vma is removed to avoid restore
> pages return failure to report GPU vm fault to application.

Your patch basically means, we never return a VM fault to an application 
that accesses invalid addresses. The application will just spin on that 
address forever. This basically breaks the debugger. I think you need to 
refine that.

While draining faults (svms->drain_pagefaults is set), I think we can 
return success if no VMA is found. During that time we are expecting 
"straggler" faults. But once the draining is done, any fault interrupts 
we get should be from bad application accesses and should result in a VM 
fault for the application.

Also, much of your patch seems to be refactoring to avoid a duplicate 
VMA lookup. I'd split that into a separate patch for clarity.

Regards,
   Felix

>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 64 ++++++++++++++++------------
>   1 file changed, 37 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 64f642935600..8f77d5746b2c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -2336,20 +2336,13 @@ svm_range_best_restore_location(struct svm_range *prange,
>   }
>   
>   static int
> -svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
> -			       unsigned long *start, unsigned long *last,
> -			       bool *is_heap_stack)
> +svm_range_get_range_boundaries(struct kfd_process *p, struct vm_area_struct *vma,
> +			       int64_t addr, unsigned long *start,
> +			       unsigned long *last, bool *is_heap_stack)
>   {
> -	struct vm_area_struct *vma;
>   	struct interval_tree_node *node;
>   	unsigned long start_limit, end_limit;
>   
> -	vma = find_vma(p->mm, addr << PAGE_SHIFT);
> -	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
> -		pr_debug("VMA does not exist in address [0x%llx]\n", addr);
> -		return -EFAULT;
> -	}
> -
>   	*is_heap_stack = (vma->vm_start <= vma->vm_mm->brk &&
>   			  vma->vm_end >= vma->vm_mm->start_brk) ||
>   			 (vma->vm_start <= vma->vm_mm->start_stack &&
> @@ -2444,9 +2437,10 @@ svm_range_check_vm_userptr(struct kfd_process *p, uint64_t start, uint64_t last,
>   
>   static struct
>   svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
> -						struct kfd_process *p,
> -						struct mm_struct *mm,
> -						int64_t addr)
> +					       struct kfd_process *p,
> +					       struct mm_struct *mm,
> +					       struct vm_area_struct *vma,
> +					       int64_t addr)
>   {
>   	struct svm_range *prange = NULL;
>   	unsigned long start, last;
> @@ -2456,7 +2450,7 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
>   	uint64_t bo_l = 0;
>   	int r;
>   
> -	if (svm_range_get_range_boundaries(p, addr, &start, &last,
> +	if (svm_range_get_range_boundaries(p, vma, addr, &start, &last,
>   					   &is_heap_stack))
>   		return NULL;
>   
> @@ -2558,21 +2552,22 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
>   		WRITE_ONCE(pdd->faults, pdd->faults + 1);
>   }
>   
> -static bool
> -svm_fault_allowed(struct mm_struct *mm, uint64_t addr, bool write_fault)
> +static int
> +svm_fault_allowed(struct mm_struct *mm, struct vm_area_struct *vma,
> +		  uint64_t addr, bool write_fault)
>   {
>   	unsigned long requested = VM_READ;
> -	struct vm_area_struct *vma;
>   
>   	if (write_fault)
>   		requested |= VM_WRITE;
>   
> -	vma = find_vma(mm, addr << PAGE_SHIFT);
> -	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
> -		pr_debug("address 0x%llx VMA is removed\n", addr);
> -		return true;
> +	if (!vma) {
> +		vma = find_vma(mm, addr << PAGE_SHIFT);
> +		if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
> +			pr_debug("address 0x%llx VMA is removed\n", addr);
> +			return -EFAULT;
> +		}
>   	}
> -
>   	pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,
>   		vma->vm_flags);
>   	return (vma->vm_flags & requested) == requested;
> @@ -2590,6 +2585,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>   	int32_t best_loc;
>   	int32_t gpuidx = MAX_GPU_INSTANCE;
>   	bool write_locked = false;
> +	struct vm_area_struct *vma = NULL;
>   	int r = 0;
>   
>   	if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
> @@ -2636,7 +2632,15 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>   			write_locked = true;
>   			goto retry_write_locked;
>   		}
> -		prange = svm_range_create_unregistered_range(adev, p, mm, addr);
> +
> +		vma = find_vma(p->mm, addr << PAGE_SHIFT);
> +		if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
> +			pr_debug("VMA not found address [0x%llx]\n", addr);
> +			mmap_write_downgrade(mm);
> +			r = 0;
> +			goto out_unlock_svms;
> +		}
> +		prange = svm_range_create_unregistered_range(adev, p, mm, vma, addr);
>   		if (!prange) {
>   			pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n",
>   				 svms, addr);
> @@ -2663,10 +2667,16 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>   		goto out_unlock_range;
>   	}
>   
> -	if (!svm_fault_allowed(mm, addr, write_fault)) {
> -		pr_debug("fault addr 0x%llx no %s permission\n", addr,
> -			write_fault ? "write" : "read");
> -		r = -EPERM;
> +	r = svm_fault_allowed(mm, vma, addr, write_fault);
> +	if (r <= 0) {
> +		if (!r) {
> +			pr_debug("fault addr 0x%llx no %s permission\n", addr,
> +				write_fault ? "write" : "read");
> +			r = -EPERM;
> +		} else  {
> +			pr_debug("fault addr 0x%llx is unmapping\n", addr);
> +			r = 0;
> +		}
>   		goto out_unlock_range;
>   	}
>   


More information about the amd-gfx mailing list