[PATCH v2 12/15] drm/amdgpu: Call find_vma under mmap_sem

Tue Oct 29 16:28:43 UTC 2019

On 2019-10-28 4:10 p.m., Jason Gunthorpe wrote:
> From: Jason Gunthorpe <jgg at mellanox.com>
>
> find_vma() must be called under the mmap_sem, reorganize this code to
> do the vma check after entering the lock.
>
> Further, fix the unlocked use of struct task_struct's mm, instead use
> the mm from hmm_mirror which has an active mm_grab. Also the mm_grab
> must be converted to a mm_get before acquiring mmap_sem or calling
> find_vma().
>
> Fixes: 66c45500bfdc ("drm/amdgpu: use new HMM APIs and helpers")
> Fixes: 0919195f2b0d ("drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads")
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Christian König <christian.koenig at amd.com>
> Cc: David (ChunMing) Zhou <David1.Zhou at amd.com>
> Cc: amd-gfx at lists.freedesktop.org
> Signed-off-by: Jason Gunthorpe <jgg at mellanox.com>

One question inline to confirm my understanding. Otherwise this patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 37 ++++++++++++++-----------
>   1 file changed, 21 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index dff41d0a85fe96..c0e41f1f0c2365 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -35,6 +35,7 @@
>   #include <linux/hmm.h>
>   #include <linux/pagemap.h>
>   #include <linux/sched/task.h>
> +#include <linux/sched/mm.h>
>   #include <linux/seq_file.h>
>   #include <linux/slab.h>
>   #include <linux/swap.h>
> @@ -788,7 +789,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
>   	struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL;
>   	struct ttm_tt *ttm = bo->tbo.ttm;
>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> -	struct mm_struct *mm = gtt->usertask->mm;
> +	struct mm_struct *mm;
>   	unsigned long start = gtt->userptr;
>   	struct vm_area_struct *vma;
>   	struct hmm_range *range;
> @@ -796,25 +797,14 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
>   	uint64_t *pfns;
>   	int r = 0;
>   
> -	if (!mm) /* Happens during process shutdown */
> -		return -ESRCH;
> -
>   	if (unlikely(!mirror)) {
>   		DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n");
> -		r = -EFAULT;
> -		goto out;
> +		return -EFAULT;
>   	}
>   
> -	vma = find_vma(mm, start);
> -	if (unlikely(!vma || start < vma->vm_start)) {
> -		r = -EFAULT;
> -		goto out;
> -	}
> -	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> -		vma->vm_file)) {
> -		r = -EPERM;
> -		goto out;
> -	}
> +	mm = mirror->hmm->mmu_notifier.mm;
> +	if (!mmget_not_zero(mm)) /* Happens during process shutdown */

This works because mirror->hmm->mmu_notifier holds an mmgrab reference 
to the mm? So the MM will not just go away, but if the mmget refcount is 
0, it means the mm is marked for destruction and shouldn't be used any more.

> +		return -ESRCH;
>   
>   	range = kzalloc(sizeof(*range), GFP_KERNEL);
>   	if (unlikely(!range)) {
> @@ -847,6 +837,17 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
>   	hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
>   
>   	down_read(&mm->mmap_sem);
> +	vma = find_vma(mm, start);
> +	if (unlikely(!vma || start < vma->vm_start)) {
> +		r = -EFAULT;
> +		goto out_unlock;
> +	}
> +	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> +		vma->vm_file)) {
> +		r = -EPERM;
> +		goto out_unlock;
> +	}
> +
>   	r = hmm_range_fault(range, 0);
>   	up_read(&mm->mmap_sem);
>   
> @@ -865,15 +866,19 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
>   	}
>   
>   	gtt->range = range;
> +	mmput(mm);
>   
>   	return 0;
>   
> +out_unlock:
> +	up_read(&mm->mmap_sem);
>   out_free_pfns:
>   	hmm_range_unregister(range);
>   	kvfree(pfns);
>   out_free_ranges:
>   	kfree(range);
>   out:
> +	mmput(mm);
>   	return r;
>   }
>