[PATCH v2 12/15] drm/amdgpu: Call find_vma under mmap_sem
Christian König
ckoenig.leichtzumerken at gmail.com
Tue Oct 29 13:07:37 UTC 2019
Am 29.10.19 um 17:28 schrieb Kuehling, Felix:
> On 2019-10-28 4:10 p.m., Jason Gunthorpe wrote:
>> From: Jason Gunthorpe <jgg at mellanox.com>
>>
>> find_vma() must be called under the mmap_sem, reorganize this code to
>> do the vma check after entering the lock.
>>
>> Further, fix the unlocked use of struct task_struct's mm, instead use
>> the mm from hmm_mirror which has an active mm_grab. Also the mm_grab
>> must be converted to a mm_get before acquiring mmap_sem or calling
>> find_vma().
>>
>> Fixes: 66c45500bfdc ("drm/amdgpu: use new HMM APIs and helpers")
>> Fixes: 0919195f2b0d ("drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads")
>> Cc: Alex Deucher <alexander.deucher at amd.com>
>> Cc: Christian König <christian.koenig at amd.com>
>> Cc: David (ChunMing) Zhou <David1.Zhou at amd.com>
>> Cc: amd-gfx at lists.freedesktop.org
>> Signed-off-by: Jason Gunthorpe <jgg at mellanox.com>
> One question inline to confirm my understanding. Otherwise this patch is
>
> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
>
>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 37 ++++++++++++++-----------
>> 1 file changed, 21 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index dff41d0a85fe96..c0e41f1f0c2365 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -35,6 +35,7 @@
>> #include <linux/hmm.h>
>> #include <linux/pagemap.h>
>> #include <linux/sched/task.h>
>> +#include <linux/sched/mm.h>
>> #include <linux/seq_file.h>
>> #include <linux/slab.h>
>> #include <linux/swap.h>
>> @@ -788,7 +789,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
>> struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL;
>> struct ttm_tt *ttm = bo->tbo.ttm;
>> struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> - struct mm_struct *mm = gtt->usertask->mm;
>> + struct mm_struct *mm;
>> unsigned long start = gtt->userptr;
>> struct vm_area_struct *vma;
>> struct hmm_range *range;
>> @@ -796,25 +797,14 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
>> uint64_t *pfns;
>> int r = 0;
>>
>> - if (!mm) /* Happens during process shutdown */
>> - return -ESRCH;
>> -
>> if (unlikely(!mirror)) {
>> DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n");
>> - r = -EFAULT;
>> - goto out;
>> + return -EFAULT;
>> }
>>
>> - vma = find_vma(mm, start);
>> - if (unlikely(!vma || start < vma->vm_start)) {
>> - r = -EFAULT;
>> - goto out;
>> - }
>> - if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
>> - vma->vm_file)) {
>> - r = -EPERM;
>> - goto out;
>> - }
>> + mm = mirror->hmm->mmu_notifier.mm;
>> + if (!mmget_not_zero(mm)) /* Happens during process shutdown */
> This works because mirror->hmm->mmu_notifier holds an mmgrab reference
> to the mm? So the MM will not just go away, but if the mmget refcount is
> 0, it means the mm is marked for destruction and shouldn't be used any more.
Yes, exactly. That is a rather common pattern, one reference count for
the functionality and one for the structure.
When the functionality is gone the structure might still be alive for
some reason. TTM and a couple of other structures use the same approach.
Christian.
>
>
>> + return -ESRCH;
>>
>> range = kzalloc(sizeof(*range), GFP_KERNEL);
>> if (unlikely(!range)) {
>> @@ -847,6 +837,17 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
>> hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
>>
>> down_read(&mm->mmap_sem);
>> + vma = find_vma(mm, start);
>> + if (unlikely(!vma || start < vma->vm_start)) {
>> + r = -EFAULT;
>> + goto out_unlock;
>> + }
>> + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
>> + vma->vm_file)) {
>> + r = -EPERM;
>> + goto out_unlock;
>> + }
>> +
>> r = hmm_range_fault(range, 0);
>> up_read(&mm->mmap_sem);
>>
>> @@ -865,15 +866,19 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
>> }
>>
>> gtt->range = range;
>> + mmput(mm);
>>
>> return 0;
>>
>> +out_unlock:
>> + up_read(&mm->mmap_sem);
>> out_free_pfns:
>> hmm_range_unregister(range);
>> kvfree(pfns);
>> out_free_ranges:
>> kfree(range);
>> out:
>> + mmput(mm);
>> return r;
>> }
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the dri-devel
mailing list