[PATCH v2 1/3] drm/amdkfd: process exit and retry fault race

Felix Kuehling felix.kuehling at amd.com
Tue Nov 23 02:29:35 UTC 2021


Am 2021-11-19 um 5:29 p.m. schrieb Philip Yang:
> kfd_process_wq_release drain retry fault to ensure no retry fault comes
> after removing kfd process from the hash table, otherwise svm page fault
> handler will fail to recover the fault and dump GPU vm fault log.
>
> Refactor deferred list work to get_task_mm and take mmap write lock
> to handle all ranges, and avoid mm is gone while inserting mmu notifier.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>

The series is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 61 ++++++++++++++++------------
>  1 file changed, 35 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 9e566ec54cf5..5fa540828ed0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -1979,43 +1979,42 @@ static void svm_range_deferred_list_work(struct work_struct *work)
>  	struct svm_range_list *svms;
>  	struct svm_range *prange;
>  	struct mm_struct *mm;
> +	struct kfd_process *p;
>  
>  	svms = container_of(work, struct svm_range_list, deferred_list_work);
>  	pr_debug("enter svms 0x%p\n", svms);
>  
> +	p = container_of(svms, struct kfd_process, svms);
> +	/* Avoid mm is gone when inserting mmu notifier */
> +	mm = get_task_mm(p->lead_thread);
> +	if (!mm) {
> +		pr_debug("svms 0x%p process mm gone\n", svms);
> +		return;
> +	}
> +retry:
> +	mmap_write_lock(mm);
> +
> +	/* Checking for the need to drain retry faults must be inside
> +	 * mmap write lock to serialize with munmap notifiers.
> +	 */
> +	if (unlikely(READ_ONCE(svms->drain_pagefaults))) {
> +		WRITE_ONCE(svms->drain_pagefaults, false);
> +		mmap_write_unlock(mm);
> +		svm_range_drain_retry_fault(svms);
> +		goto retry;
> +	}
> +
>  	spin_lock(&svms->deferred_list_lock);
>  	while (!list_empty(&svms->deferred_range_list)) {
>  		prange = list_first_entry(&svms->deferred_range_list,
>  					  struct svm_range, deferred_list);
> +		list_del_init(&prange->deferred_list);
>  		spin_unlock(&svms->deferred_list_lock);
> +
>  		pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
>  			 prange->start, prange->last, prange->work_item.op);
>  
> -		mm = prange->work_item.mm;
> -retry:
> -		mmap_write_lock(mm);
>  		mutex_lock(&svms->lock);
> -
> -		/* Checking for the need to drain retry faults must be in
> -		 * mmap write lock to serialize with munmap notifiers.
> -		 *
> -		 * Remove from deferred_list must be inside mmap write lock,
> -		 * otherwise, svm_range_list_lock_and_flush_work may hold mmap
> -		 * write lock, and continue because deferred_list is empty, then
> -		 * deferred_list handle is blocked by mmap write lock.
> -		 */
> -		spin_lock(&svms->deferred_list_lock);
> -		if (unlikely(svms->drain_pagefaults)) {
> -			svms->drain_pagefaults = false;
> -			spin_unlock(&svms->deferred_list_lock);
> -			mutex_unlock(&svms->lock);
> -			mmap_write_unlock(mm);
> -			svm_range_drain_retry_fault(svms);
> -			goto retry;
> -		}
> -		list_del_init(&prange->deferred_list);
> -		spin_unlock(&svms->deferred_list_lock);
> -
>  		mutex_lock(&prange->migrate_mutex);
>  		while (!list_empty(&prange->child_list)) {
>  			struct svm_range *pchild;
> @@ -2031,12 +2030,13 @@ static void svm_range_deferred_list_work(struct work_struct *work)
>  
>  		svm_range_handle_list_op(svms, prange);
>  		mutex_unlock(&svms->lock);
> -		mmap_write_unlock(mm);
>  
>  		spin_lock(&svms->deferred_list_lock);
>  	}
>  	spin_unlock(&svms->deferred_list_lock);
>  
> +	mmap_write_unlock(mm);
> +	mmput(mm);
>  	pr_debug("exit svms 0x%p\n", svms);
>  }
>  
> @@ -2600,10 +2600,12 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>  
>  	pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
>  
> +	/* p->lead_thread is available as kfd_process_wq_release flush the work
> +	 * before releasing task ref.
> +	 */
>  	mm = get_task_mm(p->lead_thread);
>  	if (!mm) {
>  		pr_debug("svms 0x%p failed to get mm\n", svms);
> -		r = -ESRCH;
>  		goto out;
>  	}
>  
> @@ -2730,6 +2732,13 @@ void svm_range_list_fini(struct kfd_process *p)
>  	/* Ensure list work is finished before process is destroyed */
>  	flush_work(&p->svms.deferred_list_work);
>  
> +	/*
> +	 * Ensure no retry fault comes in afterwards, as page fault handler will
> +	 * not find kfd process and take mm lock to recover fault.
> +	 */
> +	svm_range_drain_retry_fault(&p->svms);
> +
> +
>  	list_for_each_entry_safe(prange, next, &p->svms.list, list) {
>  		svm_range_unlink(prange);
>  		svm_range_remove_notifier(prange);


More information about the amd-gfx mailing list