[PATCH 3/3] drm/xe: Use hmm_range_fault to populate user pages

Wed Apr 3 03:09:00 UTC 2024

On Thu, Mar 21, 2024 at 11:55:20PM -0400, Oak Zeng wrote:
> This is an effort to unify hmmptr (aka system allocator)
> and userptr code. hmm_range_fault is used to populate
> a virtual address range for both hmmptr and userptr,
> instead of hmmptr using hmm_range_fault and userptr
> using get_user_pages_fast.
> 
> This also aligns with AMD gpu driver's behavior. In
> long term, we plan to put some common helpers in this
> area to drm layer so it can be re-used by different
> vendors.
> 
> Signed-off-by: Oak Zeng <oak.zeng at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_vm.c | 122 ++++---------------------------------
>  1 file changed, 12 insertions(+), 110 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index e3692b7e1711..c0a3bff1bb0e 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -38,6 +38,7 @@
>  #include "xe_sync.h"
>  #include "xe_trace.h"
>  #include "xe_wa.h"
> +#include "xe_hmm.h"
>  
>  static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
>  {
> @@ -65,113 +66,21 @@ int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
>  
>  int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
>  {
> -	struct xe_userptr *userptr = &uvma->userptr;
>  	struct xe_vma *vma = &uvma->vma;
>  	struct xe_vm *vm = xe_vma_vm(vma);
>  	struct xe_device *xe = vm->xe;
> -	const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
> -	struct page **pages;
> -	bool in_kthread = !current->mm;
> -	unsigned long notifier_seq;
> -	int pinned, ret, i;
> -	bool read_only = xe_vma_read_only(vma);
> +	struct xe_userptr *userptr;
> +	int ret;
>  
>  	lockdep_assert_held(&vm->lock);
>  	xe_assert(xe, xe_vma_is_userptr(vma));
> -retry:
> -	if (vma->gpuva.flags & XE_VMA_DESTROYED)
> -		return 0;
> -
> -	notifier_seq = mmu_interval_read_begin(&userptr->notifier);
> -	if (notifier_seq == userptr->notifier_seq)
> -		return 0;
> -
> -	pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
> -	if (!pages)
> -		return -ENOMEM;
> -
> -	if (userptr->sg) {
> -		dma_unmap_sgtable(xe->drm.dev,
> -				  userptr->sg,
> -				  read_only ? DMA_TO_DEVICE :
> -				  DMA_BIDIRECTIONAL, 0);
> -		sg_free_table(userptr->sg);
> -		userptr->sg = NULL;
> -	}
>  
> -	pinned = ret = 0;
> -	if (in_kthread) {
> -		if (!mmget_not_zero(userptr->notifier.mm)) {
> -			ret = -EFAULT;
> -			goto mm_closed;
> -		}
> -		kthread_use_mm(userptr->notifier.mm);
> -	}
> -
> -	while (pinned < num_pages) {
> -		ret = get_user_pages_fast(xe_vma_userptr(vma) +
> -					  pinned * PAGE_SIZE,
> -					  num_pages - pinned,
> -					  read_only ? 0 : FOLL_WRITE,
> -					  &pages[pinned]);
> -		if (ret < 0)
> -			break;
> +	userptr = &uvma->userptr;
> +	mmap_read_lock(userptr->notifier.mm);

See my comments about locking and function arguments in previous patches.

Otherwise LGTM.

Matt

> +	ret = xe_userptr_populate_range(uvma);
> +	mmap_read_unlock(userptr->notifier.mm);
>  
> -		pinned += ret;
> -		ret = 0;
> -	}
> -
> -	if (in_kthread) {
> -		kthread_unuse_mm(userptr->notifier.mm);
> -		mmput(userptr->notifier.mm);
> -	}
> -mm_closed:
> -	if (ret)
> -		goto out;
> -
> -	ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages,
> -						pinned, 0,
> -						(u64)pinned << PAGE_SHIFT,
> -						xe_sg_segment_size(xe->drm.dev),
> -						GFP_KERNEL);
> -	if (ret) {
> -		userptr->sg = NULL;
> -		goto out;
> -	}
> -	userptr->sg = &userptr->sgt;
> -
> -	ret = dma_map_sgtable(xe->drm.dev, userptr->sg,
> -			      read_only ? DMA_TO_DEVICE :
> -			      DMA_BIDIRECTIONAL,
> -			      DMA_ATTR_SKIP_CPU_SYNC |
> -			      DMA_ATTR_NO_KERNEL_MAPPING);
> -	if (ret) {
> -		sg_free_table(userptr->sg);
> -		userptr->sg = NULL;
> -		goto out;
> -	}
> -
> -	for (i = 0; i < pinned; ++i) {
> -		if (!read_only) {
> -			lock_page(pages[i]);
> -			set_page_dirty(pages[i]);
> -			unlock_page(pages[i]);
> -		}
> -
> -		mark_page_accessed(pages[i]);
> -	}
> -
> -out:
> -	release_pages(pages, pinned);
> -	kvfree(pages);
> -
> -	if (!(ret < 0)) {
> -		userptr->notifier_seq = notifier_seq;
> -		if (xe_vma_userptr_check_repin(uvma) == -EAGAIN)
> -			goto retry;
> -	}
> -
> -	return ret < 0 ? ret : 0;
> +	return ret;
>  }
>  
>  static bool preempt_fences_waiting(struct xe_vm *vm)
> @@ -921,8 +830,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>  static void xe_vma_destroy_late(struct xe_vma *vma)
>  {
>  	struct xe_vm *vm = xe_vma_vm(vma);
> -	struct xe_device *xe = vm->xe;
> -	bool read_only = xe_vma_read_only(vma);
>  
>  	if (vma->ufence) {
>  		xe_sync_ufence_put(vma->ufence);
> @@ -930,16 +837,11 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
>  	}
>  
>  	if (xe_vma_is_userptr(vma)) {
> -		struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
> +		struct xe_userptr_vma *uvma = to_userptr_vma(vma);
> +		struct xe_userptr *userptr = &uvma->userptr;
>  
> -		if (userptr->sg) {
> -			dma_unmap_sgtable(xe->drm.dev,
> -					  userptr->sg,
> -					  read_only ? DMA_TO_DEVICE :
> -					  DMA_BIDIRECTIONAL, 0);
> -			sg_free_table(userptr->sg);
> -			userptr->sg = NULL;
> -		}
> +		if (userptr->sg)
> +			xe_userptr_free_sg(uvma);
>  
>  		/*
>  		 * Since userptr pages are not pinned, we can't remove
> -- 
> 2.26.3
>