[PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v3

Kuehling, Felix Felix.Kuehling at amd.com
Tue Dec 11 00:12:20 UTC 2018


This is a nice improvement from the last version. I still see some
potential problems. See inline ...

I'm skipping over the CS and GEM parts. I hope Christian can review
those parts.

On 2018-12-06 4:02 p.m., Yang, Philip wrote:
> Use HMM helper function hmm_vma_fault() to get physical pages backing
> userptr and start CPU page table update track of those pages. Then use
> hmm_vma_range_done() to check if those pages are updated before
> amdgpu_cs_submit for gfx or before user queues are resumed for kfd.
>
> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
> from scratch, for kfd, restore worker is rescheduled to retry.
>
> HMM simplify the CPU page table concurrent update check, so remove
> guptasklock, mmu_invalidations, last_set_pages fields from
> amdgpu_ttm_tt struct.
>
> HMM does not pin the page (increase page ref count), so remove related
> operations like release_pages(), put_page(), mark_page_dirty().
>
> Change-Id: Id2d3130378b44a774e0d77156102a20a203b5036
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   1 -
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  81 ++------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c   |   2 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 188 +++++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  14 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  34 +++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h        |   7 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 160 ++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |   1 -
>  11 files changed, 206 insertions(+), 289 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index bcf587b4ba98..b492dd9e541a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -62,7 +62,6 @@ struct kgd_mem {
>  
>  	atomic_t invalid;
>  	struct amdkfd_process_info *process_info;
> -	struct page **user_pages;
>  
>  	struct amdgpu_sync sync;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index b29ef088fa14..75833b40507b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -580,28 +580,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
>  		goto out;
>  	}
>  
> -	/* If no restore worker is running concurrently, user_pages
> -	 * should not be allocated
> -	 */
> -	WARN(mem->user_pages, "Leaking user_pages array");
> -
> -	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
> -					   sizeof(struct page *),
> -					   GFP_KERNEL | __GFP_ZERO);
> -	if (!mem->user_pages) {
> -		pr_err("%s: Failed to allocate pages array\n", __func__);
> -		ret = -ENOMEM;
> -		goto unregister_out;
> -	}
> -
> -	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
> +	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
>  	if (ret) {
>  		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
> -		goto free_out;
> +		goto unregister_out;
>  	}
>  
> -	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
> -
>  	ret = amdgpu_bo_reserve(bo, true);
>  	if (ret) {
>  		pr_err("%s: Failed to reserve BO\n", __func__);
> @@ -614,11 +598,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
>  	amdgpu_bo_unreserve(bo);
>  
>  release_out:
> -	if (ret)
> -		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
> -free_out:
> -	kvfree(mem->user_pages);
> -	mem->user_pages = NULL;
> +	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>  unregister_out:
>  	if (ret)
>  		amdgpu_mn_unregister(bo);
> @@ -677,7 +657,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
>  	ctx->kfd_bo.priority = 0;
>  	ctx->kfd_bo.tv.bo = &bo->tbo;
>  	ctx->kfd_bo.tv.num_shared = 1;
> -	ctx->kfd_bo.user_pages = NULL;
>  	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>  
>  	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
> @@ -741,7 +720,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
>  	ctx->kfd_bo.priority = 0;
>  	ctx->kfd_bo.tv.bo = &bo->tbo;
>  	ctx->kfd_bo.tv.num_shared = 1;
> -	ctx->kfd_bo.user_pages = NULL;
>  	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>  
>  	i = 0;
> @@ -1329,15 +1307,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>  	list_del(&bo_list_entry->head);
>  	mutex_unlock(&process_info->lock);
>  
> -	/* Free user pages if necessary */
> -	if (mem->user_pages) {
> -		pr_debug("%s: Freeing user_pages array\n", __func__);
> -		if (mem->user_pages[0])
> -			release_pages(mem->user_pages,
> -					mem->bo->tbo.ttm->num_pages);
> -		kvfree(mem->user_pages);
> -	}
> -
>  	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
>  	if (unlikely(ret))
>  		return ret;
> @@ -1751,25 +1720,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>  
>  		bo = mem->bo;
>  
> -		if (!mem->user_pages) {
> -			mem->user_pages =
> -				kvmalloc_array(bo->tbo.ttm->num_pages,
> -						 sizeof(struct page *),
> -						 GFP_KERNEL | __GFP_ZERO);
> -			if (!mem->user_pages) {
> -				pr_err("%s: Failed to allocate pages array\n",
> -				       __func__);
> -				return -ENOMEM;
> -			}
> -		} else if (mem->user_pages[0]) {
> -			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
> -		}
> -
>  		/* Get updated user pages */
>  		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
> -						   mem->user_pages);
> +						   bo->tbo.ttm->pages);
>  		if (ret) {
> -			mem->user_pages[0] = NULL;
> +			bo->tbo.ttm->pages[0] = NULL;
>  			pr_info("%s: Failed to get user pages: %d\n",
>  				__func__, ret);
>  			/* Pretend it succeeded. It will fail later
> @@ -1778,12 +1733,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>  			 * stalled user mode queues.
>  			 */
>  		}
> -
> -		/* Mark the BO as valid unless it was invalidated
> -		 * again concurrently
> -		 */
> -		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
> -			return -EAGAIN;

I think you're removing this to make sure we exactly pair up
amdgpu_ttm_tt_get_user_pages and amdgpu_ttm_tt_get_user_pages_done
calls. But there are other error conditions in
validate_invalid_user_pages that would skip the
amdgpu_ttm_tt_get_user_pages_done calls. That will need to be handled
properly in the error-handling code paths in validate_invalid_user_pages.


>  	}
>  
>  	return 0;
> @@ -1863,10 +1812,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>  
>  		bo = mem->bo;
>  
> -		/* Copy pages array and validate the BO if we got user pages */
> -		if (mem->user_pages[0]) {
> -			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
> -						     mem->user_pages);
> +		/* Validate the BO if we got user pages */
> +		if (bo->tbo.ttm->pages[0]) {
>  			amdgpu_bo_placement_from_domain(bo, mem->domain);
>  			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>  			if (ret) {
> @@ -1875,16 +1822,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>  			}
>  		}
>  
> -		/* Validate succeeded, now the BO owns the pages, free
> -		 * our copy of the pointer array. Put this BO back on
> -		 * the userptr_valid_list. If we need to revalidate
> -		 * it, we need to start from scratch.
> -		 */
> -		kvfree(mem->user_pages);
> -		mem->user_pages = NULL;
>  		list_move_tail(&mem->validate_list.head,
>  			       &process_info->userptr_valid_list);
>  
> +		/* Stop HMM track the userptr update. We dont check the return
> +		 * value for concurrent CPU page table update because we will
> +		 * reschedule the restore worker if process_info->evicted_bos
> +		 * is updated.
> +		 */
> +		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> +
>  		/* Update mapping. If the BO was not validated
>  		 * (because we couldn't get user pages), this will
>  		 * clear the page table entries, which will result in
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> index 5c79da8e1150..3842183970cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> @@ -200,8 +200,6 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
>  
>  		if (!bo->parent)
>  			list_add_tail(&e->tv.head, &bucket[priority]);
> -
> -		e->user_pages = NULL;
>  	}
>  
>  	/* Connect the sorted buckets in the output list. */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index 7c5f5d1601e6..4beab2de09b3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -35,8 +35,7 @@ struct amdgpu_bo_list_entry {
>  	struct ttm_validate_buffer	tv;
>  	struct amdgpu_bo_va		*bo_va;
>  	uint32_t			priority;
> -	struct page			**user_pages;
> -	int				user_invalidated;
> +	bool				user_invalidated;
>  };
>  
>  struct amdgpu_bo_list {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
[snip]
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
[snip]
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index 5d518d2bb9be..c8002ee1d415 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>  			true, false, MAX_SCHEDULE_TIMEOUT);
>  		if (r <= 0)
>  			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
> -
> -		amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
>  	}
>  }
>  
> @@ -353,15 +351,16 @@ static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>  /**
>   * amdgpu_mn_get - create HMM mirror context
>   *
> + * @mm: the mm struct
>   * @adev: amdgpu device pointer
>   * @type: type of MMU notifier context
>   *
> - * Creates a HMM mirror context for current->mm.
> + * Creates a HMM mirror context for mm.
>   */
> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,

I think this change isn't needed any more. All callers of this function
specify mm as current->mm anyway.


> +				struct amdgpu_device *adev,
>  				enum amdgpu_mn_type type)
>  {
> -	struct mm_struct *mm = current->mm;
>  	struct amdgpu_mn *amn;
>  	unsigned long key = AMDGPU_MN_KEY(mm, type);
>  	int r;
> @@ -431,7 +430,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>  	struct list_head bos;
>  	struct interval_tree_node *it;
>  
> -	amn = amdgpu_mn_get(adev, type);
> +	amn = amdgpu_mn_get(current->mm, adev, type);
>  	if (IS_ERR(amn))
>  		return PTR_ERR(amn);
>  
> @@ -513,3 +512,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>  	mutex_unlock(&adev->mn_lock);
>  }
>  
> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */
> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
> +		(1 << 0), /* HMM_PFN_VALID */
> +		(1 << 1), /* HMM_PFN_WRITE */
> +		0 /* HMM_PFN_DEVICE_PRIVATE */
> +};
> +
> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
> +		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
> +		0, /* HMM_PFN_NONE */
> +		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
> +};
> +
> +void amdgpu_hmm_init_range(struct hmm_range *range)
> +{
> +	if (range) {
> +		range->flags = hmm_range_flags;
> +		range->values = hmm_range_values;
> +		range->pfn_shift = PAGE_SHIFT;
> +		range->pfns = NULL;
> +		INIT_LIST_HEAD(&range->list);
> +	}
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> index 0a51fd00021c..4c5e86847a3b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> @@ -25,9 +25,10 @@
>  #define __AMDGPU_MN_H__
>  
>  /*
> - * MMU Notifier
> + * HMM mirror
>   */
>  struct amdgpu_mn;
> +struct hmm_range;
>  
>  enum amdgpu_mn_type {
>  	AMDGPU_MN_TYPE_GFX,
> @@ -37,10 +38,12 @@ enum amdgpu_mn_type {
>  #if defined(CONFIG_HMM_MIRROR)
>  void amdgpu_mn_lock(struct amdgpu_mn *mn);
>  void amdgpu_mn_unlock(struct amdgpu_mn *mn);
> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
> +				struct amdgpu_device *adev,
>  				enum amdgpu_mn_type type);
>  int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
>  void amdgpu_mn_unregister(struct amdgpu_bo *bo);
> +void amdgpu_hmm_init_range(struct hmm_range *range);
>  #else
>  static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
>  static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c91ec3101d00..33057d6114c7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -43,6 +43,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/debugfs.h>
>  #include <linux/iommu.h>
> +#include <linux/hmm.h>
>  #include "amdgpu.h"
>  #include "amdgpu_object.h"
>  #include "amdgpu_trace.h"
> @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
>  /*
>   * TTM backend functions.
>   */
> -struct amdgpu_ttm_gup_task_list {
> -	struct list_head	list;
> -	struct task_struct	*task;
> -};
> -
>  struct amdgpu_ttm_tt {
>  	struct ttm_dma_tt	ttm;
>  	u64			offset;
> @@ -718,107 +714,105 @@ struct amdgpu_ttm_tt {
>  	uint32_t		userflags;
>  	spinlock_t              guptasklock;
>  	struct list_head        guptasks;
> -	atomic_t		mmu_invalidations;
> -	uint32_t		last_set_pages;
> +	struct hmm_range	range;
>  };
>  
>  /**
> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
> - * pointer to memory
> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
> + * memory and start HMM tracking CPU page table update
>   *
> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
> - * This provides a wrapper around the get_user_pages() call to provide
> - * device accessible pages that back user memory.
> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
> + * once afterwards to stop HMM tracking
>   */
>  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>  {
>  	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>  	struct mm_struct *mm = gtt->usertask->mm;
> -	unsigned int flags = 0;
> -	unsigned pinned = 0;
> -	int r;
> +	unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> +	struct hmm_range *range = &gtt->range;
> +	int r, i;
>  
>  	if (!mm) /* Happens during process shutdown */
>  		return -ESRCH;
>  
> -	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
> -		flags |= FOLL_WRITE;
> +	amdgpu_hmm_init_range(range);
>  
>  	down_read(&mm->mmap_sem);
>  
> +	range->vma = find_vma(mm, gtt->userptr);

There is still an assumption here that our userptr BOs cover only a
single VM. This is a new limitation with HMM that the previous
get_user_pages-based implementation did not have. We're probably OK with
this for now. But we may run into regressions if there are any ROCm
application that register user memory spanning multiple VMAs. If that
happens, we may need to generalize this so that each userptr BO can have
multiple HMM ranges.


>  	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
>  		/*
>  		 * check that we only use anonymous memory to prevent problems
>  		 * with writeback
>  		 */
> -		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> -		struct vm_area_struct *vma;
> +		struct vm_area_struct *vma = range->vma;
>  
> -		vma = find_vma(mm, gtt->userptr);
> -		if (!vma || vma->vm_file || vma->vm_end < end) {
> -			up_read(&mm->mmap_sem);
> -			return -EPERM;
> -		}
> +		if (!vma || vma->vm_file || vma->vm_end < end)
> +			range->vma = NULL;
>  	}
>  
> -	/* loop enough times using contiguous pages of memory */
> -	do {
> -		unsigned num_pages = ttm->num_pages - pinned;
> -		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
> -		struct page **p = pages + pinned;
> -		struct amdgpu_ttm_gup_task_list guptask;
> -
> -		guptask.task = current;
> -		spin_lock(&gtt->guptasklock);
> -		list_add(&guptask.list, &gtt->guptasks);
> -		spin_unlock(&gtt->guptasklock);
> +	if (!range->vma) {
> +		r = -EPERM;

You're treating permissions errors and errors finding a VMA the same
way. And I think you don't detect failures to find a VMA properly in the
first place. find_vma may return a VMA that doesn't include the address
provided. It's documented as "Look up the first VMA which satisfies 
addr < vm_end,  NULL if none." You should also check that the VMA covers
the entire address range of the userptr BO. There is a helper function
in linux/mm.h that does that: range_in_vma(range->vma, gtt->userptr,
gtt->userptr + gtt->userptr + ttm->num_pages * PAGE_SIZE). You should
check this condition in all cases.

There should be two distinct types of errors. If find_vma fails to find
a matching VMA, that should be -EFAULT. If the gtt->userflags don't
match the VMA properties, that should be -EPERM.


> +		goto out;
> +	}
>  
> -		if (mm == current->mm)
> -			r = get_user_pages(userptr, num_pages, flags, p, NULL);
> -		else
> -			r = get_user_pages_remote(gtt->usertask,
> -					mm, userptr, num_pages,
> -					flags, p, NULL, NULL);
> +	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
> +				     GFP_KERNEL);
> +	if (range->pfns == NULL) {
> +		r = -ENOMEM;
> +		goto out;
> +	}
> +	range->start = gtt->userptr;
> +	range->end = end;
>  
> -		spin_lock(&gtt->guptasklock);
> -		list_del(&guptask.list);
> -		spin_unlock(&gtt->guptasklock);
> +	for (i = 0; i < ttm->num_pages; i++) {
> +		range->pfns[i] = range->flags[HMM_PFN_VALID];
> +		range->pfns[i] |= amdgpu_ttm_tt_is_readonly(ttm) ?
> +					0 : range->flags[HMM_PFN_WRITE];
> +	}

You could optimize this slightly. You don't need to call
amdgpu_ttm_tt_is_readonly in each loop iteration. The flags are the same
for all pfns. So do the conditional thing on range->pfns[0], and then
just copy that to all the remaining pfns.


>  
> -		if (r < 0)
> -			goto release_pages;
> +	/* This may trigger page table update */
> +	r = hmm_vma_fault(range, true);
> +	if (r)
> +		goto out_free_pfns;
>  
> -		pinned += r;
> +	up_read(&mm->mmap_sem);
>  
> -	} while (pinned < ttm->num_pages);
> +	for (i = 0; i < ttm->num_pages; i++)
> +		pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
>  
> -	up_read(&mm->mmap_sem);
>  	return 0;
>  
> -release_pages:
> -	release_pages(pages, pinned);
> +out_free_pfns:
> +	kvfree(range->pfns);
> +	range->pfns = NULL;
> +out:
>  	up_read(&mm->mmap_sem);
>  	return r;
>  }
>  
>  /**
> - * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
> + * Check if the pages backing this ttm range have been invalidated
>   *
> - * Called by amdgpu_cs_list_validate(). This creates the page list
> - * that backs user memory and will ultimately be mapped into the device
> - * address space.
> + * Returns: true if pages are invalidated since the last time they've been set

I still don't like that you're inverting the return value of
hmm_vma_range_done. This will get confusing in the future. Could this
function instead return "true if pages are still valid"?


>   */
> -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
>  {
>  	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> -	unsigned i;
> +	bool r;
>  
> -	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
> -	for (i = 0; i < ttm->num_pages; ++i) {
> -		if (ttm->pages[i])
> -			put_page(ttm->pages[i]);
> +	if (gtt == NULL || !gtt->userptr)
> +		return false;

Use "!gtt" instead of "gtt == NULL". Also, returning "false" here means
the pages are still valid. That seems like the wrong answer for this
kind of failure. "false" would make sense here if you followed my
suggestion above to return true, if the pages are still valid.


>  
> -		ttm->pages[i] = pages ? pages[i] : NULL;
> +	r = hmm_vma_range_done(&gtt->range);
> +
> +	if (gtt->range.pfns) {
> +		kvfree(gtt->range.pfns);
> +		gtt->range.pfns = NULL;
>  	}
> +
> +	return !r;

I still don't like that you're inverting the return value of
hmm_vma_range_done. This will get confusing in the future.

Regards,
  Felix


>  }
>  
>  /**
> @@ -903,9 +897,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
>  	/* unmap the pages mapped to the device */
>  	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
>  
> -	/* mark the pages as dirty */
> -	amdgpu_ttm_tt_mark_user_pages(ttm);
> -
>  	sg_free_table(ttm->sg);
>  }
>  
> @@ -1207,7 +1198,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
>  	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
>  
>  	if (gtt && gtt->userptr) {
> -		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
> +		memset(ttm->pages, 0, ttm->num_pages * sizeof(struct page *));
>  		kfree(ttm->sg);
>  		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
>  		return;
> @@ -1258,8 +1249,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>  
>  	spin_lock_init(&gtt->guptasklock);
>  	INIT_LIST_HEAD(&gtt->guptasks);
> -	atomic_set(&gtt->mmu_invalidations, 0);
> -	gtt->last_set_pages = 0;
>  
>  	return 0;
>  }
> @@ -1289,7 +1278,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>  				  unsigned long end)
>  {
>  	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> -	struct amdgpu_ttm_gup_task_list *entry;
>  	unsigned long size;
>  
>  	if (gtt == NULL || !gtt->userptr)
> @@ -1302,48 +1290,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>  	if (gtt->userptr > end || gtt->userptr + size <= start)
>  		return false;
>  
> -	/* Search the lists of tasks that hold this mapping and see
> -	 * if current is one of them.  If it is return false.
> -	 */
> -	spin_lock(&gtt->guptasklock);
> -	list_for_each_entry(entry, &gtt->guptasks, list) {
> -		if (entry->task == current) {
> -			spin_unlock(&gtt->guptasklock);
> -			return false;
> -		}
> -	}
> -	spin_unlock(&gtt->guptasklock);
> -
> -	atomic_inc(&gtt->mmu_invalidations);
> -
>  	return true;
>  }
>  
>  /**
> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
>   */
> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
> -				       int *last_invalidated)
> -{
> -	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> -	int prev_invalidated = *last_invalidated;
> -
> -	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
> -	return prev_invalidated != *last_invalidated;
> -}
> -
> -/**
> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
> - * been invalidated since the last time they've been set?
> - */
> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
>  {
>  	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>  
>  	if (gtt == NULL || !gtt->userptr)
>  		return false;
>  
> -	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
> +	return true;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index b5b2d101f7db..c5a1e78a09b1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -102,7 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>  int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>  
>  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
> -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
>  void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
>  int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>  				     uint32_t flags);
> @@ -112,7 +112,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>  				  unsigned long end);
>  bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
>  				       int *last_invalidated);
> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
>  bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
>  uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
>  uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index f059ec314f2b..ab8fa84b27a6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -619,7 +619,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>  	entry->tv.bo = &vm->root.base.bo->tbo;
>  	/* One for the VM updates, one for TTM and one for the CS job */
>  	entry->tv.num_shared = 3;
> -	entry->user_pages = NULL;
>  	list_add(&entry->tv.head, validated);
>  }
>  


More information about the amd-gfx mailing list