[PATCH 1/1] drm/amdkfd: Remove prefault before migrating to VRAM

Felix Kuehling felix.kuehling at amd.com
Tue Sep 6 21:58:22 UTC 2022


On 2022-09-06 15:34, Philip Yang wrote:
> If svm range no back system memory pages, migrate the range to GPU VRAM
> don't need prefault on system pages to migrate pages. Instead we just
> alloc VRAM pages and setup migrate->dst with the corresponding device
> page and skip the page migration. The device page will be inserted to
> PTE. Then CPU page fault will migrate the page back to system memory.

There are some English grammar issues that make this explanation hard to 
understand. Let me try to fix that:

    Prefaulting potentially allocates system memory pages before a
    migration. This adds unnecessary overhead. Instead we can skip
    unallocated pages in the migration and just point migrate->dst to a
    0-initialized VRAM page directly. Then the VRAM page will be
    inserted to the PTE. A subsequent CPU page fault will migrate the
    page back to system memory.


As we discussed offline, there is still one potential issue with CPU 
page faults in a child process with COW mappings that we need to address 
before submitting this patch.

Thanks,
   Felix


>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 12 +++++-------
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 22 ----------------------
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.h     |  2 --
>   3 files changed, 5 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index 373e5bfd4e91..d3ebbde21241 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -322,12 +322,13 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
>   	for (i = j = 0; i < npages; i++) {
>   		struct page *spage;
>   
> +		dst[i] = cursor.start + (j << PAGE_SHIFT);
> +		migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
> +		svm_migrate_get_vram_page(prange, migrate->dst[i]);
> +		migrate->dst[i] = migrate_pfn(migrate->dst[i]);
> +
>   		spage = migrate_pfn_to_page(migrate->src[i]);
>   		if (spage && !is_zone_device_page(spage)) {
> -			dst[i] = cursor.start + (j << PAGE_SHIFT);
> -			migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
> -			svm_migrate_get_vram_page(prange, migrate->dst[i]);
> -			migrate->dst[i] = migrate_pfn(migrate->dst[i]);
>   			src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
>   					      DMA_TO_DEVICE);
>   			r = dma_mapping_error(dev, src[i]);
> @@ -522,9 +523,6 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
>   	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
>   		 prange->start, prange->last, best_loc);
>   
> -	/* FIXME: workaround for page locking bug with invalid pages */
> -	svm_range_prefault(prange, mm, SVM_ADEV_PGMAP_OWNER(adev));
> -
>   	start = prange->start << PAGE_SHIFT;
>   	end = (prange->last + 1) << PAGE_SHIFT;
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 11074cc8c333..cf5b4005534c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -3181,28 +3181,6 @@ svm_range_best_prefetch_location(struct svm_range *prange)
>   	return best_loc;
>   }
>   
> -/* FIXME: This is a workaround for page locking bug when some pages are
> - * invalid during migration to VRAM
> - */
> -void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
> -			void *owner)
> -{
> -	struct hmm_range *hmm_range;
> -	int r;
> -
> -	if (prange->validated_once)
> -		return;
> -
> -	r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
> -				       prange->start << PAGE_SHIFT,
> -				       prange->npages, &hmm_range,
> -				       false, true, owner);
> -	if (!r) {
> -		amdgpu_hmm_range_get_pages_done(hmm_range);
> -		prange->validated_once = true;
> -	}
> -}
> -
>   /* svm_range_trigger_migration - start page migration if prefetch loc changed
>    * @mm: current process mm_struct
>    * @prange: svm range structure
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index cfac13ad06ef..012c53729516 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -181,8 +181,6 @@ void schedule_deferred_list_work(struct svm_range_list *svms);
>   void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
>   			 unsigned long offset, unsigned long npages);
>   void svm_range_free_dma_mappings(struct svm_range *prange);
> -void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
> -			void *owner);
>   int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
>   		       uint64_t *svm_priv_data_size);
>   int kfd_criu_checkpoint_svm(struct kfd_process *p,


More information about the amd-gfx mailing list