[PATCH v3 2/2] drm/amdkfd: return migration pages from copy function
Felix Kuehling
felix.kuehling at amd.com
Fri Jul 18 22:09:26 UTC 2025
On 2025-07-14 08:46, James Zhu wrote:
> dst MIGRATE_PFN_VALID bit and src MIGRATE_PFN_MIGRATE bit
> should always be set when migration success. cpage includes
> src MIGRATE_PFN_MIGRATE bit set and MIGRATE_PFN_VALID bit
> unset pages for both ram and vram when memory is only allocated
> without access before migration, those pages should be count as
> migrate_unsuccessful_pages.
I think the patch is correct but I'm not sure I agree with the
explanation. Pages that were never accessed (and are not populated in
system memory) should be counted as successfully migrated. It seems they
were counted as unsuccessful by the old code.
Some more suggestions inline.
>
> -v2 use dst to check MIGRATE_PFN_VALID bit(suggested-by philip)
> -v3 add warning when vram pages is less than migration pages
> return migration pages directly from copy function
>
> Signed-off-by: James Zhu <James.Zhu at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 44 +++++++++---------------
> 1 file changed, 16 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index f0b690d4bb46..aad1346bde79 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -260,29 +260,15 @@ static void svm_migrate_put_sys_page(unsigned long addr)
> put_page(page);
> }
>
> -static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
> -{
> - unsigned long upages = 0;
> - unsigned long i;
> -
> - for (i = 0; i < migrate->npages; i++) {
> - if (migrate->src[i] & MIGRATE_PFN_VALID &&
> - !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
> - upages++;
> - }
> - return upages;
> -}
> -
> static int
> svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
> struct migrate_vma *migrate, struct dma_fence **mfence,
> - dma_addr_t *scratch, uint64_t ttm_res_offset)
> + dma_addr_t *scratch, uint64_t ttm_res_offset, unsigned long *mpages)
You could return mpages as the return value. That would match the
convention of svm_migrate_vma_to_vram.
> {
> uint64_t npages = migrate->npages;
> struct amdgpu_device *adev = node->adev;
> struct device *dev = adev->dev;
> struct amdgpu_res_cursor cursor;
> - uint64_t mpages = 0;
> dma_addr_t *src;
> uint64_t *dst;
> uint64_t i, j;
> @@ -296,7 +282,8 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
>
> amdgpu_res_first(prange->ttm_res, ttm_res_offset,
> npages << PAGE_SHIFT, &cursor);
> - for (i = j = 0; (i < npages) && (mpages < migrate->cpages); i++) {
> + *mpages = 0;
> + for (i = j = 0; (i < npages) && (*mpages < migrate->cpages); i++) {
> struct page *spage;
>
> if (migrate->src[i] & MIGRATE_PFN_MIGRATE) {
> @@ -304,7 +291,7 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
> migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
> svm_migrate_get_vram_page(prange, migrate->dst[i]);
> migrate->dst[i] = migrate_pfn(migrate->dst[i]);
> - mpages++;
> + (*mpages)++;
> }
> spage = migrate_pfn_to_page(migrate->src[i]);
> if (spage && !is_zone_device_page(spage)) {
> @@ -356,12 +343,12 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
> out_free_vram_pages:
> if (r) {
> pr_debug("failed %d to copy memory to vram\n", r);
> - for (i = 0; i < npages && mpages; i++) {
> + for (i = 0; i < npages && *mpages; i++) {
> if (!dst[i])
> continue;
> svm_migrate_put_vram_page(adev, dst[i]);
> migrate->dst[i] = 0;
> - mpages--;
> + (*mpages)--;
> }
> }
>
> @@ -441,13 +428,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
> else
> pr_debug("0x%lx pages collected\n", cpages);
>
> - r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);
> + r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset, &mpages);
> migrate_vma_pages(&migrate);
>
> svm_migrate_copy_done(adev, mfence);
> migrate_vma_finalize(&migrate);
>
> - mpages = cpages - svm_migrate_unsuccessful_pages(&migrate);
> pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
> mpages, cpages, migrate.npages);
Maybe change "successful/cpages/npages" to
"migrated/collected/requested". I think that would be a better
explanation of what these numbers actually mean.
>
> @@ -580,7 +566,7 @@ static void svm_migrate_page_free(struct page *page)
> static int
> svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
> struct migrate_vma *migrate, struct dma_fence **mfence,
> - dma_addr_t *scratch, uint64_t npages)
> + dma_addr_t *scratch, uint64_t npages, unsigned long *mpages)
You could return mpages as the return value. That would match the
convention of svm_migrate_vma_to_ram.
> {
> struct device *dev = adev->dev;
> uint64_t *src;
> @@ -598,6 +584,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
> src = (uint64_t *)(scratch + npages);
> dst = scratch;
>
> + *mpages = 0;
> for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
> struct page *spage;
>
> @@ -646,6 +633,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
> dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
>
> migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
> + (*mpages)++;
> j++;
> }
>
> @@ -688,7 +676,6 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
> {
> struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
> uint64_t npages = (end - start) >> PAGE_SHIFT;
> - unsigned long upages = npages;
> unsigned long cpages = 0;
> unsigned long mpages = 0;
> struct amdgpu_device *adev = node->adev;
> @@ -745,12 +732,11 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
> pr_debug("0x%lx pages collected\n", cpages);
>
> r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
> - scratch, npages);
> + scratch, npages, &mpages);
> migrate_vma_pages(&migrate);
>
> - upages = svm_migrate_unsuccessful_pages(&migrate);
> - pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
> - upages, cpages, migrate.npages);
> + pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
> + mpages, cpages, migrate.npages);
Maybe change "successful/cpages/npages" to
"migrated/collected/requested". I think that would be a better
explanation of what these numbers actually mean.
Regards,
Felix
>
> svm_migrate_copy_done(adev, mfence);
> migrate_vma_finalize(&migrate);
> @@ -764,7 +750,6 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
> node->id, 0, trigger, r);
> out:
> if (!r && cpages) {
> - mpages = cpages - upages;
> pdd = svm_range_get_pdd_by_node(prange, node);
> if (pdd)
> WRITE_ONCE(pdd->page_out, pdd->page_out + mpages);
> @@ -847,6 +832,9 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
> }
>
> if (r >= 0) {
> + WARN_ONCE(prange->vram_pages < mpages,
> + "Recorded vram pages(0x%llx) should not be less than migration pages(0x%lx).",
> + prange->vram_pages, mpages);
> prange->vram_pages -= mpages;
>
> /* prange does not have vram page set its actual_loc to system
More information about the amd-gfx
mailing list