[PATCH 2/4] drm/amdkfd: handle partial migration cpages 0

Felix Kuehling felix.kuehling at amd.com
Wed Oct 6 21:36:51 UTC 2021


Am 2021-10-06 um 10:32 a.m. schrieb Philip Yang:
> migrate_vma_setup may return cpages 0, means 0 page can be migrated,
> treat this as error case to skip the rest of migration steps, and don't
> change prange actual loc, to avoid warning message "VRAM BO missing
> during validation".
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 48 ++++++++++++++----------
>  1 file changed, 29 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index 069422337cf7..9b68e3e8f2a1 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -409,20 +409,25 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
>  			r, prange->svms, prange->start, prange->last);
>  		goto out_free;
>  	}
> -	if (migrate.cpages != npages) {
> -		pr_debug("Partial migration. 0x%lx/0x%llx pages can be migrated\n",
> -			 migrate.cpages,
> -			 npages);
> -	}
>  
> -	if (migrate.cpages) {
> -		r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence,
> -					     scratch);
> -		migrate_vma_pages(&migrate);
> -		svm_migrate_copy_done(adev, mfence);
> -		migrate_vma_finalize(&migrate);
> +	if (migrate.cpages != npages)
> +		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
> +			 migrate.cpages, npages);
> +	else
> +		pr_debug("0x%lx pages migrated\n", migrate.cpages);
> +
> +	if (!migrate.cpages) {
> +		pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n",
> +			 prange->start, prange->last);
> +		r = -ENOMEM;

I think just returning an error here is incorrect. This error gets
handled in svm_migrate_ram_to_vram and prevents the following VMAs from
migrating as well (if the range spans multiple VMAs).

Maybe return the number of pages migrated, if successful. Then the
caller can add up all the successful migrations and update
prange->actual_loc only if the total is > 0.

Regards,
  Felix


> +		goto out_free;
>  	}
>  
> +	r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch);
> +	migrate_vma_pages(&migrate);
> +	svm_migrate_copy_done(adev, mfence);
> +	migrate_vma_finalize(&migrate);
> +
>  	svm_range_dma_unmap(adev->dev, scratch, 0, npages);
>  	svm_range_free_dma_mappings(prange);
>  
> @@ -636,19 +641,24 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
>  		goto out_free;
>  	}
>  
> -	pr_debug("cpages %ld\n", migrate.cpages);
> +	if (migrate.cpages != npages)
> +		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
> +			 migrate.cpages, npages);
> +	else
> +		pr_debug("0x%lx pages migrated\n", migrate.cpages);
>  
> -	if (migrate.cpages) {
> -		r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
> -					    scratch, npages);
> -		migrate_vma_pages(&migrate);
> -		svm_migrate_copy_done(adev, mfence);
> -		migrate_vma_finalize(&migrate);
> -	} else {
> +	if (!migrate.cpages) {
>  		pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
>  			 prange->start, prange->last);
> +		r = -ENOMEM;
> +		goto out_free;
>  	}
>  
> +	r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
> +				    scratch, npages);
> +	migrate_vma_pages(&migrate);
> +	svm_migrate_copy_done(adev, mfence);
> +	migrate_vma_finalize(&migrate);
>  	svm_range_dma_unmap(adev->dev, scratch, 0, npages);
>  
>  out_free:


More information about the amd-gfx mailing list