[PATCH 10/10] drm/amdkfd: protect svm_bo ref in case prange has forked

Felix Kuehling felix.kuehling at amd.com
Mon Jun 21 21:46:15 UTC 2021


On 2021-06-21 12:04 p.m., Alex Sierra wrote:
> Keep track of all the pages inside of pranges referenced to
> the same svm_bo.

This description is a bit confusing because you're not tracking page 
references but BO references.


>   This is done, by using the ref count inside this object.
> This makes sure the object has freed after the last prange is not longer
> at any GPU. Including references shared between a parent and child during
> a fork.

References to the BO are not really shared between parent and child 
processes. They share page references. What we're doing here is, we're 
removing assumptions about the lifetime of the svm_bo by tying it to the 
lifetime of any pages referencing it.

I'd suggest this description:

drm/amdkfd: Maintain svm_bo reference in page->zone_device_data

Each zone-device page holds a reference to the SVM BO that manages its 
backing storage. This is necessary to correctly hold on to the BO in 
case zone_device pages are shared with a child-process.


>
> Signed-off-by: Alex Sierra <alex.sierra at amd.com>
> Change-Id: Ibfe5efbfed28c2d7681fe091264a5d0d5f3657b2

Please remove the Change-Id.


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 10 ++++++++--
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 10 +---------
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.h     | 10 +++++++++-
>   3 files changed, 18 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index acb9f64577a0..c8ca3252cbc2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -245,7 +245,7 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
>   	struct page *page;
>   
>   	page = pfn_to_page(pfn);
> -	page->zone_device_data = prange;
> +	page->zone_device_data = prange->svm_bo;
>   	get_page(page);
>   	lock_page(page);
>   }
> @@ -336,6 +336,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
>   			svm_migrate_get_vram_page(prange, migrate->dst[i]);
>   			migrate->dst[i] = migrate_pfn(migrate->dst[i]);
>   			migrate->dst[i] |= MIGRATE_PFN_LOCKED;
> +			svm_range_bo_ref(prange->svm_bo);

It would be cleaner to move this into svm_migrate_get_vram_page, where 
you assign the prange->svm_bo reference to page->zonde_device_data.

Regards,
   Felix


>   		}
>   		if (migrate->dst[i] & MIGRATE_PFN_VALID) {
>   			spage = migrate_pfn_to_page(migrate->src[i]);
> @@ -540,7 +541,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
>   
>   static void svm_migrate_page_free(struct page *page)
>   {
> -	/* Keep this function to avoid warning */
> +	struct svm_range_bo *svm_bo = page->zone_device_data;
> +
> +	if (svm_bo) {
> +		pr_debug("svm_bo ref left: %d\n", kref_read(&svm_bo->kref));
> +		svm_range_bo_unref(svm_bo);
> +	}
>   }
>   
>   static int
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index ebc1ae7e5193..4b5fc2375641 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -309,14 +309,6 @@ static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
>   	return true;
>   }
>   
> -static struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
> -{
> -	if (svm_bo)
> -		kref_get(&svm_bo->kref);
> -
> -	return svm_bo;
> -}
> -
>   static void svm_range_bo_release(struct kref *kref)
>   {
>   	struct svm_range_bo *svm_bo;
> @@ -355,7 +347,7 @@ static void svm_range_bo_release(struct kref *kref)
>   	kfree(svm_bo);
>   }
>   
> -static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
> +void svm_range_bo_unref(struct svm_range_bo *svm_bo)
>   {
>   	if (!svm_bo)
>   		return;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index 27fbe1936493..21f693767a0d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -150,6 +150,14 @@ static inline void svm_range_unlock(struct svm_range *prange)
>   	mutex_unlock(&prange->lock);
>   }
>   
> +static inline struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
> +{
> +	if (svm_bo)
> +		kref_get(&svm_bo->kref);
> +
> +	return svm_bo;
> +}
> +
>   int svm_range_list_init(struct kfd_process *p);
>   void svm_range_list_fini(struct kfd_process *p);
>   int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
> @@ -178,7 +186,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
>   void svm_range_free_dma_mappings(struct svm_range *prange);
>   void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
>   			void *owner);
> -
> +void svm_range_bo_unref(struct svm_range_bo *svm_bo);
>   #else
>   
>   struct kfd_process;


More information about the amd-gfx mailing list