[PATCH v4] drm/amdkfd: Set correct svm range actual loc after spliting

Chen, Xiaogang xiaogang.chen at amd.com
Tue Jan 16 00:15:15 UTC 2024


With a nitpick below, this patch is:

Reviewed-by:Xiaogang Chen<Xiaogang.Chen at amd.com>

On 1/15/2024 4:02 PM, Philip Yang wrote:
> While svm range partial migrating to system memory, clear dma_addr vram
> domain flag, otherwise the future split will get incorrect vram_pages
> and actual loc.
>
> After range spliting, set new range and old range actual_loc:
> new range actual_loc is 0 if new->vram_pages is 0.
> old range actual_loc is 0 if old->vram_pages - new->vram_pages == 0.
>
> new range takes svm_bo ref only if vram_pages not equal to 0.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  8 +++++
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 42 ++++++++++++++++++------
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.h     |  1 +
>   3 files changed, 41 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index bdc01ca9609a..79baa195ccac 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -564,6 +564,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
>   			dma_addr_t *scratch, uint64_t npages)
>   {
>   	struct device *dev = adev->dev;
> +	dma_addr_t *dma_addr;
>   	uint64_t *src;
>   	dma_addr_t *dst;
>   	struct page *dpage;
> @@ -575,6 +576,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
>   		 prange->last);
>   
>   	addr = migrate->start;
> +	dma_addr = svm_get_dma_addr_for_page_count(prange, addr);
>   
>   	src = (uint64_t *)(scratch + npages);
>   	dst = scratch;
> @@ -623,6 +625,12 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
>   			goto out_oom;
>   		}
>   
> +		/* Clear VRAM flag when page is migrated to ram, to count vram
> +		 * pages correctly when spliting the range.
> +		 */
> +		if (dma_addr && (dma_addr[i] & SVM_RANGE_VRAM_DOMAIN))
> +			dma_addr[i] = 0;
>   		pr_debug_ratelimited("dma mapping dst to 0x%llx, pfn 0x%lx\n",
>   				     dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index f84547eccd28..78b4968e4c95 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -362,7 +362,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
>   	INIT_LIST_HEAD(&prange->child_list);
>   	atomic_set(&prange->invalid, 0);
>   	prange->validate_timestamp = 0;
> -	prange->vram_pages = 0;
>   	mutex_init(&prange->migrate_mutex);
>   	mutex_init(&prange->lock);
>   
> @@ -965,6 +964,24 @@ svm_range_split_array(void *ppnew, void *ppold, size_t size,
>   	return 0;
>   }
>   
> +dma_addr_t *
> +svm_get_dma_addr_for_page_count(struct svm_range *prange, u64 addr)

I think this function name is better be: svm_get_dma_addr_for_page_addr. 
Here we do not count page number, but get correct place to save dma 
addres from specific gpu vm addr.

Regards

Xiaogang

> +{
> +	struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
> +	dma_addr_t *dma_addr;
> +	s32 gpuidx;
> +
> +	gpuidx = kfd_process_gpuidx_from_gpuid(p, prange->actual_loc);
> +	if (gpuidx < 0) {
> +		pr_debug("no GPU id 0x%x found\n", prange->actual_loc);
> +		return NULL;
> +	}
> +
> +	dma_addr = prange->dma_addr[gpuidx];
> +	dma_addr += (addr >> PAGE_SHIFT) - prange->start;
> +	return dma_addr;
> +}
> +
>   static int
>   svm_range_split_pages(struct svm_range *new, struct svm_range *old,
>   		      uint64_t start, uint64_t last)
> @@ -980,9 +997,14 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old,
>   		if (r)
>   			return r;
>   	}
> -	if (old->actual_loc)
> +	if (old->actual_loc && new->vram_pages) {
>   		old->vram_pages -= new->vram_pages;
> -
> +		new->actual_loc = old->actual_loc;
> +		if (!old->vram_pages)
> +			old->actual_loc = 0;
> +	}
> +	pr_debug("new->vram_pages 0x%llx loc 0x%x old->vram_pages 0x%llx loc 0x%x\n",
> +		 new->vram_pages, new->actual_loc, old->vram_pages, old->actual_loc);
>   	return 0;
>   }
>   
> @@ -1002,13 +1024,14 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
>   		new->offset = old->offset + npages;
>   	}
>   
> -	new->svm_bo = svm_range_bo_ref(old->svm_bo);
> -	new->ttm_res = old->ttm_res;
> -
> -	spin_lock(&new->svm_bo->list_lock);
> -	list_add(&new->svm_bo_list, &new->svm_bo->range_list);
> -	spin_unlock(&new->svm_bo->list_lock);
> +	if (new->vram_pages) {
> +		new->svm_bo = svm_range_bo_ref(old->svm_bo);
> +		new->ttm_res = old->ttm_res;
>   
> +		spin_lock(&new->svm_bo->list_lock);
> +		list_add(&new->svm_bo_list, &new->svm_bo->range_list);
> +		spin_unlock(&new->svm_bo->list_lock);
> +	}
>   	return 0;
>   }
>   
> @@ -1058,7 +1081,6 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
>   	new->flags = old->flags;
>   	new->preferred_loc = old->preferred_loc;
>   	new->prefetch_loc = old->prefetch_loc;
> -	new->actual_loc = old->actual_loc;
>   	new->granularity = old->granularity;
>   	new->mapped_to_gpu = old->mapped_to_gpu;
>   	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index 026863a0abcd..c6df930366ce 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -185,6 +185,7 @@ void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
>   void svm_range_dma_unmap(struct svm_range *prange);
>   int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
>   		       uint64_t *svm_priv_data_size);
> +dma_addr_t *svm_get_dma_addr_for_page_count(struct svm_range *prange, u64 addr);
>   int kfd_criu_checkpoint_svm(struct kfd_process *p,
>   			    uint8_t __user *user_priv_data,
>   			    uint64_t *priv_offset);


More information about the amd-gfx mailing list