[PATCH v4] drm/amdkfd: Set correct svm range actual loc after spliting
Chen, Xiaogang
xiaogang.chen at amd.com
Tue Jan 16 00:15:15 UTC 2024
With a nitpick below, this patch is:
Reviewed-by:Xiaogang Chen<Xiaogang.Chen at amd.com>
On 1/15/2024 4:02 PM, Philip Yang wrote:
> While svm range partial migrating to system memory, clear dma_addr vram
> domain flag, otherwise the future split will get incorrect vram_pages
> and actual loc.
>
> After range spliting, set new range and old range actual_loc:
> new range actual_loc is 0 if new->vram_pages is 0.
> old range actual_loc is 0 if old->vram_pages - new->vram_pages == 0.
>
> new range takes svm_bo ref only if vram_pages not equal to 0.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 8 +++++
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 42 ++++++++++++++++++------
> drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 1 +
> 3 files changed, 41 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index bdc01ca9609a..79baa195ccac 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -564,6 +564,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
> dma_addr_t *scratch, uint64_t npages)
> {
> struct device *dev = adev->dev;
> + dma_addr_t *dma_addr;
> uint64_t *src;
> dma_addr_t *dst;
> struct page *dpage;
> @@ -575,6 +576,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
> prange->last);
>
> addr = migrate->start;
> + dma_addr = svm_get_dma_addr_for_page_count(prange, addr);
>
> src = (uint64_t *)(scratch + npages);
> dst = scratch;
> @@ -623,6 +625,12 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
> goto out_oom;
> }
>
> + /* Clear VRAM flag when page is migrated to ram, to count vram
> + * pages correctly when spliting the range.
> + */
> + if (dma_addr && (dma_addr[i] & SVM_RANGE_VRAM_DOMAIN))
> + dma_addr[i] = 0;
> pr_debug_ratelimited("dma mapping dst to 0x%llx, pfn 0x%lx\n",
> dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index f84547eccd28..78b4968e4c95 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -362,7 +362,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
> INIT_LIST_HEAD(&prange->child_list);
> atomic_set(&prange->invalid, 0);
> prange->validate_timestamp = 0;
> - prange->vram_pages = 0;
> mutex_init(&prange->migrate_mutex);
> mutex_init(&prange->lock);
>
> @@ -965,6 +964,24 @@ svm_range_split_array(void *ppnew, void *ppold, size_t size,
> return 0;
> }
>
> +dma_addr_t *
> +svm_get_dma_addr_for_page_count(struct svm_range *prange, u64 addr)
I think this function name is better be: svm_get_dma_addr_for_page_addr.
Here we do not count page number, but get correct place to save dma
addres from specific gpu vm addr.
Regards
Xiaogang
> +{
> + struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
> + dma_addr_t *dma_addr;
> + s32 gpuidx;
> +
> + gpuidx = kfd_process_gpuidx_from_gpuid(p, prange->actual_loc);
> + if (gpuidx < 0) {
> + pr_debug("no GPU id 0x%x found\n", prange->actual_loc);
> + return NULL;
> + }
> +
> + dma_addr = prange->dma_addr[gpuidx];
> + dma_addr += (addr >> PAGE_SHIFT) - prange->start;
> + return dma_addr;
> +}
> +
> static int
> svm_range_split_pages(struct svm_range *new, struct svm_range *old,
> uint64_t start, uint64_t last)
> @@ -980,9 +997,14 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old,
> if (r)
> return r;
> }
> - if (old->actual_loc)
> + if (old->actual_loc && new->vram_pages) {
> old->vram_pages -= new->vram_pages;
> -
> + new->actual_loc = old->actual_loc;
> + if (!old->vram_pages)
> + old->actual_loc = 0;
> + }
> + pr_debug("new->vram_pages 0x%llx loc 0x%x old->vram_pages 0x%llx loc 0x%x\n",
> + new->vram_pages, new->actual_loc, old->vram_pages, old->actual_loc);
> return 0;
> }
>
> @@ -1002,13 +1024,14 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
> new->offset = old->offset + npages;
> }
>
> - new->svm_bo = svm_range_bo_ref(old->svm_bo);
> - new->ttm_res = old->ttm_res;
> -
> - spin_lock(&new->svm_bo->list_lock);
> - list_add(&new->svm_bo_list, &new->svm_bo->range_list);
> - spin_unlock(&new->svm_bo->list_lock);
> + if (new->vram_pages) {
> + new->svm_bo = svm_range_bo_ref(old->svm_bo);
> + new->ttm_res = old->ttm_res;
>
> + spin_lock(&new->svm_bo->list_lock);
> + list_add(&new->svm_bo_list, &new->svm_bo->range_list);
> + spin_unlock(&new->svm_bo->list_lock);
> + }
> return 0;
> }
>
> @@ -1058,7 +1081,6 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
> new->flags = old->flags;
> new->preferred_loc = old->preferred_loc;
> new->prefetch_loc = old->prefetch_loc;
> - new->actual_loc = old->actual_loc;
> new->granularity = old->granularity;
> new->mapped_to_gpu = old->mapped_to_gpu;
> bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index 026863a0abcd..c6df930366ce 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -185,6 +185,7 @@ void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
> void svm_range_dma_unmap(struct svm_range *prange);
> int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
> uint64_t *svm_priv_data_size);
> +dma_addr_t *svm_get_dma_addr_for_page_count(struct svm_range *prange, u64 addr);
> int kfd_criu_checkpoint_svm(struct kfd_process *p,
> uint8_t __user *user_priv_data,
> uint64_t *priv_offset);
More information about the amd-gfx
mailing list