[PATCH v2] amd/amdkfd: Set correct svm range actual loc after spliting
Chen, Xiaogang
xiaogang.chen at amd.com
Tue Jan 9 22:29:01 UTC 2024
On 1/9/2024 2:05 PM, Philip Yang wrote:
> After svm range partial migrating to system memory, unmap to cleanup the
> corresponding dma_addr vram domain flag, otherwise the future split will
> get incorrect vram_pages and actual loc.
>
> After range spliting, set new range and old range actual_loc:
> new range actual_loc is 0 if new->vram_pages is 0.
> old range actual_loc is 0 if old->vram_pages - new->vram_pages == 0.
>
> new range takes svm_bo ref only if vram_pages not equal to 0.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 ++
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 35 +++++++++++++++---------
> drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 3 +-
> 3 files changed, 27 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index f856901055d3..e85bcda29db6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -839,6 +839,9 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
> prange->actual_loc = 0;
> svm_range_vram_node_free(prange);
> }
> +
> + svm_range_dma_unmap(prange, start_mgr - prange->start,
> + last_mgr - start_mgr + 1);
when come here we know some pages got migrated to sys ram, in theory we
do not know if all pages got migrated. svm_range_dma_unmap does
dma_unmap for all pages from start_mgr - prange->start to last_mgr -
start_mgr + 1.
If there are pages not migrated due to some reason(though it is rare) we
still need keep its dma_addr, I think only hmm can tell that.
> }
>
> return r < 0 ? r : 0;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index cc24f30f88fb..2202bdcde057 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -254,6 +254,10 @@ void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
> return;
>
> for (i = offset; i < offset + npages; i++) {
> + if (dma_addr[i] & SVM_RANGE_VRAM_DOMAIN) {
> + dma_addr[i] = 0;
> + continue;
> + }
same as above here set dma_addr[i]=0 unconditionally without knowing if
the page is indeed in sys ram.
> if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i]))
> continue;
> pr_debug_ratelimited("unmap 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
> @@ -262,7 +266,8 @@ void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
> }
> }
>
> -void svm_range_dma_unmap(struct svm_range *prange)
> +void svm_range_dma_unmap(struct svm_range *prange, unsigned long offset,
> + unsigned long npages)
> {
> struct kfd_process_device *pdd;
> dma_addr_t *dma_addr;
> @@ -284,7 +289,7 @@ void svm_range_dma_unmap(struct svm_range *prange)
> }
> dev = &pdd->dev->adev->pdev->dev;
>
> - svm_range_dma_unmap_dev(dev, dma_addr, 0, prange->npages);
> + svm_range_dma_unmap_dev(dev, dma_addr, offset, npages);
> }
> }
>
> @@ -299,7 +304,7 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap)
>
> svm_range_vram_node_free(prange);
> if (do_unmap)
> - svm_range_dma_unmap(prange);
> + svm_range_dma_unmap(prange, 0, prange->npages);
>
> if (do_unmap && !p->xnack_enabled) {
> pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
> @@ -362,7 +367,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
> INIT_LIST_HEAD(&prange->child_list);
> atomic_set(&prange->invalid, 0);
>
> - prange->vram_pages = 0;
still want it as the last patch? I thought you want also remove
prange->validate_timestamp = 0;
and
atomic_set(&prange->invalid, 0);
that are not necessary to set afterkzalloc.
Regards
Xiaogang
> mutex_init(&prange->migrate_mutex);
> mutex_init(&prange->lock);
>
> @@ -980,9 +984,14 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old,
> if (r)
> return r;
> }
> - if (old->actual_loc)
> + if (old->actual_loc && new->vram_pages) {
> old->vram_pages -= new->vram_pages;
> -
> + new->actual_loc = old->actual_loc;
> + if (!old->vram_pages)
> + old->actual_loc = 0;
> + }
> + pr_debug("new->vram_pages 0x%llx loc 0x%x old->vram_pages 0x%llx loc 0x%x\n",
> + new->vram_pages, new->actual_loc, old->vram_pages, old->actual_loc);
> return 0;
> }
>
> @@ -1002,13 +1011,14 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
> new->offset = old->offset + npages;
> }
>
> - new->svm_bo = svm_range_bo_ref(old->svm_bo);
> - new->ttm_res = old->ttm_res;
> -
> - spin_lock(&new->svm_bo->list_lock);
> - list_add(&new->svm_bo_list, &new->svm_bo->range_list);
> - spin_unlock(&new->svm_bo->list_lock);
> + if (new->vram_pages) {
> + new->svm_bo = svm_range_bo_ref(old->svm_bo);
> + new->ttm_res = old->ttm_res;
>
> + spin_lock(&new->svm_bo->list_lock);
> + list_add(&new->svm_bo_list, &new->svm_bo->range_list);
> + spin_unlock(&new->svm_bo->list_lock);
> + }
> return 0;
> }
>
> @@ -1058,7 +1068,6 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
> new->flags = old->flags;
> new->preferred_loc = old->preferred_loc;
> new->prefetch_loc = old->prefetch_loc;
> - new->actual_loc = old->actual_loc;
> new->granularity = old->granularity;
> new->mapped_to_gpu = old->mapped_to_gpu;
> bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index 026863a0abcd..778f108911cd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -182,7 +182,8 @@ void svm_range_add_list_work(struct svm_range_list *svms,
> void schedule_deferred_list_work(struct svm_range_list *svms);
> void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
> unsigned long offset, unsigned long npages);
> -void svm_range_dma_unmap(struct svm_range *prange);
> +void svm_range_dma_unmap(struct svm_range *prange, unsigned long offset,
> + unsigned long npages);
> int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
> uint64_t *svm_priv_data_size);
> int kfd_criu_checkpoint_svm(struct kfd_process *p,
More information about the amd-gfx
mailing list