<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body>
<p><br>
</p>
<div class="moz-cite-prefix">On 2024-01-10 11:30, Felix Kuehling
wrote:<br>
</div>
<blockquote type="cite" cite="mid:21fb682d-517e-4d23-b7fe-b1eeff683ab2@amd.com">
<br>
On 2024-01-09 15:05, Philip Yang wrote:
<br>
<blockquote type="cite">After svm range partial migrating to
system memory, unmap to cleanup the
<br>
corresponding dma_addr vram domain flag, otherwise the future
split will
<br>
get incorrect vram_pages and actual loc.
<br>
<br>
After range spliting, set new range and old range actual_loc:
<br>
new range actual_loc is 0 if new->vram_pages is 0.
<br>
old range actual_loc is 0 if old->vram_pages -
new->vram_pages == 0.
<br>
<br>
new range takes svm_bo ref only if vram_pages not equal to 0.
<br>
<br>
Signed-off-by: Philip Yang <a class="moz-txt-link-rfc2396E" href="mailto:Philip.Yang@amd.com"><Philip.Yang@amd.com></a>
<br>
---
<br>
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 ++
<br>
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 35
+++++++++++++++---------
<br>
drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 3 +-
<br>
3 files changed, 27 insertions(+), 14 deletions(-)
<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
<br>
index f856901055d3..e85bcda29db6 100644
<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
<br>
@@ -839,6 +839,9 @@ int svm_migrate_vram_to_ram(struct svm_range
*prange, struct mm_struct *mm,
<br>
prange->actual_loc = 0;
<br>
svm_range_vram_node_free(prange);
<br>
}
<br>
+
<br>
+ svm_range_dma_unmap(prange, start_mgr -
prange->start,
<br>
+ last_mgr - start_mgr + 1);
<br>
</blockquote>
<br>
If this is just for clearing the VRAM flags, then we should
probably create another helper function for that. DMA unmapping
system memory pages that didn't even move is not necessary here.
<br>
<br>
Also, as Xiaogang pointed out, the migration may have missed some
pages due to page locking race conditions. If you want this to
give you accurate VRAM page counts, you should only clear the VRAM
flags for pages that were actually migrated.
<br>
</blockquote>
<p>ok, understand the concern now, if failed to migrate page to
system memory to recover CPU page fault, app will crash, but
prefetch may fail to migrate page to system memory, will send new
patch, to clear the prange->dma_addr[gpuidx] VRAM flags while
migrating the range to ram.</p>
<p>Regards,</p>
<p>Philip<br>
</p>
<blockquote type="cite" cite="mid:21fb682d-517e-4d23-b7fe-b1eeff683ab2@amd.com">
<br>
Regards,
<br>
Felix
<br>
<br>
<br>
<blockquote type="cite"> }
<br>
return r < 0 ? r : 0;
<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
<br>
index cc24f30f88fb..2202bdcde057 100644
<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
<br>
@@ -254,6 +254,10 @@ void svm_range_dma_unmap_dev(struct device
*dev, dma_addr_t *dma_addr,
<br>
return;
<br>
for (i = offset; i < offset + npages; i++) {
<br>
+ if (dma_addr[i] & SVM_RANGE_VRAM_DOMAIN) {
<br>
+ dma_addr[i] = 0;
<br>
+ continue;
<br>
+ }
<br>
if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i]))
<br>
continue;
<br>
pr_debug_ratelimited("unmap 0x%llx\n", dma_addr[i]
>> PAGE_SHIFT);
<br>
@@ -262,7 +266,8 @@ void svm_range_dma_unmap_dev(struct device
*dev, dma_addr_t *dma_addr,
<br>
}
<br>
}
<br>
-void svm_range_dma_unmap(struct svm_range *prange)
<br>
+void svm_range_dma_unmap(struct svm_range *prange, unsigned
long offset,
<br>
+ unsigned long npages)
<br>
{
<br>
struct kfd_process_device *pdd;
<br>
dma_addr_t *dma_addr;
<br>
@@ -284,7 +289,7 @@ void svm_range_dma_unmap(struct svm_range
*prange)
<br>
}
<br>
dev = &pdd->dev->adev->pdev->dev;
<br>
- svm_range_dma_unmap_dev(dev, dma_addr, 0,
prange->npages);
<br>
+ svm_range_dma_unmap_dev(dev, dma_addr, offset, npages);
<br>
}
<br>
}
<br>
@@ -299,7 +304,7 @@ static void svm_range_free(struct
svm_range *prange, bool do_unmap)
<br>
svm_range_vram_node_free(prange);
<br>
if (do_unmap)
<br>
- svm_range_dma_unmap(prange);
<br>
+ svm_range_dma_unmap(prange, 0, prange->npages);
<br>
if (do_unmap && !p->xnack_enabled) {
<br>
pr_debug("unreserve prange 0x%p size: 0x%llx\n",
prange, size);
<br>
@@ -362,7 +367,6 @@ svm_range *svm_range_new(struct
svm_range_list *svms, uint64_t start,
<br>
INIT_LIST_HEAD(&prange->child_list);
<br>
atomic_set(&prange->invalid, 0);
<br>
prange->validate_timestamp = 0;
<br>
- prange->vram_pages = 0;
<br>
mutex_init(&prange->migrate_mutex);
<br>
mutex_init(&prange->lock);
<br>
@@ -980,9 +984,14 @@ svm_range_split_pages(struct svm_range
*new, struct svm_range *old,
<br>
if (r)
<br>
return r;
<br>
}
<br>
- if (old->actual_loc)
<br>
+ if (old->actual_loc && new->vram_pages) {
<br>
old->vram_pages -= new->vram_pages;
<br>
-
<br>
+ new->actual_loc = old->actual_loc;
<br>
+ if (!old->vram_pages)
<br>
+ old->actual_loc = 0;
<br>
+ }
<br>
+ pr_debug("new->vram_pages 0x%llx loc 0x%x
old->vram_pages 0x%llx loc 0x%x\n",
<br>
+ new->vram_pages, new->actual_loc,
old->vram_pages, old->actual_loc);
<br>
return 0;
<br>
}
<br>
@@ -1002,13 +1011,14 @@ svm_range_split_nodes(struct svm_range
*new, struct svm_range *old,
<br>
new->offset = old->offset + npages;
<br>
}
<br>
- new->svm_bo = svm_range_bo_ref(old->svm_bo);
<br>
- new->ttm_res = old->ttm_res;
<br>
-
<br>
- spin_lock(&new->svm_bo->list_lock);
<br>
- list_add(&new->svm_bo_list,
&new->svm_bo->range_list);
<br>
- spin_unlock(&new->svm_bo->list_lock);
<br>
+ if (new->vram_pages) {
<br>
+ new->svm_bo = svm_range_bo_ref(old->svm_bo);
<br>
+ new->ttm_res = old->ttm_res;
<br>
+ spin_lock(&new->svm_bo->list_lock);
<br>
+ list_add(&new->svm_bo_list,
&new->svm_bo->range_list);
<br>
+ spin_unlock(&new->svm_bo->list_lock);
<br>
+ }
<br>
return 0;
<br>
}
<br>
@@ -1058,7 +1068,6 @@ svm_range_split_adjust(struct svm_range
*new, struct svm_range *old,
<br>
new->flags = old->flags;
<br>
new->preferred_loc = old->preferred_loc;
<br>
new->prefetch_loc = old->prefetch_loc;
<br>
- new->actual_loc = old->actual_loc;
<br>
new->granularity = old->granularity;
<br>
new->mapped_to_gpu = old->mapped_to_gpu;
<br>
bitmap_copy(new->bitmap_access, old->bitmap_access,
MAX_GPU_INSTANCE);
<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
<br>
index 026863a0abcd..778f108911cd 100644
<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
<br>
@@ -182,7 +182,8 @@ void svm_range_add_list_work(struct
svm_range_list *svms,
<br>
void schedule_deferred_list_work(struct svm_range_list *svms);
<br>
void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t
*dma_addr,
<br>
unsigned long offset, unsigned long npages);
<br>
-void svm_range_dma_unmap(struct svm_range *prange);
<br>
+void svm_range_dma_unmap(struct svm_range *prange, unsigned
long offset,
<br>
+ unsigned long npages);
<br>
int svm_range_get_info(struct kfd_process *p, uint32_t
*num_svm_ranges,
<br>
uint64_t *svm_priv_data_size);
<br>
int kfd_criu_checkpoint_svm(struct kfd_process *p,
<br>
</blockquote>
</blockquote>
</body>
</html>