<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body>
<p><br>
</p>
<div class="moz-cite-prefix">On 2022-04-21 16:17, Felix Kuehling
wrote:<br>
</div>
<blockquote type="cite" cite="mid:8c272923-ddfa-9151-4efe-cf8127ea995f@amd.com">Am
2022-04-19 um 20:47 schrieb Philip Yang:
<br>
<blockquote type="cite">Change SVM range mapping flags or access
attributes don't trigger
<br>
migration, if range is already mapped on GPUs we should update
GPU
<br>
mapping, and pass flush_tlb flag to amdgpu vm.
<br>
<br>
Signed-off-by: Philip Yang <a class="moz-txt-link-rfc2396E" href="mailto:Philip.Yang@amd.com"><Philip.Yang@amd.com></a>
<br>
---
<br>
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 41
++++++++++++++++++----------
<br>
1 file changed, 27 insertions(+), 14 deletions(-)
<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
<br>
index 6be1695f3a09..0a9bdadf875e 100644
<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
<br>
@@ -686,7 +686,8 @@ svm_range_check_attr(struct kfd_process *p,
<br>
static void
<br>
svm_range_apply_attrs(struct kfd_process *p, struct svm_range
*prange,
<br>
- uint32_t nattr, struct kfd_ioctl_svm_attribute
*attrs)
<br>
+ uint32_t nattr, struct kfd_ioctl_svm_attribute
*attrs,
<br>
+ bool *update_mapping)
<br>
{
<br>
uint32_t i;
<br>
int gpuidx;
<br>
@@ -702,6 +703,7 @@ svm_range_apply_attrs(struct kfd_process *p,
struct svm_range *prange,
<br>
case KFD_IOCTL_SVM_ATTR_ACCESS:
<br>
case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
<br>
case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
<br>
+ *update_mapping = true;
<br>
gpuidx = kfd_process_gpuidx_from_gpuid(p,
<br>
attrs[i].value);
<br>
if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS)
{
<br>
@@ -716,9 +718,11 @@ svm_range_apply_attrs(struct kfd_process
*p, struct svm_range *prange,
<br>
}
<br>
break;
<br>
case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
<br>
+ *update_mapping = true;
<br>
prange->flags |= attrs[i].value;
<br>
break;
<br>
case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
<br>
+ *update_mapping = true;
<br>
prange->flags &= ~attrs[i].value;
<br>
break;
<br>
case KFD_IOCTL_SVM_ATTR_GRANULARITY:
<br>
@@ -1253,7 +1257,7 @@ static int
<br>
svm_range_map_to_gpu(struct kfd_process_device *pdd, struct
svm_range *prange,
<br>
unsigned long offset, unsigned long npages, bool
readonly,
<br>
dma_addr_t *dma_addr, struct amdgpu_device
*bo_adev,
<br>
- struct dma_fence **fence)
<br>
+ struct dma_fence **fence, bool flush_tlb)
<br>
{
<br>
struct amdgpu_device *adev = pdd->dev->adev;
<br>
struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
<br>
@@ -1291,7 +1295,7 @@ svm_range_map_to_gpu(struct
kfd_process_device *pdd, struct svm_range *prange,
<br>
(last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
<br>
pte_flags);
<br>
- r = amdgpu_vm_update_range(adev, vm, false, false,
false, NULL,
<br>
+ r = amdgpu_vm_update_range(adev, vm, false, false,
flush_tlb, NULL,
<br>
last_start, prange->start + i,
<br>
pte_flags,
<br>
last_start - prange->start,
<br>
@@ -1325,7 +1329,7 @@ svm_range_map_to_gpu(struct
kfd_process_device *pdd, struct svm_range *prange,
<br>
static int
<br>
svm_range_map_to_gpus(struct svm_range *prange, unsigned long
offset,
<br>
unsigned long npages, bool readonly,
<br>
- unsigned long *bitmap, bool wait)
<br>
+ unsigned long *bitmap, bool wait, bool flush_tlb)
<br>
{
<br>
struct kfd_process_device *pdd;
<br>
struct amdgpu_device *bo_adev;
<br>
@@ -1360,7 +1364,8 @@ svm_range_map_to_gpus(struct svm_range
*prange, unsigned long offset,
<br>
r = svm_range_map_to_gpu(pdd, prange, offset,
npages, readonly,
<br>
prange->dma_addr[gpuidx],
<br>
- bo_adev, wait ? &fence : NULL);
<br>
+ bo_adev, wait ? &fence : NULL,
<br>
+ flush_tlb);
<br>
if (r)
<br>
break;
<br>
@@ -1481,8 +1486,8 @@ static void *kfd_svm_page_owner(struct
kfd_process *p, int32_t gpuidx)
<br>
* 5. Release page table (and SVM BO) reservation
<br>
*/
<br>
static int svm_range_validate_and_map(struct mm_struct *mm,
<br>
- struct svm_range *prange,
<br>
- int32_t gpuidx, bool intr, bool wait)
<br>
+ struct svm_range *prange, int32_t gpuidx,
<br>
+ bool intr, bool wait, bool flush_tlb)
<br>
{
<br>
struct svm_validate_context ctx;
<br>
unsigned long start, end, addr;
<br>
@@ -1521,8 +1526,12 @@ static int
svm_range_validate_and_map(struct mm_struct *mm,
<br>
prange->bitmap_aip, MAX_GPU_INSTANCE);
<br>
}
<br>
- if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
<br>
- return 0;
<br>
+ if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
<br>
+ if (!prange->mapped_to_gpu)
<br>
+ return 0;
<br>
+
<br>
+ bitmap_copy(ctx.bitmap, prange->bitmap_access,
MAX_GPU_INSTANCE);
<br>
+ }
<br>
if (prange->actual_loc &&
!prange->ttm_res) {
<br>
/* This should never happen. actual_loc gets set by
<br>
@@ -1594,7 +1603,7 @@ static int
svm_range_validate_and_map(struct mm_struct *mm,
<br>
}
<br>
r = svm_range_map_to_gpus(prange, offset, npages,
readonly,
<br>
- ctx.bitmap, wait);
<br>
+ ctx.bitmap, wait, flush_tlb);
<br>
unlock_out:
<br>
svm_range_unlock(prange);
<br>
@@ -1690,7 +1699,7 @@ static void svm_range_restore_work(struct
work_struct *work)
<br>
mutex_lock(&prange->migrate_mutex);
<br>
r = svm_range_validate_and_map(mm, prange,
MAX_GPU_INSTANCE,
<br>
- false, true);
<br>
+ false, true, false);
<br>
if (r)
<br>
pr_debug("failed %d to map 0x%lx to gpus\n", r,
<br>
prange->start);
<br>
@@ -2828,7 +2837,7 @@ svm_range_restore_pages(struct
amdgpu_device *adev, unsigned int pasid,
<br>
}
<br>
}
<br>
- r = svm_range_validate_and_map(mm, prange, gpuidx, false,
false);
<br>
+ r = svm_range_validate_and_map(mm, prange, gpuidx, false,
false, false);
<br>
if (r)
<br>
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to
gpus\n",
<br>
r, svms, prange->start, prange->last);
<br>
@@ -3241,6 +3250,8 @@ svm_range_set_attr(struct kfd_process *p,
struct mm_struct *mm,
<br>
struct svm_range_list *svms;
<br>
struct svm_range *prange;
<br>
struct svm_range *next;
<br>
+ bool update_mapping = false;
<br>
+ bool flush_tlb;
<br>
int r = 0;
<br>
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages
0x%llx\n",
<br>
@@ -3279,7 +3290,7 @@ svm_range_set_attr(struct kfd_process *p,
struct mm_struct *mm,
<br>
svm_range_add_notifier_locked(mm, prange);
<br>
}
<br>
list_for_each_entry(prange, &update_list, update_list)
{
<br>
- svm_range_apply_attrs(p, prange, nattr, attrs);
<br>
+ svm_range_apply_attrs(p, prange, nattr, attrs,
&update_mapping);
<br>
/* TODO: unmap ranges from GPU that lost access */
<br>
}
<br>
list_for_each_entry_safe(prange, next, &remove_list,
update_list) {
<br>
@@ -3312,8 +3323,10 @@ svm_range_set_attr(struct kfd_process *p,
struct mm_struct *mm,
<br>
continue;
<br>
}
<br>
+ flush_tlb = !migrated && update_mapping
&& prange->mapped_to_gpu;
<br>
+
<br>
</blockquote>
<br>
Can we skip the validate_and_map if update_mapping is false?
<br>
<br>
</blockquote>
<p>Yes, if changing range perferred_loc or migration granularity,
don't need update mapping, we can skip validate_and_map, will send
out v2 patch.</p>
<p>Thanks,</p>
<p>Philip<br>
</p>
<blockquote type="cite" cite="mid:8c272923-ddfa-9151-4efe-cf8127ea995f@amd.com">Other than
that, the series looks good to me.
<br>
<br>
Regards,
<br>
Felix
<br>
<br>
<br>
<blockquote type="cite"> r =
svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
<br>
- true, true);
<br>
+ true, true, flush_tlb);
<br>
if (r)
<br>
pr_debug("failed %d to map svm range\n", r);
<br>
</blockquote>
</blockquote>
</body>
</html>