[PATCH] amd/amdkfd: Refine locking in restore pages worker
Philip Yang
Philip.Yang at amd.com
Mon Jan 8 22:37:48 UTC 2024
To be able to handle multiple GPU page faults concurrently
1. remove mmap write lock, take and release mmap read lock when checking
vma, migrating and calling hmm_range_fault.
2. insert new range mmu interval notifier without taking mmap lock.
3. take and release svms lock when checking and inserting new range to
svms.
Signed-off-by: Philip Yang <Philip.Yang at amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 70 ++++++++++++++--------------
1 file changed, 36 insertions(+), 34 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 3b78e48832e9..cc24f30f88fb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -117,6 +117,18 @@ svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange)
&svm_range_mn_ops);
}
+static void
+svm_range_add_notifier(struct mm_struct *mm, struct svm_range *prange)
+{
+ pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
+ prange, prange->start, prange->last);
+
+ mmu_interval_notifier_insert(&prange->notifier, mm,
+ prange->start << PAGE_SHIFT,
+ prange->npages << PAGE_SHIFT,
+ &svm_range_mn_ops);
+}
+
/**
* svm_range_add_to_svms - add svm range to svms
* @prange: svm range structure to be added
@@ -2801,9 +2813,6 @@ svm_range *svm_range_create_unregistered_range(struct kfd_node *node,
if (is_heap_stack)
prange->preferred_loc = KFD_IOCTL_SVM_LOCATION_SYSMEM;
- svm_range_add_to_svms(prange);
- svm_range_add_notifier_locked(mm, prange);
-
return prange;
}
@@ -2899,9 +2908,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
struct kfd_node *node;
int32_t best_loc;
int32_t gpuidx = MAX_GPU_INSTANCE;
- bool write_locked = false;
struct vm_area_struct *vma;
bool migration = false;
+ bool new_prange = false;
int r = 0;
if (!KFD_IS_SVM_API_SUPPORTED(adev)) {
@@ -2947,42 +2956,30 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
r = -EFAULT;
goto out;
}
+
mmap_read_lock(mm);
-retry_write_locked:
mutex_lock(&svms->lock);
prange = svm_range_from_addr(svms, addr, NULL);
if (!prange) {
pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
svms, addr);
- if (!write_locked) {
- /* Need the write lock to create new range with MMU notifier.
- * Also flush pending deferred work to make sure the interval
- * tree is up to date before we add a new range
- */
- mutex_unlock(&svms->lock);
- mmap_read_unlock(mm);
- mmap_write_lock(mm);
- write_locked = true;
- goto retry_write_locked;
- }
prange = svm_range_create_unregistered_range(node, p, mm, addr);
if (!prange) {
pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n",
svms, addr);
- mmap_write_downgrade(mm);
r = -EFAULT;
- goto out_unlock_svms;
+ mutex_unlock(&svms->lock);
+ goto out_unlock_mm;
}
+ svm_range_add_to_svms(prange);
+ new_prange = true;
}
- if (write_locked)
- mmap_write_downgrade(mm);
-
- mutex_lock(&prange->migrate_mutex);
+ mutex_unlock(&svms->lock);
if (svm_range_skip_recover(prange)) {
amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid);
r = 0;
- goto out_unlock_range;
+ goto out_unlock_mm;
}
/* skip duplicate vm fault on different pages of same range */
@@ -2991,7 +2988,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
svms, prange->start, prange->last);
r = 0;
- goto out_unlock_range;
+ goto out_unlock_mm;
}
/* __do_munmap removed VMA, return success as we are handling stale
@@ -3001,14 +2998,14 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
if (!vma) {
pr_debug("address 0x%llx VMA is removed\n", addr);
r = 0;
- goto out_unlock_range;
+ goto out_unlock_mm;
}
if (!svm_fault_allowed(vma, write_fault)) {
pr_debug("fault addr 0x%llx no %s permission\n", addr,
write_fault ? "write" : "read");
r = -EPERM;
- goto out_unlock_range;
+ goto out_unlock_mm;
}
best_loc = svm_range_best_restore_location(prange, node, &gpuidx);
@@ -3016,9 +3013,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
svms, prange->start, prange->last);
r = -EACCES;
- goto out_unlock_range;
+ goto out_unlock_mm;
}
+ mutex_lock(&prange->migrate_mutex);
+
pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n",
svms, prange->start, prange->last, best_loc,
prange->actual_loc);
@@ -3055,9 +3054,17 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
if (r) {
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
r, svms, start, last);
- goto out_unlock_range;
+ mutex_unlock(&prange->migrate_mutex);
+ goto out_unlock_mm;
}
}
+ mutex_unlock(&prange->migrate_mutex);
+ mmap_read_unlock(mm);
+
+ if (new_prange)
+ svm_range_add_notifier(mm, prange);
+
+ mmap_read_lock(mm);
r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
false, false);
@@ -3068,14 +3075,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
migration);
-out_unlock_range:
- mutex_unlock(&prange->migrate_mutex);
-out_unlock_svms:
- mutex_unlock(&svms->lock);
+out_unlock_mm:
mmap_read_unlock(mm);
-
svm_range_count_fault(node, p, gpuidx);
-
mmput(mm);
out:
kfd_unref_process(p);
--
2.35.1
More information about the amd-gfx
mailing list