[PATCH v2 1/1] drm/amdkfd: Track unified memory when switching xnack mode

Philip Yang Philip.Yang at amd.com
Tue Sep 20 22:07:38 UTC 2022


Unified memory usage with xnack off is tracked to avoid oversubscribe
system memory. When switching xnack mode from off to on, subsequent
free ranges allocated with xnack off will not unreserve memory when
xnack is on, cause memory accounting unbalanced.

When switching xnack mode from on to off, need reserve already allocated
svm range memory because subsequent free ranges will unreserve memory
with xnack off.

Signed-off-by: Philip Yang <Philip.Yang at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 23 ++++++++++++++++++-----
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 20 ++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h     |  2 ++
 3 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 56f7307c21d2..116dec5bb318 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1594,16 +1594,29 @@ static int kfd_ioctl_set_xnack_mode(struct file *filep,
 	if (args->xnack_enabled >= 0) {
 		if (!list_empty(&p->pqm.queues)) {
 			pr_debug("Process has user queues running\n");
-			mutex_unlock(&p->mutex);
-			return -EBUSY;
+			r = -EBUSY;
+			goto out_unlock;
 		}
-		if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
+
+		if (p->xnack_enabled == args->xnack_enabled)
+			goto out_unlock;
+
+		if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) {
 			r = -EPERM;
-		else
-			p->xnack_enabled = args->xnack_enabled;
+			goto out_unlock;
+		}
+
+		p->xnack_enabled = args->xnack_enabled;
+
+		/* Switching to XNACK on/off, unreserve/reserve memory of all
+		 * svm ranges.
+		 */
+		svm_range_list_unreserve_mem(p, p->xnack_enabled);
 	} else {
 		args->xnack_enabled = p->xnack_enabled;
 	}
+
+out_unlock:
 	mutex_unlock(&p->mutex);
 
 	return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index cf5b4005534c..010a9408a127 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2956,6 +2956,26 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	return r;
 }
 
+void svm_range_list_unreserve_mem(struct kfd_process *p, bool unreserve)
+{
+	struct svm_range *prange;
+	uint64_t size;
+
+	mutex_lock(&p->svms.lock);
+	list_for_each_entry(prange, &p->svms.list, list) {
+		size = (prange->last - prange->start + 1) << PAGE_SHIFT;
+		pr_debug("svms 0x%p %s range size 0x%llx\n", &p->svms,
+			 unreserve ? "unreserve" : "reserve", size);
+		if (unreserve)
+			amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
+						KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+		else
+			amdgpu_amdkfd_reserve_mem_limit(NULL, size,
+						KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+	}
+	mutex_unlock(&p->svms.lock);
+}
+
 void svm_range_list_fini(struct kfd_process *p)
 {
 	struct svm_range *prange;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 012c53729516..05a2135cd56e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -203,10 +203,12 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
 void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
 
 void svm_range_set_max_pages(struct amdgpu_device *adev);
+void svm_range_list_unreserve_mem(struct kfd_process *p, bool unreserve);
 
 #else
 
 struct kfd_process;
+void svm_range_list_unreserve_mem(struct kfd_process *p, bool unreserve) { }
 
 static inline int svm_range_list_init(struct kfd_process *p)
 {
-- 
2.35.1



More information about the amd-gfx mailing list