[PATCH v3] drm/amdkfd: Move the process suspend and resume out of full access

Emily Deng Emily.Deng at amd.com
Wed Jun 4 07:48:17 UTC 2025


For the suspend and resume process, exclusive access is not required.
Therefore, it can be moved out of the full access section to reduce the
duration of exclusive access.

v3:
Move suspend processes before hardware fini.
Remove twice call for bare metal.

Signed-off-by: Emily Deng <Emily.Deng at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 +++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 13 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  9 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 40 +++++++++++++++++-----
 4 files changed, 67 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index d8ac4b1051a8..0a8e7835d0fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -264,6 +264,22 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
 	return r;
 }
 
+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool run_pm)
+{
+	if (adev->kfd.dev)
+		kgd2kfd_suspend_process(adev->kfd.dev, run_pm);
+}
+
+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, bool run_pm)
+{
+	int r = 0;
+
+	if (adev->kfd.dev)
+		r = kgd2kfd_resume_process(adev->kfd.dev, run_pm);
+
+	return r;
+}
+
 int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
 			    struct amdgpu_reset_context *reset_context)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index b6ca41859b53..841ae8b75ab1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -156,6 +156,8 @@ void amdgpu_amdkfd_fini(void);
 
 void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
 int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool run_pm);
+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, bool run_pm);
 void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 			const void *ih_ring_entry);
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
@@ -413,6 +415,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 void kgd2kfd_device_exit(struct kfd_dev *kfd);
 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm);
+int kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm);
 int kgd2kfd_pre_reset(struct kfd_dev *kfd,
 		      struct amdgpu_reset_context *reset_context);
 int kgd2kfd_post_reset(struct kfd_dev *kfd);
@@ -463,6 +467,15 @@ static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
 	return 0;
 }
 
+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm)
+{
+}
+
+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm)
+{
+	return 0;
+}
+
 static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
 				    struct amdgpu_reset_context *reset_context)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5289400879ec..08ff9917c62f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5061,6 +5061,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
 	adev->in_suspend = true;
 
 	if (amdgpu_sriov_vf(adev)) {
+		if (!adev->in_s0ix)
+			amdgpu_amdkfd_suspend_process(adev, adev->in_runpm);
 		amdgpu_virt_fini_data_exchange(adev);
 		r = amdgpu_virt_request_full_gpu(adev, false);
 		if (r)
@@ -5080,7 +5082,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
 	amdgpu_device_ip_suspend_phase1(adev);
 
 	if (!adev->in_s0ix) {
-		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
+		amdgpu_amdkfd_suspend(adev, amdgpu_sriov_vf(adev) || adev->in_runpm);
 		amdgpu_userq_suspend(adev);
 	}
 
@@ -5178,7 +5180,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
 	}
 
 	if (!adev->in_s0ix) {
-		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
+		r = amdgpu_amdkfd_resume(adev, amdgpu_sriov_vf(adev) || adev->in_runpm);
 		if (r)
 			goto exit;
 
@@ -5197,6 +5199,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
 	if (amdgpu_sriov_vf(adev)) {
 		amdgpu_virt_init_data_exchange(adev);
 		amdgpu_virt_release_full_gpu(adev, true);
+
+		if (!adev->in_s0ix && !r)
+			r = amdgpu_amdkfd_resume_process(adev, adev->in_runpm);
 	}
 
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index bf0854bd5555..22c6ef7c42b6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -1027,15 +1027,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
 	if (!kfd->init_complete)
 		return;
 
-	/* for runtime suspend, skip locking kfd */
-	if (!run_pm) {
-		mutex_lock(&kfd_processes_mutex);
-		/* For first KFD device suspend all the KFD processes */
-		if (++kfd_locked == 1)
-			kfd_suspend_all_processes();
-		mutex_unlock(&kfd_processes_mutex);
-	}
-
+	kgd2kfd_suspend_process(kfd, run_pm);
 	for (i = 0; i < kfd->num_nodes; i++) {
 		node = kfd->nodes[i];
 		node->dqm->ops.stop(node->dqm);
@@ -1055,6 +1047,36 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
 			return ret;
 	}
 
+	ret = kgd2kfd_resume_process(kfd, run_pm);
+
+	return ret;
+}
+
+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm)
+{
+	struct kfd_node *node;
+	int i;
+
+	if (!kfd->init_complete)
+		return;
+
+	/* for runtime suspend, skip locking kfd */
+	if (!run_pm) {
+		mutex_lock(&kfd_processes_mutex);
+		/* For first KFD device suspend all the KFD processes */
+		if (++kfd_locked == 1)
+			kfd_suspend_all_processes();
+		mutex_unlock(&kfd_processes_mutex);
+	}
+}
+
+int kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm)
+{
+	int ret, i;
+
+	if (!kfd->init_complete)
+		return 0;
+
 	/* for runtime resume, skip unlocking kfd */
 	if (!run_pm) {
 		mutex_lock(&kfd_processes_mutex);
-- 
2.34.1



More information about the amd-gfx mailing list