[PATCH v2] drm/amdkfd: Move the process suspend and resume out of full access

Deng, Emily Emily.Deng at amd.com
Tue Jun 3 09:10:49 UTC 2025


[AMD Official Use Only - AMD Internal Distribution Only]

Hi Christian and Horace,
     Could you help to review this?

Emily Deng
Best Wishes



>-----Original Message-----
>From: Zhang, Owen(SRDC) <Owen.Zhang2 at amd.com>
>Sent: Friday, May 30, 2025 5:50 PM
>To: Deng, Emily <Emily.Deng at amd.com>; amd-gfx at lists.freedesktop.org
>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume out of
>full access
>
>[AMD Official Use Only - AMD Internal Distribution Only]
>
>Hi Team, @amd-gfx at lists.freedesktop.org
>
>Can you pls review and provide your inputs? Thanks for support!
>
>
>Rgds/Owen
>
>-----Original Message-----
>From: Deng, Emily <Emily.Deng at amd.com>
>Sent: Friday, May 30, 2025 9:39 AM
>To: amd-gfx at lists.freedesktop.org
>Cc: Zhang, Owen(SRDC) <Owen.Zhang2 at amd.com>
>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume out of
>full access
>
>[AMD Official Use Only - AMD Internal Distribution Only]
>
>Ping......
>
>Emily Deng
>Best Wishes
>
>
>
>>-----Original Message-----
>>From: Deng, Emily <Emily.Deng at amd.com>
>>Sent: Wednesday, May 28, 2025 2:20 PM
>>To: Deng, Emily <Emily.Deng at amd.com>; amd-gfx at lists.freedesktop.org
>>Cc: Zhang, Owen(SRDC) <Owen.Zhang2 at amd.com>
>>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume
>>out of full access
>>
>>[AMD Official Use Only - AMD Internal Distribution Only]
>>
>>>-----Original Message-----
>>>From: Emily Deng <Emily.Deng at amd.com>
>>>Sent: Tuesday, May 27, 2025 6:50 PM
>>>To: amd-gfx at lists.freedesktop.org
>>>Cc: Deng, Emily <Emily.Deng at amd.com>
>>>Subject: [PATCH v2] drm/amdkfd: Move the process suspend and resume
>>>out of full access
>>>
>>>For the suspend and resume process, exclusive access is not required.
>>>Therefore, it can be moved out of the full access section to reduce
>>>the duration of exclusive access.
>>>
>>>Signed-off-by: Emily Deng <Emily.Deng at amd.com>
>>>---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 +++++++++
>>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 13 +++++++
>>>drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++++--
>>> drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 40 +++++++++++++++++-----
>>> 4 files changed, 70 insertions(+), 11 deletions(-)
>>>
>>>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>index 4cec3a873995..ba07e9c6619d 100644
>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>@@ -264,6 +264,22 @@ int amdgpu_amdkfd_resume(struct amdgpu_device
>>>*adev, bool run_pm)
>>>       return r;
>>> }
>>>
>>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>>>+run_pm) {
>>>+      if (adev->kfd.dev)
>>>+              kgd2kfd_suspend_process(adev->kfd.dev, run_pm); }
>>>+
>>>+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, bool
>>>+run_pm) {
>>>+      int r = 0;
>>>+
>>>+      if (adev->kfd.dev)
>>>+              r = kgd2kfd_resume_process(adev->kfd.dev, run_pm);
>>>+
>>>+      return r;
>>>+}
>>>+
>>> int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
>>>                           struct amdgpu_reset_context *reset_context)
>>>{ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>index b6ca41859b53..841ae8b75ab1 100644
>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>@@ -156,6 +156,8 @@ void amdgpu_amdkfd_fini(void);
>>>
>>> void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
>>>int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
>>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>>>+run_pm); int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev,
>>>+bool run_pm);
>>> void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
>>>                       const void *ih_ring_entry); void
>>> amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); @@ -413,6
>>>+415,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,  void
>>>kgd2kfd_device_exit(struct kfd_dev *kfd);  void kgd2kfd_suspend(struct
>>>kfd_dev *kfd, bool run_pm);  int kgd2kfd_resume(struct kfd_dev *kfd,
>>>bool run_pm);
>>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm); int
>>>+kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm);
>>> int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>>>                     struct amdgpu_reset_context *reset_context);  int
>>>kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -463,6 +467,15 @@ static
>>>inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>>>       return 0;
>>> }
>>>
>>>+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool
>>>+run_pm) { }
>>>+
>>>+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd, bool
>>>+run_pm) {
>>>+      return 0;
>>>+}
>>>+
>>> static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>>>                                   struct amdgpu_reset_context
>>>*reset_context)  { diff -- git
>>>a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>index 625c416c7d45..6e29f8bd54bb 100644
>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>@@ -5080,7 +5080,7 @@ int amdgpu_device_suspend(struct drm_device
>>>*dev, bool
>>>notify_clients)
>>>       amdgpu_device_ip_suspend_phase1(adev);
>>>
>>>       if (!adev->in_s0ix) {
>>>-              amdgpu_amdkfd_suspend(adev, adev->in_runpm);
>>>+              amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_runtime(adev)
>>>+ ||
>>>+adev->in_runpm);
>>>               amdgpu_userq_suspend(adev);
>>>       }
>>>
>>>@@ -5097,6 +5097,9 @@ int amdgpu_device_suspend(struct drm_device
>>>*dev, bool
>>>notify_clients)
>>>       if (amdgpu_sriov_vf(adev))
>>>               amdgpu_virt_release_full_gpu(adev, false);
>>>
>>>+      if (!adev->in_s0ix)
>>>+              amdgpu_amdkfd_suspend_process(adev, adev->in_runpm);
>>>+
>>>       r = amdgpu_dpm_notify_rlc_state(adev, false);
>>>       if (r)
>>>               return r;
>>>@@ -5178,7 +5181,7 @@ int amdgpu_device_resume(struct drm_device *dev,
>>>bool
>>>notify_clients)
>>>       }
>>>
>>>       if (!adev->in_s0ix) {
>>>-              r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
>>>+              r = amdgpu_amdkfd_resume(adev,
>>>+ !amdgpu_sriov_runtime(adev) ||
>>>+adev->in_runpm);
>>>               if (r)
>>>                       goto exit;
>>>
>>>@@ -5199,6 +5202,11 @@ int amdgpu_device_resume(struct drm_device
>>>*dev, bool
>>>notify_clients)
>>>               amdgpu_virt_release_full_gpu(adev, true);
>>>       }
>>>
>>>+      if (!adev->in_s0ix) {
>>>+              r = amdgpu_amdkfd_resume_process(adev, adev->in_runpm);
>>>+              if (r)
>>>+                      goto exit;
>>>+      }
>>>       if (r)
>>>               return r;
>>>
>>>diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>index bf0854bd5555..22c6ef7c42b6 100644
>>>--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>@@ -1027,15 +1027,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool
>>run_pm)
>>>       if (!kfd->init_complete)
>>>               return;
>>>
>>>-      /* for runtime suspend, skip locking kfd */
>>>-      if (!run_pm) {
>>>-              mutex_lock(&kfd_processes_mutex);
>>>-              /* For first KFD device suspend all the KFD processes */
>>>-              if (++kfd_locked == 1)
>>>-                      kfd_suspend_all_processes();
>>>-              mutex_unlock(&kfd_processes_mutex);
>>>-      }
>>>-
>>>+      kgd2kfd_suspend_process(kfd, run_pm);
>>>       for (i = 0; i < kfd->num_nodes; i++) {
>>>               node = kfd->nodes[i];
>>>               node->dqm->ops.stop(node->dqm); @@ -1055,6 +1047,36 @@
>>>int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>>>                       return ret;
>>>       }
>>>
>>>+      ret = kgd2kfd_resume_process(kfd, run_pm);
>>>+
>>>+      return ret;
>>>+}
>>>+
>>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm) {
>>>+      struct kfd_node *node;
>>>+      int i;
>>>+
>>>+      if (!kfd->init_complete)
>>>+              return;
>>>+
>>>+      /* for runtime suspend, skip locking kfd */
>>>+      if (!run_pm) {
>>>+              mutex_lock(&kfd_processes_mutex);
>>>+              /* For first KFD device suspend all the KFD processes */
>>>+              if (++kfd_locked == 1)
>>>+                      kfd_suspend_all_processes();
>>>+              mutex_unlock(&kfd_processes_mutex);
>>>+      }
>>>+}
>>>+
>>>+int kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm) {
>>>+      int ret, i;
>>>+
>>>+      if (!kfd->init_complete)
>>>+              return 0;
>>>+
>>>       /* for runtime resume, skip unlocking kfd */
>>>       if (!run_pm) {
>>>               mutex_lock(&kfd_processes_mutex);
>>>--
>>>2.34.1
>>
>
>



More information about the amd-gfx mailing list