[PATCH v2] drm/amdkfd: Move the process suspend and resume out of full access
Deng, Emily
Emily.Deng at amd.com
Tue Jun 3 09:10:49 UTC 2025
[AMD Official Use Only - AMD Internal Distribution Only]
Hi Christian and Horace,
Could you help to review this?
Emily Deng
Best Wishes
>-----Original Message-----
>From: Zhang, Owen(SRDC) <Owen.Zhang2 at amd.com>
>Sent: Friday, May 30, 2025 5:50 PM
>To: Deng, Emily <Emily.Deng at amd.com>; amd-gfx at lists.freedesktop.org
>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume out of
>full access
>
>[AMD Official Use Only - AMD Internal Distribution Only]
>
>Hi Team, @amd-gfx at lists.freedesktop.org
>
>Can you pls review and provide your inputs? Thanks for support!
>
>
>Rgds/Owen
>
>-----Original Message-----
>From: Deng, Emily <Emily.Deng at amd.com>
>Sent: Friday, May 30, 2025 9:39 AM
>To: amd-gfx at lists.freedesktop.org
>Cc: Zhang, Owen(SRDC) <Owen.Zhang2 at amd.com>
>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume out of
>full access
>
>[AMD Official Use Only - AMD Internal Distribution Only]
>
>Ping......
>
>Emily Deng
>Best Wishes
>
>
>
>>-----Original Message-----
>>From: Deng, Emily <Emily.Deng at amd.com>
>>Sent: Wednesday, May 28, 2025 2:20 PM
>>To: Deng, Emily <Emily.Deng at amd.com>; amd-gfx at lists.freedesktop.org
>>Cc: Zhang, Owen(SRDC) <Owen.Zhang2 at amd.com>
>>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume
>>out of full access
>>
>>[AMD Official Use Only - AMD Internal Distribution Only]
>>
>>>-----Original Message-----
>>>From: Emily Deng <Emily.Deng at amd.com>
>>>Sent: Tuesday, May 27, 2025 6:50 PM
>>>To: amd-gfx at lists.freedesktop.org
>>>Cc: Deng, Emily <Emily.Deng at amd.com>
>>>Subject: [PATCH v2] drm/amdkfd: Move the process suspend and resume
>>>out of full access
>>>
>>>For the suspend and resume process, exclusive access is not required.
>>>Therefore, it can be moved out of the full access section to reduce
>>>the duration of exclusive access.
>>>
>>>Signed-off-by: Emily Deng <Emily.Deng at amd.com>
>>>---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 +++++++++
>>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 13 +++++++
>>>drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++++--
>>> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 40 +++++++++++++++++-----
>>> 4 files changed, 70 insertions(+), 11 deletions(-)
>>>
>>>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>index 4cec3a873995..ba07e9c6619d 100644
>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>@@ -264,6 +264,22 @@ int amdgpu_amdkfd_resume(struct amdgpu_device
>>>*adev, bool run_pm)
>>> return r;
>>> }
>>>
>>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>>>+run_pm) {
>>>+ if (adev->kfd.dev)
>>>+ kgd2kfd_suspend_process(adev->kfd.dev, run_pm); }
>>>+
>>>+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, bool
>>>+run_pm) {
>>>+ int r = 0;
>>>+
>>>+ if (adev->kfd.dev)
>>>+ r = kgd2kfd_resume_process(adev->kfd.dev, run_pm);
>>>+
>>>+ return r;
>>>+}
>>>+
>>> int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
>>> struct amdgpu_reset_context *reset_context)
>>>{ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>index b6ca41859b53..841ae8b75ab1 100644
>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>@@ -156,6 +156,8 @@ void amdgpu_amdkfd_fini(void);
>>>
>>> void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
>>>int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
>>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>>>+run_pm); int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev,
>>>+bool run_pm);
>>> void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
>>> const void *ih_ring_entry); void
>>> amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); @@ -413,6
>>>+415,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, void
>>>kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct
>>>kfd_dev *kfd, bool run_pm); int kgd2kfd_resume(struct kfd_dev *kfd,
>>>bool run_pm);
>>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm); int
>>>+kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm);
>>> int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>>> struct amdgpu_reset_context *reset_context); int
>>>kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -463,6 +467,15 @@ static
>>>inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>>> return 0;
>>> }
>>>
>>>+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool
>>>+run_pm) { }
>>>+
>>>+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd, bool
>>>+run_pm) {
>>>+ return 0;
>>>+}
>>>+
>>> static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>>> struct amdgpu_reset_context
>>>*reset_context) { diff -- git
>>>a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>index 625c416c7d45..6e29f8bd54bb 100644
>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>@@ -5080,7 +5080,7 @@ int amdgpu_device_suspend(struct drm_device
>>>*dev, bool
>>>notify_clients)
>>> amdgpu_device_ip_suspend_phase1(adev);
>>>
>>> if (!adev->in_s0ix) {
>>>- amdgpu_amdkfd_suspend(adev, adev->in_runpm);
>>>+ amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_runtime(adev)
>>>+ ||
>>>+adev->in_runpm);
>>> amdgpu_userq_suspend(adev);
>>> }
>>>
>>>@@ -5097,6 +5097,9 @@ int amdgpu_device_suspend(struct drm_device
>>>*dev, bool
>>>notify_clients)
>>> if (amdgpu_sriov_vf(adev))
>>> amdgpu_virt_release_full_gpu(adev, false);
>>>
>>>+ if (!adev->in_s0ix)
>>>+ amdgpu_amdkfd_suspend_process(adev, adev->in_runpm);
>>>+
>>> r = amdgpu_dpm_notify_rlc_state(adev, false);
>>> if (r)
>>> return r;
>>>@@ -5178,7 +5181,7 @@ int amdgpu_device_resume(struct drm_device *dev,
>>>bool
>>>notify_clients)
>>> }
>>>
>>> if (!adev->in_s0ix) {
>>>- r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
>>>+ r = amdgpu_amdkfd_resume(adev,
>>>+ !amdgpu_sriov_runtime(adev) ||
>>>+adev->in_runpm);
>>> if (r)
>>> goto exit;
>>>
>>>@@ -5199,6 +5202,11 @@ int amdgpu_device_resume(struct drm_device
>>>*dev, bool
>>>notify_clients)
>>> amdgpu_virt_release_full_gpu(adev, true);
>>> }
>>>
>>>+ if (!adev->in_s0ix) {
>>>+ r = amdgpu_amdkfd_resume_process(adev, adev->in_runpm);
>>>+ if (r)
>>>+ goto exit;
>>>+ }
>>> if (r)
>>> return r;
>>>
>>>diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>index bf0854bd5555..22c6ef7c42b6 100644
>>>--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>@@ -1027,15 +1027,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool
>>run_pm)
>>> if (!kfd->init_complete)
>>> return;
>>>
>>>- /* for runtime suspend, skip locking kfd */
>>>- if (!run_pm) {
>>>- mutex_lock(&kfd_processes_mutex);
>>>- /* For first KFD device suspend all the KFD processes */
>>>- if (++kfd_locked == 1)
>>>- kfd_suspend_all_processes();
>>>- mutex_unlock(&kfd_processes_mutex);
>>>- }
>>>-
>>>+ kgd2kfd_suspend_process(kfd, run_pm);
>>> for (i = 0; i < kfd->num_nodes; i++) {
>>> node = kfd->nodes[i];
>>> node->dqm->ops.stop(node->dqm); @@ -1055,6 +1047,36 @@
>>>int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>>> return ret;
>>> }
>>>
>>>+ ret = kgd2kfd_resume_process(kfd, run_pm);
>>>+
>>>+ return ret;
>>>+}
>>>+
>>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm) {
>>>+ struct kfd_node *node;
>>>+ int i;
>>>+
>>>+ if (!kfd->init_complete)
>>>+ return;
>>>+
>>>+ /* for runtime suspend, skip locking kfd */
>>>+ if (!run_pm) {
>>>+ mutex_lock(&kfd_processes_mutex);
>>>+ /* For first KFD device suspend all the KFD processes */
>>>+ if (++kfd_locked == 1)
>>>+ kfd_suspend_all_processes();
>>>+ mutex_unlock(&kfd_processes_mutex);
>>>+ }
>>>+}
>>>+
>>>+int kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm) {
>>>+ int ret, i;
>>>+
>>>+ if (!kfd->init_complete)
>>>+ return 0;
>>>+
>>> /* for runtime resume, skip unlocking kfd */
>>> if (!run_pm) {
>>> mutex_lock(&kfd_processes_mutex);
>>>--
>>>2.34.1
>>
>
>
More information about the amd-gfx
mailing list