[PATCH v2] drm/amdkfd: Move the process suspend and resume out of full access
Deng, Emily
Emily.Deng at amd.com
Wed Jun 4 03:19:34 UTC 2025
[AMD Official Use Only - AMD Internal Distribution Only]
Ping......
Emily Deng
Best Wishes
>-----Original Message-----
>From: Deng, Emily
>Sent: Tuesday, June 3, 2025 5:11 PM
>To: Koenig, Christian <Christian.Koenig at amd.com>; Chen, Horace
><Horace.Chen at amd.com>
>Cc: amd-gfx at lists.freedesktop.org; Zhang, Owen(SRDC)
><Owen.Zhang2 at amd.com>
>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume out of
>full access
>
>Hi Christian and Horace,
> Could you help to review this?
>
>Emily Deng
>Best Wishes
>
>
>
>>-----Original Message-----
>>From: Zhang, Owen(SRDC) <Owen.Zhang2 at amd.com>
>>Sent: Friday, May 30, 2025 5:50 PM
>>To: Deng, Emily <Emily.Deng at amd.com>; amd-gfx at lists.freedesktop.org
>>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume
>>out of full access
>>
>>[AMD Official Use Only - AMD Internal Distribution Only]
>>
>>Hi Team, @amd-gfx at lists.freedesktop.org
>>
>>Can you pls review and provide your inputs? Thanks for support!
>>
>>
>>Rgds/Owen
>>
>>-----Original Message-----
>>From: Deng, Emily <Emily.Deng at amd.com>
>>Sent: Friday, May 30, 2025 9:39 AM
>>To: amd-gfx at lists.freedesktop.org
>>Cc: Zhang, Owen(SRDC) <Owen.Zhang2 at amd.com>
>>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume
>>out of full access
>>
>>[AMD Official Use Only - AMD Internal Distribution Only]
>>
>>Ping......
>>
>>Emily Deng
>>Best Wishes
>>
>>
>>
>>>-----Original Message-----
>>>From: Deng, Emily <Emily.Deng at amd.com>
>>>Sent: Wednesday, May 28, 2025 2:20 PM
>>>To: Deng, Emily <Emily.Deng at amd.com>; amd-gfx at lists.freedesktop.org
>>>Cc: Zhang, Owen(SRDC) <Owen.Zhang2 at amd.com>
>>>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and
>>>resume out of full access
>>>
>>>[AMD Official Use Only - AMD Internal Distribution Only]
>>>
>>>>-----Original Message-----
>>>>From: Emily Deng <Emily.Deng at amd.com>
>>>>Sent: Tuesday, May 27, 2025 6:50 PM
>>>>To: amd-gfx at lists.freedesktop.org
>>>>Cc: Deng, Emily <Emily.Deng at amd.com>
>>>>Subject: [PATCH v2] drm/amdkfd: Move the process suspend and resume
>>>>out of full access
>>>>
>>>>For the suspend and resume process, exclusive access is not required.
>>>>Therefore, it can be moved out of the full access section to reduce
>>>>the duration of exclusive access.
>>>>
>>>>Signed-off-by: Emily Deng <Emily.Deng at amd.com>
>>>>---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 +++++++++
>>>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 13 +++++++
>>>>drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++++--
>>>> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 40 +++++++++++++++++-----
>>>> 4 files changed, 70 insertions(+), 11 deletions(-)
>>>>
>>>>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>index 4cec3a873995..ba07e9c6619d 100644
>>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>@@ -264,6 +264,22 @@ int amdgpu_amdkfd_resume(struct amdgpu_device
>>>>*adev, bool run_pm)
>>>> return r;
>>>> }
>>>>
>>>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>>>>+run_pm) {
>>>>+ if (adev->kfd.dev)
>>>>+ kgd2kfd_suspend_process(adev->kfd.dev, run_pm); }
>>>>+
>>>>+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, bool
>>>>+run_pm) {
>>>>+ int r = 0;
>>>>+
>>>>+ if (adev->kfd.dev)
>>>>+ r = kgd2kfd_resume_process(adev->kfd.dev, run_pm);
>>>>+
>>>>+ return r;
>>>>+}
>>>>+
>>>> int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
>>>> struct amdgpu_reset_context
>>>>*reset_context) { diff --git
>>>>a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>index b6ca41859b53..841ae8b75ab1 100644
>>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>@@ -156,6 +156,8 @@ void amdgpu_amdkfd_fini(void);
>>>>
>>>> void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
>>>>int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
>>>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>>>>+run_pm); int amdgpu_amdkfd_resume_process(struct amdgpu_device
>>>>+*adev, bool run_pm);
>>>> void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
>>>> const void *ih_ring_entry); void
>>>> amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); @@ -413,6
>>>>+415,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, void
>>>>kgd2kfd_device_exit(struct kfd_dev *kfd); void
>>>>kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); int
>>>>kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
>>>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm); int
>>>>+kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm);
>>>> int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>>>> struct amdgpu_reset_context *reset_context);
>>>>int kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -463,6 +467,15 @@
>>>>static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>>>> return 0;
>>>> }
>>>>
>>>>+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool
>>>>+run_pm) { }
>>>>+
>>>>+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd, bool
>>>>+run_pm) {
>>>>+ return 0;
>>>>+}
>>>>+
>>>> static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>>>> struct amdgpu_reset_context
>>>>*reset_context) { diff -- git
>>>>a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>index 625c416c7d45..6e29f8bd54bb 100644
>>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>@@ -5080,7 +5080,7 @@ int amdgpu_device_suspend(struct drm_device
>>>>*dev, bool
>>>>notify_clients)
>>>> amdgpu_device_ip_suspend_phase1(adev);
>>>>
>>>> if (!adev->in_s0ix) {
>>>>- amdgpu_amdkfd_suspend(adev, adev->in_runpm);
>>>>+ amdgpu_amdkfd_suspend(adev,
>>>>+ !amdgpu_sriov_runtime(adev)
>>>>+ ||
>>>>+adev->in_runpm);
>>>> amdgpu_userq_suspend(adev);
>>>> }
>>>>
>>>>@@ -5097,6 +5097,9 @@ int amdgpu_device_suspend(struct drm_device
>>>>*dev, bool
>>>>notify_clients)
>>>> if (amdgpu_sriov_vf(adev))
>>>> amdgpu_virt_release_full_gpu(adev, false);
>>>>
>>>>+ if (!adev->in_s0ix)
>>>>+ amdgpu_amdkfd_suspend_process(adev, adev->in_runpm);
>>>>+
>>>> r = amdgpu_dpm_notify_rlc_state(adev, false);
>>>> if (r)
>>>> return r;
>>>>@@ -5178,7 +5181,7 @@ int amdgpu_device_resume(struct drm_device
>>>>*dev, bool
>>>>notify_clients)
>>>> }
>>>>
>>>> if (!adev->in_s0ix) {
>>>>- r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
>>>>+ r = amdgpu_amdkfd_resume(adev,
>>>>+ !amdgpu_sriov_runtime(adev) ||
>>>>+adev->in_runpm);
>>>> if (r)
>>>> goto exit;
>>>>
>>>>@@ -5199,6 +5202,11 @@ int amdgpu_device_resume(struct drm_device
>>>>*dev, bool
>>>>notify_clients)
>>>> amdgpu_virt_release_full_gpu(adev, true);
>>>> }
>>>>
>>>>+ if (!adev->in_s0ix) {
>>>>+ r = amdgpu_amdkfd_resume_process(adev, adev->in_runpm);
>>>>+ if (r)
>>>>+ goto exit;
>>>>+ }
>>>> if (r)
>>>> return r;
>>>>
>>>>diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>index bf0854bd5555..22c6ef7c42b6 100644
>>>>--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>@@ -1027,15 +1027,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool
>>>run_pm)
>>>> if (!kfd->init_complete)
>>>> return;
>>>>
>>>>- /* for runtime suspend, skip locking kfd */
>>>>- if (!run_pm) {
>>>>- mutex_lock(&kfd_processes_mutex);
>>>>- /* For first KFD device suspend all the KFD processes */
>>>>- if (++kfd_locked == 1)
>>>>- kfd_suspend_all_processes();
>>>>- mutex_unlock(&kfd_processes_mutex);
>>>>- }
>>>>-
>>>>+ kgd2kfd_suspend_process(kfd, run_pm);
>>>> for (i = 0; i < kfd->num_nodes; i++) {
>>>> node = kfd->nodes[i];
>>>> node->dqm->ops.stop(node->dqm); @@ -1055,6 +1047,36 @@
>>>>int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>>>> return ret;
>>>> }
>>>>
>>>>+ ret = kgd2kfd_resume_process(kfd, run_pm);
>>>>+
>>>>+ return ret;
>>>>+}
>>>>+
>>>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm) {
>>>>+ struct kfd_node *node;
>>>>+ int i;
>>>>+
>>>>+ if (!kfd->init_complete)
>>>>+ return;
>>>>+
>>>>+ /* for runtime suspend, skip locking kfd */
>>>>+ if (!run_pm) {
>>>>+ mutex_lock(&kfd_processes_mutex);
>>>>+ /* For first KFD device suspend all the KFD processes */
>>>>+ if (++kfd_locked == 1)
>>>>+ kfd_suspend_all_processes();
>>>>+ mutex_unlock(&kfd_processes_mutex);
>>>>+ }
>>>>+}
>>>>+
>>>>+int kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm) {
>>>>+ int ret, i;
>>>>+
>>>>+ if (!kfd->init_complete)
>>>>+ return 0;
>>>>+
>>>> /* for runtime resume, skip unlocking kfd */
>>>> if (!run_pm) {
>>>> mutex_lock(&kfd_processes_mutex);
>>>>--
>>>>2.34.1
>>>
>>
>>
More information about the amd-gfx
mailing list