[PATCH v2] drm/amdkfd: Move the process suspend and resume out of full access
Zhang, Owen(SRDC)
Owen.Zhang2 at amd.com
Fri May 30 09:50:10 UTC 2025
[AMD Official Use Only - AMD Internal Distribution Only]
Hi Team, @amd-gfx at lists.freedesktop.org
Can you pls review and provide your inputs? Thanks for support!
Rgds/Owen
-----Original Message-----
From: Deng, Emily <Emily.Deng at amd.com>
Sent: Friday, May 30, 2025 9:39 AM
To: amd-gfx at lists.freedesktop.org
Cc: Zhang, Owen(SRDC) <Owen.Zhang2 at amd.com>
Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume out of full access
[AMD Official Use Only - AMD Internal Distribution Only]
Ping......
Emily Deng
Best Wishes
>-----Original Message-----
>From: Deng, Emily <Emily.Deng at amd.com>
>Sent: Wednesday, May 28, 2025 2:20 PM
>To: Deng, Emily <Emily.Deng at amd.com>; amd-gfx at lists.freedesktop.org
>Cc: Zhang, Owen(SRDC) <Owen.Zhang2 at amd.com>
>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume
>out of full access
>
>[AMD Official Use Only - AMD Internal Distribution Only]
>
>>-----Original Message-----
>>From: Emily Deng <Emily.Deng at amd.com>
>>Sent: Tuesday, May 27, 2025 6:50 PM
>>To: amd-gfx at lists.freedesktop.org
>>Cc: Deng, Emily <Emily.Deng at amd.com>
>>Subject: [PATCH v2] drm/amdkfd: Move the process suspend and resume
>>out of full access
>>
>>For the suspend and resume process, exclusive access is not required.
>>Therefore, it can be moved out of the full access section to reduce
>>the duration of exclusive access.
>>
>>Signed-off-by: Emily Deng <Emily.Deng at amd.com>
>>---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 +++++++++
>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 13 +++++++
>>drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++++--
>> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 40 +++++++++++++++++-----
>> 4 files changed, 70 insertions(+), 11 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>index 4cec3a873995..ba07e9c6619d 100644
>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>@@ -264,6 +264,22 @@ int amdgpu_amdkfd_resume(struct amdgpu_device
>>*adev, bool run_pm)
>> return r;
>> }
>>
>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>>+run_pm) {
>>+ if (adev->kfd.dev)
>>+ kgd2kfd_suspend_process(adev->kfd.dev, run_pm); }
>>+
>>+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, bool
>>+run_pm) {
>>+ int r = 0;
>>+
>>+ if (adev->kfd.dev)
>>+ r = kgd2kfd_resume_process(adev->kfd.dev, run_pm);
>>+
>>+ return r;
>>+}
>>+
>> int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
>> struct amdgpu_reset_context *reset_context)
>>{ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>index b6ca41859b53..841ae8b75ab1 100644
>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>@@ -156,6 +156,8 @@ void amdgpu_amdkfd_fini(void);
>>
>> void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
>>int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>>+run_pm); int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev,
>>+bool run_pm);
>> void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
>> const void *ih_ring_entry); void
>> amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); @@ -413,6
>>+415,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, void
>>kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct
>>kfd_dev *kfd, bool run_pm); int kgd2kfd_resume(struct kfd_dev *kfd,
>>bool run_pm);
>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm); int
>>+kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm);
>> int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>> struct amdgpu_reset_context *reset_context); int
>>kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -463,6 +467,15 @@ static
>>inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>> return 0;
>> }
>>
>>+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool
>>+run_pm) { }
>>+
>>+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd, bool
>>+run_pm) {
>>+ return 0;
>>+}
>>+
>> static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>> struct amdgpu_reset_context
>>*reset_context) { diff -- git
>>a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>index 625c416c7d45..6e29f8bd54bb 100644
>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>@@ -5080,7 +5080,7 @@ int amdgpu_device_suspend(struct drm_device
>>*dev, bool
>>notify_clients)
>> amdgpu_device_ip_suspend_phase1(adev);
>>
>> if (!adev->in_s0ix) {
>>- amdgpu_amdkfd_suspend(adev, adev->in_runpm);
>>+ amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_runtime(adev)
>>+ ||
>>+adev->in_runpm);
>> amdgpu_userq_suspend(adev);
>> }
>>
>>@@ -5097,6 +5097,9 @@ int amdgpu_device_suspend(struct drm_device
>>*dev, bool
>>notify_clients)
>> if (amdgpu_sriov_vf(adev))
>> amdgpu_virt_release_full_gpu(adev, false);
>>
>>+ if (!adev->in_s0ix)
>>+ amdgpu_amdkfd_suspend_process(adev, adev->in_runpm);
>>+
>> r = amdgpu_dpm_notify_rlc_state(adev, false);
>> if (r)
>> return r;
>>@@ -5178,7 +5181,7 @@ int amdgpu_device_resume(struct drm_device *dev,
>>bool
>>notify_clients)
>> }
>>
>> if (!adev->in_s0ix) {
>>- r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
>>+ r = amdgpu_amdkfd_resume(adev,
>>+ !amdgpu_sriov_runtime(adev) ||
>>+adev->in_runpm);
>> if (r)
>> goto exit;
>>
>>@@ -5199,6 +5202,11 @@ int amdgpu_device_resume(struct drm_device
>>*dev, bool
>>notify_clients)
>> amdgpu_virt_release_full_gpu(adev, true);
>> }
>>
>>+ if (!adev->in_s0ix) {
>>+ r = amdgpu_amdkfd_resume_process(adev, adev->in_runpm);
>>+ if (r)
>>+ goto exit;
>>+ }
>> if (r)
>> return r;
>>
>>diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>index bf0854bd5555..22c6ef7c42b6 100644
>>--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>@@ -1027,15 +1027,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool
>run_pm)
>> if (!kfd->init_complete)
>> return;
>>
>>- /* for runtime suspend, skip locking kfd */
>>- if (!run_pm) {
>>- mutex_lock(&kfd_processes_mutex);
>>- /* For first KFD device suspend all the KFD processes */
>>- if (++kfd_locked == 1)
>>- kfd_suspend_all_processes();
>>- mutex_unlock(&kfd_processes_mutex);
>>- }
>>-
>>+ kgd2kfd_suspend_process(kfd, run_pm);
>> for (i = 0; i < kfd->num_nodes; i++) {
>> node = kfd->nodes[i];
>> node->dqm->ops.stop(node->dqm); @@ -1055,6 +1047,36 @@
>>int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>> return ret;
>> }
>>
>>+ ret = kgd2kfd_resume_process(kfd, run_pm);
>>+
>>+ return ret;
>>+}
>>+
>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm) {
>>+ struct kfd_node *node;
>>+ int i;
>>+
>>+ if (!kfd->init_complete)
>>+ return;
>>+
>>+ /* for runtime suspend, skip locking kfd */
>>+ if (!run_pm) {
>>+ mutex_lock(&kfd_processes_mutex);
>>+ /* For first KFD device suspend all the KFD processes */
>>+ if (++kfd_locked == 1)
>>+ kfd_suspend_all_processes();
>>+ mutex_unlock(&kfd_processes_mutex);
>>+ }
>>+}
>>+
>>+int kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm) {
>>+ int ret, i;
>>+
>>+ if (!kfd->init_complete)
>>+ return 0;
>>+
>> /* for runtime resume, skip unlocking kfd */
>> if (!run_pm) {
>> mutex_lock(&kfd_processes_mutex);
>>--
>>2.34.1
>
More information about the amd-gfx
mailing list