[PATCHv2 1/3] drm/amdgpu: Implement MES Suspend and Resume APIs for GFX11

Kasiviswanathan, Harish Harish.Kasiviswanathan at amd.com
Thu Aug 15 20:32:15 UTC 2024


[AMD Official Use Only - AMD Internal Distribution Only]

-----Original Message-----
From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Mukul Joshi
Sent: Wednesday, August 14, 2024 7:28 PM
To: amd-gfx at lists.freedesktop.org
Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Deucher, Alexander <Alexander.Deucher at amd.com>; Joshi, Mukul <Mukul.Joshi at amd.com>
Subject: [PATCHv2 1/3] drm/amdgpu: Implement MES Suspend and Resume APIs for GFX11

Add implementation for MES Suspend and Resume APIs to unmap/map
all queues for GFX11. Support for GFX12 will be added when the
corresponding firmware support is in place.

Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
---
v1->v2:
- Add MES FW version check.
- Update amdgpu_mes_suspend/amdgpu_mes_resume handling.

 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 71 +++++++++++++------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  2 +
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 32 ++++++++++-
 3 files changed, 69 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index c598c3edff7e..e551943da77a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -501,60 +501,50 @@ int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)

 int amdgpu_mes_suspend(struct amdgpu_device *adev)
 {
-       struct idr *idp;
-       struct amdgpu_mes_process *process;
-       struct amdgpu_mes_gang *gang;
        struct mes_suspend_gang_input input;
-       int r, pasid;
+       int r;
+
+       if (!amdgpu_mes_suspend_resume_all_supported(adev))
+               return 0;
+
+       memset(&input, 0x0, sizeof(struct mes_suspend_gang_input));
+       input.suspend_all_gangs = 1;

        /*
         * Avoid taking any other locks under MES lock to avoid circular
         * lock dependencies.
         */
        amdgpu_mes_lock(&adev->mes);
-
-       idp = &adev->mes.pasid_idr;
-
-       idr_for_each_entry(idp, process, pasid) {
-               list_for_each_entry(gang, &process->gang_list, list) {
-                       r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
-                       if (r)
-                               DRM_ERROR("failed to suspend pasid %d gangid %d",
-                                        pasid, gang->gang_id);
-               }
-       }
-
+       r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
        amdgpu_mes_unlock(&adev->mes);
-       return 0;
+       if (r)
+               DRM_ERROR("failed to suspend all gangs");
+
+       return r;
 }

 int amdgpu_mes_resume(struct amdgpu_device *adev)
 {
-       struct idr *idp;
-       struct amdgpu_mes_process *process;
-       struct amdgpu_mes_gang *gang;
        struct mes_resume_gang_input input;
-       int r, pasid;
+       int r;
+
+       if (!amdgpu_mes_suspend_resume_all_supported(adev))
+               return 0;
+
+       memset(&input, 0x0, sizeof(struct mes_resume_gang_input));
+       input.resume_all_gangs = 1;

        /*
         * Avoid taking any other locks under MES lock to avoid circular
         * lock dependencies.
         */
        amdgpu_mes_lock(&adev->mes);
-
-       idp = &adev->mes.pasid_idr;
-
-       idr_for_each_entry(idp, process, pasid) {
-               list_for_each_entry(gang, &process->gang_list, list) {
-                       r = adev->mes.funcs->resume_gang(&adev->mes, &input);
-                       if (r)
-                               DRM_ERROR("failed to resume pasid %d gangid %d",
-                                        pasid, gang->gang_id);
-               }
-       }
-
+       r = adev->mes.funcs->resume_gang(&adev->mes, &input);
        amdgpu_mes_unlock(&adev->mes);
-       return 0;
+       if (r)
+               DRM_ERROR("failed to resume all gangs");
+
+       return r;
 }

 static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
@@ -1608,6 +1598,19 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
        return r;
 }

+bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev)
+{
+       uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+       bool is_supported = false;
+
+       if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
+           amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) &&
+           mes_rev >= 0x63)
+               is_supported = true;
+
+       return is_supported;
+}
+
 #if defined(CONFIG_DEBUG_FS)

 static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 548e724e3a75..b2db62e50454 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -494,4 +494,6 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
        memalloc_noreclaim_restore(mes->saved_flags);
        mutex_unlock(&mes->mutex_hidden);
 }
+
+bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
 #endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index c0340ee3dec0..a5c582674db9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -421,13 +421,41 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
 static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes,
                                  struct mes_suspend_gang_input *input)
 {
-       return 0;
+       union MESAPI__SUSPEND mes_suspend_gang_pkt;
+
+       memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt));
+
+       mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+       mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND;
+       mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+       mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs;

Only suspend_all_gangs and resume_all_gangs are set by the caller. None of the variables below are set by caller. Is that ok?

+       mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr;
+       mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr;
+       mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value;
+
+       return mes_v11_0_submit_pkt_and_poll_completion(mes,
+                       &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt),
+                       offsetof(union MESAPI__SUSPEND, api_status));
 }

 static int mes_v11_0_resume_gang(struct amdgpu_mes *mes,
                                 struct mes_resume_gang_input *input)
 {
-       return 0;
+       union MESAPI__RESUME mes_resume_gang_pkt;
+
+       memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt));
+
+       mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+       mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME;
+       mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+       mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs;
+       mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr;
+
+       return mes_v11_0_submit_pkt_and_poll_completion(mes,
+                       &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt),
+                       offsetof(union MESAPI__RESUME, api_status));
 }

 static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes)
--
2.35.1



More information about the amd-gfx mailing list