[PATCH] drm/amdkfd: add/remove kfd queues through on stop/start KFD scheduling

Liu, Shaoyun Shaoyun.Liu at amd.com
Tue Oct 15 18:09:35 UTC 2024


[AMD Official Use Only - AMD Internal Distribution Only]

Ping

-----Original Message-----
From: Liu, Shaoyun <Shaoyun.Liu at amd.com>
Sent: Friday, October 4, 2024 12:08 PM
To: amd-gfx at lists.freedesktop.org
Cc: Liu, Shaoyun <Shaoyun.Liu at amd.com>
Subject: [PATCH] drm/amdkfd: add/remove kfd queues through on stop/start KFD scheduling

Add back kfd queues in start scheduling that originally been removed on stop scheduling.

Signed-off-by: shaoyunl <shaoyun.liu at amd.com>
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 36 +++++++++++++++++--
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 648f40091aa3..82a2404e8a7e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -292,7 +292,7 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
        return r;
 }

-static int remove_all_queues_mes(struct device_queue_manager *dqm)
+static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm)
 {
        struct device_process_node *cur;
        struct device *dev = dqm->dev->adev->dev; @@ -319,6 +319,33 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm)
        return retval;
 }

+static int add_all_kfd_queues_mes(struct device_queue_manager *dqm) {
+       struct device_process_node *cur;
+       struct device *dev = dqm->dev->adev->dev;
+       struct qcm_process_device *qpd;
+       struct queue *q;
+       int retval = 0;
+
+       list_for_each_entry(cur, &dqm->queues, list) {
+               qpd = cur->qpd;
+               list_for_each_entry(q, &qpd->queues_list, list) {
+                       if (q->properties.is_active) {
+                               retval = add_queue_mes(dqm, q, qpd);
+                               if (retval) {
+                                       dev_err(dev, "%s: Failed to add queue %d for dev %d",
+                                               __func__,
+                                               q->properties.queue_id,
+                                               dqm->dev->id);
+                                       return retval;
+                               }
+                       }
+               }
+       }
+
+       return retval;
+}
+
 static int suspend_all_queues_mes(struct device_queue_manager *dqm)  {
        struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; @@ -1742,7 +1769,7 @@ static int halt_cpsch(struct device_queue_manager *dqm)
                                                 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
                                USE_DEFAULT_GRACE_PERIOD, false);
                else
-                       ret = remove_all_queues_mes(dqm);
+                       ret = remove_all_kfd_queues_mes(dqm);
        }
        dqm->sched_halt = true;
        dqm_unlock(dqm);
@@ -1768,6 +1795,9 @@ static int unhalt_cpsch(struct device_queue_manager *dqm)
                ret = execute_queues_cpsch(dqm,
                                           KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
                        0, USE_DEFAULT_GRACE_PERIOD);
+       else
+               ret = add_all_kfd_queues_mes(dqm);
+
        dqm_unlock(dqm);

        return ret;
@@ -1867,7 +1897,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
        if (!dqm->dev->kfd->shared_resources.enable_mes)
                unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
        else
-               remove_all_queues_mes(dqm);
+               remove_all_kfd_queues_mes(dqm);

        dqm->sched_running = false;

--
2.34.1



More information about the amd-gfx mailing list