[PATCH 5/7] drm/amdgpu: Place NPS mode request on unload

Xu, Feifei Feifei.Xu at amd.com
Thu Sep 26 09:07:06 UTC 2024


[AMD Official Use Only - AMD Internal Distribution Only]

Reviewed-by: Feifei Xu <Feifei.Xu at amd.com>

-----Original Message-----
From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Lijo Lazar
Sent: Tuesday, September 24, 2024 1:57 PM
To: amd-gfx at lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang at amd.com>; Deucher, Alexander <Alexander.Deucher at amd.com>; Koenig, Christian <Christian.Koenig at amd.com>; Bhardwaj, Rajneesh <Rajneesh.Bhardwaj at amd.com>; Errabolu, Ramesh <Ramesh.Errabolu at amd.com>
Subject: [PATCH 5/7] drm/amdgpu: Place NPS mode request on unload

If a user has requested NPS mode switch, place the request through PSP during unload of the driver. For devices which are part of a hive, all requests are placed together. If one of them fails, revert back to the current NPS mode.

Signed-off-by: Lijo Lazar <lijo.lazar at amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  1 +  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 47 ++++++++++++++++++++++++  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |  2 +  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 38 +++++++++++++++++++  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h |  4 ++
 5 files changed, 92 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 95331294509c..d16cdcdb2114 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2428,6 +2428,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
        struct amdgpu_device *adev = drm_to_adev(dev);

        amdgpu_xcp_dev_unplug(adev);
+       amdgpu_gmc_prepare_nps_mode_change(adev);
        drm_dev_unplug(dev);

        if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 24a1f931d9ed..21f1e65c9dc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -1345,3 +1345,50 @@ int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,

        return psp_memory_partition(&adev->psp, nps_mode);  }
+
+static inline bool amdgpu_gmc_need_nps_switch_req(struct amdgpu_device *adev,
+                                                 int req_nps_mode,
+                                                 int cur_nps_mode)
+{
+       return (((BIT(req_nps_mode) & adev->gmc.supported_nps_modes) ==
+                       BIT(req_nps_mode)) &&
+               req_nps_mode != cur_nps_mode);
+}
+
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev) {
+       int req_nps_mode, cur_nps_mode, r;
+       struct amdgpu_hive_info *hive;
+
+       if (amdgpu_sriov_vf(adev) || !adev->gmc.supported_nps_modes ||
+           !adev->gmc.gmc_funcs->request_mem_partition_mode)
+               return;
+
+       cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+       hive = amdgpu_get_xgmi_hive(adev);
+       if (hive) {
+               req_nps_mode = atomic_read(&hive->requested_nps_mode);
+               if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode,
+                                                   cur_nps_mode)) {
+                       amdgpu_put_xgmi_hive(hive);
+                       return;
+               }
+               r = amdgpu_xgmi_request_nps_change(adev, hive, req_nps_mode);
+               amdgpu_put_xgmi_hive(hive);
+               goto out;
+       }
+
+       req_nps_mode = adev->gmc.requested_nps_mode;
+       if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode, cur_nps_mode))
+               return;
+
+       /* even if this fails, we should let driver unload w/o blocking */
+       r = adev->gmc.gmc_funcs->request_mem_partition_mode(adev,
+req_nps_mode);
+out:
+       if (r)
+               dev_err(adev->dev, "NPS mode change request failed\n");
+       else
+               dev_info(
+                       adev->dev,
+                       "NPS mode change request done, reload driver to complete the
+change\n"); }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 1a50639a003a..b13d6adb5efd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -467,4 +467,6 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,

 int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
                                        int nps_mode);
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 5d721ccb9dfd..db2c1b11b813 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -1564,3 +1564,41 @@ int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev)

        return 0;
 }
+
+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+                                  struct amdgpu_hive_info *hive,
+                                  int req_nps_mode)
+{
+       struct amdgpu_device *tmp_adev;
+       int cur_nps_mode, r;
+
+       /* This is expected to be called only during unload of driver. The
+        * request needs to be placed only once for all devices in the hive. If
+        * one of them fail, revert the request for previous successful devices.
+        * After placing the request, make hive mode as UNKNOWN so that other
+        * devices don't request anymore.
+        */
+       mutex_lock(&hive->hive_lock);
+       list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+               r = adev->gmc.gmc_funcs->request_mem_partition_mode(
+                       tmp_adev, req_nps_mode);
+               if (r)
+                       goto err;
+       }
+       /* Set to UNKNOWN so that other devices don't request anymore */
+       atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE);
+
+       mutex_unlock(&hive->hive_lock);
+
+       return 0;
+err:
+       /* Request back current mode if one of the requests failed */
+       cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(tmp_adev);
+       list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list,
+                                            gmc.xgmi.head)
+               adev->gmc.gmc_funcs->request_mem_partition_mode(tmp_adev,
+                                                               cur_nps_mode);
+       mutex_lock(&hive->hive_lock);
+
+       return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 67abadb4f298..41d5f97fc77a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -79,4 +79,8 @@ static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,  int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev);  int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev);

+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+                                  struct amdgpu_hive_info *hive,
+                                  int req_nps_mode);
+
 #endif
--
2.25.1



More information about the amd-gfx mailing list