[PATCH v2 1/2] drm/amdgpu: implement smu send rma reason for smu v13.0.6

Wang, Yang(Kevin) KevinYang.Wang at amd.com
Wed Feb 7 14:01:24 UTC 2024


[AMD Official Use Only - General]

Thanks , I will correct it before submitting.

Best Regards,
Kevin

-----Original Message-----
From: Zhang, Hawking <Hawking.Zhang at amd.com>
Sent: Wednesday, February 7, 2024 10:00 PM
To: Wang, Yang(Kevin) <KevinYang.Wang at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Zhou1, Tao <Tao.Zhou1 at amd.com>; Lazar, Lijo <Lijo.Lazar at amd.com>
Subject: RE: [PATCH v2 1/2] drm/amdgpu: implement smu send rma reason for smu v13.0.6

[AMD Official Use Only - General]

With a nitpick below, the series is

Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>

+       MSG_MAP(BadPageThreshold,                    PPSMC_MSG_RmaDueToBadPageThreshold,        0),

Might be better name it to RmaDueToBadPageThreshold/SMU_MSG_ RmaDueToBadPageThreshold

Regards,
Hawking

-----Original Message-----
From: Wang, Yang(Kevin) <KevinYang.Wang at amd.com>
Sent: Wednesday, February 7, 2024 21:54
To: amd-gfx at lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang at amd.com>; Zhou1, Tao <Tao.Zhou1 at amd.com>; Lazar, Lijo <Lijo.Lazar at amd.com>; Wang, Yang(Kevin) <KevinYang.Wang at amd.com>
Subject: [PATCH v2 1/2] drm/amdgpu: implement smu send rma reason for smu v13.0.6

implement smu send rma reason function for smu v13.0.6

Signed-off-by: Yang Wang <kevinyang.wang at amd.com>
Reviewed-by: Tao Zhou <tao.zhou1 at amd.com>
---
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 15 ++++++++++++++
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h       |  1 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     | 10 ++++++++++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  6 ++++++  .../pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h  |  3 ++-  drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 ++-  .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 20 +++++++++++++++++++
 7 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 6627ee07d52d..f84bfed50681 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -693,6 +693,21 @@ int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t si
        return ret;
 }

+int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev) {
+       struct smu_context *smu = adev->powerplay.pp_handle;
+       int ret;
+
+       if (!is_support_sw_smu(adev))
+               return -EOPNOTSUPP;
+
+       mutex_lock(&adev->pm.mutex);
+       ret = smu_send_rma_reason(smu);
+       mutex_unlock(&adev->pm.mutex);
+
+       return ret;
+}
+
 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
                                  enum pp_clock_type type,
                                  uint32_t *min, diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 3047ffe7f244..621200e0823f 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -450,6 +450,7 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_versio  int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool enable);  int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size);  int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t size);
+int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev);
 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
                                       enum pp_clock_type type,
                                       uint32_t *min, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 0ad947df777a..138dcb8724b6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -3669,3 +3669,13 @@ int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size)

        return ret;
 }
+
+int smu_send_rma_reason(struct smu_context *smu) {
+       int ret = 0;
+
+       if (smu->ppt_funcs && smu->ppt_funcs->send_rma_reason)
+               ret = smu->ppt_funcs->send_rma_reason(smu);
+
+       return ret;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 66e84defd0b6..a870bdd49a4e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1341,6 +1341,11 @@ struct pptable_funcs {
         */
        int (*send_hbm_bad_pages_num)(struct smu_context *smu, uint32_t size);

+       /**
+        * @send_rma_reason: message rma reason event to SMU.
+        */
+       int (*send_rma_reason)(struct smu_context *smu);
+
        /**
         * @get_ecc_table:  message SMU to get ECC INFO table.
         */
@@ -1588,5 +1593,6 @@ int smu_stb_collect_info(struct smu_context *smu, void *buff, uint32_t size);  void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev);  int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size);  int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size);
+int smu_send_rma_reason(struct smu_context *smu);
 #endif
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
index 509e3cd483fb..86758051cb93 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
@@ -91,7 +91,8 @@
 #define PPSMC_MSG_QueryValidMcaCeCount              0x3A
 #define PPSMC_MSG_McaBankCeDumpDW                   0x3B
 #define PPSMC_MSG_SelectPLPDMode                    0x40
-#define PPSMC_Message_Count                         0x41
+#define PPSMC_MSG_RmaDueToBadPageThreshold          0x43
+#define PPSMC_Message_Count                         0x44

 //PPSMC Reset Types for driver msg argument
 #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET        0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 953a767613b1..efd97408b667 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -261,7 +261,8 @@
        __SMU_DUMMY_MAP(SetSoftMaxVpe), \
        __SMU_DUMMY_MAP(SetSoftMinVpe), \
        __SMU_DUMMY_MAP(GetMetricsVersion), \
-       __SMU_DUMMY_MAP(EnableUCLKShadow),
+       __SMU_DUMMY_MAP(EnableUCLKShadow), \
+       __SMU_DUMMY_MAP(BadPageThreshold),

 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 40ce3f780847..f2311138e8fa 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -172,6 +172,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
        MSG_MAP(McaBankDumpDW,                       PPSMC_MSG_McaBankDumpDW,                   0),
        MSG_MAP(McaBankCeDumpDW,                     PPSMC_MSG_McaBankCeDumpDW,                 0),
        MSG_MAP(SelectPLPDMode,                      PPSMC_MSG_SelectPLPDMode,                  0),
+       MSG_MAP(BadPageThreshold,                    PPSMC_MSG_RmaDueToBadPageThreshold,        0),
 };

 // clang-format on
@@ -2381,6 +2382,24 @@ static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu,
        return ret;
 }

+static int smu_v13_0_6_send_rma_reason(struct smu_context *smu) {
+       struct amdgpu_device *adev = smu->adev;
+       int ret;
+
+       /* NOTE: the message is only valid on dGPU with pmfw 85.90.0 and above */
+       if ((adev->flags & AMD_IS_APU) || smu->smc_fw_version < 0x00555a00)
+               return 0;
+
+       ret = smu_cmn_send_smc_msg(smu, SMU_MSG_BadPageThreshold, NULL);
+       if (ret)
+               dev_err(smu->adev->dev,
+                       "[%s] failed to send BadPageThreshold event to SMU\n",
+                       __func__);
+
+       return ret;
+}
+
 static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)  {
        struct smu_context *smu = adev->powerplay.pp_handle; @@ -3095,6 +3114,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
        .i2c_init = smu_v13_0_6_i2c_control_init,
        .i2c_fini = smu_v13_0_6_i2c_control_fini,
        .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num,
+       .send_rma_reason = smu_v13_0_6_send_rma_reason,
 };

 void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
--
2.34.1




More information about the amd-gfx mailing list