[PATCH 2/2] drm/amd/pm: enable thermal alert on smu 14.0.2/3

Gao, Likun Likun.Gao at amd.com
Thu May 16 07:20:06 UTC 2024


[AMD Official Use Only - AMD Internal Distribution Only]

The series was
Reviewed-by: Likun Gao <Likun.Gao at amd.com>.

Regards,
Likun

-----Original Message-----
From: Kenneth Feng <kenneth.feng at amd.com>
Sent: Thursday, May 16, 2024 9:11 AM
To: amd-gfx at lists.freedesktop.org
Cc: Gao, Likun <Likun.Gao at amd.com>; Feng, Kenneth <Kenneth.Feng at amd.com>
Subject: [PATCH 2/2] drm/amd/pm: enable thermal alert on smu 14.0.2/3

enable thermal alert on smu 14.0.2/3

Signed-off-by: Kenneth Feng <kenneth.feng at amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h  |  8 +++
 .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c    | 68 ++++++++++++++++++-
 .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c  | 35 +++++++++-
 3 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
index 3d34b3869df6..9b97a4e95c0f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
@@ -48,6 +48,10 @@

 #define SMU14_TOOL_SIZE                        0x19000

+#define CTF_OFFSET_EDGE                        5
+#define CTF_OFFSET_HOTSPOT             5
+#define CTF_OFFSET_MEM                 5
+
 extern const int decoded_link_speed[5];  extern const int decoded_link_width[7];

@@ -236,5 +240,9 @@ int smu_v14_0_od_edit_dpm_table(struct smu_context *smu,

 void smu_v14_0_set_smu_mailbox_registers(struct smu_context *smu);

+int smu_v14_0_enable_thermal_alert(struct smu_context *smu);
+
+int smu_v14_0_disable_thermal_alert(struct smu_context *smu);
+
 #endif
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
index fbfe9cae0e05..8cce17d1f230 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
@@ -38,6 +38,8 @@
 #include "amdgpu_ras.h"
 #include "smu_cmn.h"

+#include "asic_reg/thm/thm_14_0_2_offset.h"
+#include "asic_reg/thm/thm_14_0_2_sh_mask.h"
 #include "asic_reg/mp/mp_14_0_2_offset.h"
 #include "asic_reg/mp/mp_14_0_2_sh_mask.h"

@@ -853,12 +855,19 @@ static int smu_v14_0_set_irq_state(struct amdgpu_device *adev,
                                   unsigned tyep,
                                   enum amdgpu_interrupt_state state)  {
+       struct smu_context *smu = adev->powerplay.pp_handle;
+       uint32_t low, high;
        uint32_t val = 0;

        switch (state) {
        case AMDGPU_IRQ_STATE_DISABLE:
                /* For THM irqs */
-               // TODO
+               val = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL);
+               val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTH_MASK, 1);
+               val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTL_MASK, 1);
+               WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, val);
+
+               WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_ENA, 0);

                /* For MP1 SW irqs */
                if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) || @@ -875,7 +884,24 @@ static int smu_v14_0_set_irq_state(struct amdgpu_device *adev,
                break;
        case AMDGPU_IRQ_STATE_ENABLE:
                /* For THM irqs */
-               // TODO
+               low = max(SMU_THERMAL_MINIMUM_ALERT_TEMP,
+                         smu->thermal_range.min / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES);
+               high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP,
+                          smu->thermal_range.software_shutdown_temp);
+               val = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL);
+               val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5);
+               val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1);
+               val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTH_MASK, 0);
+               val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTL_MASK, 0);
+               val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high & 0xff));
+               val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low & 0xff));
+               val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
+               WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, val);
+
+               val = (1 << THM_THERMAL_INT_ENA__THERM_INTH_CLR__SHIFT);
+               val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT);
+               val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT);
+               WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_ENA, val);

                /* For MP1 SW irqs */
                if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) || @@ -1849,3 +1875,41 @@ int smu_v14_0_od_edit_dpm_table(struct smu_context *smu,
        return ret;
 }

+static int smu_v14_0_allow_ih_interrupt(struct smu_context *smu) {
+       return smu_cmn_send_smc_msg(smu,
+                                   SMU_MSG_AllowIHHostInterrupt,
+                                   NULL);
+}
+
+static int smu_v14_0_process_pending_interrupt(struct smu_context *smu)
+{
+       int ret = 0;
+
+       if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT))
+               ret = smu_v14_0_allow_ih_interrupt(smu);
+
+       return ret;
+}
+
+int smu_v14_0_enable_thermal_alert(struct smu_context *smu) {
+       int ret = 0;
+
+       if (!smu->irq_source.num_types)
+               return 0;
+
+       ret = amdgpu_irq_get(smu->adev, &smu->irq_source, 0);
+       if (ret)
+               return ret;
+
+       return smu_v14_0_process_pending_interrupt(smu);
+}
+
+int smu_v14_0_disable_thermal_alert(struct smu_context *smu) {
+       if (!smu->irq_source.num_types)
+               return 0;
+
+       return amdgpu_irq_put(smu->adev, &smu->irq_source, 0); }
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
index 1fbceb85d319..2b874e1ba4ea 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
@@ -1276,10 +1276,41 @@ static int smu_v14_0_2_update_pcie_parameters(struct smu_context *smu,
        return 0;
 }

+static const struct smu_temperature_range smu14_thermal_policy[] = {
+       {-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
+       { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000,
+120000}, };
+
 static int smu_v14_0_2_get_thermal_temperature_range(struct smu_context *smu,
                                                     struct smu_temperature_range *range)  {
-       // TODO
+       struct smu_table_context *table_context = &smu->smu_table;
+       struct smu_14_0_2_powerplay_table *powerplay_table =
+               table_context->power_play_table;
+       PPTable_t *pptable = smu->smu_table.driver_pptable;
+
+       if (amdgpu_sriov_vf(smu->adev))
+               return 0;
+
+       if (!range)
+               return -EINVAL;
+
+       memcpy(range, &smu14_thermal_policy[0], sizeof(struct
+smu_temperature_range));
+
+       range->max = pptable->CustomSkuTable.TemperatureLimit[TEMP_EDGE] *
+               SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+       range->edge_emergency_max = (pptable->CustomSkuTable.TemperatureLimit[TEMP_EDGE] + CTF_OFFSET_EDGE) *
+               SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+       range->hotspot_crit_max = pptable->CustomSkuTable.TemperatureLimit[TEMP_HOTSPOT] *
+               SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+       range->hotspot_emergency_max = (pptable->CustomSkuTable.TemperatureLimit[TEMP_HOTSPOT] + CTF_OFFSET_HOTSPOT) *
+               SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+       range->mem_crit_max = pptable->CustomSkuTable.TemperatureLimit[TEMP_MEM] *
+               SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+       range->mem_emergency_max = (pptable->CustomSkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)*
+               SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+       range->software_shutdown_temp = powerplay_table->software_shutdown_temp;
+       range->software_shutdown_temp_offset =
+pptable->CustomSkuTable.FanAbnormalTempLimitOffset;

        return 0;
 }
@@ -1866,6 +1897,8 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = {
        .update_pcie_parameters = smu_v14_0_2_update_pcie_parameters,
        .get_thermal_temperature_range = smu_v14_0_2_get_thermal_temperature_range,
        .register_irq_handler = smu_v14_0_register_irq_handler,
+       .enable_thermal_alert = smu_v14_0_enable_thermal_alert,
+       .disable_thermal_alert = smu_v14_0_disable_thermal_alert,
        .notify_memory_pool_location = smu_v14_0_notify_memory_pool_location,
        .set_soft_freq_limited_range = smu_v14_0_set_soft_freq_limited_range,
        .init_pptable_microcode = smu_v14_0_init_pptable_microcode,
--
2.34.1



More information about the amd-gfx mailing list