[PATCH 3/6] drm/amd/powerplay: support temperature emergency max values

Alex Deucher alexdeucher at gmail.com
Fri Apr 19 15:10:54 UTC 2019


On Thu, Apr 18, 2019 at 5:03 AM Evan Quan <evan.quan at amd.com> wrote:
>
> These new interfaces(temp1_emergency, temp2_emergency,
> temp3_emergency) are supported on SOC15 dGPUs only.
>
> Change-Id: I2552df63f9c8c50294b3940bb2a402217673c2bc
> Signed-off-by: Evan Quan <evan.quan at amd.com>

Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h       |  6 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 40 ++++++++++++++++++-
>  .../drm/amd/powerplay/hwmgr/hardwaremanager.c |  6 +++
>  .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c    |  6 +++
>  .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c    |  6 +++
>  .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    |  6 +++
>  .../gpu/drm/amd/powerplay/inc/power_state.h   |  3 ++
>  .../gpu/drm/amd/powerplay/inc/pp_thermal.h    | 12 ++++--
>  8 files changed, 80 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
> index e1492438ae7b..32e2def42f30 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
> @@ -75,14 +75,20 @@ struct amdgpu_dpm_thermal {
>         int                min_temp;
>         /* high temperature threshold */
>         int                max_temp;
> +       /* hotspot max emergency(shutdown) temp */
> +       int                max_hotspot_emergency_temp;
>         /* edge low temperature threshold */
>         int                min_edge_temp;
>         /* edge high temperature critical threshold */
>         int                max_edge_crit_temp;
> +       /* edge max emergency(shutdown) temp */
> +       int                max_edge_emergency_temp;
>         /* memory low temperature threshold */
>         int                min_mem_temp;
>         /* memory high temperature critical threshold */
>         int                max_mem_crit_temp;
> +       /* memory max emergency(shutdown) temp */
> +       int                max_mem_emergency_temp;
>         /* was last interrupt low to high or high to low */
>         bool               high_to_low;
>         /* interrupt source */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index 3f6b5b5bb0c6..be33144e2dca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -1511,6 +1511,32 @@ static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
>         return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
>  }
>
> +static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev,
> +                                            struct device_attribute *attr,
> +                                            char *buf)
> +{
> +       struct amdgpu_device *adev = dev_get_drvdata(dev);
> +       int channel = to_sensor_dev_attr(attr)->index;
> +       int temp;
> +
> +       if (channel >= PP_TEMP_MAX)
> +               return -EINVAL;
> +
> +       switch (channel) {
> +       case PP_TEMP_JUNCTION:
> +               temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp;
> +               break;
> +       case PP_TEMP_EDGE:
> +               temp = adev->pm.dpm.thermal.max_edge_emergency_temp;
> +               break;
> +       case PP_TEMP_MEM:
> +               temp = adev->pm.dpm.thermal.max_mem_emergency_temp;
> +               break;
> +       }
> +
> +       return snprintf(buf, PAGE_SIZE, "%d\n", temp);
> +}
> +
>  static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>                                             struct device_attribute *attr,
>                                             char *buf)
> @@ -2091,6 +2117,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
>   * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
>   *   - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
>   *
> + * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius
> + *   - these are supported on SOC15 dGPUs only
> + *
>   * hwmon interfaces for GPU voltage:
>   *
>   * - in0_input: the voltage on the GPU in millivolts
> @@ -2140,10 +2169,13 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
>  static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
> +static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
>  static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 1);
> +static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
>  static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
> +static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
>  static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
>  static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
>  static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
> @@ -2180,6 +2212,9 @@ static struct attribute *hwmon_attributes[] = {
>         &sensor_dev_attr_temp1_label.dev_attr.attr,
>         &sensor_dev_attr_temp2_label.dev_attr.attr,
>         &sensor_dev_attr_temp3_label.dev_attr.attr,
> +       &sensor_dev_attr_temp1_emergency.dev_attr.attr,
> +       &sensor_dev_attr_temp2_emergency.dev_attr.attr,
> +       &sensor_dev_attr_temp3_emergency.dev_attr.attr,
>         &sensor_dev_attr_pwm1.dev_attr.attr,
>         &sensor_dev_attr_pwm1_enable.dev_attr.attr,
>         &sensor_dev_attr_pwm1_min.dev_attr.attr,
> @@ -2310,7 +2345,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
>              attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
>              attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
>              attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
> -            attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr))
> +            attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
> +            attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
> +            attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
> +            attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr))
>                 return 0;
>
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
> index c8397b20f71a..12027b194a4a 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
> @@ -228,9 +228,12 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
>         struct PP_TemperatureRange range = {
>                 TEMP_RANGE_MIN,
>                 TEMP_RANGE_MAX,
> +               TEMP_RANGE_MAX,
>                 TEMP_RANGE_MIN,
>                 TEMP_RANGE_MAX,
> +               TEMP_RANGE_MAX,
>                 TEMP_RANGE_MIN,
> +               TEMP_RANGE_MAX,
>                 TEMP_RANGE_MAX};
>         struct amdgpu_device *adev = hwmgr->adev;
>
> @@ -245,10 +248,13 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
>
>         adev->pm.dpm.thermal.min_temp = range.min;
>         adev->pm.dpm.thermal.max_temp = range.max;
> +       adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max;
>         adev->pm.dpm.thermal.min_edge_temp = range.edge_min;
>         adev->pm.dpm.thermal.max_edge_crit_temp = range.edge_crit_max;
> +       adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max;
>         adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
>         adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
> +       adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max;
>
>         return ret;
>  }
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index 65aa7e70d7d9..1d78a5ee9523 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -4861,10 +4861,16 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
>
>         thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->edge_crit_max = pp_table->TedgeLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->mem_crit_max = pp_table->ThbmLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> index c0994851e7c7..a283046347c9 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> @@ -2536,10 +2536,16 @@ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
>
>         thermal_data->max = pptable_information->us_software_shutdown_temp *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->edge_crit_max = pp_table->TedgeLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->mem_crit_max = pp_table->ThbmLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> index 5d9aa0f22c86..0c0714862eb8 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> @@ -3973,10 +3973,16 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
>
>         thermal_data->max = pptable_information->us_software_shutdown_temp *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->edge_crit_max = pp_table->TedgeLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->mem_crit_max = pp_table->ThbmLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
> index c102415ddc98..48d6c4b9fa61 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
> @@ -124,10 +124,13 @@ struct PP_StateSoftwareAlgorithmBlock {
>  struct PP_TemperatureRange {
>         int min;
>         int max;
> +       int hotspot_emergency_max;
>         int edge_min;
>         int edge_crit_max;
> +       int edge_emergency_max;
>         int mem_min;
>         int mem_crit_max;
> +       int mem_emergency_max;
>  };
>
>  struct PP_StateValidationBlock {
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
> index 75a0a2f8bea2..3e30768f9e1c 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
> @@ -27,14 +27,18 @@
>
>  static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] =
>  {
> -       {-273150,  99000, -273150, 99000, -273150, 99000},
> -       { 120000, 120000, 120000, 120000, 120000, 120000},
> +       {-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
> +       { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
>  };
>
>  static const struct PP_TemperatureRange SMU7ThermalPolicy[] =
>  {
> -       {-273150,  99000, -273150, 99000, -273150, 99000},
> -       { 120000, 120000, 120000, 120000, 120000, 120000},
> +       {-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
> +       { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
>  };
>
> +#define CTF_OFFSET_EDGE                        5
> +#define CTF_OFFSET_HOTSPOT             5
> +#define CTF_OFFSET_HBM                 5
> +
>  #endif
> --
> 2.21.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list