[PATCH 1/5] drm/amd/powerplay: support hotspot/memory critical limit values
Evan Quan
evan.quan at amd.com
Tue Apr 23 08:36:57 UTC 2019
These new interfaces(temp2_crit, temp2_crit_hyst, temp3_crit,
temp3_crit_hyst) are supported on SOC15 dGPUs only.
Change-Id: Ia87e3f6ad816b51d6680eb74c8f755d6c2b0a6ae
Signed-off-by: Evan Quan <evan.quan at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h | 8 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 55 ++++++++++++++++++-
.../drm/amd/powerplay/hwmgr/hardwaremanager.c | 12 +++-
.../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 10 +++-
.../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 11 +++-
.../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 11 +++-
.../gpu/drm/amd/powerplay/inc/power_state.h | 4 ++
.../gpu/drm/amd/powerplay/inc/pp_thermal.h | 8 +--
8 files changed, 103 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index dca35407879d..8df54443ec78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -75,6 +75,14 @@ struct amdgpu_dpm_thermal {
int min_temp;
/* high temperature threshold */
int max_temp;
+ /* hotspot low temperature threshold */
+ int min_hotspot_temp;
+ /* hotspot high temperature critical threshold */
+ int max_hotspot_crit_temp;
+ /* memory low temperature threshold */
+ int min_mem_temp;
+ /* memory high temperature critical threshold */
+ int max_mem_crit_temp;
/* was last interrupt low to high or high to low */
bool high_to_low;
/* interrupt source */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 552127b74f78..f427e4a64453 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1457,6 +1457,38 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
+static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int hyst = to_sensor_dev_attr(attr)->index;
+ int temp;
+
+ if (hyst)
+ temp = adev->pm.dpm.thermal.min_hotspot_temp;
+ else
+ temp = adev->pm.dpm.thermal.max_hotspot_crit_temp;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
+static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int hyst = to_sensor_dev_attr(attr)->index;
+ int temp;
+
+ if (hyst)
+ temp = adev->pm.dpm.thermal.min_mem_temp;
+ else
+ temp = adev->pm.dpm.thermal.max_mem_crit_temp;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -2028,9 +2060,11 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
* - temp1_input: the on die GPU temperature in millidegrees Celsius
*
- * - temp1_crit: temperature critical max value in millidegrees Celsius
+ * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
+ * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
*
- * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
*
* hwmon interfaces for GPU voltage:
*
@@ -2081,6 +2115,10 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
@@ -2107,6 +2145,10 @@ static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_temp1_input.dev_attr.attr,
&sensor_dev_attr_temp1_crit.dev_attr.attr,
&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp2_crit.dev_attr.attr,
+ &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp3_crit.dev_attr.attr,
+ &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
&sensor_dev_attr_pwm1.dev_attr.attr,
&sensor_dev_attr_pwm1_enable.dev_attr.attr,
&sensor_dev_attr_pwm1_min.dev_attr.attr,
@@ -2229,6 +2271,15 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
return 0;
+ /* only SOC15 dGPUs support hotspot and mem temperatures */
+ if (((adev->flags & AMD_IS_APU) ||
+ adev->asic_type < CHIP_VEGA10) &&
+ (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr))
+ return 0;
+
return effective_mode;
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 70f7f47a2fcf..af6ab04130ef 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -225,7 +225,13 @@ int phm_register_irq_handlers(struct pp_hwmgr *hwmgr)
int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
{
int ret = 0;
- struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX};
+ struct PP_TemperatureRange range = {
+ TEMP_RANGE_MIN,
+ TEMP_RANGE_MAX,
+ TEMP_RANGE_MIN,
+ TEMP_RANGE_MAX,
+ TEMP_RANGE_MIN,
+ TEMP_RANGE_MAX};
struct amdgpu_device *adev = hwmgr->adev;
if (hwmgr->hwmgr_func->get_thermal_temperature_range)
@@ -239,6 +245,10 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
adev->pm.dpm.thermal.min_temp = range.min;
adev->pm.dpm.thermal.max_temp = range.max;
+ adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min;
+ adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max;
+ adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
+ adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
return ret;
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 384c37875cd0..4e1df44f094b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -4852,12 +4852,16 @@ static int vega10_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
struct PP_TemperatureRange *thermal_data)
{
- struct phm_ppt_v2_information *table_info =
- (struct phm_ppt_v2_information *)hwmgr->pptable;
+ struct vega10_hwmgr *data = hwmgr->backend;
+ PPTable_t *pp_table = &(data->smc_state_table.pp_table);
memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
- thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp *
+ thermal_data->max = pp_table->TedgeLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->mem_crit_max = pp_table->ThbmLimit *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 707cd4b0357f..4f63570ea257 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -2526,12 +2526,17 @@ static int vega12_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
struct PP_TemperatureRange *thermal_data)
{
- struct phm_ppt_v3_information *pptable_information =
- (struct phm_ppt_v3_information *)hwmgr->pptable;
+ struct vega12_hwmgr *data =
+ (struct vega12_hwmgr *)(hwmgr->backend);
+ PPTable_t *pp_table = &(data->smc_state_table.pp_table);
memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
- thermal_data->max = pptable_information->us_software_shutdown_temp *
+ thermal_data->max = pp_table->TedgeLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->mem_crit_max = pp_table->ThbmLimit *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 39a547084e90..129ffa781b89 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -3963,12 +3963,17 @@ static int vega20_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
struct PP_TemperatureRange *thermal_data)
{
- struct phm_ppt_v3_information *pptable_information =
- (struct phm_ppt_v3_information *)hwmgr->pptable;
+ struct vega20_hwmgr *data =
+ (struct vega20_hwmgr *)(hwmgr->backend);
+ PPTable_t *pp_table = &(data->smc_state_table.pp_table);
memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
- thermal_data->max = pptable_information->us_software_shutdown_temp *
+ thermal_data->max = pp_table->TedgeLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->mem_crit_max = pp_table->ThbmLimit *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
index a99b5cbb113e..a8988e7d58c6 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
@@ -124,6 +124,10 @@ struct PP_StateSoftwareAlgorithmBlock {
struct PP_TemperatureRange {
int min;
int max;
+ int hotspot_min;
+ int hotspot_crit_max;
+ int mem_min;
+ int mem_crit_max;
};
struct PP_StateValidationBlock {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
index 201d2b6329ab..75a0a2f8bea2 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
@@ -27,14 +27,14 @@
static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] =
{
- {-273150, 99000},
- { 120000, 120000},
+ {-273150, 99000, -273150, 99000, -273150, 99000},
+ { 120000, 120000, 120000, 120000, 120000, 120000},
};
static const struct PP_TemperatureRange SMU7ThermalPolicy[] =
{
- {-273150, 99000},
- { 120000, 120000},
+ {-273150, 99000, -273150, 99000, -273150, 99000},
+ { 120000, 120000, 120000, 120000, 120000, 120000},
};
#endif
--
2.21.0
More information about the amd-gfx
mailing list