[PATCH 2/5] drm/amd/powerplay: support temperature emergency max values

Evan Quan evan.quan at amd.com
Tue Apr 23 08:36:58 UTC 2019


These new interfaces(temp1_emergency, temp2_emergency,
temp3_emergency) are supported on SOC15 dGPUs only.

Change-Id: I2552df63f9c8c50294b3940bb2a402217673c2bc
Signed-off-by: Evan Quan <evan.quan at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h       |  6 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 40 ++++++++++++++++++-
 .../drm/amd/powerplay/hwmgr/hardwaremanager.c |  6 +++
 .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c    |  6 +++
 .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c    |  6 +++
 .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    |  6 +++
 .../gpu/drm/amd/powerplay/inc/power_state.h   |  3 ++
 .../gpu/drm/amd/powerplay/inc/pp_thermal.h    | 12 ++++--
 8 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 8df54443ec78..521dbd0d9af8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -75,14 +75,20 @@ struct amdgpu_dpm_thermal {
 	int                min_temp;
 	/* high temperature threshold */
 	int                max_temp;
+	/* edge max emergency(shutdown) temp */
+	int                max_edge_emergency_temp;
 	/* hotspot low temperature threshold */
 	int                min_hotspot_temp;
 	/* hotspot high temperature critical threshold */
 	int                max_hotspot_crit_temp;
+	/* hotspot max emergency(shutdown) temp */
+	int                max_hotspot_emergency_temp;
 	/* memory low temperature threshold */
 	int                min_mem_temp;
 	/* memory high temperature critical threshold */
 	int                max_mem_crit_temp;
+	/* memory max emergency(shutdown) temp */
+	int                max_mem_emergency_temp;
 	/* was last interrupt low to high or high to low */
 	bool               high_to_low;
 	/* interrupt source */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index f427e4a64453..77de010a4ceb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1489,6 +1489,32 @@ static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
 }
 
+static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int channel = to_sensor_dev_attr(attr)->index;
+	int temp;
+
+	if (channel >= PP_TEMP_MAX)
+		return -EINVAL;
+
+	switch (channel) {
+	case PP_TEMP_JUNCTION:
+		temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp;
+		break;
+	case PP_TEMP_EDGE:
+		temp = adev->pm.dpm.thermal.max_edge_emergency_temp;
+		break;
+	case PP_TEMP_MEM:
+		temp = adev->pm.dpm.thermal.max_mem_emergency_temp;
+		break;
+	}
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
 static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
 					    struct device_attribute *attr,
 					    char *buf)
@@ -2066,6 +2092,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
  * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
  *   - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
  *
+ * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius
+ *   - these are supported on SOC15 dGPUs only
+ *
  * hwmon interfaces for GPU voltage:
  *
  * - in0_input: the voltage on the GPU in millivolts
@@ -2115,10 +2144,13 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
 static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
 static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
 static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
 static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
 static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
@@ -2149,6 +2181,9 @@ static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
 	&sensor_dev_attr_temp3_crit.dev_attr.attr,
 	&sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp1_emergency.dev_attr.attr,
+	&sensor_dev_attr_temp2_emergency.dev_attr.attr,
+	&sensor_dev_attr_temp3_emergency.dev_attr.attr,
 	&sensor_dev_attr_pwm1.dev_attr.attr,
 	&sensor_dev_attr_pwm1_enable.dev_attr.attr,
 	&sensor_dev_attr_pwm1_min.dev_attr.attr,
@@ -2277,7 +2312,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	    (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
 	     attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
 	     attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
-	     attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr))
+	     attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr))
 		return 0;
 
 	return effective_mode;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index af6ab04130ef..cc57fb953e62 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -228,9 +228,12 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
 	struct PP_TemperatureRange range = {
 		TEMP_RANGE_MIN,
 		TEMP_RANGE_MAX,
+		TEMP_RANGE_MAX,
 		TEMP_RANGE_MIN,
 		TEMP_RANGE_MAX,
+		TEMP_RANGE_MAX,
 		TEMP_RANGE_MIN,
+		TEMP_RANGE_MAX,
 		TEMP_RANGE_MAX};
 	struct amdgpu_device *adev = hwmgr->adev;
 
@@ -245,10 +248,13 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
 
 	adev->pm.dpm.thermal.min_temp = range.min;
 	adev->pm.dpm.thermal.max_temp = range.max;
+	adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max;
 	adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min;
 	adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max;
+	adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max;
 	adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
 	adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
+	adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max;
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 4e1df44f094b..1422bc4e45d1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -4859,10 +4859,16 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 
 	thermal_data->max = pp_table->TedgeLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->mem_crit_max = pp_table->ThbmLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 4f63570ea257..60c9f9502e65 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -2534,10 +2534,16 @@ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 
 	thermal_data->max = pp_table->TedgeLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->mem_crit_max = pp_table->ThbmLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 129ffa781b89..6fcdefcfa930 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -3971,10 +3971,16 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 
 	thermal_data->max = pp_table->TedgeLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->mem_crit_max = pp_table->ThbmLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
index a8988e7d58c6..a5f2227a3971 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
@@ -124,10 +124,13 @@ struct PP_StateSoftwareAlgorithmBlock {
 struct PP_TemperatureRange {
 	int min;
 	int max;
+	int edge_emergency_max;
 	int hotspot_min;
 	int hotspot_crit_max;
+	int hotspot_emergency_max;
 	int mem_min;
 	int mem_crit_max;
+	int mem_emergency_max;
 };
 
 struct PP_StateValidationBlock {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
index 75a0a2f8bea2..3e30768f9e1c 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
@@ -27,14 +27,18 @@
 
 static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] =
 {
-	{-273150,  99000, -273150, 99000, -273150, 99000},
-	{ 120000, 120000, 120000, 120000, 120000, 120000},
+	{-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
+	{ 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
 };
 
 static const struct PP_TemperatureRange SMU7ThermalPolicy[] =
 {
-	{-273150,  99000, -273150, 99000, -273150, 99000},
-	{ 120000, 120000, 120000, 120000, 120000, 120000},
+	{-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
+	{ 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
 };
 
+#define CTF_OFFSET_EDGE			5
+#define CTF_OFFSET_HOTSPOT		5
+#define CTF_OFFSET_HBM			5
+
 #endif
-- 
2.21.0



More information about the amd-gfx mailing list