[PATCH 2/4] drm/amd/powerplay: implement ASIC specific thermal throttling logging

Evan Quan evan.quan at amd.com
Thu May 28 03:04:42 UTC 2020


Enable this for Arcturus only for now.

Change-Id: I4ad4fbb4f932d748849574e0f4dde01bb7db487a
Signed-off-by: Evan Quan <evan.quan at amd.com>
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
---
 drivers/gpu/drm/amd/powerplay/arcturus_ppt.c  | 44 +++++++++++++++++++
 .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h    |  1 +
 drivers/gpu/drm/amd/powerplay/smu_internal.h  |  2 +
 3 files changed, 47 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
index 67980f50d8ec..5c1b2d7abcaa 100644
--- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
@@ -2329,6 +2329,49 @@ static int arcturus_allow_xgmi_power_down(struct smu_context *smu, bool en)
 					   NULL);
 }
 
+static const struct throttling_logging_label {
+	uint32_t feature_mask;
+	const char *label;
+} logging_label[] = {
+	{(1U << THROTTLER_TEMP_HOTSPOT_BIT), "GPU"},
+	{(1U << THROTTLER_TEMP_MEM_BIT), "HBM"},
+	{(1U << THROTTLER_TEMP_VR_GFX_BIT), "VR of GFX rail"},
+	{(1U << THROTTLER_TEMP_VR_MEM_BIT), "VR of HBM rail"},
+	{(1U << THROTTLER_TEMP_VR_SOC_BIT), "VR of SOC rail"},
+	{(1U << THROTTLER_VRHOT0_BIT), "VR0 HOT"},
+	{(1U << THROTTLER_VRHOT1_BIT), "VR1 HOT"},
+};
+static void arcturus_log_thermal_throttling_event(struct smu_context *smu)
+{
+	int throttler_idx, throtting_events = 0, buf_idx = 0;
+	struct amdgpu_device *adev = smu->adev;
+	SmuMetrics_t metrics;
+	char log_buf[256];
+
+	arcturus_get_metrics_table(smu, &metrics);
+
+	memset(log_buf, 0, sizeof(log_buf));
+	for (throttler_idx = 0; throttler_idx < ARRAY_SIZE(logging_label);
+	     throttler_idx++) {
+		if (metrics.ThrottlerStatus & logging_label[throttler_idx].feature_mask) {
+			throtting_events++;
+			buf_idx += snprintf(log_buf + buf_idx,
+					    sizeof(log_buf) - buf_idx,
+					    "%s%s",
+					    throtting_events > 1 ? " and " : "",
+					    logging_label[throttler_idx].label);
+			if (buf_idx >= sizeof(log_buf)) {
+				pr_err("buffer overflow!\n");
+				log_buf[sizeof(log_buf) - 1] = '\0';
+				break;
+			}
+		}
+	}
+
+	dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n",
+			log_buf);
+}
+
 static const struct pptable_funcs arcturus_ppt_funcs = {
 	/* translate smu index into arcturus specific index */
 	.get_smu_msg_index = arcturus_get_smu_msg_index,
@@ -2423,6 +2466,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
 	.get_pptable_power_limit = arcturus_get_pptable_power_limit,
 	.set_df_cstate = arcturus_set_df_cstate,
 	.allow_xgmi_power_down = arcturus_allow_xgmi_power_down,
+	.log_thermal_throttling_event = arcturus_log_thermal_throttling_event,
 };
 
 void arcturus_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index bac7e13dc801..1223d298c03f 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -575,6 +575,7 @@ struct pptable_funcs {
 	uint32_t (*get_pptable_power_limit)(struct smu_context *smu);
 	int (*disable_umc_cdr_12gbps_workaround)(struct smu_context *smu);
 	int (*set_power_source)(struct smu_context *smu, enum smu_power_src_type power_src);
+	void (*log_thermal_throttling_event)(struct smu_context *smu);
 };
 
 int smu_load_microcode(struct smu_context *smu);
diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h
index c97444841abc..6b627d6e80b1 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_internal.h
+++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h
@@ -219,4 +219,6 @@ static inline int smu_send_smc_msg(struct smu_context *smu, enum smu_message_typ
 #define smu_i2c_eeprom_fini(smu, control) \
 		((smu)->ppt_funcs->i2c_eeprom_fini ? (smu)->ppt_funcs->i2c_eeprom_fini((control)) : 0)
 
+#define smu_log_thermal_throttling(smu) \
+		((smu)->ppt_funcs->log_thermal_throttling_event ? (smu)->ppt_funcs->log_thermal_throttling_event((smu)) : 0)
 #endif
-- 
2.26.2



More information about the amd-gfx mailing list