[PATCH 1/4] drm/amd/powerplay: update how to use metrics table on Arcturus

Evan Quan evan.quan at amd.com
Fri Jun 5 06:07:37 UTC 2020


Retrieve only those interested metrics data instead of the whole
metrics table. By this, the memory copy can be dropped.

Change-Id: Ice2b2ba4647301119130be0ba65bb587f19d38ae
Signed-off-by: Evan Quan <evan.quan at amd.com>
---
 drivers/gpu/drm/amd/powerplay/arcturus_ppt.c  | 213 ++++++++++++------
 .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h    |  26 +++
 2 files changed, 174 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
index 05abfdedcf37..6f859a370c5f 100644
--- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
@@ -906,17 +906,23 @@ static int arcturus_get_thermal_temperature_range(struct smu_context *smu,
 	return 0;
 }
 
-static int arcturus_get_metrics_table(struct smu_context *smu,
-				      SmuMetrics_t *metrics_table)
+static int arcturus_get_smu_metrics_data(struct smu_context *smu,
+					 MetricsMember_t member,
+					 uint32_t *value)
 {
 	struct smu_table_context *smu_table= &smu->smu_table;
+	SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table;
 	int ret = 0;
 
 	mutex_lock(&smu->metrics_lock);
+
 	if (!smu_table->metrics_time ||
-	     time_after(jiffies, smu_table->metrics_time + HZ / 1000)) {
-		ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
-				(void *)smu_table->metrics_table, false);
+	     time_after(jiffies, smu_table->metrics_time + msecs_to_jiffies(1))) {
+		ret = smu_update_table(smu,
+				       SMU_TABLE_SMU_METRICS,
+				       0,
+				       smu_table->metrics_table,
+				       false);
 		if (ret) {
 			pr_info("Failed to export SMU metrics table!\n");
 			mutex_unlock(&smu->metrics_lock);
@@ -925,7 +931,87 @@ static int arcturus_get_metrics_table(struct smu_context *smu,
 		smu_table->metrics_time = jiffies;
 	}
 
-	memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t));
+	switch (member) {
+	case METRICS_CURR_GFXCLK:
+		*value = metrics->CurrClock[PPCLK_GFXCLK];
+		break;
+	case METRICS_CURR_SOCCLK:
+		*value = metrics->CurrClock[PPCLK_SOCCLK];
+		break;
+	case METRICS_CURR_UCLK:
+		*value = metrics->CurrClock[PPCLK_UCLK];
+		break;
+	case METRICS_CURR_VCLK:
+		*value = metrics->CurrClock[PPCLK_VCLK];
+		break;
+	case METRICS_CURR_DCLK:
+		*value = metrics->CurrClock[PPCLK_DCLK];
+		break;
+	case METRICS_CURR_FCLK:
+		*value = metrics->CurrClock[PPCLK_FCLK];
+		break;
+	case METRICS_AVERAGE_GFXCLK:
+		*value = metrics->AverageGfxclkFrequency;
+		break;
+	case METRICS_AVERAGE_SOCCLK:
+		*value = metrics->AverageSocclkFrequency;
+		break;
+	case METRICS_AVERAGE_UCLK:
+		*value = metrics->AverageUclkFrequency;
+		break;
+	case METRICS_AVERAGE_VCLK:
+		*value = metrics->AverageVclkFrequency;
+		break;
+	case METRICS_AVERAGE_DCLK:
+		*value = metrics->AverageDclkFrequency;
+		break;
+	case METRICS_AVERAGE_GFXACTIVITY:
+		*value = metrics->AverageGfxActivity;
+		break;
+	case METRICS_AVERAGE_MEMACTIVITY:
+		*value = metrics->AverageUclkActivity;
+		break;
+	case METRICS_AVERAGE_VCNACTIVITY:
+		*value = metrics->VcnActivityPercentage;
+		break;
+	case METRICS_AVERAGE_SOCKETPOWER:
+		*value = metrics->AverageSocketPower << 8;
+		break;
+	case METRICS_TEMPERATURE_EDGE:
+		*value = metrics->TemperatureEdge *
+			SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		break;
+	case METRICS_TEMPERATURE_HOTSPOT:
+		*value = metrics->TemperatureHotspot *
+			SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		break;
+	case METRICS_TEMPERATURE_MEM:
+		*value = metrics->TemperatureHBM *
+			SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		break;
+	case METRICS_TEMPERATURE_VRGFX:
+		*value = metrics->TemperatureVrGfx *
+			SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		break;
+	case METRICS_TEMPERATURE_VRSOC:
+		*value = metrics->TemperatureVrSoc *
+			SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		break;
+	case METRICS_TEMPERATURE_VRMEM:
+		*value = metrics->TemperatureVrMem *
+			SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		break;
+	case METRICS_THROTTLER_STATUS:
+		*value = metrics->ThrottlerStatus;
+		break;
+	case METRICS_CURR_FANSPEED:
+		*value = metrics->CurrFanSpeed;
+		break;
+	default:
+		*value = UINT_MAX;
+		break;
+	}
+
 	mutex_unlock(&smu->metrics_lock);
 
 	return ret;
@@ -935,81 +1021,71 @@ static int arcturus_get_current_activity_percent(struct smu_context *smu,
 						 enum amd_pp_sensors sensor,
 						 uint32_t *value)
 {
-	SmuMetrics_t metrics;
 	int ret = 0;
 
 	if (!value)
 		return -EINVAL;
 
-	ret = arcturus_get_metrics_table(smu, &metrics);
-	if (ret)
-		return ret;
-
 	switch (sensor) {
 	case AMDGPU_PP_SENSOR_GPU_LOAD:
-		*value = metrics.AverageGfxActivity;
+		ret = arcturus_get_smu_metrics_data(smu,
+						    METRICS_AVERAGE_GFXACTIVITY,
+						    value);
 		break;
 	case AMDGPU_PP_SENSOR_MEM_LOAD:
-		*value = metrics.AverageUclkActivity;
+		ret = arcturus_get_smu_metrics_data(smu,
+						    METRICS_AVERAGE_MEMACTIVITY,
+						    value);
 		break;
 	default:
 		pr_err("Invalid sensor for retrieving clock activity\n");
 		return -EINVAL;
 	}
 
-	return 0;
+	return ret;
 }
 
 static int arcturus_get_gpu_power(struct smu_context *smu, uint32_t *value)
 {
-	SmuMetrics_t metrics;
-	int ret = 0;
-
 	if (!value)
 		return -EINVAL;
 
-	ret = arcturus_get_metrics_table(smu, &metrics);
-	if (ret)
-		return ret;
-
-	*value = metrics.AverageSocketPower << 8;
-
-	return 0;
+	return arcturus_get_smu_metrics_data(smu,
+					     METRICS_AVERAGE_SOCKETPOWER,
+					     value);
 }
 
 static int arcturus_thermal_get_temperature(struct smu_context *smu,
 					    enum amd_pp_sensors sensor,
 					    uint32_t *value)
 {
-	SmuMetrics_t metrics;
 	int ret = 0;
 
 	if (!value)
 		return -EINVAL;
 
-	ret = arcturus_get_metrics_table(smu, &metrics);
-	if (ret)
-		return ret;
-
 	switch (sensor) {
 	case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
-		*value = metrics.TemperatureHotspot *
-			SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		ret = arcturus_get_smu_metrics_data(smu,
+						    METRICS_TEMPERATURE_HOTSPOT,
+						    value);
 		break;
 	case AMDGPU_PP_SENSOR_EDGE_TEMP:
-		*value = metrics.TemperatureEdge *
-			SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		ret = arcturus_get_smu_metrics_data(smu,
+						    METRICS_TEMPERATURE_EDGE,
+						    value);
 		break;
 	case AMDGPU_PP_SENSOR_MEM_TEMP:
-		*value = metrics.TemperatureHBM *
-			SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		ret = arcturus_get_smu_metrics_data(smu,
+						    METRICS_TEMPERATURE_MEM,
+						    value);
 		break;
 	default:
 		pr_err("Invalid sensor for retrieving temp\n");
 		return -EINVAL;
 	}
 
-	return 0;
+	return ret;
 }
 
 static int arcturus_read_sensor(struct smu_context *smu,
@@ -1061,19 +1137,12 @@ static int arcturus_read_sensor(struct smu_context *smu,
 static int arcturus_get_fan_speed_rpm(struct smu_context *smu,
 				      uint32_t *speed)
 {
-	SmuMetrics_t metrics;
-	int ret = 0;
-
 	if (!speed)
 		return -EINVAL;
 
-	ret = arcturus_get_metrics_table(smu, &metrics);
-	if (ret)
-		return ret;
-
-	*speed = metrics.CurrFanSpeed;
-
-	return ret;
+	return arcturus_get_smu_metrics_data(smu,
+					     METRICS_CURR_FANSPEED,
+					     speed);
 }
 
 static int arcturus_get_fan_speed_percent(struct smu_context *smu,
@@ -1100,8 +1169,8 @@ static int arcturus_get_current_clk_freq_by_table(struct smu_context *smu,
 				       enum smu_clk_type clk_type,
 				       uint32_t *value)
 {
-	static SmuMetrics_t metrics;
-	int ret = 0, clk_id = 0;
+	MetricsMember_t member_type;
+	int clk_id = 0;
 
 	if (!value)
 		return -EINVAL;
@@ -1110,41 +1179,53 @@ static int arcturus_get_current_clk_freq_by_table(struct smu_context *smu,
 	if (clk_id < 0)
 		return -EINVAL;
 
-	ret = arcturus_get_metrics_table(smu, &metrics);
-	if (ret)
-		return ret;
-
 	switch (clk_id) {
 	case PPCLK_GFXCLK:
 		/*
 		 * CurrClock[clk_id] can provide accurate
 		 *   output only when the dpm feature is enabled.
 		 * We can use Average_* for dpm disabled case.
-		 *   But this is available for gfxclk/uclk/socclk.
+		 *   But this is available for gfxclk/uclk/socclk/vclk/dclk.
 		 */
 		if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT))
-			*value = metrics.CurrClock[PPCLK_GFXCLK];
+			member_type = METRICS_CURR_GFXCLK;
 		else
-			*value = metrics.AverageGfxclkFrequency;
+			member_type = METRICS_AVERAGE_GFXCLK;
 		break;
 	case PPCLK_UCLK:
 		if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT))
-			*value = metrics.CurrClock[PPCLK_UCLK];
+			member_type = METRICS_CURR_UCLK;
 		else
-			*value = metrics.AverageUclkFrequency;
+			member_type = METRICS_AVERAGE_UCLK;
 		break;
 	case PPCLK_SOCCLK:
 		if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT))
-			*value = metrics.CurrClock[PPCLK_SOCCLK];
+			member_type = METRICS_CURR_SOCCLK;
 		else
-			*value = metrics.AverageSocclkFrequency;
+			member_type = METRICS_AVERAGE_SOCCLK;
 		break;
-	default:
-		*value = metrics.CurrClock[clk_id];
+	case PPCLK_VCLK:
+		if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT))
+			member_type = METRICS_CURR_VCLK;
+		else
+			member_type = METRICS_AVERAGE_VCLK;
+		break;
+	case PPCLK_DCLK:
+		if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT))
+			member_type = METRICS_CURR_DCLK;
+		else
+			member_type = METRICS_AVERAGE_DCLK;
 		break;
+	case PPCLK_FCLK:
+		member_type = METRICS_CURR_FCLK;
+		break;
+	default:
+		return -EINVAL;
 	}
 
-	return ret;
+	return arcturus_get_smu_metrics_data(smu,
+					     member_type,
+					     value);
 }
 
 static uint32_t arcturus_find_lowest_dpm_level(struct arcturus_single_dpm_table *table)
@@ -2401,15 +2482,17 @@ static void arcturus_log_thermal_throttling_event(struct smu_context *smu)
 {
 	int throttler_idx, throtting_events = 0, buf_idx = 0;
 	struct amdgpu_device *adev = smu->adev;
-	SmuMetrics_t metrics;
+	uint32_t throttler_status;
 	char log_buf[256];
 
-	arcturus_get_metrics_table(smu, &metrics);
+	arcturus_get_smu_metrics_data(smu,
+				      METRICS_THROTTLER_STATUS,
+				      &throttler_status);
 
 	memset(log_buf, 0, sizeof(log_buf));
 	for (throttler_idx = 0; throttler_idx < ARRAY_SIZE(logging_label);
 	     throttler_idx++) {
-		if (metrics.ThrottlerStatus & logging_label[throttler_idx].feature_mask) {
+		if (throttler_status & logging_label[throttler_idx].feature_mask) {
 			throtting_events++;
 			buf_idx += snprintf(log_buf + buf_idx,
 					    sizeof(log_buf) - buf_idx,
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index 10234babfbbd..1bb07318efa6 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -574,6 +574,32 @@ struct pptable_funcs {
 	void (*log_thermal_throttling_event)(struct smu_context *smu);
 };
 
+typedef enum {
+	METRICS_CURR_GFXCLK,
+	METRICS_CURR_SOCCLK,
+	METRICS_CURR_UCLK,
+	METRICS_CURR_VCLK,
+	METRICS_CURR_DCLK,
+	METRICS_CURR_FCLK,
+	METRICS_AVERAGE_GFXCLK,
+	METRICS_AVERAGE_SOCCLK,
+	METRICS_AVERAGE_UCLK,
+	METRICS_AVERAGE_VCLK,
+	METRICS_AVERAGE_DCLK,
+	METRICS_AVERAGE_GFXACTIVITY,
+	METRICS_AVERAGE_MEMACTIVITY,
+	METRICS_AVERAGE_VCNACTIVITY,
+	METRICS_AVERAGE_SOCKETPOWER,
+	METRICS_TEMPERATURE_EDGE,
+	METRICS_TEMPERATURE_HOTSPOT,
+	METRICS_TEMPERATURE_MEM,
+	METRICS_TEMPERATURE_VRGFX,
+	METRICS_TEMPERATURE_VRSOC,
+	METRICS_TEMPERATURE_VRMEM,
+	METRICS_THROTTLER_STATUS,
+	METRICS_CURR_FANSPEED,
+} MetricsMember_t;
+
 int smu_load_microcode(struct smu_context *smu);
 
 int smu_check_fw_status(struct smu_context *smu);
-- 
2.27.0



More information about the amd-gfx mailing list