[PATCH 159/159] drm/amd/pm: add new data in metrics table
Alex Deucher
alexander.deucher at amd.com
Wed Feb 24 22:18:59 UTC 2021
From: Kenneth Feng <kenneth.feng at amd.com>
Export new data in the metrics table for gfx and memory
utilization counter, and each hbm temperature as well.
v2:
change the metrics table version to v1.1
v3:
fix the coding style
Signed-off-by: Kenneth Feng <kenneth.feng at amd.com>
Reviewed-by: Kevin Wang <kevin1.wang at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
.../gpu/drm/amd/include/kgd_pp_interface.h | 11 ++++
drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 2 +
.../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 55 +++++++++++--------
.../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 12 ++++
4 files changed, 56 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index c6b5c789abf0..ce01f012963b 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -227,6 +227,8 @@ enum pp_df_cstate {
#define XGMI_MODE_PSTATE_D3 0
#define XGMI_MODE_PSTATE_D0 1
+#define NUM_HBM_INSTANCES 4
+
struct seq_file;
enum amd_pp_clock_type;
struct amd_pp_simple_clock_info;
@@ -389,6 +391,15 @@ struct gpu_metrics_v1_0 {
uint8_t pcie_link_speed; // in 0.1 GT/s
};
+struct gpu_metrics_v1_1 {
+ struct gpu_metrics_v1_0 v1_0;
+
+ uint32_t gfx_activity_acc;
+ uint32_t mem_activity_acc;
+
+ uint16_t temperature_hbm[NUM_HBM_INSTANCES];
+};
+
struct gpu_metrics_v2_0 {
struct metrics_table_header common_header;
diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
index ef9dad9a51ff..b9f87285875f 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
@@ -267,6 +267,8 @@ int smu_v13_0_get_current_pcie_link_speed(struct smu_context *smu);
void smu_v13_0_init_gpu_metrics_v1_0(struct gpu_metrics_v1_0 *gpu_metrics);
+void smu_v13_0_init_gpu_metrics_v1_1(struct gpu_metrics_v1_1 *gpu_metrics);
+
int smu_v13_0_gfx_ulv_control(struct smu_context *smu,
bool enablement);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index c463af1cafa0..1727de2b31bd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1296,10 +1296,11 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
void **table)
{
struct smu_table_context *smu_table = &smu->smu_table;
- struct gpu_metrics_v1_0 *gpu_metrics =
- (struct gpu_metrics_v1_0 *)smu_table->gpu_metrics_table;
+ struct gpu_metrics_v1_1 *gpu_metrics =
+ (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
SmuMetrics_t metrics;
int ret = 0;
+ int i;
ret = smu_cmn_get_metrics_table(smu,
&metrics,
@@ -1307,40 +1308,46 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
if (ret)
return ret;
- smu_v13_0_init_gpu_metrics_v1_0(gpu_metrics);
+ smu_v13_0_init_gpu_metrics_v1_1(gpu_metrics);
- gpu_metrics->temperature_edge = metrics.TemperatureEdge;
- gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
- gpu_metrics->temperature_mem = metrics.TemperatureHBM;
- gpu_metrics->temperature_vrgfx = metrics.TemperatureVrGfx;
- gpu_metrics->temperature_vrsoc = metrics.TemperatureVrSoc;
- gpu_metrics->temperature_vrmem = metrics.TemperatureVrMem;
+ gpu_metrics->v1_0.temperature_edge = metrics.TemperatureEdge;
+ gpu_metrics->v1_0.temperature_hotspot = metrics.TemperatureHotspot;
+ gpu_metrics->v1_0.temperature_mem = metrics.TemperatureHBM;
+ gpu_metrics->v1_0.temperature_vrgfx = metrics.TemperatureVrGfx;
+ gpu_metrics->v1_0.temperature_vrsoc = metrics.TemperatureVrSoc;
+ gpu_metrics->v1_0.temperature_vrmem = metrics.TemperatureVrMem;
- gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity;
- gpu_metrics->average_umc_activity = metrics.AverageUclkActivity;
+ gpu_metrics->v1_0.average_gfx_activity = metrics.AverageGfxActivity;
+ gpu_metrics->v1_0.average_umc_activity = metrics.AverageUclkActivity;
- gpu_metrics->average_socket_power = metrics.AverageSocketPower;
+ gpu_metrics->v1_0.average_socket_power = metrics.AverageSocketPower;
- gpu_metrics->average_gfxclk_frequency = metrics.AverageGfxclkFrequency;
- gpu_metrics->average_socclk_frequency = metrics.AverageSocclkFrequency;
- gpu_metrics->average_uclk_frequency = metrics.AverageUclkFrequency;
+ gpu_metrics->v1_0.average_gfxclk_frequency = metrics.AverageGfxclkFrequency;
+ gpu_metrics->v1_0.average_socclk_frequency = metrics.AverageSocclkFrequency;
+ gpu_metrics->v1_0.average_uclk_frequency = metrics.AverageUclkFrequency;
- gpu_metrics->current_gfxclk = metrics.CurrClock[PPCLK_GFXCLK];
- gpu_metrics->current_socclk = metrics.CurrClock[PPCLK_SOCCLK];
- gpu_metrics->current_uclk = metrics.CurrClock[PPCLK_UCLK];
- gpu_metrics->current_vclk0 = metrics.CurrClock[PPCLK_VCLK];
- gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
+ gpu_metrics->v1_0.current_gfxclk = metrics.CurrClock[PPCLK_GFXCLK];
+ gpu_metrics->v1_0.current_socclk = metrics.CurrClock[PPCLK_SOCCLK];
+ gpu_metrics->v1_0.current_uclk = metrics.CurrClock[PPCLK_UCLK];
+ gpu_metrics->v1_0.current_vclk0 = metrics.CurrClock[PPCLK_VCLK];
+ gpu_metrics->v1_0.current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
- gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+ gpu_metrics->v1_0.throttle_status = metrics.ThrottlerStatus;
- gpu_metrics->pcie_link_width =
+ gpu_metrics->v1_0.pcie_link_width =
smu_v13_0_get_current_pcie_link_width(smu);
- gpu_metrics->pcie_link_speed =
+ gpu_metrics->v1_0.pcie_link_speed =
aldebaran_get_current_pcie_link_speed(smu);
+ gpu_metrics->gfx_activity_acc = metrics.GfxBusyAcc;
+ gpu_metrics->mem_activity_acc = metrics.DramBusyAcc;
+
+ for (i = 0; i < NUM_HBM_INSTANCES; i++)
+ gpu_metrics->temperature_hbm[i] = metrics.TemperatureAllHBM[i];
+
*table = (void *)gpu_metrics;
- return sizeof(struct gpu_metrics_v1_0);
+ return sizeof(*gpu_metrics);
}
static bool aldebaran_is_mode1_reset_supported(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index ce160f233323..cde299c110b4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -1820,3 +1820,15 @@ void smu_v13_0_init_gpu_metrics_v1_0(struct gpu_metrics_v1_0 *gpu_metrics)
gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
}
+
+void smu_v13_0_init_gpu_metrics_v1_1(struct gpu_metrics_v1_1 *gpu_metrics)
+{
+ memset(gpu_metrics, 0xFF, sizeof(struct gpu_metrics_v1_1));
+
+ gpu_metrics->v1_0.common_header.structure_size =
+ sizeof(struct gpu_metrics_v1_1);
+ gpu_metrics->v1_0.common_header.format_revision = 1;
+ gpu_metrics->v1_0.common_header.content_revision = 1;
+
+ gpu_metrics->v1_0.system_clock_counter = ktime_get_boottime_ns();
+}
--
2.29.2
More information about the amd-gfx
mailing list