[PATCH] drm/amd/pm: Fill metrics data for SMUv13.0.6
Chen, Guchun
Guchun.Chen at amd.com
Fri Jun 2 05:56:03 UTC 2023
[AMD Official Use Only - General]
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Lijo
> Lazar
> Sent: Friday, June 2, 2023 12:00 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Zhang, Hawking
> <Hawking.Zhang at amd.com>
> Subject: [PATCH] drm/amd/pm: Fill metrics data for SMUv13.0.6
>
> Populate metrics data table for SMU v13.0.6. Add PCIe link speed/width
> information also.
>
> Signed-off-by: Lijo Lazar <lijo.lazar at amd.com>
> ---
> .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 108 +++++++++++---
> ----
> 1 file changed, 67 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> index 75255e0baf91..4ff5a66d446a 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> @@ -80,7 +80,10 @@
> /* possible frequency drift (1Mhz) */
> #define EPSILON 1
>
> -#define smnPCIE_ESM_CTRL 0x111003D0
> +#define smnPCIE_ESM_CTRL 0x193D0
> +#define smnPCIE_LC_LINK_WIDTH_CNTL 0x1ab40288 #define
> +PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK 0x00000070L
> #define
> +PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT 0x4
I see in smu_v13_0.c and smu_v11_0.c, the same macro definitions are present. So is it better to put it into a common place which is scalable for later asics as well?
Regards,
Guchun
> static const struct cmn2asic_msg_mapping
> smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = {
> MSG_MAP(TestMessage,
> PPSMC_MSG_TestMessage, 0),
> @@ -197,6 +200,7 @@ struct PPTable_t {
> };
>
> #define SMUQ10_TO_UINT(x) ((x) >> 10)
> +#define SMUQ16_TO_UINT(x) ((x) >> 16)
>
> struct smu_v13_0_6_dpm_map {
> enum smu_clk_type clk_type;
> @@ -1935,6 +1939,16 @@ static void
> smu_v13_0_6_log_thermal_throttling_event(struct smu_context *smu)
>
> smu_v13_0_6_throttler_map));
> }
>
> +static int
> +smu_v13_0_6_get_current_pcie_link_width_level(struct smu_context *smu)
> +{
> + struct amdgpu_device *adev = smu->adev;
> +
> + return (RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL) &
> + PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) >>
> + PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT;
> +}
> +
> static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context
> *smu) {
> struct amdgpu_device *adev = smu->adev; @@ -1953,8 +1967,12
> @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu,
> void **table
> struct smu_table_context *smu_table = &smu->smu_table;
> struct gpu_metrics_v1_3 *gpu_metrics =
> (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
> + struct amdgpu_device *adev = smu->adev;
> + int ret = 0, inst0, xcc0;
> MetricsTable_t *metrics;
> - int i, ret = 0;
> +
> + inst0 = adev->sdma.instance[0].aid_id;
> + xcc0 = GET_INST(GC, 0);
>
> metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
> ret = smu_v13_0_6_get_metrics_table(smu, metrics, true); @@ -
> 1963,51 +1981,59 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct
> smu_context *smu, void **table
>
> smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
>
> - /* TODO: Decide on how to fill in zero value fields */
> - gpu_metrics->temperature_edge = 0;
> - gpu_metrics->temperature_hotspot = 0;
> - gpu_metrics->temperature_mem = 0;
> - gpu_metrics->temperature_vrgfx = 0;
> - gpu_metrics->temperature_vrsoc = 0;
> - gpu_metrics->temperature_vrmem = 0;
> -
> - gpu_metrics->average_gfx_activity = 0;
> - gpu_metrics->average_umc_activity = 0;
> - gpu_metrics->average_mm_activity = 0;
> -
> - gpu_metrics->average_socket_power = 0;
> - gpu_metrics->energy_accumulator = 0;
> -
> - gpu_metrics->average_gfxclk_frequency = 0;
> - gpu_metrics->average_socclk_frequency = 0;
> - gpu_metrics->average_uclk_frequency = 0;
> - gpu_metrics->average_vclk0_frequency = 0;
> - gpu_metrics->average_dclk0_frequency = 0;
> -
> - gpu_metrics->current_gfxclk = 0;
> - gpu_metrics->current_socclk = 0;
> - gpu_metrics->current_uclk = 0;
> - gpu_metrics->current_vclk0 = 0;
> - gpu_metrics->current_dclk0 = 0;
> -
> + gpu_metrics->temperature_hotspot =
> + SMUQ10_TO_UINT(metrics->MaxSocketTemperature);
> + /* Individual HBM stack temperature is not reported */
> + gpu_metrics->temperature_mem =
> + SMUQ10_TO_UINT(metrics->MaxHbmTemperature);
> + /* Reports max temperature of all voltage rails */
> + gpu_metrics->temperature_vrsoc =
> + SMUQ10_TO_UINT(metrics->MaxVrTemperature);
> +
> + gpu_metrics->average_gfx_activity =
> + SMUQ10_TO_UINT(metrics->SocketGfxBusy);
> + gpu_metrics->average_umc_activity =
> + SMUQ10_TO_UINT(metrics->DramBandwidthUtilization);
> +
> + gpu_metrics->average_socket_power =
> + SMUQ10_TO_UINT(metrics->SocketPower);
> + gpu_metrics->energy_accumulator =
> + SMUQ16_TO_UINT(metrics->SocketEnergyAcc);
> +
> + gpu_metrics->current_gfxclk =
> + SMUQ10_TO_UINT(metrics->GfxclkFrequency[xcc0]);
> + gpu_metrics->current_socclk =
> + SMUQ10_TO_UINT(metrics->SocclkFrequency[inst0]);
> + gpu_metrics->current_uclk = SMUQ10_TO_UINT(metrics-
> >UclkFrequency);
> + gpu_metrics->current_vclk0 =
> + SMUQ10_TO_UINT(metrics->VclkFrequency[inst0]);
> + gpu_metrics->current_dclk0 =
> + SMUQ10_TO_UINT(metrics->DclkFrequency[inst0]);
> +
> + gpu_metrics->average_gfxclk_frequency = gpu_metrics-
> >current_gfxclk;
> + gpu_metrics->average_socclk_frequency = gpu_metrics-
> >current_socclk;
> + gpu_metrics->average_uclk_frequency = gpu_metrics->current_uclk;
> + gpu_metrics->average_vclk0_frequency = gpu_metrics-
> >current_vclk0;
> + gpu_metrics->average_dclk0_frequency = gpu_metrics-
> >current_dclk0;
> +
> + /* Throttle status is not reported through metrics now */
> gpu_metrics->throttle_status = 0;
> - gpu_metrics->indep_throttle_status =
> smu_cmn_get_indep_throttler_status(
> - gpu_metrics->throttle_status, smu_v13_0_6_throttler_map);
> -
> - gpu_metrics->current_fan_speed = 0;
>
> - gpu_metrics->pcie_link_width = 0;
> - gpu_metrics->pcie_link_speed =
> smu_v13_0_6_get_current_pcie_link_speed(smu);
> + if (!(adev->flags & AMD_IS_APU)) {
> + gpu_metrics->pcie_link_width =
> +
> smu_v13_0_6_get_current_pcie_link_width_level(smu);
> + gpu_metrics->pcie_link_speed =
> + smu_v13_0_6_get_current_pcie_link_speed(smu);
> + }
>
> gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
>
> - gpu_metrics->gfx_activity_acc = 0;
> - gpu_metrics->mem_activity_acc = 0;
> -
> - for (i = 0; i < NUM_HBM_INSTANCES; i++)
> - gpu_metrics->temperature_hbm[i] = 0;
> + gpu_metrics->gfx_activity_acc =
> + SMUQ10_TO_UINT(metrics->SocketGfxBusyAcc);
> + gpu_metrics->mem_activity_acc =
> + SMUQ10_TO_UINT(metrics->DramBandwidthUtilizationAcc);
>
> - gpu_metrics->firmware_timestamp = 0;
> + gpu_metrics->firmware_timestamp = metrics->Timestamp;
>
> *table = (void *)gpu_metrics;
> kfree(metrics);
> --
> 2.25.1
More information about the amd-gfx
mailing list