[PATCH v2 3/3] drm/amd/pm: Use gpu_metrics_v1_4 for SMUv13.0.6

Kamal, Asad Asad.Kamal at amd.com
Mon Oct 9 14:32:22 UTC 2023


[AMD Official Use Only - General]

-----Original Message-----
From: Lazar, Lijo <Lijo.Lazar at amd.com>
Sent: Monday, October 9, 2023 7:59 PM
To: Kamal, Asad <Asad.Kamal at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Ma, Le <Le.Ma at amd.com>; Zhang, Morris <Shiwu.Zhang at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>
Subject: Re: [PATCH v2 3/3] drm/amd/pm: Use gpu_metrics_v1_4 for SMUv13.0.6



On 10/9/2023 5:28 PM, Lazar, Lijo wrote:
>
>
> On 10/6/2023 8:11 PM, Asad Kamal wrote:
>> Use gpu_metrics_v1_4 for SMUv13.0.6 to fill gpu metric info
>>
>> Signed-off-by: Asad Kamal <asad.kamal at amd.com>
>
> Series is:
>      Reviewed-by: Lijo Lazar <lijo.lazar at amd.com>
>

On a second thought, since there is no FW release yet with FW metrics table v9 support, suggest to drop patch 1 and pcie_bandwidth_inst  value assignment. Will keep the field as place holder till there is a FW update.
[Kamal, Asad] Will send a v3 with the changes.

Thanks & Regards
Asad

Thanks,
Lijo

> Thanks,
> Lijo
>
>> ---
>>   .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 67
>> ++++++++++++-------
>>   1 file changed, 43 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
>> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
>> index ce971a93d28b..3a07f1c95e45 100644
>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
>> @@ -279,7 +279,7 @@ static int smu_v13_0_6_tables_init(struct
>> smu_context *smu)
>>           return -ENOMEM;
>>       smu_table->metrics_time = 0;
>> -    smu_table->gpu_metrics_table_size = sizeof(struct
>> gpu_metrics_v1_3);
>> +    smu_table->gpu_metrics_table_size = sizeof(struct
>> +gpu_metrics_v1_4);
>>       smu_table->gpu_metrics_table =
>>           kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
>>       if (!smu_table->gpu_metrics_table) { @@ -1969,22 +1969,19 @@
>> static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context
>> *smu)
>>   static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu,
>> void **table)
>>   {
>>       struct smu_table_context *smu_table = &smu->smu_table;
>> -    struct gpu_metrics_v1_3 *gpu_metrics =
>> -        (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
>> +    struct gpu_metrics_v1_4 *gpu_metrics =
>> +        (struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table;
>>       struct amdgpu_device *adev = smu->adev;
>> -    int ret = 0, inst0, xcc0;
>> +    int ret = 0, xcc_id, inst, i;
>>       MetricsTable_t *metrics;
>>       u16 link_width_level;
>> -    inst0 = adev->sdma.instance[0].aid_id;
>> -    xcc0 = GET_INST(GC, 0);
>> -
>>       metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
>>       ret = smu_v13_0_6_get_metrics_table(smu, metrics, true);
>>       if (ret)
>>           return ret;
>> -    smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
>> +    smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4);
>>       gpu_metrics->temperature_hotspot =
>>           SMUQ10_ROUND(metrics->MaxSocketTemperature);
>> @@ -2000,30 +1997,38 @@ static ssize_t
>> smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
>>       gpu_metrics->average_umc_activity =
>>           SMUQ10_ROUND(metrics->DramBandwidthUtilization);
>> -    gpu_metrics->average_socket_power =
>> +    gpu_metrics->curr_socket_power =
>>           SMUQ10_ROUND(metrics->SocketPower);
>>       /* Energy counter reported in 15.259uJ (2^-16) units */
>>       gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc;
>> -    gpu_metrics->current_gfxclk =
>> -        SMUQ10_ROUND(metrics->GfxclkFrequency[xcc0]);
>> -    gpu_metrics->current_socclk =
>> -        SMUQ10_ROUND(metrics->SocclkFrequency[inst0]);
>> -    gpu_metrics->current_uclk =
>> SMUQ10_ROUND(metrics->UclkFrequency);
>> -    gpu_metrics->current_vclk0 =
>> -        SMUQ10_ROUND(metrics->VclkFrequency[inst0]);
>> -    gpu_metrics->current_dclk0 =
>> -        SMUQ10_ROUND(metrics->DclkFrequency[inst0]);
>> +    for (i = 0; i < MAX_GFX_CLKS; i++) {
>> +        xcc_id = GET_INST(GC, i);
>> +        if (xcc_id >= 0)
>> +            gpu_metrics->current_gfxclk[i] =
>> +                SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]);
>> +
>> +        if (i < MAX_CLKS) {
>> +            gpu_metrics->current_socclk[i] =
>> +                SMUQ10_ROUND(metrics->SocclkFrequency[i]);
>> +            inst = GET_INST(VCN, i);
>> +            if (inst >= 0) {
>> +                gpu_metrics->current_vclk0[i] =
>> +                    SMUQ10_ROUND(metrics->VclkFrequency[inst]);
>> +                gpu_metrics->current_dclk0[i] =
>> +                    SMUQ10_ROUND(metrics->DclkFrequency[inst]);
>> +            }
>> +        }
>> +    }
>> -    gpu_metrics->average_gfxclk_frequency =
>> gpu_metrics->current_gfxclk;
>> -    gpu_metrics->average_socclk_frequency =
>> gpu_metrics->current_socclk;
>> -    gpu_metrics->average_uclk_frequency = gpu_metrics->current_uclk;
>> -    gpu_metrics->average_vclk0_frequency =
>> gpu_metrics->current_vclk0;
>> -    gpu_metrics->average_dclk0_frequency =
>> gpu_metrics->current_dclk0;
>> +    gpu_metrics->current_uclk =
>> +SMUQ10_ROUND(metrics->UclkFrequency);
>>       /* Throttle status is not reported through metrics now */
>>       gpu_metrics->throttle_status = 0;
>> +    /* Clock Lock Status. Each bit corresponds to each GFXCLK
>> instance */
>> +    gpu_metrics->gfxclk_lock_status = metrics->GfxLockXCDMak >>
>> GET_INST(GC, 0);
>> +
>>       if (!(adev->flags & AMD_IS_APU)) {
>>           link_width_level =
>> smu_v13_0_6_get_current_pcie_link_width_level(smu);
>>           if (link_width_level > MAX_LINK_WIDTH) @@ -2033,6 +2038,10
>> @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context
>> *smu, void **table
>>               DECODE_LANE_WIDTH(link_width_level);
>>           gpu_metrics->pcie_link_speed =
>>               smu_v13_0_6_get_current_pcie_link_speed(smu);
>> +        gpu_metrics->pcie_bandwidth_acc =
>> +                SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]);
>> +        gpu_metrics->pcie_bandwidth_inst =
>> +                SMUQ10_ROUND(metrics->PcieBandwidth[0]);
>>       }
>>       gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); @@
>> -2042,12 +2051,22 @@ static ssize_t
>> smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
>>       gpu_metrics->mem_activity_acc =
>>           SMUQ10_ROUND(metrics->DramBandwidthUtilizationAcc);
>> +    for (i = 0; i < NUM_XGMI_LINKS; i++) {
>> +        gpu_metrics->xgmi_read_data_acc[i] =
>> +            SMUQ10_ROUND(metrics->XgmiReadDataSizeAcc[i]);
>> +        gpu_metrics->xgmi_write_data_acc[i] =
>> +            SMUQ10_ROUND(metrics->XgmiWriteDataSizeAcc[i]);
>> +    }
>> +
>> +    gpu_metrics->xgmi_link_width = SMUQ10_ROUND(metrics->XgmiWidth);
>> +    gpu_metrics->xgmi_link_speed =
>> +SMUQ10_ROUND(metrics->XgmiBitrate);
>> +
>>       gpu_metrics->firmware_timestamp = metrics->Timestamp;
>>       *table = (void *)gpu_metrics;
>>       kfree(metrics);
>> -    return sizeof(struct gpu_metrics_v1_3);
>> +    return sizeof(*gpu_metrics);
>>   }
>>   static int smu_v13_0_6_mode2_reset(struct smu_context *smu)


More information about the amd-gfx mailing list