[PATCH] drm/amd/swsmu: update smu v14_0_0 driver if version and metrics table

Deucher, Alexander Alexander.Deucher at amd.com
Mon Dec 4 15:55:36 UTC 2023


[Public]

> -----Original Message-----
> From: Ma, Li <Li.Ma at amd.com>
> Sent: Monday, December 4, 2023 3:52 AM
> To: Deucher, Alexander <Alexander.Deucher at amd.com>; amd-
> gfx at lists.freedesktop.org
> Cc: Koenig, Christian <Christian.Koenig at amd.com>; Zhang, Yifan
> <Yifan1.Zhang at amd.com>; Yu, Lang <Lang.Yu at amd.com>; Wang,
> Yang(Kevin) <KevinYang.Wang at amd.com>
> Subject: RE: [PATCH] drm/amd/swsmu: update smu v14_0_0 driver if version
> and metrics table
>
> [Public]
>
> Hi Alex,
>
> Sorry for the late reply. Only smu14 used this gpu_metrics_v3_0 struct. And
> the patch has upstream. As far as l know, umr used gpu_metrics_v3_0 and I
> will submit a patch to umr.
> Does this struct need to be back compatible currently? If yes, I will revert this
> patch and add a new gpu_metrics_v3_1.

Ok.  If we don't yet have a released kernel with v3_0 support we should be fine.  I'll just include the updates in 6.7.

Alex

>
> Best Regards,
> Li
>
> -----Original Message-----
> From: Deucher, Alexander <Alexander.Deucher at amd.com>
> Sent: Tuesday, November 28, 2023 4:47 AM
> To: Ma, Li <Li.Ma at amd.com>; amd-gfx at lists.freedesktop.org
> Cc: Koenig, Christian <Christian.Koenig at amd.com>; Zhang, Yifan
> <Yifan1.Zhang at amd.com>; Yu, Lang <Lang.Yu at amd.com>
> Subject: RE: [PATCH] drm/amd/swsmu: update smu v14_0_0 driver if version
> and metrics table
>
> [Public]
>
> > -----Original Message-----
> > From: Ma, Li <Li.Ma at amd.com>
> > Sent: Thursday, November 23, 2023 5:07 AM
> > To: amd-gfx at lists.freedesktop.org
> > Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Koenig, Christian
> > <Christian.Koenig at amd.com>; Zhang, Yifan <Yifan1.Zhang at amd.com>; Yu,
> > Lang <Lang.Yu at amd.com>; Ma, Li <Li.Ma at amd.com>
> > Subject: [PATCH] drm/amd/swsmu: update smu v14_0_0 driver if version
> > and metrics table
> >
> > Increment the driver if version and add new mems to the mertics table.
> >
> > Signed-off-by: Li Ma <li.ma at amd.com>
> > ---
> >  .../gpu/drm/amd/include/kgd_pp_interface.h    | 17 ++++
> >  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 10 +++
> >  .../inc/pmfw_if/smu14_driver_if_v14_0_0.h     | 77 +++++++++++--------
> >  .../drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c  | 46 ++++++++++-
> >  4 files changed, 115 insertions(+), 35 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> > b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> > index 8ebba87f4289..eaea1c65e526 100644
> > --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> > +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> > @@ -1086,6 +1086,10 @@ struct gpu_metrics_v3_0 {
> >       uint16_t                        average_dram_reads;
> >       /* time filtered DRAM write bandwidth [MB/sec] */
> >       uint16_t                        average_dram_writes;
> > +     /* time filtered IPU read bandwidth [MB/sec] */
> > +     uint16_t                        average_ipu_reads;
> > +     /* time filtered IPU write bandwidth [MB/sec] */
> > +     uint16_t                        average_ipu_writes;
> >
> >       /* Driver attached timestamp (in ns) */
> >       uint64_t                        system_clock_counter;
> > @@ -1105,6 +1109,8 @@ struct gpu_metrics_v3_0 {
> >       uint32_t                        average_all_core_power;
> >       /* calculated core power [mW] */
> >       uint16_t                        average_core_power[16];
> > +     /* time filtered total system power [mW] */
> > +     uint16_t                        average_sys_power;
> >       /* maximum IRM defined STAPM power limit [mW] */
> >       uint16_t                        stapm_power_limit;
> >       /* time filtered STAPM power limit [mW] */ @@ -1117,6 +1123,8 @@
> > struct gpu_metrics_v3_0 {
> >       uint16_t                        average_ipuclk_frequency;
> >       uint16_t                        average_fclk_frequency;
> >       uint16_t                        average_vclk_frequency;
> > +     uint16_t                        average_uclk_frequency;
> > +     uint16_t                        average_mpipu_frequency;
> >
> >       /* Current clocks */
> >       /* target core frequency [MHz] */ @@ -1126,6 +1134,15 @@ struct
> > gpu_metrics_v3_0 {
> >       /* GFXCLK frequency limit enforced on GFX [MHz] */
> >       uint16_t                        current_gfx_maxfreq;
> >
> > +     /* Throttle Residency (ASIC dependent) */
> > +     uint32_t throttle_residency_prochot;
> > +     uint32_t throttle_residency_spl;
> > +     uint32_t throttle_residency_fppt;
> > +     uint32_t throttle_residency_sppt;
> > +     uint32_t throttle_residency_thm_core;
> > +     uint32_t throttle_residency_thm_gfx;
> > +     uint32_t throttle_residency_thm_soc;
> > +
> >       /* Metrics table alpha filter time constant [us] */
> >       uint32_t                        time_filter_alphavalue;
> >  };
>
> Is anything else besides smu14 using v3 of this struct?  If so, we can't change
> the layout otherwise it will break existing tools.  If so, bump the version minor
> and append the new items to the end.
>
> Alex
>
>
> > diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > index c125253df20b..c2265e027ca8 100644
> > --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > @@ -1418,6 +1418,16 @@ typedef enum {
> >       METRICS_PCIE_WIDTH,
> >       METRICS_CURR_FANPWM,
> >       METRICS_CURR_SOCKETPOWER,
> > +     METRICS_AVERAGE_VPECLK,
> > +     METRICS_AVERAGE_IPUCLK,
> > +     METRICS_AVERAGE_MPIPUCLK,
> > +     METRICS_THROTTLER_RESIDENCY_PROCHOT,
> > +     METRICS_THROTTLER_RESIDENCY_SPL,
> > +     METRICS_THROTTLER_RESIDENCY_FPPT,
> > +     METRICS_THROTTLER_RESIDENCY_SPPT,
> > +     METRICS_THROTTLER_RESIDENCY_THM_CORE,
> > +     METRICS_THROTTLER_RESIDENCY_THM_GFX,
> > +     METRICS_THROTTLER_RESIDENCY_THM_SOC,
> >  } MetricsMember_t;
> >
> >  enum smu_cmn2asic_mapping_type {
> > diff --git
> >
> a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0
> > .h
> >
> b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0
> > .h
> > index 22f88842a7fd..8f42771e1f0a 100644
> > ---
> >
> a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0
> > .h
> > +++
> >
> b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0
> > .h
> > @@ -27,7 +27,7 @@
> >  // *** IMPORTANT ***
> >  // SMU TEAM: Always increment the interface version if  // any
> > structure is changed in this file -#define PMFW_DRIVER_IF_VERSION 6
> > +#define PMFW_DRIVER_IF_VERSION 7
> >
> >  typedef struct {
> >    int32_t value;
> > @@ -150,37 +150,50 @@ typedef struct {  } DpmClocks_t;
> >
> >  typedef struct {
> > -  uint16_t CoreFrequency[16];        //Target core frequency [MHz]
> > -  uint16_t CorePower[16];            //CAC calculated core power [mW]
> > -  uint16_t CoreTemperature[16];      //TSEN measured core temperature
> > [centi-C]
> > -  uint16_t GfxTemperature;           //TSEN measured GFX temperature [centi-
> C]
> > -  uint16_t SocTemperature;           //TSEN measured SOC temperature [centi-
> C]
> > -  uint16_t StapmOpnLimit;            //Maximum IRM defined STAPM power
> limit
> > [mW]
> > -  uint16_t StapmCurrentLimit;        //Time filtered STAPM power limit [mW]
> > -  uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced
> > on classic cores [MHz]
> > -  uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit
> > enforced on GFX [MHz]
> > -  uint16_t SkinTemp;                 //Maximum skin temperature reported by
> APU
> > and HS2 chassis sensors [centi-C]
> > -  uint16_t GfxclkFrequency;          //Time filtered target GFXCLK frequency
> > [MHz]
> > -  uint16_t FclkFrequency;            //Time filtered target FCLK frequency [MHz]
> > -  uint16_t GfxActivity;              //Time filtered GFX busy % [0-100]
> > -  uint16_t SocclkFrequency;          //Time filtered target SOCCLK frequency
> > [MHz]
> > -  uint16_t VclkFrequency;            //Time filtered target VCLK frequency [MHz]
> > -  uint16_t VcnActivity;              //Time filtered VCN busy % [0-100]
> > -  uint16_t VpeclkFrequency;          //Time filtered target VPECLK frequency
> > [MHz]
> > -  uint16_t IpuclkFrequency;          //Time filtered target IPUCLK frequency
> > [MHz]
> > -  uint16_t IpuBusy[8];               //Time filtered IPU per-column busy % [0-
> 100]
> > -  uint16_t DRAMReads;                //Time filtered DRAM read bandwidth
> > [MB/sec]
> > -  uint16_t DRAMWrites;               //Time filtered DRAM write bandwidth
> > [MB/sec]
> > -  uint16_t CoreC0Residency[16];      //Time filtered per-core C0 residency %
> > [0-100]
> > -  uint16_t IpuPower;                 //Time filtered IPU power [mW]
> > -  uint32_t ApuPower;                 //Time filtered APU power [mW]
> > -  uint32_t GfxPower;                 //Time filtered GFX power [mW]
> > -  uint32_t dGpuPower;                //Time filtered dGPU power [mW]
> > -  uint32_t SocketPower;              //Time filtered power used for PPT/STAPM
> > [APU+dGPU] [mW]
> > -  uint32_t AllCorePower;             //Time filtered sum of core power across all
> > cores in the socket [mW]
> > -  uint32_t FilterAlphaValue;         //Metrics table alpha filter time constant
> [us]
> > -  uint32_t MetricsCounter;           //Counter that is incremented on every
> > metrics table update [PM_TIMER cycles]
> > -  uint32_t spare[16];
> > +  uint16_t CoreFrequency[16];          //Target core frequency [MHz]
> > +  uint16_t CorePower[16];              //CAC calculated core power [mW]
> > +  uint16_t CoreTemperature[16];        //TSEN measured core temperature
> > [centi-C]
> > +  uint16_t GfxTemperature;             //TSEN measured GFX temperature
> [centi-
> > C]
> > +  uint16_t SocTemperature;             //TSEN measured SOC temperature
> [centi-
> > C]
> > +  uint16_t StapmOpnLimit;              //Maximum IRM defined STAPM power
> > limit [mW]
> > +  uint16_t StapmCurrentLimit;          //Time filtered STAPM power limit [mW]
> > +  uint16_t InfrastructureCpuMaxFreq;   //CCLK frequency limit enforced on
> > classic cores [MHz]
> > +  uint16_t InfrastructureGfxMaxFreq;   //GFXCLK frequency limit enforced
> on
> > GFX [MHz]
> > +  uint16_t SkinTemp;                   //Maximum skin temperature reported by
> APU
> > and HS2 chassis sensors [centi-C]
> > +  uint16_t GfxclkFrequency;            //Time filtered target GFXCLK frequency
> > [MHz]
> > +  uint16_t FclkFrequency;              //Time filtered target FCLK frequency
> [MHz]
> > +  uint16_t GfxActivity;                //Time filtered GFX busy % [0-100]
> > +  uint16_t SocclkFrequency;            //Time filtered target SOCCLK frequency
> > [MHz]
> > +  uint16_t VclkFrequency;              //Time filtered target VCLK frequency
> [MHz]
> > +  uint16_t VcnActivity;                //Time filtered VCN busy % [0-100]
> > +  uint16_t VpeclkFrequency;            //Time filtered target VPECLK frequency
> > [MHz]
> > +  uint16_t IpuclkFrequency;            //Time filtered target IPUCLK frequency
> > [MHz]
> > +  uint16_t IpuBusy[8];                 //Time filtered IPU per-column busy % [0-
> > 100]
> > +  uint16_t DRAMReads;                  //Time filtered DRAM read bandwidth
> > [MB/sec]
> > +  uint16_t DRAMWrites;                 //Time filtered DRAM write bandwidth
> > [MB/sec]
> > +  uint16_t CoreC0Residency[16];        //Time filtered per-core C0 residency %
> > [0-100]
> > +  uint16_t IpuPower;                   //Time filtered IPU power [mW]
> > +  uint32_t ApuPower;                   //Time filtered APU power [mW]
> > +  uint32_t GfxPower;                   //Time filtered GFX power [mW]
> > +  uint32_t dGpuPower;                  //Time filtered dGPU power [mW]
> > +  uint32_t SocketPower;                //Time filtered power used for PPT/STAPM
> > [APU+dGPU] [mW]
> > +  uint32_t AllCorePower;               //Time filtered sum of core power across all
> > cores in the socket [mW]
> > +  uint32_t FilterAlphaValue;           //Metrics table alpha filter time constant
> > [us]
> > +  uint32_t MetricsCounter;             //Counter that is incremented on every
> > metrics table update [PM_TIMER cycles]
> > +  uint16_t MemclkFrequency;            //Time filtered target MEMCLK
> frequency
> > [MHz]
> > +  uint16_t MpipuclkFrequency;          //Time filtered target MPIPUCLK
> > frequency [MHz]
> > +  uint16_t IpuReads;                   //Time filtered IPU read bandwidth [MB/sec]
> > +  uint16_t IpuWrites;                  //Time filtered IPU write bandwidth [MB/sec]
> > +  uint32_t ThrottleResidency_PROCHOT;  //Counter that is incremented
> > + on
> > every metrics table update when PROCHOT was engaged [PM_TIMER cycles]
> > +  uint32_t ThrottleResidency_SPL;      //Counter that is incremented on
> every
> > metrics table update when SPL was engaged [PM_TIMER cycles]
> > +  uint32_t ThrottleResidency_FPPT;     //Counter that is incremented on
> every
> > metrics table update when fast PPT was engaged [PM_TIMER cycles]
> > +  uint32_t ThrottleResidency_SPPT;     //Counter that is incremented on
> every
> > metrics table update when slow PPT was engaged [PM_TIMER cycles]
> > +  uint32_t ThrottleResidency_THM_CORE; //Counter that is incremented
> > + on every metrics table update when CORE thermal throttling was
> > + engaged [PM_TIMER cycles]  uint32_t ThrottleResidency_THM_GFX;
> > + //Counter that
> > is incremented on every metrics table update when GFX thermal
> > throttling was engaged [PM_TIMER cycles]  uint32_t
> > ThrottleResidency_THM_SOC; //Counter that is incremented on every
> > metrics table update when SOC thermal throttling was engaged [PM_TIMER
> > cycles]
> > +  uint16_t Psys;                       //Time filtered Psys power [mW]
> > +  uint16_t spare1;
> > +  uint32_t spare[6];
> >  } SmuMetrics_t;
> >
> >  //ISP tile definitions
> > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
> > b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
> > index 03b38c3a9968..94ccdbfd7090 100644
> > --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
> > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
> > @@ -246,11 +246,20 @@ static int
> > smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu,
> >               *value = 0;
> >               break;
> >       case METRICS_AVERAGE_UCLK:
> > -             *value = 0;
> > +             *value = metrics->MemclkFrequency;
> >               break;
> >       case METRICS_AVERAGE_FCLK:
> >               *value = metrics->FclkFrequency;
> >               break;
> > +     case METRICS_AVERAGE_VPECLK:
> > +             *value = metrics->VpeclkFrequency;
> > +             break;
> > +     case METRICS_AVERAGE_IPUCLK:
> > +             *value = metrics->IpuclkFrequency;
> > +             break;
> > +     case METRICS_AVERAGE_MPIPUCLK:
> > +             *value = metrics->MpipuclkFrequency;
> > +             break;
> >       case METRICS_AVERAGE_GFXACTIVITY:
> >               *value = metrics->GfxActivity / 100;
> >               break;
> > @@ -270,8 +279,26 @@ static int
> > smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu,
> >               *value = metrics->SocTemperature / 100 *
> >               SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
> >               break;
> > -     case METRICS_THROTTLER_STATUS:
> > -             *value = 0;
> > +     case METRICS_THROTTLER_RESIDENCY_PROCHOT:
> > +             *value = metrics->ThrottleResidency_PROCHOT;
> > +             break;
> > +     case METRICS_THROTTLER_RESIDENCY_SPL:
> > +             *value = metrics->ThrottleResidency_SPL;
> > +             break;
> > +     case METRICS_THROTTLER_RESIDENCY_FPPT:
> > +             *value = metrics->ThrottleResidency_FPPT;
> > +             break;
> > +     case METRICS_THROTTLER_RESIDENCY_SPPT:
> > +             *value = metrics->ThrottleResidency_SPPT;
> > +             break;
> > +     case METRICS_THROTTLER_RESIDENCY_THM_CORE:
> > +             *value = metrics->ThrottleResidency_THM_CORE;
> > +             break;
> > +     case METRICS_THROTTLER_RESIDENCY_THM_GFX:
> > +             *value = metrics->ThrottleResidency_THM_GFX;
> > +             break;
> > +     case METRICS_THROTTLER_RESIDENCY_THM_SOC:
> > +             *value = metrics->ThrottleResidency_THM_SOC;
> >               break;
> >       case METRICS_VOLTAGE_VDDGFX:
> >               *value = 0;
> > @@ -498,6 +525,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct
> > smu_context *smu,
> >               sizeof(uint16_t) * 16);
> >       gpu_metrics->average_dram_reads = metrics.DRAMReads;
> >       gpu_metrics->average_dram_writes = metrics.DRAMWrites;
> > +     gpu_metrics->average_ipu_reads = metrics.IpuReads;
> > +     gpu_metrics->average_ipu_writes = metrics.IpuWrites;
> >
> >       gpu_metrics->average_socket_power = metrics.SocketPower;
> >       gpu_metrics->average_ipu_power = metrics.IpuPower; @@ -505,6
> > +534,7 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct
> > +smu_context
> > *smu,
> >       gpu_metrics->average_gfx_power = metrics.GfxPower;
> >       gpu_metrics->average_dgpu_power = metrics.dGpuPower;
> >       gpu_metrics->average_all_core_power = metrics.AllCorePower;
> > +     gpu_metrics->average_sys_power = metrics.Psys;
> >       memcpy(&gpu_metrics->average_core_power[0],
> >               &metrics.CorePower[0],
> >               sizeof(uint16_t) * 16);
> > @@ -515,6 +545,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct
> > smu_context *smu,
> >       gpu_metrics->average_fclk_frequency = metrics.FclkFrequency;
> >       gpu_metrics->average_vclk_frequency = metrics.VclkFrequency;
> >       gpu_metrics->average_ipuclk_frequency = metrics.IpuclkFrequency;
> > +     gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency;
> > +     gpu_metrics->average_mpipu_frequency =
> > metrics.MpipuclkFrequency;
> >
> >       memcpy(&gpu_metrics->current_coreclk[0],
> >               &metrics.CoreFrequency[0], @@ -522,6 +554,14 @@ static
> > ssize_t smu_v14_0_0_get_gpu_metrics(struct
> > smu_context *smu,
> >       gpu_metrics->current_core_maxfreq =
> > metrics.InfrastructureCpuMaxFreq;
> >       gpu_metrics->current_gfx_maxfreq =
> > metrics.InfrastructureGfxMaxFreq;
> >
> > +     gpu_metrics->throttle_residency_prochot =
> > metrics.ThrottleResidency_PROCHOT;
> > +     gpu_metrics->throttle_residency_spl = metrics.ThrottleResidency_SPL;
> > +     gpu_metrics->throttle_residency_fppt =
> > metrics.ThrottleResidency_FPPT;
> > +     gpu_metrics->throttle_residency_sppt =
> > metrics.ThrottleResidency_SPPT;
> > +     gpu_metrics->throttle_residency_thm_core =
> > metrics.ThrottleResidency_THM_CORE;
> > +     gpu_metrics->throttle_residency_thm_gfx =
> > metrics.ThrottleResidency_THM_GFX;
> > +     gpu_metrics->throttle_residency_thm_soc =
> > +metrics.ThrottleResidency_THM_SOC;
> > +
> >       gpu_metrics->time_filter_alphavalue = metrics.FilterAlphaValue;
> >       gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
> >
> > --
> > 2.25.1
>
>



More information about the amd-gfx mailing list