[PATCH v3 3/3] drm/amd/pm: Use gpu_metrics_v1_4 for SMUv13.0.6

Lazar, Lijo lijo.lazar at amd.com
Tue Oct 10 04:51:40 UTC 2023



On 10/9/2023 8:23 PM, Asad Kamal wrote:
> Use gpu_metrics_v1_4 for SMUv13.0.6 to fill
> gpu metric info
> 
> v3: Removed filling gpu metric instantaneous
> pcie bw
> 
> Signed-off-by: Asad Kamal <asad.kamal at amd.com>

A special note inline.

Series is-
	Reviewed-by: Lijo Lazar <lijo.lazar at amd.com>

> ---
>   .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 65 ++++++++++++-------
>   1 file changed, 41 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> index ce971a93d28b..7ab73112e4f3 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> @@ -279,7 +279,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
>   		return -ENOMEM;
>   	smu_table->metrics_time = 0;
>   
> -	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_3);
> +	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_4);
>   	smu_table->gpu_metrics_table =
>   		kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
>   	if (!smu_table->gpu_metrics_table) {
> @@ -1969,22 +1969,19 @@ static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu)
>   static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table)
>   {
>   	struct smu_table_context *smu_table = &smu->smu_table;
> -	struct gpu_metrics_v1_3 *gpu_metrics =
> -		(struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
> +	struct gpu_metrics_v1_4 *gpu_metrics =
> +		(struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table;
>   	struct amdgpu_device *adev = smu->adev;
> -	int ret = 0, inst0, xcc0;
> +	int ret = 0, xcc_id, inst, i;
>   	MetricsTable_t *metrics;
>   	u16 link_width_level;
>   
> -	inst0 = adev->sdma.instance[0].aid_id;
> -	xcc0 = GET_INST(GC, 0);
> -
>   	metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);

Please take care to include "(drm/amd/pm: Fix a memory leak on an error 
path)" while pushing these changes.

Thanks,
Lijo

>   	ret = smu_v13_0_6_get_metrics_table(smu, metrics, true);
>   	if (ret)
>   		return ret;
>   
> -	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
> +	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4);
>   
>   	gpu_metrics->temperature_hotspot =
>   		SMUQ10_ROUND(metrics->MaxSocketTemperature);
> @@ -2000,30 +1997,38 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
>   	gpu_metrics->average_umc_activity =
>   		SMUQ10_ROUND(metrics->DramBandwidthUtilization);
>   
> -	gpu_metrics->average_socket_power =
> +	gpu_metrics->curr_socket_power =
>   		SMUQ10_ROUND(metrics->SocketPower);
>   	/* Energy counter reported in 15.259uJ (2^-16) units */
>   	gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc;
>   
> -	gpu_metrics->current_gfxclk =
> -		SMUQ10_ROUND(metrics->GfxclkFrequency[xcc0]);
> -	gpu_metrics->current_socclk =
> -		SMUQ10_ROUND(metrics->SocclkFrequency[inst0]);
> -	gpu_metrics->current_uclk = SMUQ10_ROUND(metrics->UclkFrequency);
> -	gpu_metrics->current_vclk0 =
> -		SMUQ10_ROUND(metrics->VclkFrequency[inst0]);
> -	gpu_metrics->current_dclk0 =
> -		SMUQ10_ROUND(metrics->DclkFrequency[inst0]);
> +	for (i = 0; i < MAX_GFX_CLKS; i++) {
> +		xcc_id = GET_INST(GC, i);
> +		if (xcc_id >= 0)
> +			gpu_metrics->current_gfxclk[i] =
> +				SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]);
> +
> +		if (i < MAX_CLKS) {
> +			gpu_metrics->current_socclk[i] =
> +				SMUQ10_ROUND(metrics->SocclkFrequency[i]);
> +			inst = GET_INST(VCN, i);
> +			if (inst >= 0) {
> +				gpu_metrics->current_vclk0[i] =
> +					SMUQ10_ROUND(metrics->VclkFrequency[inst]);
> +				gpu_metrics->current_dclk0[i] =
> +					SMUQ10_ROUND(metrics->DclkFrequency[inst]);
> +			}
> +		}
> +	}
>   
> -	gpu_metrics->average_gfxclk_frequency = gpu_metrics->current_gfxclk;
> -	gpu_metrics->average_socclk_frequency = gpu_metrics->current_socclk;
> -	gpu_metrics->average_uclk_frequency = gpu_metrics->current_uclk;
> -	gpu_metrics->average_vclk0_frequency = gpu_metrics->current_vclk0;
> -	gpu_metrics->average_dclk0_frequency = gpu_metrics->current_dclk0;
> +	gpu_metrics->current_uclk = SMUQ10_ROUND(metrics->UclkFrequency);
>   
>   	/* Throttle status is not reported through metrics now */
>   	gpu_metrics->throttle_status = 0;
>   
> +	/* Clock Lock Status. Each bit corresponds to each GFXCLK instance */
> +	gpu_metrics->gfxclk_lock_status = metrics->GfxLockXCDMak >> GET_INST(GC, 0);
> +
>   	if (!(adev->flags & AMD_IS_APU)) {
>   		link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu);
>   		if (link_width_level > MAX_LINK_WIDTH)
> @@ -2033,6 +2038,8 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
>   			DECODE_LANE_WIDTH(link_width_level);
>   		gpu_metrics->pcie_link_speed =
>   			smu_v13_0_6_get_current_pcie_link_speed(smu);
> +		gpu_metrics->pcie_bandwidth_acc =
> +				SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]);
>   	}
>   
>   	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
> @@ -2042,12 +2049,22 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
>   	gpu_metrics->mem_activity_acc =
>   		SMUQ10_ROUND(metrics->DramBandwidthUtilizationAcc);
>   
> +	for (i = 0; i < NUM_XGMI_LINKS; i++) {
> +		gpu_metrics->xgmi_read_data_acc[i] =
> +			SMUQ10_ROUND(metrics->XgmiReadDataSizeAcc[i]);
> +		gpu_metrics->xgmi_write_data_acc[i] =
> +			SMUQ10_ROUND(metrics->XgmiWriteDataSizeAcc[i]);
> +	}
> +
> +	gpu_metrics->xgmi_link_width = SMUQ10_ROUND(metrics->XgmiWidth);
> +	gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(metrics->XgmiBitrate);
> +
>   	gpu_metrics->firmware_timestamp = metrics->Timestamp;
>   
>   	*table = (void *)gpu_metrics;
>   	kfree(metrics);
>   
> -	return sizeof(struct gpu_metrics_v1_3);
> +	return sizeof(*gpu_metrics);
>   }
>   
>   static int smu_v13_0_6_mode2_reset(struct smu_context *smu)


More information about the amd-gfx mailing list