[PATCH] drm/amdgpu: disable c-states on xgmi perfmons

Quan, Evan Evan.Quan at amd.com
Thu Oct 17 01:33:59 UTC 2019


Hi Jonathan,

At default, vega20 still takes old powerplay routines. So, this will not work on Vega20.
As proposed before, the logics similar as below should be used: 
/////////////////////////////////////////////////////////////////////////////////////////////
if (is_support_sw_smu(adev)) {
        r = smu_set_df_cstate(&adev->smu, DF_CSTATE_DISALLOW or DF_CSTATE_ALLOW);
} else if (adev->powerplay.pp_funcs &&
                   adev->powerplay.pp_funcs->set_df_cstate) {
        r = adev->powerplay.pp_funcs->set_df_cstate(
                adev->powerplay.pp_handle,
                DF_CSTATE_DISALLOW or DF_CSTATE_ALLOW);
}
////////////////////////////////////////////////////////////////////////////////////////////

Regards,
Evan
> -----Original Message-----
> From: Kim, Jonathan <Jonathan.Kim at amd.com>
> Sent: 2019年10月17日 8:50
> To: amd-gfx at lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Quan, Evan
> <Evan.Quan at amd.com>
> Subject: RE: [PATCH] drm/amdgpu: disable c-states on xgmi perfmons
> 
> + Felix
> 
> -----Original Message-----
> From: Kim, Jonathan <Jonathan.Kim at amd.com>
> Sent: Wednesday, October 16, 2019 8:49 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Felix.Keuhling at amd.com; Quan, Evan <Evan.Quan at amd.com>; Kim,
> Jonathan <Jonathan.Kim at amd.com>; Kim, Jonathan
> <Jonathan.Kim at amd.com>
> Subject: [PATCH] drm/amdgpu: disable c-states on xgmi perfmons
> 
> read or writes to df registers when gpu df is in c-states will result in hang.  df
> c-states should be disabled prior to read or writes then re-enabled after read
> or writes.
> 
> Change-Id: I6d5a83e4fe13e29c73dfb03a94fe7c611e867fec
> Signed-off-by: Jonathan Kim <Jonathan.Kim at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 21 ++++++++++++++++++++-
>  1 file changed, 20 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> index 16fbd2bc8ad1..9a58416662e0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> @@ -102,6 +102,9 @@ static uint64_t df_v3_6_get_fica(struct
> amdgpu_device *adev,
>  	address = adev->nbio.funcs->get_pcie_index_offset(adev);
>  	data = adev->nbio.funcs->get_pcie_data_offset(adev);
> 
> +	if (smu_set_df_cstate(&adev->smu, 0))
> +		return 0xFFFFFFFFFFFFFFFF;
> +
>  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>  	WREG32(address,
> smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
>  	WREG32(data, ficaa_val);
> @@ -114,6 +117,8 @@ static uint64_t df_v3_6_get_fica(struct
> amdgpu_device *adev,
> 
>  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
> 
> +	smu_set_df_cstate(&adev->smu, 1);
> +
>  	return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val);  }
> 
> @@ -125,6 +130,9 @@ static void df_v3_6_set_fica(struct amdgpu_device
> *adev, uint32_t ficaa_val,
>  	address = adev->nbio.funcs->get_pcie_index_offset(adev);
>  	data = adev->nbio.funcs->get_pcie_data_offset(adev);
> 
> +	if (smu_set_df_cstate(&adev->smu, 0))
> +		return;
> +
>  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>  	WREG32(address,
> smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
>  	WREG32(data, ficaa_val);
> @@ -134,8 +142,9 @@ static void df_v3_6_set_fica(struct amdgpu_device
> *adev, uint32_t ficaa_val,
> 
>  	WREG32(address,
> smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
>  	WREG32(data, ficadh_val);
> -
>  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
> +
> +	smu_set_df_cstate(&adev->smu, 1);
>  }
> 
>  /*
> @@ -153,12 +162,17 @@ static void df_v3_6_perfmon_rreg(struct
> amdgpu_device *adev,
>  	address = adev->nbio.funcs->get_pcie_index_offset(adev);
>  	data = adev->nbio.funcs->get_pcie_data_offset(adev);
> 
> +	if (smu_set_df_cstate(&adev->smu, 0))
> +		return;
> +
>  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>  	WREG32(address, lo_addr);
>  	*lo_val = RREG32(data);
>  	WREG32(address, hi_addr);
>  	*hi_val = RREG32(data);
>  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
> +
> +	smu_set_df_cstate(&adev->smu, 1);
>  }
> 
>  /*
> @@ -175,12 +189,17 @@ static void df_v3_6_perfmon_wreg(struct
> amdgpu_device *adev, uint32_t lo_addr,
>  	address = adev->nbio.funcs->get_pcie_index_offset(adev);
>  	data = adev->nbio.funcs->get_pcie_data_offset(adev);
> 
> +	if (smu_set_df_cstate(&adev->smu, 0))
> +		return;
> +
>  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>  	WREG32(address, lo_addr);
>  	WREG32(data, lo_val);
>  	WREG32(address, hi_addr);
>  	WREG32(data, hi_val);
>  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
> +
> +	smu_set_df_cstate(&adev->smu, 1);
>  }
> 
>  /* get the number of df counters available */
> --
> 2.17.1



More information about the amd-gfx mailing list