[PATCH v2] drm/amdgpu: Disable dpm_enabled flag while VF is in reset

Lazar, Lijo lijo.lazar at amd.com
Mon Aug 12 13:45:32 UTC 2024



On 8/12/2024 6:39 PM, Victor Skvortsov wrote:
> VFs do not perform HW fini/suspend in FLR, so the dpm_enabled
> is incorrectly kept enabled. Add interface to disable it in
> virt_pre_reset call.
> 
> v2: Made implementation generic for all asics
> 
> Signed-off-by: Victor Skvortsov <victor.skvortsov at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c     | 6 ++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c       | 8 ++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h       | 1 +
>  drivers/gpu/drm/amd/include/kgd_pp_interface.h | 1 +
>  drivers/gpu/drm/amd/pm/amdgpu_dpm.c            | 5 ++++-
>  5 files changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 29a4adee9286..a6b8d0ba4758 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -5289,10 +5289,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>  	if (reset_context->reset_req_dev == adev)
>  		job = reset_context->job;
>  
> -	if (amdgpu_sriov_vf(adev)) {
> -		/* stop the data exchange thread */
> -		amdgpu_virt_fini_data_exchange(adev);
> -	}
> +	if (amdgpu_sriov_vf(adev))
> +		amdgpu_virt_pre_reset(adev);
>  
>  	amdgpu_fence_driver_isr_toggle(adev, true);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index b287a82e6177..b6397d3229e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -33,6 +33,7 @@
>  #include "amdgpu.h"
>  #include "amdgpu_ras.h"
>  #include "amdgpu_reset.h"
> +#include "amdgpu_dpm.h"
>  #include "vi.h"
>  #include "soc15.h"
>  #include "nv.h"
> @@ -849,6 +850,13 @@ enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad
>  	return mode;
>  }
>  
> +void amdgpu_virt_pre_reset(struct amdgpu_device *adev)
> +{
> +	/* stop the data exchange thread */
> +	amdgpu_virt_fini_data_exchange(adev);
> +	amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR);
> +}
> +
>  void amdgpu_virt_post_reset(struct amdgpu_device *adev)
>  {
>  	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index b42a8854dca0..b650a2032c42 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -376,6 +376,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
>  		      u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id);
>  bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
>  			uint32_t ucode_id);
> +void amdgpu_virt_pre_reset(struct amdgpu_device *adev);
>  void amdgpu_virt_post_reset(struct amdgpu_device *adev);
>  bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
>  bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> index 4b20e2274313..19a48d98830a 100644
> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> @@ -218,6 +218,7 @@ enum pp_mp1_state {
>  	PP_MP1_STATE_SHUTDOWN,
>  	PP_MP1_STATE_UNLOAD,
>  	PP_MP1_STATE_RESET,
> +	PP_MP1_STATE_FLR,
>  };
>  
>  enum pp_df_cstate {
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> index 8b7d6ed7e2ed..af39206a2c5f 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> @@ -168,7 +168,10 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
>  	int ret = 0;
>  	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
>  
> -	if (pp_funcs && pp_funcs->set_mp1_state) {
> +	if (amdgpu_sriov_vf(adev) && mp1_state == PP_MP1_STATE_FLR) {
> +		/* VF lost access to SMU */
> +		adev->pm.dpm_enabled = false;

For non-VF devices, PP_MP1_STATE_FLR needs to be a don't care.
Preferrably, something like

if (mp1_state == PP_MP1_STATE_FLR) {
	if (amdgpu_sriov_vf(adev))
		adev->pm.dpm_enabled = false;
}else { ..
}

Thanks,
Lijo

> +	} else if (pp_funcs && pp_funcs->set_mp1_state) {
>  		mutex_lock(&adev->pm.mutex);
>  
>  		ret = pp_funcs->set_mp1_state(


More information about the amd-gfx mailing list