[PATCH v2 1/2] drm/amd/pm: add support for checking SDMA reset capability
Huang, Tim
Tim.Huang at amd.com
Thu Feb 20 09:10:13 UTC 2025
[Public]
Hi Jesse,
> -----Original Message-----
> From: jesse.zhang at amd.com <jesse.zhang at amd.com>
> Sent: Thursday, February 20, 2025 4:31 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Zhu, Jiadong
> <Jiadong.Zhu at amd.com>; Huang, Tim <Tim.Huang at amd.com>; Zhang,
> Jesse(Jie) <Jesse.Zhang at amd.com>; Prosyak, Vitaly
> <Vitaly.Prosyak at amd.com>; Zhang, Jesse(Jie) <Jesse.Zhang at amd.com>
> Subject: [PATCH v2 1/2] drm/amd/pm: add support for checking SDMA reset
> capability
>
> From: "Jesse.zhang at amd.com" <Jesse.zhang at amd.com>
>
> This patch introduces a new function to check if the SMU supports resetting
> the SDMA engine.
> This capability check ensures that the driver does not attempt to reset the
> SDMA engine on hardware that does not support it.
>
> The following changes are included:
> - New function `amdgpu_dpm_reset_sdma_is_supported` to check SDMA
> reset
> support at the AMDGPU driver level.
> - New function `smu_reset_sdma_is_supported` to check SDMA reset support
> at the SMU level.
> - Implementation of `smu_v13_0_6_reset_sdma_is_supported` for the
> specific
> SMU version v13.0.6.
> - Updated `smu_v13_0_6_reset_sdma` to use the new capability check
> before
> attempting to reset the SDMA engine.
>
> Signed-off-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
> Signed-off-by: Jesse Zhang <jesse.zhang at amd.com>
> ---
> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 23
> +++++++++++++++++++
> drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 1 +
> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17
> ++++++++++++++
> drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 5
> ++++ .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 23
> ++++++++++++++++++-
> 5 files changed, 68 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> index faae9bf48aa4..26209d5ff787 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> @@ -722,6 +722,29 @@ int amdgpu_dpm_send_rma_reason(struct
> amdgpu_device *adev)
> return ret;
> }
>
> +/**
> + * amdgpu_dpm_reset_sdma_is_supported - Check if SDMA reset is
> +supported
> + * @adev: amdgpu_device pointer
> + *
> + * This function checks if the SMU supports resetting the SDMA engine.
> + * It returns -EOPNOTSUPP if the hardware does not support software SMU
> +or
> + * if the feature is not supported.
> + */
> +int amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev) {
> + struct smu_context *smu = adev->powerplay.pp_handle;
> + int ret;
> +
> + if (!is_support_sw_smu(adev))
> + return -EOPNOTSUPP;
We may need to carefully set the return type and return value for this function, as it is used in your patch 2/2 like this:
if ((adev->gfx.mec_fw_version >= 0xb0) &&amdgpu_dpm_reset_sdma_is_supported(adev))
> +
> + mutex_lock(&adev->pm.mutex);
> + ret = smu_reset_sdma_is_supported(smu);
> + mutex_unlock(&adev->pm.mutex);
> +
> + return ret;
> +}
> +
> int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t
> inst_mask) {
> struct smu_context *smu = adev->powerplay.pp_handle; diff --git
> a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> index 1f5ac7e0230d..353a10119dc5 100644
> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> @@ -603,5 +603,6 @@ int amdgpu_dpm_set_pm_policy(struct
> amdgpu_device *adev, int policy_type, ssize_t
> amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev,
> enum pp_pm_policy p_type, char *buf); int
> amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask);
> +int amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev);
>
> #endif
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index 0b32c6cf6924..f860590ef893 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -3907,6 +3907,23 @@ int smu_send_rma_reason(struct smu_context
> *smu)
> return ret;
> }
>
> +/**
> + * smu_reset_sdma_is_supported - Check if SDMA reset is supported by
> +SMU
> + * @smu: smu_context pointer
> + *
> + * This function checks if the SMU supports resetting the SDMA engine.
> + * It returns 0 if supported, -EOPNOTSUPP otherwise.
> + */
> +int smu_reset_sdma_is_supported(struct smu_context *smu) {
> + int ret = 0;
> +
> + if (smu->ppt_funcs && smu->ppt_funcs->reset_sdma_is_supported)
> + ret = smu->ppt_funcs->reset_sdma_is_supported(smu);
> +
> + return ret;
> +}
> +
> int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask) {
> int ret = 0;
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> index 3630593bce61..090a2b3b81a0 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> @@ -1376,6 +1376,10 @@ struct pptable_funcs {
> * @reset_sdma: message SMU to soft reset sdma instance.
> */
> int (*reset_sdma)(struct smu_context *smu, uint32_t inst_mask);
> + /**
> + * @reset_sdma_is_supported: Check if support resets the SDMA
> engine.
> + */
> + int (*reset_sdma_is_supported)(struct smu_context *smu);
>
> /**
> * @get_ecc_table: message SMU to get ECC INFO table.
> @@ -1637,6 +1641,7 @@ int smu_send_hbm_bad_pages_num(struct
> smu_context *smu, uint32_t size); int
> smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size);
> int smu_send_rma_reason(struct smu_context *smu); int
> smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask);
> +int smu_reset_sdma_is_supported(struct smu_context *smu);
> int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy
> p_type,
> int level);
> ssize_t smu_get_pm_policy_info(struct smu_context *smu, diff --git
> a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> index 9f2de69f53b2..a200ac7b15a8 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> @@ -2902,11 +2902,31 @@ static int
> smu_v13_0_6_send_rma_reason(struct smu_context *smu)
> return ret;
> }
>
> +/**
> + * smu_v13_0_6_reset_sdma_is_supported - Check if SDMA reset is
> +supported
> + * @smu: smu_context pointer
> + *
> + * This function checks if the SMU supports resetting the SDMA engine.
> + * It returns -EOPNOTSUPP if the capability is not supported.
> + */
> +static int smu_v13_0_6_reset_sdma_is_supported(struct smu_context
> *smu)
> +{
> + int ret = 0;
> +
> + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SDMA_RESET))) {
> + dev_info(smu->adev->dev,
> + "SDMA reset capability is not supported\n");
> + ret = EOPNOTSUPP;
Need to set the return value consistence with the return of amdgpu_dpm_reset_sdma_is_supported if not supported.
Tim
> + }
> +
> + return ret;
> +}
> +
> static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t
> inst_mask) {
> int ret = 0;
>
> - if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SDMA_RESET)))
> + if (smu_v13_0_6_reset_sdma_is_supported(smu))
> return -EOPNOTSUPP;
>
> ret = smu_cmn_send_smc_msg_with_param(smu,
> @@ -3590,6 +3610,7 @@ static const struct pptable_funcs
> smu_v13_0_6_ppt_funcs = {
> .send_hbm_bad_pages_num =
> smu_v13_0_6_smu_send_hbm_bad_page_num,
> .send_rma_reason = smu_v13_0_6_send_rma_reason,
> .reset_sdma = smu_v13_0_6_reset_sdma,
> + .reset_sdma_is_supported = smu_v13_0_6_reset_sdma_is_supported,
> };
>
> void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
> --
> 2.25.1
More information about the amd-gfx
mailing list