[PATCH] drm/amdgpu: check whether smu is idle in sriov case

Alex Deucher alexdeucher at gmail.com
Thu Jul 6 14:05:21 UTC 2023


On Thu, Jul 6, 2023 at 8:01 AM Danijel Slivka <danijel.slivka at amd.com> wrote:
>
> Why:
> If the reg mmMP1_SMN_C2PMSG_90 is being programed to 0x0 before
> guest initialization, then modprobe amdgpu will fail at smu hw_init.
> (the default mmMP1_SMN_C2PMSG_90 at a clean guest environment is 0x1).
>
> How to fix:
> this patch is to check whether smu is idle by sending a test
> message to smu. If smu is idle, it will respond.
>
> Signed-off-by: Danijel Slivka <danijel.slivka at amd.com>
> Signed-off-by: Nikola Prica <nikola.prica at amd.com>
> Signed-off-by: Jingwen Chen <Jingwen.Chen2 at amd.com>
> Signed-off-by: pengzhou <PengJu.Zhou at amd.com>
> ---
>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |  8 ++++
>  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 ++++
>  drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h  |  2 +
>  .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  1 +
>  .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c    |  5 +++
>  drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c        | 40 +++++++++++++++++++
>  drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h        |  2 +
>  7 files changed, 65 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index ce41a8309582..63ea4cd32ece 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -1443,6 +1443,14 @@ static int smu_start_smc_engine(struct smu_context *smu)
>                 }
>         }
>
> +       if (amdgpu_sriov_vf(adev) && smu->ppt_funcs->wait_smu_idle) {
> +               ret = smu->ppt_funcs->wait_smu_idle(smu);
> +               if (ret) {
> +                       dev_err(adev->dev, "SMU is not idle\n");
> +                       return ret;
> +               }
> +       }
> +
>         /*
>          * Send msg GetDriverIfVersion to check if the return value is equal
>          * with DRIVER_IF_VERSION of smc header.
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> index 6e2069dcb6b9..1bf87ad30d93 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> @@ -926,6 +926,13 @@ struct pptable_funcs {
>          */
>         int (*check_fw_status)(struct smu_context *smu);
>
> +       /**
> +        * @wait_smu_idle: wait for SMU idle status.
> +        *
> +        * Return: Zero if check passes, negative errno on failure.
> +        */
> +       int (*wait_smu_idle)(struct smu_context *smu);
> +
>         /**
>          * @set_mp1_state: put SMU into a correct state for comming
>          *                 resume from runpm or gpu reset.
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h
> index d466db6f0ad4..f3293ddd1a1b 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h
> @@ -165,6 +165,8 @@ int smu_v11_0_fini_power(struct smu_context *smu);
>
>  int smu_v11_0_check_fw_status(struct smu_context *smu);
>
> +int smu_v11_0_wait_smu_idle(struct smu_context *smu);
> +
>  int smu_v11_0_setup_pptable(struct smu_context *smu);
>
>  int smu_v11_0_get_vbios_bootup_values(struct smu_context *smu);
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> index c94d825a871b..f4e7596dcdcd 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> @@ -3503,6 +3503,7 @@ static const struct pptable_funcs navi10_ppt_funcs = {
>         .init_power = smu_v11_0_init_power,
>         .fini_power = smu_v11_0_fini_power,
>         .check_fw_status = smu_v11_0_check_fw_status,
> +       .wait_smu_idle = smu_v11_0_wait_smu_idle,

Drop the smu_v11_0 wrapper and just use the cmn function directly.

>         .setup_pptable = navi10_setup_pptable,
>         .get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values,
>         .check_fw_version = smu_v11_0_check_fw_version,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
> index aa4a5498a12f..059dc2243c06 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
> @@ -195,6 +195,11 @@ int smu_v11_0_check_fw_status(struct smu_context *smu)
>         return -EIO;
>  }
>
> +int smu_v11_0_wait_smu_idle(struct smu_context *smu)
> +{
> +       return smu_wait_smu_idle(smu);
> +}

Drop this.

> +
>  int smu_v11_0_check_fw_version(struct smu_context *smu)
>  {
>         struct amdgpu_device *adev = smu->adev;
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
> index 3ecb900e6ecd..5dc81d7b04da 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
> @@ -313,6 +313,46 @@ int smu_cmn_wait_for_response(struct smu_context *smu)
>         return res;
>  }
>
> +/**
> + * smu_wait_smu_idle -- wait for smu to become idle
> + * @smu: pointer to an SMU context
> + *
> + * Send SMU_MSG_TestMessage to check whether SMU is idle.
> + * If SMU is idle, it will respond.
> + * The returned parameter will be the param you pass + 1.
> + *
> + * Return 0 on success, -errno on error, indicating the execution
> + * status and result of the message being waited for. See
> + * __smu_cmn_reg2errno() for details of the -errno.
> + */
> +int smu_wait_smu_idle(struct smu_context *smu)

Rename this smu_cmn_wait_smu_idle() for consistency.

Alex

> +{
> +       u32 reg;
> +       u32 param = 0xff00011;
> +       uint32_t read_arg;
> +       int res, index;
> +
> +       index = smu_cmn_to_asic_specific_index(smu,
> +                                              CMN2ASIC_MAPPING_MSG,
> +                                              SMU_MSG_TestMessage);
> +
> +       __smu_cmn_send_msg(smu, index, param);
> +       reg = __smu_cmn_poll_stat(smu);
> +       res = __smu_cmn_reg2errno(smu, reg);
> +
> +       if (unlikely(smu->adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) &&
> +           res && (res != -ETIME)) {
> +               amdgpu_device_halt(smu->adev);
> +               WARN_ON(1);
> +       }
> +
> +       smu_cmn_read_arg(smu, &read_arg);
> +       if (read_arg == param + 1)
> +               return 0;
> +       return res;
> +}
> +
> +
>  /**
>   * smu_cmn_send_smc_msg_with_param -- send a message with parameter
>   * @smu: pointer to an SMU context
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
> index d7cd358a53bd..abe875513d77 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
> @@ -50,6 +50,8 @@ int smu_cmn_send_debug_smc_msg_with_param(struct smu_context *smu,
>
>  int smu_cmn_wait_for_response(struct smu_context *smu);
>
> +int smu_wait_smu_idle(struct smu_context *smu);
> +
>  int smu_cmn_to_asic_specific_index(struct smu_context *smu,
>                                    enum smu_cmn2asic_mapping_type type,
>                                    uint32_t index);
> --
> 2.25.1
>


More information about the amd-gfx mailing list