[PATCH Review 1/2] drm/amdgpu: Optimze checking ras supported

Zhou1, Tao Tao.Zhou1 at amd.com
Tue Jun 13 07:08:17 UTC 2023


[AMD Official Use Only - General]

[Tao] typo in title: Optimze -> Optimize

> -----Original Message-----
> From: Stanley.Yang <Stanley.Yang at amd.com>
> Sent: Tuesday, June 13, 2023 11:53 AM
> To: amd-gfx at lists.freedesktop.org; Zhang, Hawking <Hawking.Zhang at amd.com>;
> Zhou1, Tao <Tao.Zhou1 at amd.com>
> Cc: Yang, Stanley <Stanley.Yang at amd.com>
> Subject: [PATCH Review 1/2] drm/amdgpu: Optimze checking ras supported
>
> Using "is_app_apu" to identify device in the native APU mode or carveout mode.
>
> Signed-off-by: Stanley.Yang <Stanley.Yang at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c |  2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c |  8 +++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 34 ++++++++++++++-----------
>  3 files changed, 23 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index 78bacea951a9..352e958b190a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -1653,7 +1653,7 @@ int psp_ras_initialize(struct psp_context *psp)
>
>       if (amdgpu_ras_is_poison_mode_supported(adev))
>               ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
> -     if (!adev->gmc.xgmi.connected_to_cpu)
> +     if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu)
>               ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
>       ras_cmd->ras_in_message.init_flags.xcc_mask =
>               adev->gfx.xcc_mask;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 7a0924469e4f..56bb0db207b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1689,8 +1689,7 @@ static void
> amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
>               }
>       }
>
> -     if (!adev->gmc.xgmi.connected_to_cpu)
> -             amdgpu_umc_poison_handler(adev, false);
> +     amdgpu_umc_poison_handler(adev, false);
>
>       if (block_obj->hw_ops && block_obj->hw_ops-
> >handle_poison_consumption)
>               poison_stat = block_obj->hw_ops-
> >handle_poison_consumption(adev);
> @@ -2458,11 +2457,10 @@ static void amdgpu_ras_check_supported(struct
> amdgpu_device *adev)  {
>       adev->ras_hw_enabled = adev->ras_enabled = 0;
>
> -     if (!adev->is_atom_fw ||
> -         !amdgpu_ras_asic_supported(adev))
> +     if (!amdgpu_ras_asic_supported(adev))
>               return;
>
> -     if (!adev->gmc.xgmi.connected_to_cpu) {
> +     if (!adev->gmc.xgmi.connected_to_cpu && !adev-

[Tao] the tab should be replaced with space.

> >gmc.is_app_apu) {
>               if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
>                       dev_info(adev->dev, "MEM ECC is active.\n");
>                       adev->ras_hw_enabled |= (1 <<
> AMDGPU_RAS_BLOCK__UMC | diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> index 1edf8e6aeb16..db0d94ca4ffc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> @@ -169,27 +169,31 @@ int amdgpu_umc_poison_handler(struct
> amdgpu_device *adev, bool reset)  {
>       int ret = AMDGPU_RAS_SUCCESS;
>
> -     if (!amdgpu_sriov_vf(adev)) {
> -             if (!adev->gmc.xgmi.connected_to_cpu) {
> -                     struct ras_err_data err_data = {0, 0, 0, NULL};
> -                     struct ras_common_if head = {
> -                             .block = AMDGPU_RAS_BLOCK__UMC,
> -                     };
> -                     struct ras_manager *obj = amdgpu_ras_find_obj(adev,
> &head);
> -
> -                     ret = amdgpu_umc_do_page_retirement(adev,
> &err_data, NULL, reset);
> -
> -                     if (ret == AMDGPU_RAS_SUCCESS && obj) {
> -                             obj->err_data.ue_count += err_data.ue_count;
> -                             obj->err_data.ce_count += err_data.ce_count;
> -                     }
> -             } else if (reset) {
> +     if (adev->gmc.xgmi.connected_to_cpu ||
> +             adev->gmc.is_app_apu) {
> +             if (reset) {
>                       /* MCA poison handler is only responsible for GPU reset,
>                        * let MCA notifier do page retirement.
>                        */
>                       kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
>                       amdgpu_ras_reset_gpu(adev);
>               }
> +             return ret;
> +     }
> +
> +     if (!amdgpu_sriov_vf(adev)) {
> +             struct ras_err_data err_data = {0, 0, 0, NULL};
> +             struct ras_common_if head = {
> +                     .block = AMDGPU_RAS_BLOCK__UMC,
> +             };
> +             struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
> +
> +             ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL,
> reset);
> +
> +             if (ret == AMDGPU_RAS_SUCCESS && obj) {
> +                     obj->err_data.ue_count += err_data.ue_count;
> +                     obj->err_data.ce_count += err_data.ce_count;
> +             }
>       } else {
>               if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
>                       adev->virt.ops->ras_poison_handler(adev);
> --
> 2.17.1



More information about the amd-gfx mailing list